Skip to content

Commit

Permalink
Introduce new CGF diagnostics utility
Browse files Browse the repository at this point in the history
It currently provides two types of diagnostics:
 - Check whether accessor target matches kernel type (we had this
   before, but now it is checked during CGF submission and throws
   synchronously in the main thread).
 - Check whether all accessors (and side effects) are being copied into
   a kernel. Fewer accessors being copied than expected either means
   there are unused accessors (potential performance bug), or accessors
   are being captured by reference (dangling reference - very bad!).
  • Loading branch information
psalz committed May 24, 2023
1 parent 0a743c7 commit ff7ed02
Show file tree
Hide file tree
Showing 11 changed files with 356 additions and 29 deletions.
9 changes: 9 additions & 0 deletions include/accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "access_modes.h"
#include "buffer.h"
#include "buffer_storage.h"
#include "cgf_diagnostics.h"
#include "closure_hydrator.h"
#include "handler.h"
#include "sycl_wrappers.h"
Expand Down Expand Up @@ -341,6 +342,10 @@ class accessor<DataT, Dims, Mode, target::device> : public detail::accessor_base

#if !defined(__SYCL_DEVICE_ONLY__)
if(detail::is_embedded_hydration_id(m_device_ptr)) {
if(detail::cgf_diagnostics::is_available() && detail::cgf_diagnostics::get_instance().is_checking()) {
detail::cgf_diagnostics::get_instance().register_accessor(detail::extract_hydration_id(m_device_ptr), target::device);
}

if(detail::closure_hydrator::is_available() && detail::closure_hydrator::get_instance().is_hydrating()) {
const auto info = detail::closure_hydrator::get_instance().get_accessor_info<target::device>(detail::extract_hydration_id(m_device_ptr));
m_device_ptr = static_cast<DataT*>(info.ptr);
Expand Down Expand Up @@ -619,6 +624,10 @@ class accessor<DataT, Dims, Mode, target::host_task> : public detail::accessor_b
m_virtual_buffer_range = other.m_virtual_buffer_range;

if(detail::is_embedded_hydration_id(m_host_ptr)) {
if(detail::cgf_diagnostics::is_available() && detail::cgf_diagnostics::get_instance().is_checking()) {
detail::cgf_diagnostics::get_instance().register_accessor(detail::extract_hydration_id(m_host_ptr), target::host_task);
}

if(detail::closure_hydrator::is_available() && detail::closure_hydrator::get_instance().is_hydrating()) {
const auto info = detail::closure_hydrator::get_instance().get_accessor_info<target::host_task>(detail::extract_hydration_id(m_host_ptr));
m_host_ptr = static_cast<DataT*>(info.ptr);
Expand Down
113 changes: 113 additions & 0 deletions include/cgf_diagnostics.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#pragma once

#include <optional>

#include "task.h"

namespace celerity::detail {

class cgf_diagnostics {
public:
static void make_available() {
assert(m_instance == nullptr);
m_instance = std::unique_ptr<cgf_diagnostics>(new cgf_diagnostics());
}

static bool is_available() { return m_instance != nullptr; }

static void teardown() { m_instance.reset(); }

static cgf_diagnostics& get_instance() {
assert(m_instance != nullptr);
return *m_instance;
}

cgf_diagnostics(const cgf_diagnostics&) = delete;
cgf_diagnostics(cgf_diagnostics&&) = delete;
cgf_diagnostics operator=(const cgf_diagnostics&) = delete;
cgf_diagnostics operator=(cgf_diagnostics&&) = delete;
~cgf_diagnostics() = default;

template <target Tgt, typename Closure, std::enable_if_t<Tgt == target::device, int> = 0>
void check(const Closure& kernel, const buffer_access_map& buffer_accesses) {
static_assert(std::is_copy_constructible_v<std::decay_t<Closure>>);
check(target::device, kernel, &buffer_accesses, 0);
}

template <target Tgt, typename Closure, std::enable_if_t<Tgt == target::host_task, int> = 0>
void check(const Closure& kernel, const buffer_access_map& buffer_accesses, const size_t non_void_side_effects_count) {
static_assert(std::is_copy_constructible_v<std::decay_t<Closure>>);
check(target::host_task, kernel, &buffer_accesses, non_void_side_effects_count);
}

bool is_checking() const { return m_is_checking; }

void register_accessor(const hydration_id hid, const target tgt) {
assert(m_is_checking);
assert(hid - 1 < m_expected_buffer_accesses->get_num_accesses());
if(tgt != m_expected_target) {
throw std::runtime_error(fmt::format("Accessor {} for buffer {} has wrong target ('{}' instead of '{}').", hid - 1,
m_expected_buffer_accesses->get_nth_access(hid - 1).first, tgt == target::device ? "device" : "host_task",
m_expected_target == target::device ? "device" : "host_task"));
}
m_registered_buffer_accesses.at(hid - 1) = true;
}

void register_side_effect() {
if(!m_is_checking) return;
if(m_expected_target != target::host_task) { throw std::runtime_error("Side effects can only be used in host tasks."); }
m_registered_side_effect_count++;
}

private:
inline static thread_local std::unique_ptr<cgf_diagnostics> m_instance; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)

bool m_is_checking = false;
std::optional<target> m_expected_target = std::nullopt;
const buffer_access_map* m_expected_buffer_accesses = nullptr;
std::vector<bool> m_registered_buffer_accesses;
size_t m_expected_side_effects_count = 0;
size_t m_registered_side_effect_count = 0;

cgf_diagnostics() = default;

template <typename Closure>
void check(const target tgt, const Closure& kernel, const buffer_access_map* const buffer_accesses, const size_t expected_side_effects_count) {
m_expected_target = tgt;
m_expected_buffer_accesses = buffer_accesses;
m_registered_buffer_accesses.clear();
m_registered_buffer_accesses.resize(m_expected_buffer_accesses->get_num_accesses());
m_expected_side_effects_count = expected_side_effects_count;
m_registered_side_effect_count = 0;

m_is_checking = true;
try {
[[maybe_unused]] auto copy = kernel;
} catch(...) {
m_is_checking = false;
throw;
}
m_is_checking = false;
m_expected_target = std::nullopt;

for(size_t i = 0; i < m_expected_buffer_accesses->get_num_accesses(); ++i) {
if(!m_registered_buffer_accesses[i]) {
throw std::runtime_error(fmt::format("Accessor {} for buffer {} is not being copied into the kernel. This indicates a bug. Make sure "
"the accessor is captured by value and not by reference, or remove it entirely.",
i, m_expected_buffer_accesses->get_nth_access(i).first));
}
}

if(tgt == target::host_task) {
if(m_registered_side_effect_count < m_expected_side_effects_count) {
throw std::runtime_error(
fmt::format("The number of side effects copied into the kernel is fewer ({}) than expected ({}). This may be indicative "
"of a bug. Make sure all side effects are captured by value and not by reference, and remove unused ones.",
m_registered_side_effect_count, m_expected_side_effects_count));
}
// TODO: We could issue a warning here when the number of registered side effects is higher than expected (which may be legitimate, due to copies).
}
}
};

} // namespace celerity::detail
24 changes: 19 additions & 5 deletions include/handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <spdlog/fmt/fmt.h>

#include "buffer.h"
#include "cgf_diagnostics.h"
#include "closure_hydrator.h"
#include "device_queue.h"
#include "host_queue.h"
Expand Down Expand Up @@ -37,7 +38,7 @@ namespace detail {
handler make_command_group_handler(const task_id tid, const size_t num_collective_nodes);
std::unique_ptr<task> into_task(handler&& cgh);
hydration_id add_requirement(handler& cgh, const buffer_id bid, std::unique_ptr<range_mapper_base> rm);
void add_requirement(handler& cgh, const host_object_id hoid, const experimental::side_effect_order order);
void add_requirement(handler& cgh, const host_object_id hoid, const experimental::side_effect_order order, const bool is_void);
void add_reduction(handler& cgh, const reduction_info& rinfo);
void extend_lifetime(handler& cgh, std::shared_ptr<detail::lifetime_extending_state> state);

Expand Down Expand Up @@ -392,13 +393,14 @@ class handler {
friend handler detail::make_command_group_handler(const detail::task_id tid, const size_t num_collective_nodes);
friend std::unique_ptr<detail::task> detail::into_task(handler&& cgh);
friend detail::hydration_id detail::add_requirement(handler& cgh, const detail::buffer_id bid, std::unique_ptr<detail::range_mapper_base> rm);
friend void detail::add_requirement(handler& cgh, const detail::host_object_id hoid, const experimental::side_effect_order order);
friend void detail::add_requirement(handler& cgh, const detail::host_object_id hoid, const experimental::side_effect_order order, const bool is_void);
friend void detail::add_reduction(handler& cgh, const detail::reduction_info& rinfo);
friend void detail::extend_lifetime(handler& cgh, std::shared_ptr<detail::lifetime_extending_state> state);

detail::task_id m_tid;
detail::buffer_access_map m_access_map;
detail::side_effect_map m_side_effects;
size_t m_non_void_side_effects_count = 0;
detail::reduction_set m_reductions;
std::unique_ptr<detail::task> m_task = nullptr;
size_t m_num_collective_nodes;
Expand Down Expand Up @@ -442,9 +444,10 @@ class handler {
return m_next_accessor_hydration_id++;
}

void add_requirement(const detail::host_object_id hoid, const experimental::side_effect_order order) {
void add_requirement(const detail::host_object_id hoid, const experimental::side_effect_order order, const bool is_void) {
assert(m_task == nullptr);
m_side_effects.add_side_effect(hoid, order);
if(!is_void) { m_non_void_side_effects_count++; }
}

void add_reduction(const detail::reduction_info& rinfo) {
Expand Down Expand Up @@ -473,6 +476,7 @@ class handler {
// each node that reads from the reduction output buffer, initializing it to the identity value locally.
throw std::runtime_error{"The execution range of device tasks must have at least one item"};
}
// Note that cgf_diagnostics has a similar check, but we don't catch void side effects there.
if(!m_side_effects.empty()) { throw std::runtime_error{"Side effects cannot be used in device kernels"}; }
m_task =
detail::task::make_device_compute(m_tid, geometry, std::move(launcher), std::move(m_access_map), std::move(m_reductions), std::move(debug_name));
Expand All @@ -494,6 +498,10 @@ class handler {
std::index_sequence<ReductionIndices...> /* indices */, Reductions... reductions) {
static_assert(std::is_copy_constructible_v<std::decay_t<Kernel>>, "Kernel functor must be copyable"); // Required for hydration

// Check whether all accessors are being captured by value etc.
// Although the diagnostics should always be available, we currently disable them for some test cases.
if(detail::cgf_diagnostics::is_available()) { detail::cgf_diagnostics::get_instance().check<target::device>(kernel, m_access_map); }

auto fn = [=](detail::device_queue& q, const subrange<3> execution_sr, const std::vector<void*>& reduction_ptrs, const bool is_reduction_initializer) {
return q.submit([&](sycl::handler& cgh) {
constexpr int sycl_dims = std::max(1, Dims);
Expand Down Expand Up @@ -525,6 +533,12 @@ class handler {
static_assert(std::is_copy_constructible_v<std::decay_t<Kernel>>, "Kernel functor must be copyable"); // Required for hydration
static_assert(Dims >= 0);

// Check whether all accessors are being captured by value etc.
// Although the diagnostics should always be available, we currently disable them for some test cases.
if(detail::cgf_diagnostics::is_available()) {
detail::cgf_diagnostics::get_instance().check<target::host_task>(kernel, m_access_map, m_non_void_side_effects_count);
}

auto fn = [kernel, cgid, global_range](detail::host_queue& q, const subrange<3>& execution_sr) {
auto hydrated_kernel = detail::closure_hydrator::get_instance().hydrate<target::host_task>(kernel);
return q.submit(cgid, [hydrated_kernel, global_range, execution_sr](MPI_Comm comm) {
Expand Down Expand Up @@ -571,8 +585,8 @@ namespace detail {
return cgh.add_requirement(bid, std::move(rm));
}

inline void add_requirement(handler& cgh, const host_object_id hoid, const experimental::side_effect_order order) {
return cgh.add_requirement(hoid, order);
inline void add_requirement(handler& cgh, const host_object_id hoid, const experimental::side_effect_order order, const bool is_void) {
return cgh.add_requirement(hoid, order, is_void);
}

inline void add_reduction(handler& cgh, const detail::reduction_info& rinfo) { return cgh.add_reduction(rinfo); }
Expand Down
12 changes: 9 additions & 3 deletions include/side_effect.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

#include <type_traits>

#include "cgf_diagnostics.h"
#include "handler.h"
#include "host_object.h"


namespace celerity::experimental {

/**
Expand All @@ -26,6 +26,10 @@ class side_effect {
const host_object<T>& object, handler& cgh)
: side_effect(ctor_internal_tag{}, object, cgh) {}

side_effect(const side_effect& other) : m_instance(other.m_instance) {
if(detail::cgf_diagnostics::is_available()) { detail::cgf_diagnostics::get_instance().register_side_effect(); }
}

template <typename U = T>
std::enable_if_t<!std::is_void_v<U>, instance_type>& operator*() const {
return *m_instance;
Expand All @@ -40,7 +44,7 @@ class side_effect {
instance_type* m_instance;

side_effect(ctor_internal_tag /* tag */, const host_object<T>& object, handler& cgh) : m_instance{&detail::get_host_object_instance(object)} {
detail::add_requirement(cgh, detail::get_host_object_id(object), order);
detail::add_requirement(cgh, detail::get_host_object_id(object), order, false);
detail::extend_lifetime(cgh, detail::get_lifetime_extending_state(object));
}
};
Expand All @@ -52,9 +56,11 @@ class side_effect<void, Order> {
constexpr static inline side_effect_order order = Order;

explicit side_effect(const host_object<void>& object, handler& cgh) {
detail::add_requirement(cgh, detail::get_host_object_id(object), order);
detail::add_requirement(cgh, detail::get_host_object_id(object), order, true);
detail::extend_lifetime(cgh, detail::get_lifetime_extending_state(object));
}

// Note: We don't register the side effect with CGF diagnostics b/c it makes little sense to capture void side effects.
};

template <typename T>
Expand Down
6 changes: 6 additions & 0 deletions src/runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "affinity.h"
#include "buffer.h"
#include "buffer_manager.h"
#include "cgf_diagnostics.h"
#include "command_graph.h"
#include "executor.h"
#include "graph_generator.h"
Expand Down Expand Up @@ -133,6 +134,8 @@ namespace detail {
#endif
m_user_bench = std::make_unique<experimental::bench::detail::user_benchmarker>(*m_cfg, static_cast<node_id>(world_rank));

cgf_diagnostics::make_available();

m_h_queue = std::make_unique<host_queue>();
m_d_queue = std::make_unique<device_queue>();

Expand Down Expand Up @@ -178,6 +181,9 @@ namespace detail {
m_buffer_mngr.reset();
m_d_queue.reset();
m_h_queue.reset();

cgf_diagnostics::teardown();

m_user_bench.reset();

// Make sure we free all of our MPI transfers before we finalize
Expand Down
4 changes: 2 additions & 2 deletions test/buffer_manager_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace detail {
b_id = celerity::detail::get_buffer_id(b);
q.submit([&](celerity::handler& cgh) {
celerity::accessor a{b, cgh, celerity::access::all(), celerity::write_only};
cgh.parallel_for<class UKN(i)>(b.get_range(), [=](celerity::item<1> it) {});
cgh.parallel_for<class UKN(i)>(b.get_range(), [=](celerity::item<1> it) { (void)a; });
});
REQUIRE(bm.has_buffer(b_id));
}
Expand All @@ -36,7 +36,7 @@ namespace detail {
for(int i = 0; i < (new_horizon_step * 3 + 2); i++) {
q.submit([&](celerity::handler& cgh) {
celerity::accessor a{c, cgh, celerity::access::all(), celerity::write_only};
cgh.parallel_for<class UKN(i)>(c.get_range(), [=](celerity::item<1>) {});
cgh.parallel_for<class UKN(i)>(c.get_range(), [=](celerity::item<1>) { (void)a; });
});
// this sync is inside the loop because otherwise there is a race between this thread and the executor informing the TDAG
// of the executed horizons, meaning that task deletion is not guaranteed.
Expand Down
2 changes: 1 addition & 1 deletion test/print_graph_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ TEST_CASE_METHOD(test_utils::runtime_fixture, "Buffer debug names show up in the

q.submit([&](handler& cgh) {
celerity::accessor acc{buff_a, cgh, celerity::access::all{}, celerity::write_only};
cgh.parallel_for<class UKN(print_graph_buffer_name)>(range, [=](item<1> item) {});
cgh.parallel_for<class UKN(print_graph_buffer_name)>(range, [=](item<1> item) { (void)acc; });
});

// wait for commands to be generated in the scheduler thread
Expand Down
Loading

0 comments on commit ff7ed02

Please sign in to comment.