Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect uninitialized reads and overlapping writes #224

Merged
merged 4 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Versioning](http://semver.org/spec/v2.0.0.html).
- Add new environment variables `CELERITY_HORIZON_STEP` and `CELERITY_HORIZON_MAX_PARALLELISM` to control Horizon generation (#199)
- Add new `experimental::constrain_split` API to limit how a kernel can be split (#?)
- `distr_queue::fence` and `buffer_snapshot` are now stable, subsuming the `experimental::` APIs of the same name (#225)
- Celerity now warns at runtime when a task declares reads from uninitialized buffers or writes with overlapping ranges between nodes (#224)

### Changed

Expand Down
12 changes: 6 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,18 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(ENABLE_ACC_CHECK ON)
set(DEFAULT_ENABLE_DEBUG_CHECKS ON)
else()
set(ENABLE_ACC_CHECK OFF)
set(DEFAULT_ENABLE_DEBUG_CHECKS OFF)
endif()

option(CELERITY_ACCESSOR_BOUNDARY_CHECK "Enable accessor boundary check" ${ENABLE_ACC_CHECK})
option(CELERITY_ACCESS_PATTERN_DIAGNOSTICS "Diagnose uninitialized reads and overlapping writes" ${DEFAULT_ENABLE_DEBUG_CHECKS})
option(CELERITY_ACCESSOR_BOUNDARY_CHECK "Enable accessor boundary check" ${DEFAULT_ENABLE_DEBUG_CHECKS})

if(CELERITY_ACCESSOR_BOUNDARY_CHECK)
if(CELERITY_ACCESSOR_BOUNDARY_CHECK AND NOT (CMAKE_BUILD_TYPE STREQUAL "Debug"))
message(STATUS "Accessor boundary check enabled - this will impact kernel performance")
endif()

unset(ENABLE_ACC_CHECK)

set(CELERITY_CMAKE_DIR "${PROJECT_SOURCE_DIR}/cmake")
set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH}" "${CELERITY_CMAKE_DIR}")
find_package(MPI 2.0 REQUIRED)
Expand Down Expand Up @@ -288,6 +287,7 @@ target_compile_definitions(celerity_runtime PUBLIC
CELERITY_FEATURE_UNNAMED_KERNELS=$<BOOL:${CELERITY_FEATURE_UNNAMED_KERNELS}>
CELERITY_DETAIL_HAS_NAMED_THREADS=$<BOOL:${CELERITY_DETAIL_HAS_NAMED_THREADS}>
CELERITY_ACCESSOR_BOUNDARY_CHECK=$<BOOL:${CELERITY_ACCESSOR_BOUNDARY_CHECK}>
CELERITY_ACCESS_PATTERN_DIAGNOSTICS=$<BOOL:${CELERITY_ACCESS_PATTERN_DIAGNOSTICS}>
fknorr marked this conversation as resolved.
Show resolved Hide resolved
)

# Collect version information from git in src/version.cc. This target is always out of date, but the timestamp
Expand Down
292 changes: 146 additions & 146 deletions ci/perf/gpuc2_bench.csv

Large diffs are not rendered by default.

294 changes: 147 additions & 147 deletions ci/perf/gpuc2_bench.md

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions cmake/celerity-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ set(CELERITY_FEATURE_SCALAR_REDUCTIONS "@CELERITY_FEATURE_SCALAR_REDUCTIONS@")
set(CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS "@CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS@")
set(CELERITY_FEATURE_LOCAL_ACCESSOR "@CELERITY_FEATURE_LOCAL_ACCESSOR@")
set(CELERITY_FEATURE_UNNAMED_KERNELS "@CELERITY_FEATURE_UNNAMED_KERNELS@")
set(CELERITY_DETAIL_HAS_NAMED_THREADS "@CELERITY_DETAIL_HAS_NAMED_THREADS@")
set(CELERITY_ACCESSOR_BOUNDARY_CHECK "@CELERITY_ACCESSOR_BOUNDARY_CHECK@")
set(CELERITY_ACCESS_PATTERN_DIAGNOSTICS "@CELERITY_ACCESS_PATTERN_DIAGNOSTICS@")

include("${CMAKE_CURRENT_LIST_DIR}/celerity-targets.cmake")

Expand Down
20 changes: 16 additions & 4 deletions include/distributed_graph_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

#include <bitset>
#include <unordered_map>
#include <variant>

#include "command_graph.h"
#include "ranges.h"
Expand Down Expand Up @@ -65,6 +64,7 @@ class distributed_graph_generator {
buffer_state(region_map<write_command_state> lw, region_map<std::bitset<max_num_nodes>> rr)
: local_last_writer(std::move(lw)), replicated_regions(std::move(rr)), pending_reduction(std::nullopt) {}

region<3> initialized_region; // for detecting uninitialized reads (only if policies.uninitialized_read != error_policy::ignore)
region_map<write_command_state> local_last_writer;
region_map<node_bitset> replicated_regions;

Expand All @@ -75,10 +75,15 @@ class distributed_graph_generator {
};

public:
distributed_graph_generator(
const size_t num_nodes, const node_id local_nid, command_graph& cdag, const task_manager& tm, detail::command_recorder* recorder);
struct policy_set {
error_policy uninitialized_read_error = error_policy::panic;
error_policy overlapping_write_error = error_policy::panic;
};

distributed_graph_generator(const size_t num_nodes, const node_id local_nid, command_graph& cdag, const task_manager& tm,
detail::command_recorder* recorder, const policy_set& policy = default_policy_set());

void add_buffer(const buffer_id bid, const int dims, const range<3>& range);
void add_buffer(const buffer_id bid, const int dims, const range<3>& range, bool host_initialized);

std::unordered_set<abstract_command*> build_task(const task& tsk);

Expand Down Expand Up @@ -116,12 +121,19 @@ class distributed_graph_generator {

void prune_commands_before(const command_id epoch);

void report_overlapping_writes(const task& tsk, const box_vector<3>& local_chunks) const;

private:
using buffer_read_map = std::unordered_map<buffer_id, region<3>>;
using side_effect_map = std::unordered_map<host_object_id, command_id>;

// default-constructs a policy_set - this must be a function because we can't use the implicit default constructor of policy_set, which has member
// initializers, within its surrounding class (Clang)
constexpr static policy_set default_policy_set() { return {}; }

size_t m_num_nodes;
node_id m_local_nid;
policy_set m_policy;
command_graph& m_cdag;
const task_manager& m_task_mngr;
std::unordered_map<buffer_id, buffer_state> m_buffer_states;
Expand Down
22 changes: 5 additions & 17 deletions include/handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,6 @@
#include "types.h"
#include "workaround.h"

#if !defined(_MSC_VER)
// Required for kernel name demangling in Clang
#include <cxxabi.h>
#endif

namespace celerity {
class handler;
}
Expand Down Expand Up @@ -70,23 +65,16 @@ namespace detail {

void set_task_name(handler& cgh, const std::string& debug_name);

template <typename Name>
std::string kernel_debug_name() {
// we need to typeid a pointer, since the name is often undefined
std::string name = typeid(Name*).name();
#if !defined(_MSC_VER)
const std::unique_ptr<char, void (*)(void*)> demangled(abi::__cxa_demangle(name.c_str(), nullptr, nullptr, nullptr), std::free);
name = demangled.get();
#endif
// get rid of the pointer "*"
return name.substr(0, name.length() - 1);
}

struct unnamed_kernel {};

template <typename KernelName>
constexpr bool is_unnamed_kernel = std::is_same_v<KernelName, unnamed_kernel>;

template <typename KernelName>
std::string kernel_debug_name() {
return !is_unnamed_kernel<KernelName> ? utils::get_simplified_type_name<KernelName>() : std::string{};
}

struct simple_kernel_flavor {};
struct nd_range_kernel_flavor {};

Expand Down
5 changes: 4 additions & 1 deletion include/scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ namespace detail {
*/
void notify_task_created(const task* const tsk) { notify(event_task_available{tsk}); }

void notify_buffer_registered(const buffer_id bid, const int dims, const range<3>& range) { notify(event_buffer_registered{bid, dims, range}); }
void notify_buffer_registered(const buffer_id bid, const int dims, const range<3>& range, bool host_initialized) {
notify(event_buffer_registered{bid, dims, range, host_initialized});
}

protected:
/**
Expand All @@ -53,6 +55,7 @@ namespace detail {
buffer_id bid;
int dims;
celerity::range<3> range;
bool host_initialized;
};
using event = std::variant<event_shutdown, event_task_available, event_buffer_registered>;

Expand Down
5 changes: 5 additions & 0 deletions include/task.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,5 +271,10 @@ namespace detail {
}
};

[[nodiscard]] std::string print_task_debug_label(const task& tsk, bool title_case = false);

/// Determines which overlapping regions appear between write accesses when the iteration space of `tsk` is split into `chunks`.
std::unordered_map<buffer_id, region<3>> detect_overlapping_writes(const task& tsk, const box_vector<3>& chunks);
PeterTh marked this conversation as resolved.
Show resolved Hide resolved

} // namespace detail
} // namespace celerity
11 changes: 10 additions & 1 deletion include/task_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,13 @@ namespace detail {
using buffer_writers_map = std::unordered_map<buffer_id, region_map<std::optional<task_id>>>;

public:
struct policy_set {
error_policy uninitialized_read_error = error_policy::panic;
};

constexpr inline static task_id initial_epoch_task = 0;

task_manager(size_t num_collective_nodes, host_queue* queue, detail::task_recorder* recorder);
task_manager(size_t num_collective_nodes, host_queue* queue, detail::task_recorder* recorder, const policy_set& policy = default_policy_set());

virtual ~task_manager() = default;

Expand Down Expand Up @@ -179,8 +183,13 @@ namespace detail {
size_t get_current_task_count() const { return m_task_buffer.get_current_task_count(); }

private:
// default-constructs a policy_set - this must be a function because we can't use the implicit default constructor of policy_set, which has member
// initializers, within its surrounding class (Clang)
constexpr static policy_set default_policy_set() { return {}; }

const size_t m_num_collective_nodes;
host_queue* m_queue;
policy_set m_policy;

task_ring_buffer m_task_buffer;

Expand Down
8 changes: 8 additions & 0 deletions include/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,12 @@ struct reduction_info {
};

constexpr node_id master_node_id = 0;

enum class error_policy {
ignore,
log_warning,
log_error,
panic,
};

} // namespace celerity::detail
48 changes: 45 additions & 3 deletions include/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,14 @@
#include <functional>
#include <string>
#include <type_traits>
#include <typeinfo>
#include <variant>

#include <fmt/format.h>

#include "types.h"


namespace celerity::detail::utils {

template <typename T, typename P>
Expand Down Expand Up @@ -84,10 +90,46 @@ static auto tuple_without(const std::tuple<Ts...>& tuple) {
}
}

// fiddles out the base name of a task from a full, demangled input type name
std::string simplify_task_name(const std::string& demangled_type_name);
/// Fiddles out the base name of a (possibly templated) struct or class from a full (possibly mangled) type name.
/// The input parameter should be `typeid(Struct*)`, i.e. a _pointer_ to the desired struct type.
std::string get_simplified_type_name_from_pointer(const std::type_info& pointer_type_info);

// escapes "<", ">", and "&" with their corresponding HTML escape sequences
/// Fiddles out the base name of a (possibly templated) struct or class from a full (possibly mangled) type name.
template <typename Struct>
std::string get_simplified_type_name() {
// Using a pointer will also make this function work types that have no definitions, which commonly happens for kernel name type.
return get_simplified_type_name_from_pointer(typeid(Struct*));
}

/// Escapes "<", ">", and "&" with their corresponding HTML escape sequences
std::string escape_for_dot_label(std::string str);

enum class panic_solution {
log_and_abort, ///< default
throw_logic_error, ///< enabled in unit tests to detect and recover from panics
};

/// Globally and atomically sets the behavior of `utils::panic()`.
void set_panic_solution(panic_solution solution);

/// Either throws or aborts with a message, depending on the global `panic_solution` setting.
[[noreturn]] void panic(const std::string& msg);

/// Either throws or aborts with a message, depending on the global `panic_solution` setting.
template <typename... FmtParams, std::enable_if_t<sizeof...(FmtParams) >= 2, int> = 0>
[[noreturn]] void panic(const FmtParams&... fmt_args) {
// TODO also receive a std::source_location with C++20.
panic(fmt::format(fmt_args...));
}

/// Ignores, logs, or panics on an error depending on the `error_policy`.
void report_error(const error_policy policy, const std::string& msg);

/// Ignores, logs, or panics on an error depending on the `error_policy`.
template <typename... FmtParams, std::enable_if_t<sizeof...(FmtParams) >= 2, int> = 0>
void report_error(const error_policy policy, const FmtParams&... fmt_args) {
// TODO also receive a std::source_location with C++20.
if(policy != error_policy::ignore) { report_error(policy, fmt::format(fmt_args...)); }
}

} // namespace celerity::detail::utils
Loading
Loading