Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accessor boundary check #178

Merged
merged 1 commit into from
Jun 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,20 @@ project(celerity_runtime VERSION ${Celerity_VERSION} LANGUAGES CXX)
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(ENABLE_ACC_CHECK ON)
else()
set(ENABLE_ACC_CHECK OFF)
psalz marked this conversation as resolved.
Show resolved Hide resolved
endif()

option(CELERITY_USE_MIMALLOC "Use the mimalloc memory allocator" ON)
option(CELERITY_ACCESSOR_BOUNDARY_CHECK "Enable accessor boundary check" ${ENABLE_ACC_CHECK})

if(CELERITY_ACCESSOR_BOUNDARY_CHECK)
message(STATUS "Accessor boundary check enabled - this will impact kernel performance")
endif()

unset(ENABLE_ACC_CHECK)

set(CELERITY_CMAKE_DIR "${PROJECT_SOURCE_DIR}/cmake")
set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH}" "${CELERITY_CMAKE_DIR}")
Expand Down Expand Up @@ -313,6 +326,7 @@ target_compile_definitions(celerity_runtime PUBLIC
CELERITY_FEATURE_UNNAMED_KERNELS=$<BOOL:${CELERITY_FEATURE_UNNAMED_KERNELS}>
CELERITY_DETAIL_HAS_NAMED_THREADS=$<BOOL:${CELERITY_DETAIL_HAS_NAMED_THREADS}>
CELERITY_DETAIL_IS_OLD_COMPUTECPP_COMPILER=$<BOOL:${CELERITY_DETAIL_IS_OLD_COMPUTECPP_COMPILER}>
CELERITY_ACCESSOR_BOUNDARY_CHECK=$<BOOL:${CELERITY_ACCESSOR_BOUNDARY_CHECK}>
)

# Collect version information from git in src/version.cc. This target is always out of date, but the timestamp
Expand Down
37 changes: 31 additions & 6 deletions include/accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,12 +254,23 @@ class accessor<DataT, Dims, Mode, target::device> : public detail::accessor_base
}

template <access_mode M = Mode>
inline std::enable_if_t<detail::access::mode_traits::is_producer(M), DataT&> operator[](const id<Dims>& index) const {
return m_device_ptr[get_linear_offset(index)];
}

template <access_mode M = Mode>
inline std::enable_if_t<detail::access::mode_traits::is_pure_consumer(M), const DataT&> operator[](const id<Dims>& index) const {
inline std::conditional_t<detail::access::mode_traits::is_producer(M), DataT&, const DataT&> operator[](const id<Dims>& index) const {
#if CELERITY_ACCESSOR_BOUNDARY_CHECK
// We currently don't support boundary checking for accessors created using accessor_testspy::make_device_accessor,
// which does not set m_oob_indices.
if(m_oob_indices != nullptr) {
const id<Dims> all_true = detail::id_cast<Dims>(id<3>(true, true, true));
const bool is_within_bounds_lo = (index >= m_accessed_virtual_subrange.offset) == all_true;
const bool is_within_bounds_hi = (index < (m_accessed_virtual_subrange.offset + m_accessed_virtual_subrange.range)) == all_true;
if((!is_within_bounds_lo || !is_within_bounds_hi)) {
for(int d = 0; d < Dims; ++d) {
sycl::atomic_ref<size_t, sycl::memory_order::relaxed, sycl::memory_scope::device>{m_oob_indices[0][d]}.fetch_min(index[d]);
sycl::atomic_ref<size_t, sycl::memory_order::relaxed, sycl::memory_scope::device>{m_oob_indices[1][d]}.fetch_max(index[d] + 1);
}
return m_oob_fallback_value;
}
}
#endif
return m_device_ptr[get_linear_offset(index)];
}

Expand Down Expand Up @@ -311,6 +322,12 @@ class accessor<DataT, Dims, Mode, target::device> : public detail::accessor_base
DataT* m_device_ptr = nullptr;
CELERITY_DETAIL_NO_UNIQUE_ADDRESS id<Dims> m_backing_buffer_offset;
CELERITY_DETAIL_NO_UNIQUE_ADDRESS range<Dims> m_backing_buffer_range = detail::zero_range;
#if CELERITY_ACCESSOR_BOUNDARY_CHECK
id<3>* m_oob_indices = nullptr;
subrange<Dims> m_accessed_virtual_subrange = {};
// This value (or a reference to it) is returned for all out-of-bounds accesses.
mutable DataT m_oob_fallback_value = DataT{};
#endif

template <typename Functor>
accessor(const ctor_internal_tag /* tag */, const buffer<DataT, Dims>& buff, handler& cgh, const Functor& rmfn) {
Expand Down Expand Up @@ -340,6 +357,10 @@ class accessor<DataT, Dims, Mode, target::device> : public detail::accessor_base
m_device_ptr = other.m_device_ptr;
m_backing_buffer_offset = other.m_backing_buffer_offset;
m_backing_buffer_range = other.m_backing_buffer_range;
#if CELERITY_ACCESSOR_BOUNDARY_CHECK
m_oob_indices = other.m_oob_indices;
m_accessed_virtual_subrange = other.m_accessed_virtual_subrange;
#endif

#if !defined(__SYCL_DEVICE_ONLY__)
if(detail::is_embedded_hydration_id(m_device_ptr)) {
Expand All @@ -352,6 +373,10 @@ class accessor<DataT, Dims, Mode, target::device> : public detail::accessor_base
m_device_ptr = static_cast<DataT*>(info.ptr);
m_backing_buffer_offset = detail::id_cast<Dims>(info.backing_buffer_offset);
m_backing_buffer_range = detail::range_cast<Dims>(info.backing_buffer_range);
#if CELERITY_ACCESSOR_BOUNDARY_CHECK
m_oob_indices = info.out_of_bounds_indices;
m_accessed_virtual_subrange = detail::subrange_cast<Dims>(info.accessed_virtual_subrange);
#endif
}
}
#endif
Expand Down
4 changes: 3 additions & 1 deletion include/buffer_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ namespace detail {
using host_buffer_factory = std::function<std::unique_ptr<buffer_storage>(const range<3>&)>;

struct buffer_info {
int dimensions = -1;
celerity::range<3> range = {1, 1, 1};
size_t element_size = 0;
bool is_host_initialized;
Expand Down Expand Up @@ -139,7 +140,8 @@ namespace detail {
return std::make_unique<device_buffer_storage<DataT, Dims>>(range_cast<Dims>(r), q);
};
auto host_factory = [](const celerity::range<3>& r) { return std::make_unique<host_buffer_storage<DataT, Dims>>(range_cast<Dims>(r)); };
m_buffer_infos.emplace(bid, buffer_info{range, sizeof(DataT), is_host_initialized, {}, std::move(device_factory), std::move(host_factory)});
m_buffer_infos.emplace(
bid, buffer_info{Dims, range, sizeof(DataT), is_host_initialized, {}, std::move(device_factory), std::move(host_factory)});
m_newest_data_location.emplace(bid, region_map<data_location>(range, data_location::nowhere));

#if defined(CELERITY_DETAIL_ENABLE_DEBUG)
Expand Down
4 changes: 4 additions & 0 deletions include/closure_hydrator.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ class closure_hydrator {
range<3> backing_buffer_range;
id<3> backing_buffer_offset;
subrange<3> accessed_virtual_subrange;

#if CELERITY_ACCESSOR_BOUNDARY_CHECK
id<3>* out_of_bounds_indices = nullptr;
#endif
};

closure_hydrator(const closure_hydrator&) = delete;
Expand Down
4 changes: 4 additions & 0 deletions include/worker_job.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ namespace detail {
cl::sycl::event m_event;
bool m_submitted = false;

#if CELERITY_ACCESSOR_BOUNDARY_CHECK
std::vector<id<3>*> m_oob_indices_per_accessor;
#endif

bool execute(const command_pkg& pkg) override;
std::string get_description(const command_pkg& pkg) override;
};
Expand Down
32 changes: 31 additions & 1 deletion src/worker_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,19 @@ namespace detail {
const auto [bid, mode] = access_map.get_nth_access(i);
const auto sr = grid_box_to_subrange(access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size()));
const auto info = m_buffer_mngr.access_device_buffer(bid, mode, sr);

#if CELERITY_ACCESSOR_BOUNDARY_CHECK
auto* const oob_idx = sycl::malloc_shared<id<3>>(2, m_queue.get_sycl_queue());
assert(oob_idx != nullptr);
constexpr size_t size_t_max = std::numeric_limits<size_t>::max();
const auto buffer_dims = m_buffer_mngr.get_buffer_info(bid).dimensions;
oob_idx[0] = id<3>{size_t_max, buffer_dims > 1 ? size_t_max : 0, buffer_dims == 3 ? size_t_max : 0};
oob_idx[1] = id<3>{1, 1, 1};
m_oob_indices_per_accessor.push_back(oob_idx);
accessor_infos.push_back(closure_hydrator::accessor_info{info.ptr, info.backing_buffer_range, info.backing_buffer_offset, sr, oob_idx});
#else
accessor_infos.push_back(closure_hydrator::accessor_info{info.ptr, info.backing_buffer_range, info.backing_buffer_offset, sr});
#endif
}

for(size_t i = 0; i < reductions.size(); ++i) {
Expand All @@ -233,9 +245,27 @@ namespace detail {
const auto status = m_event.get_info<cl::sycl::info::event::command_execution_status>();
if(status == cl::sycl::info::event_command_status::complete) {
m_buffer_mngr.unlock(pkg.cid);

const auto data = std::get<execution_data>(pkg.data);
auto tsk = m_task_mngr.get_task(data.tid);

#if CELERITY_ACCESSOR_BOUNDARY_CHECK
for(size_t i = 0; i < m_oob_indices_per_accessor.size(); ++i) {
const id<3>& oob_min = m_oob_indices_per_accessor[i][0];
const id<3>& oob_max = m_oob_indices_per_accessor[i][1];

if(oob_max != id<3>{1, 1, 1}) {
const auto& access_map = tsk->get_buffer_access_map();
const auto acc_sr =
grid_box_to_subrange(access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size()));
const auto oob_sr = subrange<3>(oob_min, range_cast<3>(oob_max - oob_min));
CELERITY_ERROR("Out-of-bounds access in kernel '{}' detected: Accessor {} for buffer {} attempted to access indices between {} which are "
"outside of mapped subrange {}",
tsk->get_debug_name(), i, access_map.get_nth_access(i).first, oob_sr, acc_sr);
}
sycl::free(m_oob_indices_per_accessor[i], m_queue.get_sycl_queue());
}
#endif

for(const auto& reduction : tsk->get_reductions()) {
const auto element_size = m_buffer_mngr.get_buffer_info(reduction.bid).element_size;
auto operand = make_uninitialized_payload<std::byte>(element_size);
Expand Down
38 changes: 38 additions & 0 deletions test/accessor_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,9 @@ namespace detail {

TEST_CASE("0-dimensional accessors are pointer-sized", "[accessor]") {
if(!CELERITY_DETAIL_HAS_NO_UNIQUE_ADDRESS) SKIP("[[no_unique_address]] not available on this compiler");
#if CELERITY_ACCESSOR_BOUNDARY_CHECK
SKIP("no accessor size guarantees when CELERITY_ACCESSOR_BOUNDARY_CHECK=1.");
#endif

// these checks are not static_asserts because they depend on an (optional) compiler layout optimization
CHECK(sizeof(accessor<int, 0, access_mode::read, target::device>) == sizeof(int*));
Expand Down Expand Up @@ -641,5 +644,40 @@ namespace detail {
sycl::free(result, q.get_sycl_queue());
}

template <int>
class oob_fixture : public test_utils::runtime_fixture {};

template <int>
class acc_out_of_bounds_kernel {};

TEMPLATE_TEST_CASE_METHOD_SIG(oob_fixture, "accessor reports out-of-bounds accesses", "[accessor][oob]", ((int Dims), Dims), 1, 2, 3) {
psalz marked this conversation as resolved.
Show resolved Hide resolved
#if !CELERITY_ACCESSOR_BOUNDARY_CHECK
SKIP("CELERITY_ACCESSOR_BOUNDARY_CHECK=0");
#endif

buffer<int, Dims> buff(range_cast<Dims>(range<3>{10, 20, 30}));
const auto accessible_sr = subrange_cast<Dims>(subrange<3>{{5, 10, 15}, {1, 2, 3}});
const auto oob_idx_lo = id_cast<Dims>(id<3>{1, 2, 3});
const auto oob_idx_hi = id_cast<Dims>(id<3>{7, 13, 25});
distr_queue q;

celerity::test_utils::log_capture lc;

q.submit([&](handler& cgh) {
accessor acc(buff, cgh, celerity::access::fixed(accessible_sr), celerity::write_only, celerity::no_init);
cgh.parallel_for<acc_out_of_bounds_kernel<Dims>>(range<Dims>(unit_range), [=](item<Dims>) {
acc[oob_idx_lo] = 0;
acc[oob_idx_hi] = 0;
});
});
q.slow_full_sync();

const auto attempted_sr = subrange<3>{id_cast<3>(oob_idx_lo), range_cast<3>(oob_idx_hi - oob_idx_lo + id_cast<Dims>(range<Dims>(unit_range)))};
const auto error_message = fmt::format("Out-of-bounds access in kernel 'acc_out_of_bounds_kernel<{}>' detected: Accessor 0 for buffer 0 attempted to "
"access indices between {} which are outside of mapped subrange {}",
Dims, attempted_sr, subrange_cast<3>(accessible_sr));
CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring(error_message));
}

} // namespace detail
} // namespace celerity