From d8c333260a5fb266ce12c57e6c37a533d6b12e7c Mon Sep 17 00:00:00 2001 From: Gabriel Mitterrutzner Date: Thu, 7 Sep 2023 09:27:43 +0200 Subject: [PATCH] Add out-of-bounds check for host side accessors --- include/accessor.h | 34 ++++++++++++++++ include/worker_job.h | 4 ++ src/worker_job.cc | 55 ++++++++++++++++++++++---- test/accessor_tests.cc | 88 ++++++++++++++++++++++++++++++++++++------ 4 files changed, 162 insertions(+), 19 deletions(-) diff --git a/include/accessor.h b/include/accessor.h index f15ae059f..42d1dffee 100644 --- a/include/accessor.h +++ b/include/accessor.h @@ -406,6 +406,22 @@ class accessor : public detail::accessor_b template inline std::enable_if_t operator[](const id& index) const { +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + if(m_oob_indices != nullptr) { + const bool is_within_bounds_lo = all_true(index >= m_accessed_virtual_subrange.offset); + const bool is_within_bounds_hi = all_true(index < (m_accessed_virtual_subrange.offset + m_accessed_virtual_subrange.range)); + + if((!is_within_bounds_lo || !is_within_bounds_hi)) { + std::lock_guard guard(m_oob_mutex); + for(int d = 0; d < Dims; ++d) { + m_oob_indices[0][d] = std::min(m_oob_indices[0][d], index[d]); + m_oob_indices[1][d] = std::max(m_oob_indices[1][d], index[d] + 1); + } + return m_oob_fallback_value; + } + } +#endif + return m_host_ptr[get_linear_offset(index)]; } @@ -521,6 +537,16 @@ class accessor : public detail::accessor_b // m_host_ptr must be defined *last* for it to overlap with the sequence of range and id members in the 0-dimensional case CELERITY_DETAIL_NO_UNIQUE_ADDRESS DataT* m_host_ptr = nullptr; +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + id<3>* m_oob_indices = nullptr; + // This mutex has to be inline static, since accessors are copyable making the mutex otherwise useless. + // It is a workaround until atomic_ref() can be used on m_oob_indices in c++20. + inline static std::mutex m_oob_mutex; + + // This value (or a reference to it) is returned for all out-of-bounds accesses. + mutable DataT m_oob_fallback_value = DataT{}; +#endif + template accessor(ctor_internal_tag /* tag */, const buffer& buff, handler& cgh, const Functor& rmfn) : m_virtual_buffer_range(buff.get_range()) { using range_mapper = detail::range_mapper>; // decay function type to function pointer @@ -547,6 +573,10 @@ class accessor : public detail::accessor_b m_backing_buffer_range = other.m_backing_buffer_range; m_virtual_buffer_range = other.m_virtual_buffer_range; +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + m_oob_indices = other.m_oob_indices; +#endif + if(detail::is_embedded_hydration_id(m_host_ptr)) { if(detail::cgf_diagnostics::is_available() && detail::cgf_diagnostics::get_instance().is_checking()) { detail::cgf_diagnostics::get_instance().register_accessor(detail::extract_hydration_id(m_host_ptr), target::host_task); @@ -558,6 +588,10 @@ class accessor : public detail::accessor_b m_backing_buffer_offset = detail::id_cast(info.backing_buffer_offset); m_backing_buffer_range = detail::range_cast(info.backing_buffer_range); m_accessed_virtual_subrange = detail::subrange_cast(info.accessed_virtual_subrange); + +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + m_oob_indices = info.out_of_bounds_indices; +#endif } } } diff --git a/include/worker_job.h b/include/worker_job.h index e49875eb1..acd8a18e3 100644 --- a/include/worker_job.h +++ b/include/worker_job.h @@ -156,6 +156,10 @@ namespace detail { std::future m_future; bool m_submitted = false; +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + std::vector>> m_oob_indices_per_accessor; +#endif + bool execute(const command_pkg& pkg) override; std::string get_description(const command_pkg& pkg) override; }; diff --git a/src/worker_job.cc b/src/worker_job.cc index 7ab74fb9f..5a1351649 100644 --- a/src/worker_job.cc +++ b/src/worker_job.cc @@ -160,7 +160,21 @@ namespace detail { const auto [bid, mode] = access_map.get_nth_access(i); const auto sr = access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size()).get_subrange(); const auto info = m_buffer_mngr.access_host_buffer(bid, mode, sr); + +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + // oob_indices[0] contains the lower bound oob indices + // oob_indices[1] contains the upper bound oob indices + std::vector> oob_indices{2}; + constexpr size_t size_t_max = std::numeric_limits::max(); + const auto buffer_dims = m_buffer_mngr.get_buffer_info(bid).dimensions; + oob_indices[0] = id<3>{size_t_max, buffer_dims > 1 ? size_t_max : 0, buffer_dims == 3 ? size_t_max : 0}; + oob_indices[1] = id<3>{0, 0, 0}; + access_infos.push_back( + closure_hydrator::accessor_info{info.ptr, info.backing_buffer_range, info.backing_buffer_offset, sr, oob_indices.data()}); + m_oob_indices_per_accessor.push_back(std::move(oob_indices)); +#else access_infos.push_back(closure_hydrator::accessor_info{info.ptr, info.backing_buffer_range, info.backing_buffer_offset, sr}); +#endif } closure_hydrator::get_instance().arm(target::host_task, std::move(access_infos)); @@ -175,6 +189,27 @@ namespace detail { if(m_future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { m_buffer_mngr.unlock(pkg.cid); +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + const auto data = std::get(pkg.data); + auto tsk = m_task_mngr.get_task(data.tid); + + for(size_t i = 0; i < m_oob_indices_per_accessor.size(); ++i) { + const id<3>& oob_min = m_oob_indices_per_accessor[i][0]; + const id<3>& oob_max = m_oob_indices_per_accessor[i][1]; + + if(oob_max != id<3>{0, 0, 0}) { + const auto& access_map = tsk->get_buffer_access_map(); + const auto acc_sr = access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size()).get_subrange(); + const auto oob_sr = subrange<3>(oob_min, range_cast<3>(oob_max - oob_min)); + const auto buffer_id = access_map.get_nth_access(i).first; + const auto buffer_name = m_buffer_mngr.get_debug_name(buffer_id); + CELERITY_ERROR("Out-of-bounds access in host task detected: Accessor {} for buffer {} attempted to access indices between {} which are " + "outside of mapped subrange {}", + i, (buffer_name.empty() ? fmt::format("{}", buffer_id) : buffer_name), oob_sr, acc_sr); + } + } +#endif + auto info = m_future.get(); CELERITY_TRACE("Delta time submit -> start: {}us, start -> end: {}us", std::chrono::duration_cast(info.start_time - info.submit_time).count(), @@ -217,14 +252,16 @@ namespace detail { try { const auto info = m_buffer_mngr.access_device_buffer(bid, mode, sr); #if CELERITY_ACCESSOR_BOUNDARY_CHECK - auto* const oob_idx = sycl::malloc_host>(2, m_queue.get_sycl_queue()); - assert(oob_idx != nullptr); + // oob_indices[0] contains the lower bound oob indices + // oob_indices[1] contains the upper bound oob indices + auto* const oob_indices = sycl::malloc_host>(2, m_queue.get_sycl_queue()); + assert(oob_indices != nullptr); constexpr size_t size_t_max = std::numeric_limits::max(); const auto buffer_dims = m_buffer_mngr.get_buffer_info(bid).dimensions; - oob_idx[0] = id<3>{size_t_max, buffer_dims > 1 ? size_t_max : 0, buffer_dims == 3 ? size_t_max : 0}; - oob_idx[1] = id<3>{1, 1, 1}; - m_oob_indices_per_accessor.push_back(oob_idx); - accessor_infos.push_back(closure_hydrator::accessor_info{info.ptr, info.backing_buffer_range, info.backing_buffer_offset, sr, oob_idx}); + oob_indices[0] = id<3>{size_t_max, buffer_dims > 1 ? size_t_max : 0, buffer_dims == 3 ? size_t_max : 0}; + oob_indices[1] = id<3>{0, 0, 0}; + m_oob_indices_per_accessor.push_back(oob_indices); + accessor_infos.push_back(closure_hydrator::accessor_info{info.ptr, info.backing_buffer_range, info.backing_buffer_offset, sr, oob_indices}); #else accessor_infos.push_back(closure_hydrator::accessor_info{info.ptr, info.backing_buffer_range, info.backing_buffer_offset, sr}); #endif @@ -259,13 +296,15 @@ namespace detail { const id<3>& oob_min = m_oob_indices_per_accessor[i][0]; const id<3>& oob_max = m_oob_indices_per_accessor[i][1]; - if(oob_max != id<3>{1, 1, 1}) { + if(oob_max != id<3>{0, 0, 0}) { const auto& access_map = tsk->get_buffer_access_map(); const auto acc_sr = access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size()).get_subrange(); const auto oob_sr = subrange<3>(oob_min, range_cast<3>(oob_max - oob_min)); + const auto buffer_id = access_map.get_nth_access(i).first; + const auto buffer_name = m_buffer_mngr.get_debug_name(buffer_id); CELERITY_ERROR("Out-of-bounds access in kernel '{}' detected: Accessor {} for buffer {} attempted to access indices between {} which are " "outside of mapped subrange {}", - tsk->get_debug_name(), i, access_map.get_nth_access(i).first, oob_sr, acc_sr); + tsk->get_debug_name(), i, (buffer_name.empty() ? fmt::format("{}", buffer_id) : buffer_name), oob_sr, acc_sr); } sycl::free(m_oob_indices_per_accessor[i], m_queue.get_sycl_queue()); } diff --git a/test/accessor_tests.cc b/test/accessor_tests.cc index 1c4ed1df8..e49552e7b 100644 --- a/test/accessor_tests.cc +++ b/test/accessor_tests.cc @@ -650,16 +650,20 @@ namespace detail { template class acc_out_of_bounds_kernel {}; - TEMPLATE_TEST_CASE_METHOD_SIG(oob_fixture, "accessor reports out-of-bounds accesses", "[accessor][oob]", ((int Dims), Dims), 1, 2, 3) { + TEMPLATE_TEST_CASE_METHOD_SIG(oob_fixture, "device accessor reports out-of-bounds accesses", "[accessor][oob]", ((int Dims), Dims), 1, 2, 3) { #if !CELERITY_ACCESSOR_BOUNDARY_CHECK SKIP("CELERITY_ACCESSOR_BOUNDARY_CHECK=0"); #endif - buffer buff(test_utils::truncate_range({10, 20, 30})); + buffer unnamed_buff(test_utils::truncate_range({10, 20, 30})); + buffer named_buff(test_utils::truncate_range({10, 20, 30})); const auto accessible_sr = test_utils::truncate_subrange({{5, 10, 15}, {1, 2, 3}}); const auto oob_idx_lo = test_utils::truncate_id({1, 2, 3}); const auto oob_idx_hi = test_utils::truncate_id({7, 13, 25}); + const auto buffer_name = "oob"; - // we need to be careful about the orderign of the construction and destruction + celerity::debug::set_buffer_name(named_buff, buffer_name); + + // we need to be careful about the ordering of the construction and destruction // of the Celerity queue and the log capturing utility here std::unique_ptr lc; { @@ -668,21 +672,83 @@ namespace detail { lc = std::make_unique(); q.submit([&](handler& cgh) { - accessor acc(buff, cgh, celerity::access::fixed(accessible_sr), celerity::write_only, celerity::no_init); + accessor unnamed_acc(unnamed_buff, cgh, celerity::access::fixed(accessible_sr), celerity::write_only, celerity::no_init); + accessor named_acc(named_buff, cgh, celerity::access::fixed(accessible_sr), celerity::write_only, celerity::no_init); + cgh.parallel_for>(range(ones), [=](item) { - acc[oob_idx_lo] = 0; - acc[oob_idx_hi] = 0; + unnamed_acc[oob_idx_lo] = 0; + unnamed_acc[oob_idx_hi] = 0; + + named_acc[oob_idx_lo] = 0; + named_acc[oob_idx_hi] = 0; }); }); q.slow_full_sync(); } - const auto attempted_sr = subrange<3>{id_cast<3>(oob_idx_lo), range_cast<3>(oob_idx_hi - oob_idx_lo + id_cast(range(ones)))}; - const auto error_message = fmt::format("Out-of-bounds access in kernel 'celerity::detail::acc_out_of_bounds_kernel<{}>' detected: Accessor 0 for " - "buffer 0 attempted to access indices between {} which are outside of mapped subrange {}", - Dims, attempted_sr, subrange_cast<3>(accessible_sr)); - CHECK_THAT(lc->get_log(), Catch::Matchers::ContainsSubstring(error_message)); + const auto attempted_sr = + subrange<3>{id_cast<3>(oob_idx_lo), range_cast<3>(oob_idx_hi - oob_idx_lo + id_cast(range(ones))) - range_cast<3>(range(zeros))}; + const auto unnamed_error_message = + fmt::format("Out-of-bounds access in kernel 'celerity::detail::acc_out_of_bounds_kernel<{}>' detected: Accessor 0 for buffer 0 attempted to " + "access indices between {} which are outside of mapped subrange {}", + Dims, attempted_sr, subrange_cast<3>(accessible_sr)); + CHECK_THAT(lc->get_log(), Catch::Matchers::ContainsSubstring(unnamed_error_message)); + + const auto named_error_message = + fmt::format("Out-of-bounds access in kernel 'celerity::detail::acc_out_of_bounds_kernel<{}>' detected: Accessor 1 for buffer {} attempted to " + "access indices between {} which are outside of mapped subrange {}", + Dims, buffer_name, attempted_sr, subrange_cast<3>(accessible_sr)); + CHECK_THAT(lc->get_log(), Catch::Matchers::ContainsSubstring(named_error_message)); } + TEMPLATE_TEST_CASE_METHOD_SIG(oob_fixture, "host accessor reports out-of-bounds accesses", "[accessor][oob]", ((int Dims), Dims), 1, 2, 3) { +#if !CELERITY_ACCESSOR_BOUNDARY_CHECK + SKIP("CELERITY_ACCESSOR_BOUNDARY_CHECK=0"); +#endif + buffer unnamed_buff(test_utils::truncate_range({10, 20, 30})); + buffer named_buff(test_utils::truncate_range({10, 20, 30})); + const auto accessible_sr = test_utils::truncate_subrange({{5, 10, 15}, {1, 2, 3}}); + const auto oob_idx_lo = test_utils::truncate_id({1, 2, 3}); + const auto oob_idx_hi = test_utils::truncate_id({7, 13, 25}); + const auto buffer_name = "oob"; + + celerity::debug::set_buffer_name(named_buff, buffer_name); + + // we need to be careful about the ordering of the construction and destruction + // of the Celerity queue and the log capturing utility here + std::unique_ptr lc; + { + distr_queue q; + + lc = std::make_unique(); + + q.submit([&](handler& cgh) { + accessor unnamed_acc(unnamed_buff, cgh, celerity::access::fixed(accessible_sr), celerity::write_only_host_task, celerity::no_init); + accessor nambed_acc(named_buff, cgh, celerity::access::fixed(accessible_sr), celerity::write_only_host_task, celerity::no_init); + + cgh.host_task(range(ones), [=](partition) { + unnamed_acc[oob_idx_lo] = 0; + unnamed_acc[oob_idx_hi] = 0; + + nambed_acc[oob_idx_lo] = 0; + nambed_acc[oob_idx_hi] = 0; + }); + }); + + q.slow_full_sync(); + } + + const auto attempted_sr = + subrange<3>{id_cast<3>(oob_idx_lo), range_cast<3>(oob_idx_hi - oob_idx_lo + id_cast(range(ones))) - range_cast<3>(range(zeros))}; + const auto unnamed_error_message = fmt::format("Out-of-bounds access in host task detected: Accessor 0 for buffer 0 attempted to " + "access indices between {} which are outside of mapped subrange {}", + attempted_sr, subrange_cast<3>(accessible_sr)); + CHECK_THAT(lc->get_log(), Catch::Matchers::ContainsSubstring(unnamed_error_message)); + + const auto named_error_message = fmt::format("Out-of-bounds access in host task detected: Accessor 1 for buffer {} attempted to " + "access indices between {} which are outside of mapped subrange {}", + buffer_name, attempted_sr, subrange_cast<3>(accessible_sr)); + CHECK_THAT(lc->get_log(), Catch::Matchers::ContainsSubstring(named_error_message)); + } } // namespace detail } // namespace celerity