diff --git a/CMakeLists.txt b/CMakeLists.txt index cd3c94b39..c231770ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,20 @@ project(celerity_runtime VERSION ${Celerity_VERSION} LANGUAGES CXX) set_property(GLOBAL PROPERTY USE_FOLDERS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +if (CMAKE_BUILD_TYPE STREQUAL "Debug") + set(ENABLE_ACC_CHECK ON) +else() + set(ENABLE_ACC_CHECK OFF) +endif() + option(CELERITY_USE_MIMALLOC "Use the mimalloc memory allocator" ON) +option(CELERITY_ACCESSOR_BOUNDARY_CHECK "Enable accessor boundary check" ${ENABLE_ACC_CHECK}) + +if(CELERITY_ACCESSOR_BOUNDARY_CHECK) + message(STATUS "Accessor boundary check enabled - this will impact kernel performance") +endif() + +unset(ENABLE_ACC_CHECK) set(CELERITY_CMAKE_DIR "${PROJECT_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH}" "${CELERITY_CMAKE_DIR}") @@ -313,6 +326,7 @@ target_compile_definitions(celerity_runtime PUBLIC CELERITY_FEATURE_UNNAMED_KERNELS=$ CELERITY_DETAIL_HAS_NAMED_THREADS=$ CELERITY_DETAIL_IS_OLD_COMPUTECPP_COMPILER=$ + CELERITY_ACCESSOR_BOUNDARY_CHECK=$ ) # Collect version information from git in src/version.cc. This target is always out of date, but the timestamp diff --git a/include/accessor.h b/include/accessor.h index 61c03e866..22cc1afef 100644 --- a/include/accessor.h +++ b/include/accessor.h @@ -254,12 +254,23 @@ class accessor : public detail::accessor_base } template - inline std::enable_if_t operator[](const id& index) const { - return m_device_ptr[get_linear_offset(index)]; - } - - template - inline std::enable_if_t operator[](const id& index) const { + inline std::conditional_t operator[](const id& index) const { +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + // We currently don't support boundary checking for accessors created using accessor_testspy::make_device_accessor, + // which does not set m_oob_indices. + if(m_oob_indices != nullptr) { + const id all_true = detail::id_cast(id<3>(true, true, true)); + const bool is_within_bounds_lo = (index >= m_accessed_virtual_subrange.offset) == all_true; + const bool is_within_bounds_hi = (index < (m_accessed_virtual_subrange.offset + m_accessed_virtual_subrange.range)) == all_true; + if((!is_within_bounds_lo || !is_within_bounds_hi)) { + for(int d = 0; d < Dims; ++d) { + sycl::atomic_ref{m_oob_indices[0][d]}.fetch_min(index[d]); + sycl::atomic_ref{m_oob_indices[1][d]}.fetch_max(index[d] + 1); + } + return m_oob_fallback_value; + } + } +#endif return m_device_ptr[get_linear_offset(index)]; } @@ -311,6 +322,12 @@ class accessor : public detail::accessor_base DataT* m_device_ptr = nullptr; CELERITY_DETAIL_NO_UNIQUE_ADDRESS id m_backing_buffer_offset; CELERITY_DETAIL_NO_UNIQUE_ADDRESS range m_backing_buffer_range = detail::zero_range; +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + id<3>* m_oob_indices = nullptr; + subrange m_accessed_virtual_subrange = {}; + // This value (or a reference to it) is returned for all out-of-bounds accesses. + mutable DataT m_oob_fallback_value = DataT{}; +#endif template accessor(const ctor_internal_tag /* tag */, const buffer& buff, handler& cgh, const Functor& rmfn) { @@ -340,6 +357,10 @@ class accessor : public detail::accessor_base m_device_ptr = other.m_device_ptr; m_backing_buffer_offset = other.m_backing_buffer_offset; m_backing_buffer_range = other.m_backing_buffer_range; +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + m_oob_indices = other.m_oob_indices; + m_accessed_virtual_subrange = other.m_accessed_virtual_subrange; +#endif #if !defined(__SYCL_DEVICE_ONLY__) if(detail::is_embedded_hydration_id(m_device_ptr)) { @@ -352,6 +373,10 @@ class accessor : public detail::accessor_base m_device_ptr = static_cast(info.ptr); m_backing_buffer_offset = detail::id_cast(info.backing_buffer_offset); m_backing_buffer_range = detail::range_cast(info.backing_buffer_range); +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + m_oob_indices = info.out_of_bounds_indices; + m_accessed_virtual_subrange = detail::subrange_cast(info.accessed_virtual_subrange); +#endif } } #endif diff --git a/include/buffer_manager.h b/include/buffer_manager.h index 3882d1e25..cb4de0b37 100644 --- a/include/buffer_manager.h +++ b/include/buffer_manager.h @@ -92,6 +92,7 @@ namespace detail { using host_buffer_factory = std::function(const range<3>&)>; struct buffer_info { + int dimensions = -1; celerity::range<3> range = {1, 1, 1}; size_t element_size = 0; bool is_host_initialized; @@ -139,7 +140,8 @@ namespace detail { return std::make_unique>(range_cast(r), q); }; auto host_factory = [](const celerity::range<3>& r) { return std::make_unique>(range_cast(r)); }; - m_buffer_infos.emplace(bid, buffer_info{range, sizeof(DataT), is_host_initialized, {}, std::move(device_factory), std::move(host_factory)}); + m_buffer_infos.emplace( + bid, buffer_info{Dims, range, sizeof(DataT), is_host_initialized, {}, std::move(device_factory), std::move(host_factory)}); m_newest_data_location.emplace(bid, region_map(range, data_location::nowhere)); #if defined(CELERITY_DETAIL_ENABLE_DEBUG) diff --git a/include/closure_hydrator.h b/include/closure_hydrator.h index 0dbbe9160..90ca5b1cf 100644 --- a/include/closure_hydrator.h +++ b/include/closure_hydrator.h @@ -62,6 +62,10 @@ class closure_hydrator { range<3> backing_buffer_range; id<3> backing_buffer_offset; subrange<3> accessed_virtual_subrange; + +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + id<3>* out_of_bounds_indices = nullptr; +#endif }; closure_hydrator(const closure_hydrator&) = delete; diff --git a/include/worker_job.h b/include/worker_job.h index 5966c87aa..e49875eb1 100644 --- a/include/worker_job.h +++ b/include/worker_job.h @@ -180,6 +180,10 @@ namespace detail { cl::sycl::event m_event; bool m_submitted = false; +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + std::vector*> m_oob_indices_per_accessor; +#endif + bool execute(const command_pkg& pkg) override; std::string get_description(const command_pkg& pkg) override; }; diff --git a/src/worker_job.cc b/src/worker_job.cc index a768af2d6..ad6d5c043 100644 --- a/src/worker_job.cc +++ b/src/worker_job.cc @@ -213,7 +213,19 @@ namespace detail { const auto [bid, mode] = access_map.get_nth_access(i); const auto sr = grid_box_to_subrange(access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size())); const auto info = m_buffer_mngr.access_device_buffer(bid, mode, sr); + +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + auto* const oob_idx = sycl::malloc_shared>(2, m_queue.get_sycl_queue()); + assert(oob_idx != nullptr); + constexpr size_t size_t_max = std::numeric_limits::max(); + const auto buffer_dims = m_buffer_mngr.get_buffer_info(bid).dimensions; + oob_idx[0] = id<3>{size_t_max, buffer_dims > 1 ? size_t_max : 0, buffer_dims == 3 ? size_t_max : 0}; + oob_idx[1] = id<3>{1, 1, 1}; + m_oob_indices_per_accessor.push_back(oob_idx); + accessor_infos.push_back(closure_hydrator::accessor_info{info.ptr, info.backing_buffer_range, info.backing_buffer_offset, sr, oob_idx}); +#else accessor_infos.push_back(closure_hydrator::accessor_info{info.ptr, info.backing_buffer_range, info.backing_buffer_offset, sr}); +#endif } for(size_t i = 0; i < reductions.size(); ++i) { @@ -233,9 +245,27 @@ namespace detail { const auto status = m_event.get_info(); if(status == cl::sycl::info::event_command_status::complete) { m_buffer_mngr.unlock(pkg.cid); - const auto data = std::get(pkg.data); auto tsk = m_task_mngr.get_task(data.tid); + +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + for(size_t i = 0; i < m_oob_indices_per_accessor.size(); ++i) { + const id<3>& oob_min = m_oob_indices_per_accessor[i][0]; + const id<3>& oob_max = m_oob_indices_per_accessor[i][1]; + + if(oob_max != id<3>{1, 1, 1}) { + const auto& access_map = tsk->get_buffer_access_map(); + const auto acc_sr = + grid_box_to_subrange(access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size())); + const auto oob_sr = subrange<3>(oob_min, range_cast<3>(oob_max - oob_min)); + CELERITY_ERROR("Out-of-bounds access in kernel '{}' detected: Accessor {} for buffer {} attempted to access indices between {} which are " + "outside of mapped subrange {}", + tsk->get_debug_name(), i, access_map.get_nth_access(i).first, oob_sr, acc_sr); + } + sycl::free(m_oob_indices_per_accessor[i], m_queue.get_sycl_queue()); + } +#endif + for(const auto& reduction : tsk->get_reductions()) { const auto element_size = m_buffer_mngr.get_buffer_info(reduction.bid).element_size; auto operand = make_uninitialized_payload(element_size); diff --git a/test/accessor_tests.cc b/test/accessor_tests.cc index 288eda9fa..f273526e7 100644 --- a/test/accessor_tests.cc +++ b/test/accessor_tests.cc @@ -534,6 +534,9 @@ namespace detail { TEST_CASE("0-dimensional accessors are pointer-sized", "[accessor]") { if(!CELERITY_DETAIL_HAS_NO_UNIQUE_ADDRESS) SKIP("[[no_unique_address]] not available on this compiler"); +#if CELERITY_ACCESSOR_BOUNDARY_CHECK + SKIP("no accessor size guarantees when CELERITY_ACCESSOR_BOUNDARY_CHECK=1."); +#endif // these checks are not static_asserts because they depend on an (optional) compiler layout optimization CHECK(sizeof(accessor) == sizeof(int*)); @@ -641,5 +644,40 @@ namespace detail { sycl::free(result, q.get_sycl_queue()); } + template + class oob_fixture : public test_utils::runtime_fixture {}; + + template + class acc_out_of_bounds_kernel {}; + + TEMPLATE_TEST_CASE_METHOD_SIG(oob_fixture, "accessor reports out-of-bounds accesses", "[accessor][oob]", ((int Dims), Dims), 1, 2, 3) { +#if !CELERITY_ACCESSOR_BOUNDARY_CHECK + SKIP("CELERITY_ACCESSOR_BOUNDARY_CHECK=0"); +#endif + + buffer buff(range_cast(range<3>{10, 20, 30})); + const auto accessible_sr = subrange_cast(subrange<3>{{5, 10, 15}, {1, 2, 3}}); + const auto oob_idx_lo = id_cast(id<3>{1, 2, 3}); + const auto oob_idx_hi = id_cast(id<3>{7, 13, 25}); + distr_queue q; + + celerity::test_utils::log_capture lc; + + q.submit([&](handler& cgh) { + accessor acc(buff, cgh, celerity::access::fixed(accessible_sr), celerity::write_only, celerity::no_init); + cgh.parallel_for>(range(unit_range), [=](item) { + acc[oob_idx_lo] = 0; + acc[oob_idx_hi] = 0; + }); + }); + q.slow_full_sync(); + + const auto attempted_sr = subrange<3>{id_cast<3>(oob_idx_lo), range_cast<3>(oob_idx_hi - oob_idx_lo + id_cast(range(unit_range)))}; + const auto error_message = fmt::format("Out-of-bounds access in kernel 'acc_out_of_bounds_kernel<{}>' detected: Accessor 0 for buffer 0 attempted to " + "access indices between {} which are outside of mapped subrange {}", + Dims, attempted_sr, subrange_cast<3>(accessible_sr)); + CHECK_THAT(lc.get_log(), Catch::Matchers::ContainsSubstring(error_message)); + } + } // namespace detail } // namespace celerity