diff --git a/CMakeLists.txt b/CMakeLists.txt index d5183d7d7..42484a2ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,11 +126,7 @@ else() set(CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS OFF) endif() -if(CELERITY_SYCL_IMPL STREQUAL hipSYCL OR CELERITY_SYCL_IMPL STREQUAL "DPC++") - set(CELERITY_FEATURE_LOCAL_ACCESSOR ON) -else() - set(CELERITY_FEATURE_LOCAL_ACCESSOR OFF) -endif() +set(CELERITY_FEATURE_LOCAL_ACCESSOR ON) if(NOT CELERITY_SYCL_IMPL STREQUAL ComputeCpp) set(CELERITY_FEATURE_UNNAMED_KERNELS ON) diff --git a/examples/matmul/matmul.cc b/examples/matmul/matmul.cc index 78c9f7574..56c20dc83 100644 --- a/examples/matmul/matmul.cc +++ b/examples/matmul/matmul.cc @@ -19,8 +19,6 @@ void multiply(celerity::distr_queue queue, celerity::buffer mat_a, celerit celerity::accessor b{mat_b, cgh, celerity::access::slice<2>(0), celerity::read_only}; celerity::accessor c{mat_c, cgh, celerity::access::one_to_one{}, celerity::write_only, celerity::no_init}; -#if CELERITY_FEATURE_LOCAL_ACCESSOR - // Use local-memory tiling to avoid waiting on global memory too often const size_t GROUP_SIZE = 8; celerity::local_accessor scratch_a{{GROUP_SIZE, GROUP_SIZE}, cgh}; @@ -43,20 +41,6 @@ void multiply(celerity::distr_queue queue, celerity::buffer mat_a, celerit } c[item.get_global_id()] = sum; }); - -#else - - cgh.parallel_for(celerity::range<2>(MAT_SIZE, MAT_SIZE), [=](celerity::item<2> item) { - T sum{}; - for(size_t k = 0; k < MAT_SIZE; ++k) { - const auto a_ik = a[{item[0], k}]; - const auto b_kj = b[{k, item[1]}]; - sum += a_ik * b_kj; - } - c[item] = sum; - }); - -#endif }); } diff --git a/include/accessor.h b/include/accessor.h index 0c3c35f96..5af73b01c 100644 --- a/include/accessor.h +++ b/include/accessor.h @@ -62,6 +62,13 @@ namespace detail { return *hack_make_invisible_nullptr(); } +#if WORKAROUND_COMPUTECPP + class hack_null_sycl_handler: public sycl::handler { + public: + hack_null_sycl_handler(): sycl::handler(nullptr) {} + }; +#endif + } // namespace detail /** @@ -550,16 +557,16 @@ class accessor : public detail::accessor_b template class local_accessor { -#if !CELERITY_FEATURE_LOCAL_ACCESSOR - static_assert(detail::constexpr_false, "Your SYCL implementation cannot support celerity::local_accessor"); -#else private: -#if WORKAROUND_DPCPP +#if WORKAROUND_DPCPP || WORKAROUND(COMPUTECPP, 2, 6) using sycl_accessor = cl::sycl::accessor; #else using sycl_accessor = cl::sycl::local_accessor; #endif + template + using subscript_type = decltype(std::declval()[std::declval()]); + public: using value_type = DataT; using reference = DataT&; @@ -567,21 +574,13 @@ class local_accessor { using size_type = size_t; local_accessor() -#if WORKAROUND_DPCPP - : sycl_acc(allocation_size, detail::hack_make_invisible_null_reference()), -#else - : sycl_acc(), -#endif + : sycl_acc{make_dangling_sycl_accessor()}, allocation_size(detail::zero_range) { } #if !defined(__SYCL_DEVICE_ONLY__) && !defined(SYCL_DEVICE_ONLY) local_accessor(const range& allocation_size, handler& cgh) -#if WORKAROUND_DPCPP - : sycl_acc(allocation_size, detail::hack_make_invisible_null_reference()), -#else - : sycl_acc(), -#endif + : sycl_acc{make_dangling_sycl_accessor()}, allocation_size(allocation_size) { if(!detail::is_prepass_handler(cgh)) { auto& device_handler = dynamic_cast(cgh); @@ -612,7 +611,7 @@ class local_accessor { std::add_pointer_t get_pointer() const noexcept { return sycl_acc.get_pointer(); } template - inline decltype(auto) operator[](const Index& index) const { + inline subscript_type operator[](const Index& index) const { return sycl_acc[index]; } @@ -621,8 +620,19 @@ class local_accessor { range allocation_size; cl::sycl::handler* const* eventual_sycl_cgh = nullptr; - cl::sycl::handler* sycl_cgh() const { return eventual_sycl_cgh != nullptr ? *eventual_sycl_cgh : nullptr; } + static sycl_accessor make_dangling_sycl_accessor() + { +#if WORKAROUND_DPCPP + return sycl_accessor{detail::zero_range, detail::hack_make_invisible_null_reference()}; +#elif WORKAROUND_COMPUTECPP + detail::hack_null_sycl_handler null_cgh; + return sycl_accessor{detail::zero_range, null_cgh}; +#else + return sycl_accessor{}; #endif + } + + cl::sycl::handler* sycl_cgh() const { return eventual_sycl_cgh != nullptr ? *eventual_sycl_cgh : nullptr; } }; diff --git a/test/runtime_tests.cc b/test/runtime_tests.cc index 5779e0b1b..6391ec64b 100644 --- a/test/runtime_tests.cc +++ b/test/runtime_tests.cc @@ -2195,7 +2195,6 @@ namespace detail { CHECK_THROWS_WITH((celerity::nd_range<3>{{256, 256, 256}, {2, 1, 0}}), "global_range is not divisible by local_range"); } -#if CELERITY_FEATURE_LOCAL_ACCESSOR TEST_CASE("nd_range kernels support local memory", "[handler]") { distr_queue q; buffer out{64}; @@ -2219,7 +2218,6 @@ namespace detail { }); }); } -#endif #if CELERITY_FEATURE_SIMPLE_SCALAR_REDUCTIONS