-
Notifications
You must be signed in to change notification settings - Fork 113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Specify access mode in __result_and_scratch_storage
methods
#1909
base: main
Are you sure you want to change the base?
Changes from all commits
cc03d10
153419e
badca77
8be1ad9
b4876d1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -319,6 +319,8 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name | |
// 1. Local scan on each workgroup | ||
auto __submit_event = __exec.queue().submit([&](sycl::handler& __cgh) { | ||
oneapi::dpl::__ranges::__require_access(__cgh, __rng1, __rng2); //get an access to data under SYCL buffer | ||
// TODO what is correct access mode here for __get_scratch_acc call? | ||
// Is default sycl::access_mode::read_write is ok? | ||
auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh); | ||
__dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh); | ||
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT | ||
|
@@ -340,6 +342,8 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name | |
auto __iters_per_single_wg = oneapi::dpl::__internal::__dpl_ceiling_div(__n_groups, __wgroup_size); | ||
__submit_event = __exec.queue().submit([&](sycl::handler& __cgh) { | ||
__cgh.depends_on(__submit_event); | ||
// TODO what is correct access mode here for __get_scratch_acc call? | ||
// Is default sycl::access_mode::read_write is ok? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think read_write. |
||
auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh); | ||
__dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh); | ||
#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT | ||
|
@@ -362,7 +366,11 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name | |
auto __final_event = __exec.queue().submit([&](sycl::handler& __cgh) { | ||
__cgh.depends_on(__submit_event); | ||
oneapi::dpl::__ranges::__require_access(__cgh, __rng1, __rng2); //get an access to data under SYCL buffer | ||
// TODO what is correct access mode here for __get_scratch_acc call? | ||
// Is default sycl::access_mode::read_write is ok? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i believe read. |
||
auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh); | ||
// TODO what is right access mode here? | ||
// sycl::access_mode::read_write is used by default. | ||
Comment on lines
+372
to
+373
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i believe write |
||
auto __res_acc = __result_and_scratch.__get_result_acc(__cgh); | ||
__cgh.parallel_for<_PropagateScanName...>(sycl::range<1>(__n_groups * __size_per_wg), [=](auto __item) { | ||
auto __temp_ptr = __result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr(__temp_acc); | ||
|
@@ -579,7 +587,7 @@ struct __parallel_copy_if_static_single_group_submitter<_Size, _ElemsPerItem, _W | |
// predicate on each element of the input range. The second half stores the index of the output | ||
// range to copy elements of the input range. | ||
auto __lacc = __dpl_sycl::__local_accessor<_ValueType>(sycl::range<1>{__elems_per_wg * 2}, __hdl); | ||
auto __res_acc = __result.__get_result_acc(__hdl); | ||
auto __res_acc = __result.__get_result_acc<sycl::access_mode::write>(__hdl); | ||
|
||
__hdl.parallel_for<_ScanKernelName...>( | ||
sycl::nd_range<1>(_WGSize, _WGSize), [=](sycl::nd_item<1> __self_item) { | ||
|
@@ -1466,7 +1474,7 @@ __parallel_find_or_impl_one_wg(oneapi::dpl::__internal::__device_backend_tag, _E | |
// main parallel_for | ||
auto __event = __exec.queue().submit([&](sycl::handler& __cgh) { | ||
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); | ||
auto __result_acc = __result_storage.__get_result_acc(__cgh); | ||
auto __result_acc = __result_storage.__get_result_acc<sycl::access_mode::write>(__cgh); | ||
|
||
__cgh.parallel_for<KernelName>( | ||
sycl::nd_range</*dim=*/1>(sycl::range</*dim=*/1>(__wgroup_size), sycl::range</*dim=*/1>(__wgroup_size)), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -141,7 +141,7 @@ struct __parallel_transform_reduce_small_submitter<_Tp, _Commutative, _VecSize, | |
|
||
sycl::event __reduce_event = __exec.queue().submit([&, __n](sycl::handler& __cgh) { | ||
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); // get an access to data under SYCL buffer | ||
auto __res_acc = __scratch_container.__get_result_acc(__cgh); | ||
auto __res_acc = __scratch_container.__get_result_acc<sycl::access_mode::write>(__cgh); | ||
std::size_t __local_mem_size = __reduce_pattern.local_mem_req(__work_group_size); | ||
__dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__local_mem_size), __cgh); | ||
__cgh.parallel_for<_Name...>( | ||
|
@@ -208,7 +208,7 @@ struct __parallel_transform_reduce_device_kernel_submitter<_Tp, _Commutative, _V | |
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); // get an access to data under SYCL buffer | ||
std::size_t __local_mem_size = __reduce_pattern.local_mem_req(__work_group_size); | ||
__dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__local_mem_size), __cgh); | ||
auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh); | ||
auto __temp_acc = __scratch_container.__get_scratch_acc<sycl::access_mode::write>(__cgh); | ||
__cgh.parallel_for<_KernelName...>( | ||
sycl::nd_range<1>(sycl::range<1>(__n_groups * __work_group_size), sycl::range<1>(__work_group_size)), | ||
[=](sycl::nd_item<1> __item_id) { | ||
|
@@ -253,8 +253,10 @@ struct __parallel_transform_reduce_work_group_kernel_submitter<_Tp, _Commutative | |
__reduce_event = __exec.queue().submit([&, __n](sycl::handler& __cgh) { | ||
__cgh.depends_on(__reduce_event); | ||
|
||
// TODO what is correct access mode here for __get_scratch_acc call? | ||
// Is default sycl::access_mode::read_write is ok? | ||
Comment on lines
+256
to
+257
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is read access. |
||
auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh); | ||
auto __res_acc = __scratch_container.__get_result_acc(__cgh); | ||
auto __res_acc = __scratch_container.__get_result_acc<sycl::access_mode::write>(__cgh); | ||
__dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__work_group_size), __cgh); | ||
|
||
__cgh.parallel_for<_KernelName...>( | ||
|
@@ -358,7 +360,11 @@ struct __parallel_transform_reduce_impl | |
__reduce_event = __exec.queue().submit([&, __is_first, __offset_1, __offset_2, __n, | ||
__n_groups](sycl::handler& __cgh) { | ||
__cgh.depends_on(__reduce_event); | ||
// TODO what is correct access mode here for __get_scratch_acc call? | ||
// Is default sycl::access_mode::read_write is ok? | ||
Comment on lines
+363
to
+364
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i do think read write is appropriate here. |
||
auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh); | ||
// TODO what is correct access mode here for __get_result_acc call? | ||
// Is default sycl::access_mode::read_write is ok? | ||
Comment on lines
+366
to
+367
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe this is write. |
||
auto __res_acc = __scratch_container.__get_result_acc(__cgh); | ||
|
||
// get an access to data under SYCL buffer | ||
|
@@ -377,7 +383,8 @@ struct __parallel_transform_reduce_impl | |
[=](sycl::nd_item<1> __item_id) { | ||
auto __temp_ptr = __result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr(__temp_acc); | ||
auto __res_ptr = | ||
__result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr(__res_acc, 2 * __n_groups); | ||
__result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr<sycl::access_mode::write>( | ||
__res_acc, 2 * __n_groups); | ||
Comment on lines
+386
to
+387
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i dont think we need an access mode here. |
||
auto __local_idx = __item_id.get_local_id(0); | ||
auto __group_idx = __item_id.get_group(0); | ||
// 1. Initialization (transform part). Fill local memory | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -298,10 +298,11 @@ struct __parallel_reduce_then_scan_reduce_submitter<__sub_group_size, __max_inpu | |
__dpl_sycl::__local_accessor<_InitValueType> __sub_group_partials(__num_sub_groups_local, __cgh); | ||
__cgh.depends_on(__prior_event); | ||
oneapi::dpl::__ranges::__require_access(__cgh, __in_rng); | ||
auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh); | ||
auto __temp_acc = __scratch_container.__get_scratch_acc<sycl::access_mode::write>(__cgh); | ||
__cgh.parallel_for<_KernelName...>( | ||
__nd_range, [=, *this](sycl::nd_item<1> __ndi) [[sycl::reqd_sub_group_size(__sub_group_size)]] { | ||
_InitValueType* __temp_ptr = _TmpStorageAcc::__get_usm_or_buffer_accessor_ptr(__temp_acc); | ||
_InitValueType* __temp_ptr = | ||
_TmpStorageAcc::__get_usm_or_buffer_accessor_ptr<sycl::access_mode::write>(__temp_acc); | ||
Comment on lines
+304
to
+305
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we need an access mode here. |
||
std::size_t __group_id = __ndi.get_group(0); | ||
__dpl_sycl::__sub_group __sub_group = __ndi.get_sub_group(); | ||
std::uint32_t __sub_group_id = __sub_group.get_group_linear_id(); | ||
|
@@ -453,8 +454,8 @@ struct __parallel_reduce_then_scan_scan_submitter< | |
__dpl_sycl::__local_accessor<_InitValueType> __sub_group_partials(__num_sub_groups_local + 1, __cgh); | ||
__cgh.depends_on(__prior_event); | ||
oneapi::dpl::__ranges::__require_access(__cgh, __in_rng, __out_rng); | ||
auto __temp_acc = __scratch_container.__get_scratch_acc(__cgh); | ||
auto __res_acc = __scratch_container.__get_result_acc(__cgh); | ||
auto __temp_acc = __scratch_container.__get_scratch_acc<sycl::access_mode::read_write>(__cgh); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this can be just read access. |
||
auto __res_acc = __scratch_container.__get_result_acc<sycl::access_mode::write>(__cgh); | ||
|
||
__cgh.parallel_for<_KernelName...>( | ||
__nd_range, [=, *this] (sycl::nd_item<1> __ndi) [[sycl::reqd_sub_group_size(__sub_group_size)]] { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -478,12 +478,11 @@ using __repacked_tuple_t = typename __repacked_tuple<T>::type; | |
template <typename _ContainerOrIterable> | ||
using __value_t = typename __internal::__memobj_traits<_ContainerOrIterable>::value_type; | ||
|
||
template <typename _T> | ||
template <typename _T, sycl::access_mode _AccessMode = sycl::access_mode::read_write> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we gain anything from this default template argument. All the usages specify explicitly at this level. |
||
struct __usm_or_buffer_accessor | ||
{ | ||
private: | ||
using __accessor_t = sycl::accessor<_T, 1, sycl::access::mode::read_write, __dpl_sycl::__target_device, | ||
sycl::access::placeholder::false_t>; | ||
using __accessor_t = sycl::accessor<_T, 1, _AccessMode, __dpl_sycl::__target_device, sycl::access::placeholder::false_t>; | ||
__accessor_t __acc; | ||
_T* __ptr = nullptr; | ||
bool __usm = false; | ||
|
@@ -492,11 +491,12 @@ struct __usm_or_buffer_accessor | |
public: | ||
// Buffer accessor | ||
__usm_or_buffer_accessor(sycl::handler& __cgh, sycl::buffer<_T, 1>* __sycl_buf) | ||
: __acc(sycl::accessor(*__sycl_buf, __cgh, sycl::read_write, __dpl_sycl::__no_init{})) | ||
: __acc(sycl::accessor(*__sycl_buf, __cgh, __get_access_mode_tag(), __dpl_sycl::__no_init{})) | ||
{ | ||
} | ||
__usm_or_buffer_accessor(sycl::handler& __cgh, sycl::buffer<_T, 1>* __sycl_buf, size_t __acc_offset) | ||
: __acc(sycl::accessor(*__sycl_buf, __cgh, sycl::read_write, __dpl_sycl::__no_init{})), __offset(__acc_offset) | ||
: __acc(sycl::accessor(*__sycl_buf, __cgh, __get_access_mode_tag(), __dpl_sycl::__no_init{})), | ||
__offset(__acc_offset) | ||
{ | ||
} | ||
|
||
|
@@ -512,6 +512,23 @@ struct __usm_or_buffer_accessor | |
{ | ||
return __usm ? __ptr + __offset : &__acc[__offset]; | ||
} | ||
|
||
private: | ||
|
||
static auto __get_access_mode_tag() | ||
{ | ||
if constexpr (_AccessMode == sycl::access::mode::read) | ||
return sycl::read; | ||
|
||
else if constexpr (_AccessMode == sycl::access::mode::write) | ||
return sycl::write; | ||
|
||
else if constexpr (_AccessMode == sycl::access::mode::read_write) | ||
return sycl::read_write; | ||
|
||
else | ||
static_assert(false, "Unknown _AccessMode state"); | ||
} | ||
Comment on lines
+518
to
+531
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be a general utility function outside of this structure (with a template arg)? It's useful in the structure but it seems to have general applicability. I'm a little surprised this utility doesn't exist in the specification itself actually. Also the function itself should be able to be |
||
}; | ||
|
||
template <typename _ExecutionPolicy, typename _T> | ||
|
@@ -611,27 +628,29 @@ struct __result_and_scratch_storage | |
#endif | ||
} | ||
|
||
template <sycl::access_mode _AccessMode = sycl::access_mode::read_write> | ||
auto | ||
__get_result_acc(sycl::handler& __cgh) const | ||
{ | ||
#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT | ||
if (__use_USM_host && __supports_USM_device) | ||
return __usm_or_buffer_accessor<_T>(__cgh, __result_buf.get()); | ||
return __usm_or_buffer_accessor<_T, _AccessMode>(__cgh, __result_buf.get()); | ||
else if (__supports_USM_device) | ||
return __usm_or_buffer_accessor<_T>(__cgh, __scratch_buf.get(), __scratch_n); | ||
return __usm_or_buffer_accessor<_T>(__cgh, __sycl_buf.get(), __scratch_n); | ||
return __usm_or_buffer_accessor<_T, _AccessMode>(__cgh, __scratch_buf.get(), __scratch_n); | ||
return __usm_or_buffer_accessor<_T, _AccessMode>(__cgh, __sycl_buf.get(), __scratch_n); | ||
#else | ||
return sycl::accessor(*__sycl_buf.get(), __cgh, sycl::read_write, __dpl_sycl::__no_init{}); | ||
#endif | ||
} | ||
|
||
template <sycl::access_mode _AccessMode = sycl::access_mode::read_write> | ||
auto | ||
__get_scratch_acc(sycl::handler& __cgh) const | ||
{ | ||
#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT | ||
if (__use_USM_host || __supports_USM_device) | ||
return __usm_or_buffer_accessor<_T>(__cgh, __scratch_buf.get()); | ||
return __usm_or_buffer_accessor<_T>(__cgh, __sycl_buf.get()); | ||
return __usm_or_buffer_accessor<_T, _AccessMode>(__cgh, __scratch_buf.get()); | ||
return __usm_or_buffer_accessor<_T, _AccessMode>(__cgh, __sycl_buf.get()); | ||
#else | ||
return sycl::accessor(*__sycl_buf.get(), __cgh, sycl::read_write, __dpl_sycl::__no_init{}); | ||
#endif | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@danhoeflinger do you know the correct template param state here and in other TODO's ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i think this can be write