Skip to content

Commit

Permalink
Change some uses of ::std
Browse files Browse the repository at this point in the history
  • Loading branch information
akukanov committed Oct 1, 2024
1 parent 1af1e4e commit c2e9295
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 30 deletions.
14 changes: 7 additions & 7 deletions include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -1691,14 +1691,14 @@ __pattern_hetero_set_op(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _F
_ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2,
_OutputIterator __result, _Compare __comp, _IsOpDifference)
{
typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _Size1;
typedef typename ::std::iterator_traits<_ForwardIterator2>::difference_type _Size2;
typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _Size1;
typedef typename std::iterator_traits<_ForwardIterator2>::difference_type _Size2;

const _Size1 __n1 = __last1 - __first1;
const _Size2 __n2 = __last2 - __first2;

//Algo is based on the recommended approach of set_intersection algo for GPU: binary search + scan (copying by mask).
using _ReduceOp = ::std::plus<_Size1>;
using _ReduceOp = std::plus<_Size1>;
using _Assigner = unseq_backend::__scan_assigner;
using _NoAssign = unseq_backend::__scan_no_assign;
using _MaskAssigner = unseq_backend::__mask_assigner<2>;
Expand All @@ -1708,7 +1708,7 @@ __pattern_hetero_set_op(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _F
_ReduceOp __reduce_op;
_Assigner __assign_op;
_DataAcc __get_data_op;
unseq_backend::__copy_by_mask<_ReduceOp, oneapi::dpl::__internal::__pstl_assign, /*inclusive*/ ::std::true_type, 2>
unseq_backend::__copy_by_mask<_ReduceOp, oneapi::dpl::__internal::__pstl_assign, /*inclusive*/ std::true_type, 2>
__copy_by_mask_op;
unseq_backend::__brick_set_op<_ExecutionPolicy, _Compare, _Size1, _Size2, _IsOpDifference> __create_mask_op{
__comp, __n1, __n2};
Expand All @@ -1728,18 +1728,18 @@ __pattern_hetero_set_op(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _F

auto __result_size =
__par_backend_hetero::__parallel_transform_scan_base(
_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec),
_BackendTag{}, std::forward<_ExecutionPolicy>(__exec),
oneapi::dpl::__ranges::make_zip_view(
__buf1.all_view(), __buf2.all_view(),
oneapi::dpl::__ranges::all_view<int32_t, __par_backend_hetero::access_mode::read_write>(
__mask_buf.get_buffer())),
__buf3.all_view(), _InitType{},
// local scan
unseq_backend::__scan</*inclusive*/ ::std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _Assigner,
unseq_backend::__scan</*inclusive*/ std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _Assigner,
_MaskAssigner, decltype(__create_mask_op), _InitType>{
__reduce_op, __get_data_op, __assign_op, _MaskAssigner{}, __create_mask_op},
// scan between groups
unseq_backend::__scan</*inclusive=*/::std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _NoAssign,
unseq_backend::__scan</*inclusive=*/std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _NoAssign,
_Assigner, _DataAcc, _InitType>{__reduce_op, __get_data_op, _NoAssign{}, __assign_op,
__get_data_op},
// global scan
Expand Down
22 changes: 11 additions & 11 deletions include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -927,13 +927,13 @@ __pattern_reduce_by_segment(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&&

oneapi::dpl::__internal::__ranges::__pattern_walk_n(
__tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy_keys_wrapper>(__exec), __copy_range,
::std::forward<_Range1>(__keys), ::std::forward<_Range3>(__out_keys));
std::forward<_Range1>(__keys), std::forward<_Range3>(__out_keys));

oneapi::dpl::__internal::__ranges::__pattern_walk_n(
__tag,
oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy_values_wrapper>(
::std::forward<_ExecutionPolicy>(__exec)),
__copy_range, ::std::forward<_Range2>(__values), ::std::forward<_Range4>(__out_values));
std::forward<_ExecutionPolicy>(__exec)),
__copy_range, std::forward<_Range2>(__values), std::forward<_Range4>(__out_values));

return 1;
}
Expand Down Expand Up @@ -963,7 +963,7 @@ __pattern_reduce_by_segment(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&&
experimental::ranges::views::all_write(__idx));

// use work group size adjusted to shared local memory as the maximum segment size.
::std::size_t __wgroup_size =
std::size_t __wgroup_size =
oneapi::dpl::__internal::__slm_adjusted_work_group_size(__exec, sizeof(__key_type) + sizeof(__val_type));

// element is copied if it is the 0th element (marks beginning of first segment), is in an index
Expand All @@ -975,11 +975,11 @@ __pattern_reduce_by_segment(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&&
[__binary_pred, __wgroup_size](const auto& __a) {
// The size of key range for the (i-1) view is one less, so for the 0th index we do not check the keys
// for (i-1), but we still need to get its key value as it is the start of a segment
const auto index = ::std::get<0>(__a);
const auto index = std::get<0>(__a);
if (index == 0)
return true;
return index % __wgroup_size == 0 // segment size
|| !__binary_pred(::std::get<1>(__a), ::std::get<2>(__a)); // key comparison
|| !__binary_pred(std::get<1>(__a), std::get<2>(__a)); // key comparison
},
unseq_backend::__brick_assign_key_position{});

Expand All @@ -989,7 +989,7 @@ __pattern_reduce_by_segment(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&&
unseq_backend::__brick_reduce_idx<_BinaryOperator, decltype(__n)>(__binary_op, __n), __intermediate_result_end,
oneapi::dpl::__ranges::take_view_simple(experimental::ranges::views::all_read(__idx),
__intermediate_result_end),
::std::forward<_Range2>(__values), experimental::ranges::views::all_write(__tmp_out_values))
std::forward<_Range2>(__values), experimental::ranges::views::all_write(__tmp_out_values))
.wait();

// Round 2: final reduction to get result for each segment of equal adjacent keys
Expand Down Expand Up @@ -1018,22 +1018,22 @@ __pattern_reduce_by_segment(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&&
[__binary_pred](const auto& __a) {
// The size of key range for the (i-1) view is one less, so for the 0th index we do not check the keys
// for (i-1), but we still need to get its key value as it is the start of a segment
if (::std::get<0>(__a) == 0)
if (std::get<0>(__a) == 0)
return true;
return !__binary_pred(::std::get<1>(__a), ::std::get<2>(__a)); // keys comparison
return !__binary_pred(std::get<1>(__a), std::get<2>(__a)); // keys comparison
},
unseq_backend::__brick_assign_key_position{});

//reduce by segment
oneapi::dpl::__par_backend_hetero::__parallel_for(
_BackendTag{},
oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__reduce2_wrapper>(
::std::forward<_ExecutionPolicy>(__exec)),
std::forward<_ExecutionPolicy>(__exec)),
unseq_backend::__brick_reduce_idx<_BinaryOperator, decltype(__intermediate_result_end)>(
__binary_op, __intermediate_result_end),
__result_end,
oneapi::dpl::__ranges::take_view_simple(experimental::ranges::views::all_read(__idx), __result_end),
experimental::ranges::views::all_read(__tmp_out_values), ::std::forward<_Range4>(__out_values))
experimental::ranges::views::all_read(__tmp_out_values), std::forward<_Range4>(__out_values))
.__deferrable_wait();

return __result_end;
Expand Down
24 changes: 12 additions & 12 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ __parallel_transform_scan_base(oneapi::dpl::__internal::__device_backend_tag, _E
oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<__scan_propagate_kernel<_CustomName>>;

return __parallel_scan_submitter<_CustomName, _PropagateKernel>()(
::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng),
std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__in_rng), std::forward<_Range2>(__out_rng),
__init, __local_scan, __group_scan, __global_scan);
}

Expand All @@ -761,7 +761,7 @@ __group_scan_fits_in_slm(const sycl::queue& __queue, std::size_t __n, std::size_
std::size_t __single_group_upper_limit)
{
// Pessimistically only use half of the memory to take into account memory used by compiled kernel
const ::std::size_t __max_slm_size =
const std::size_t __max_slm_size =
__queue.get_device().template get_info<sycl::info::device::local_mem_size>() / 2;
const auto __req_slm_size = sizeof(_Type) * __n_uniform;

Expand Down Expand Up @@ -906,7 +906,7 @@ template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typenam
typename _BinaryOperation, typename _Inclusive>
auto
__parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec,
_Range1&& __in_rng, _Range2&& __out_rng, ::std::size_t __n, _UnaryOperation __unary_op,
_Range1&& __in_rng, _Range2&& __out_rng, std::size_t __n, _UnaryOperation __unary_op,
_InitType __init, _BinaryOperation __binary_op, _Inclusive)
{
using _Type = typename _InitType::__value_type;
Expand Down Expand Up @@ -1047,7 +1047,7 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag
_InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _CreateMaskOp __create_mask_op,
_CopyByMaskOp __copy_by_mask_op)
{
using _ReduceOp = ::std::plus<_Size>;
using _ReduceOp = std::plus<_Size>;
using _Assigner = unseq_backend::__scan_assigner;
using _NoAssign = unseq_backend::__scan_no_assign;
using _MaskAssigner = unseq_backend::__mask_assigner<1>;
Expand All @@ -1063,17 +1063,17 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag
oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __n);

return __parallel_transform_scan_base(
__backend_tag, ::std::forward<_ExecutionPolicy>(__exec),
__backend_tag, std::forward<_ExecutionPolicy>(__exec),
oneapi::dpl::__ranges::zip_view(
__in_rng, oneapi::dpl::__ranges::all_view<int32_t, __par_backend_hetero::access_mode::read_write>(
__mask_buf.get_buffer())),
::std::forward<_OutRng>(__out_rng), _InitType{},
std::forward<_OutRng>(__out_rng), _InitType{},
// local scan
unseq_backend::__scan</*inclusive*/ ::std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _Assigner,
unseq_backend::__scan</*inclusive*/ std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _Assigner,
_MaskAssigner, _CreateMaskOp, _InitType>{__reduce_op, __get_data_op, __assign_op,
__add_mask_op, __create_mask_op},
// scan between groups
unseq_backend::__scan</*inclusive*/ ::std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _NoAssign,
unseq_backend::__scan</*inclusive*/ std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _NoAssign,
_Assigner, _DataAcc, _InitType>{__reduce_op, __get_data_op, _NoAssign{}, __assign_op,
__get_data_op},
// global scan
Expand Down Expand Up @@ -1154,16 +1154,16 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag,
using _SingleGroupInvoker = __invoke_single_group_copy_if<_Size>;

// Next power of 2 greater than or equal to __n
auto __n_uniform = ::oneapi::dpl::__internal::__dpl_bit_ceil(static_cast<::std::make_unsigned_t<_Size>>(__n));
auto __n_uniform = ::oneapi::dpl::__internal::__dpl_bit_ceil(static_cast<std::make_unsigned_t<_Size>>(__n));

// Pessimistically only use half of the memory to take into account memory used by compiled kernel
const ::std::size_t __max_slm_size =
const std::size_t __max_slm_size =
__exec.queue().get_device().template get_info<sycl::info::device::local_mem_size>() / 2;

// The kernel stores n integers for the predicate and another n integers for the offsets
const auto __req_slm_size = sizeof(::std::uint16_t) * __n_uniform * 2;
const auto __req_slm_size = sizeof(std::uint16_t) * __n_uniform * 2;

constexpr ::std::uint16_t __single_group_upper_limit = 2048;
constexpr std::uint16_t __single_group_upper_limit = 2048;

std::size_t __max_wg_size = oneapi::dpl::__internal::__max_work_group_size(__exec);

Expand Down

0 comments on commit c2e9295

Please sign in to comment.