Skip to content

Commit

Permalink
If the iters per work item is 1, then only compile the basic pfor kernel
Browse files Browse the repository at this point in the history
Signed-off-by: Matthew Michel <matthew.michel@intel.com>
  • Loading branch information
mmichel11 committed Sep 24, 2024
1 parent 075c030 commit a8c965e
Showing 1 changed file with 15 additions and 6 deletions.
21 changes: 15 additions & 6 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -352,16 +352,25 @@ __parallel_for(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&&

using __small_submitter = __parallel_for_small_submitter<_ForKernelSmall>;
using __large_submitter = __parallel_for_large_submitter<_ForKernelLarge, _Ranges...>;
// Compile two kernels: one for small-to-medium inputs and a second for large. This avoids runtime checks within a single
// kernel that worsen performance for small cases.
if (__count < __large_submitter::__estimate_best_start_size(__exec))
// Compile two kernels: one for small-to-medium inputs and a second for large. This avoids runtime checks within a
// single kernel that worsen performance for small cases. If the number of iterations of the large submitter is 1,
// then only compile the basic kernel as the two versions are effectively the same.
if constexpr (__large_submitter::__iters_per_work_item > 1)
{
return __small_submitter()(std::forward<_ExecutionPolicy>(__exec), __brick, __count,
std::forward<_Ranges>(__rngs)...);
if (__count < __large_submitter::__estimate_best_start_size(__exec))
{
return __small_submitter()(std::forward<_ExecutionPolicy>(__exec), __brick, __count,
std::forward<_Ranges>(__rngs)...);
}
else
{
return __large_submitter()(std::forward<_ExecutionPolicy>(__exec), __brick, __count,
std::forward<_Ranges>(__rngs)...);
}
}
else
{
return __large_submitter()(std::forward<_ExecutionPolicy>(__exec), __brick, __count,
return __small_submitter()(std::forward<_ExecutionPolicy>(__exec), __brick, __count,
std::forward<_Ranges>(__rngs)...);
}
}
Expand Down

0 comments on commit a8c965e

Please sign in to comment.