-
Notifications
You must be signed in to change notification settings - Fork 734
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SYCL][Fusion] Take auxiliary resources from fused command groups
`KernelFusion/Reduction/reduction.cpp` was `XFAIL`ed in #1254 to avoid CI errors. This patch fixes that error by assigning auxiliary resources from each fused command group to the placeholder fusion event. This event will not be marked as completed after the fused reductions finish execution either if fusion is completed , cancelled or aborted. Test is updated to check every algorithm. This way we cover both cases (fusion taking and not taking place), as some algorithms will fail to fuse due to the nature of the command groups being launched (incompatible ND-ranges or incompatible command-group kinds). Signed-off-by: Victor Perez <victor.perez@codeplay.com>
- Loading branch information
1 parent
4a510b6
commit 67cf773
Showing
5 changed files
with
93 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,59 +1,94 @@ | ||
// RUN: %{build} -fsycl-embed-ir -o %t.out | ||
// RUN: %{run} %t.out | ||
// XFAIL: cpu | ||
|
||
// Test fusion works with reductions. | ||
// Test fusion works with reductions. Some algorithms will lead to fusion being | ||
// cancelled in some devices. These should work properly anyway. | ||
|
||
#include <sycl/sycl.hpp> | ||
#include <utility> | ||
|
||
#include "../helpers.hpp" | ||
#include "sycl/detail/reduction_forward.hpp" | ||
|
||
using namespace sycl; | ||
|
||
template <typename BinaryOperation> class ReductionTest; | ||
constexpr inline size_t globalSize = 512; | ||
|
||
int main() { | ||
constexpr size_t dataSize = 512; | ||
template <typename BinaryOperation> class ReductionTest; | ||
|
||
int sumRes = -1; | ||
int maxRes = -1; | ||
template <detail::reduction::strategy Strategy> void test(nd_range<1> ndr) { | ||
std::array<int, globalSize> data; | ||
int sumRes = 0; | ||
int maxRes = 0; | ||
|
||
{ | ||
queue q{ext::codeplay::experimental::property::queue::enable_fusion{}}; | ||
|
||
buffer<int> dataBuf{dataSize}; | ||
buffer<int> dataBuf{data}; | ||
buffer<int> sumBuf{&sumRes, 1}; | ||
buffer<int> maxBuf{&maxRes, 1}; | ||
|
||
ext::codeplay::experimental::fusion_wrapper fw{q}; | ||
fw.start_fusion(); | ||
|
||
fw.start_fusion(); | ||
iota(q, dataBuf, 0); | ||
|
||
q.submit([&](handler &cgh) { | ||
accessor in(dataBuf, cgh, read_only); | ||
auto sumRed = reduction(sumBuf, cgh, plus<>{}, | ||
property::reduction::initialize_to_identity{}); | ||
detail::reduction_parallel_for<detail::auto_name, Strategy>( | ||
cgh, ndr, ext::oneapi::experimental::empty_properties_t{}, sumRed, | ||
[=](nd_item<1> Item, auto &Red) { | ||
Red.combine(in[Item.get_global_id()]); | ||
}); | ||
}); | ||
|
||
q.submit([&](handler &cgh) { | ||
accessor in(dataBuf, cgh, read_only); | ||
auto maxRed = reduction(maxBuf, cgh, maximum<>{}, | ||
property::reduction::initialize_to_identity{}); | ||
cgh.parallel_for(dataSize, sumRed, maxRed, | ||
[=](id<1> i, auto &sum, auto &max) { | ||
sum.combine(in[i]); | ||
max.combine(in[i]); | ||
}); | ||
detail::reduction_parallel_for<detail::auto_name, Strategy>( | ||
cgh, ndr, ext::oneapi::experimental::empty_properties_t{}, maxRed, | ||
[=](nd_item<1> Item, auto &Red) { | ||
Red.combine(in[Item.get_global_id()]); | ||
}); | ||
}); | ||
|
||
complete_fusion_with_check( | ||
fw, ext::codeplay::experimental::property::no_barriers{}); | ||
fw.complete_fusion(ext::codeplay::experimental::property::no_barriers{}); | ||
} | ||
|
||
constexpr int expectedMax = dataSize - 1; | ||
constexpr int expectedSum = dataSize * expectedMax / 2; | ||
constexpr int expectedMax = globalSize - 1; | ||
constexpr int expectedSum = globalSize * expectedMax / 2; | ||
|
||
std::cerr << sumRes << "\n"; | ||
assert(sumRes == expectedSum); | ||
assert(maxRes == expectedMax); | ||
} | ||
|
||
assert(maxRes == expectedMax && "Unexpected max value"); | ||
assert(sumRes == expectedSum && "Unexpected sum value"); | ||
template <detail::reduction::strategy... strategies> | ||
void test_strategies( | ||
std::integer_sequence<detail::reduction::strategy, strategies...>, | ||
size_t localSize) { | ||
((test<strategies>({globalSize, localSize})), ...); | ||
} | ||
|
||
return 0; | ||
int main() { | ||
constexpr std::array<std::size_t, 3> localSizes{ | ||
globalSize /*Test single work-group*/, | ||
globalSize / 32 /*Test middle-sized work-group*/, | ||
1 /*Test single item work-groups*/}; | ||
for (size_t localSize : localSizes) { | ||
test_strategies( | ||
std::integer_sequence< | ||
detail::reduction::strategy, | ||
detail::reduction::strategy::group_reduce_and_last_wg_detection, | ||
detail::reduction::strategy::local_atomic_and_atomic_cross_wg, | ||
detail::reduction::strategy::range_basic, | ||
detail::reduction::strategy::group_reduce_and_atomic_cross_wg, | ||
detail::reduction::strategy::local_mem_tree_and_atomic_cross_wg, | ||
detail::reduction::strategy::group_reduce_and_multiple_kernels, | ||
detail::reduction::strategy::basic, | ||
detail::reduction::strategy::multi>{}, | ||
localSize); | ||
} | ||
} |