Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][Graph] in-order queue barrier fix #13193

Merged
merged 3 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1703,11 +1703,15 @@ passed an invalid event.
The new handler methods, and queue shortcuts, defined by
link:../supported/sycl_ext_oneapi_enqueue_barrier.asciidoc[sycl_ext_oneapi_enqueue_barrier]
can only be used in graph nodes created using the Record & Replay API, as
barriers rely on events to enforce dependencies. A synchronous exception will be
thrown with error code `invalid` if a user tries to add them to a graph using
the Explicit API. Empty nodes created with the `node::depends_on_all_leaves`
property can be used instead of barriers when a user is building a graph with
the explicit API.
barriers rely on events to enforce dependencies. For barriers with an empty
wait list parameter, the semantics are that the barrier node being added to
will depend on all the existing graph leaf nodes, not only the leaf nodes
that were added from the queue being recorded.

A synchronous exception will be thrown with error code `invalid` if a user
tries to add them to a graph using the Explicit API. Empty nodes created with
the `node::depends_on_all_leaves` property can be used instead of barriers when
a user is building a graph with the explicit API.

==== sycl_ext_oneapi_memcpy2d

Expand Down
21 changes: 10 additions & 11 deletions sycl/source/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,14 +207,13 @@ void queue::wait_and_throw_proxy(const detail::code_location &CodeLoc) {

static event
getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
// The last command recorded in the graph is not tracked by the queue but by
// the graph itself. We must therefore search for the last node/event in the
// This function should not be called when a queue is recording to a graph,
// as a graph can record from multiple queues and we cannot guarantee the
// last node added by an in-order queue will be the last node added to the
// graph.
if (auto Graph = QueueImpl->getCommandGraph()) {
auto LastEvent =
Graph->getEventForNode(Graph->getLastInorderNode(QueueImpl));
return sycl::detail::createSyclObjFromImpl<event>(LastEvent);
}
assert(!QueueImpl->getCommandGraph() &&
"Should not be called in on graph recording.");

auto LastEvent = QueueImpl->getLastEvent();
if (QueueImpl->MDiscardEvents) {
std::cout << "Discard event enabled" << std::endl;
Expand All @@ -241,7 +240,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
/// \return a SYCL event object, which corresponds to the queue the command
/// group is being enqueued on.
event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) {
if (is_in_order())
if (is_in_order() && !impl->getCommandGraph())
return getBarrierEventForInorderQueueHelper(impl);

return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc);
Expand All @@ -260,10 +259,10 @@ event queue::ext_oneapi_submit_barrier(const std::vector<event> &WaitList,
const detail::code_location &CodeLoc) {
bool AllEventsEmptyOrNop = std::all_of(
begin(WaitList), end(WaitList), [&](const event &Event) -> bool {
return !detail::getSyclObjImpl(Event)->isContextInitialized() ||
detail::getSyclObjImpl(Event)->isNOP();
auto EventImpl = detail::getSyclObjImpl(Event);
return !EventImpl->isContextInitialized() || EventImpl->isNOP();
});
if (is_in_order() && AllEventsEmptyOrNop)
if (is_in_order() && !impl->getCommandGraph() && AllEventsEmptyOrNop)
return getBarrierEventForInorderQueueHelper(impl);

return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); },
Expand Down
45 changes: 45 additions & 0 deletions sycl/test-e2e/Graph/RecordReplay/barrier_multi_queue.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
// Extra run to check for immediate-command-list in Level Zero
// RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
//

#include "../graph_common.hpp"

int main() {
queue Queue1{{sycl::property::queue::in_order()}};
queue Queue2{Queue1.get_context(),
Queue1.get_device(),
{sycl::property::queue::in_order()}};

int *PtrA = malloc_device<int>(Size, Queue1);
int *PtrB = malloc_device<int>(Size, Queue1);

exp_ext::command_graph Graph{Queue1};
Graph.begin_recording({Queue1, Queue2});

auto EventA = Queue1.submit([&](handler &CGH) {
CGH.parallel_for(range<1>{Size}, [=](id<1> it) { PtrA[it] = it; });
});

Queue2.ext_oneapi_submit_barrier({EventA});

auto EventB = Queue2.copy(PtrA, PtrB, Size);
Graph.end_recording();

auto ExecGraph = Graph.finalize();
Queue1.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); });

std::array<int, Size> Output;
Queue1.memcpy(Output.data(), PtrB, sizeof(int) * Size).wait();

for (int i = 0; i < Size; i++) {
assert(Output[i] == i);
}

free(PtrA, Queue1);
free(PtrB, Queue1);
return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,7 @@ int main() {

{
// Test cast 4 - graph.
sycl::queue GQueue{
{sycl::property::queue::in_order{},
sycl::ext::intel::property::queue::no_immediate_command_list{}}};
sycl::queue GQueue{sycl::property::queue::in_order{}};

if (GQueue.get_device().has(sycl::aspect::ext_oneapi_graph)) {
std::cout << "Test 4" << std::endl;
Expand All @@ -84,7 +82,6 @@ int main() {
cgh.single_task<class kernel3>([=]() { *Res += 9; });
});
auto Barrier = GQueue.ext_oneapi_submit_barrier();
assert(Barrier == BeforeBarrierEvent);
GQueue.submit([&](sycl::handler &cgh) {
cgh.single_task<class kernel4>([=]() { *Res *= 2; });
});
Expand Down
Loading
Loading