Skip to content

Commit

Permalink
[SYCL][Graph] Permit empty & barrier nodes in WGU (#14236)
Browse files Browse the repository at this point in the history
In order to enable the minimum viable GROMACS use case for the Whole
Graph Update feature, allow graphs to contain empty nodes and barrier
nodes during update.

See discussion thread
#13253 (comment) on
SYCL-Graph spec PR for publicizing the availability of the Whole Graph
Update feature.
  • Loading branch information
EwanC authored Jun 25, 2024
1 parent 99635a0 commit 93fef86
Show file tree
Hide file tree
Showing 5 changed files with 317 additions and 11 deletions.
30 changes: 20 additions & 10 deletions sycl/source/detail/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,6 @@ graph_impl::add(const std::shared_ptr<graph_impl> &Impl,
(void)Args;
sycl::handler Handler{Impl};
CGF(Handler);
Handler.finalize();

if (Handler.MCGType == sycl::detail::CG::Barrier) {
throw sycl::exception(
Expand All @@ -381,6 +380,8 @@ graph_impl::add(const std::shared_ptr<graph_impl> &Impl,
"SYCL Graph Explicit API. Please use empty nodes instead.");
}

Handler.finalize();

node_type NodeType =
Handler.MImpl->MUserFacingNodeType !=
ext::oneapi::experimental::node_type::empty
Expand Down Expand Up @@ -1236,18 +1237,22 @@ void exec_graph_impl::update(
sycl::make_error_code(errc::invalid),
"Node passed to update() is not part of the graph.");
}
if (Node->MCGType != sycl::detail::CG::Kernel) {
throw sycl::exception(errc::invalid, "Cannot update non-kernel nodes");
}

if (Node->MCommandGroup->getRequirements().size() == 0) {
continue;
if (!(Node->isEmpty() || Node->MCGType == sycl::detail::CG::Kernel ||
Node->MCGType == sycl::detail::CG::Barrier)) {
throw sycl::exception(errc::invalid,
"Unsupported node type for update. Only kernel, "
"barrier and empty nodes are supported.");
}
NeedScheduledUpdate = true;

UpdateRequirements.insert(UpdateRequirements.end(),
Node->MCommandGroup->getRequirements().begin(),
Node->MCommandGroup->getRequirements().end());
if (const auto &CG = Node->MCommandGroup;
CG && CG->getRequirements().size() != 0) {
NeedScheduledUpdate = true;

UpdateRequirements.insert(UpdateRequirements.end(),
Node->MCommandGroup->getRequirements().begin(),
Node->MCommandGroup->getRequirements().end());
}
}

// Clean up any execution events which have finished so we don't pass them to
Expand Down Expand Up @@ -1290,6 +1295,11 @@ void exec_graph_impl::update(
}

void exec_graph_impl::updateImpl(std::shared_ptr<node_impl> Node) {
// Kernel node update is the only command type supported in UR for update.
// Updating any other types of nodes, e.g. empty & barrier nodes is a no-op.
if (Node->MCGType != sycl::detail::CG::Kernel) {
return;
}
auto ContextImpl = sycl::detail::getSyclObjImpl(MContext);
const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin();
auto DeviceImpl = sycl::detail::getSyclObjImpl(MGraphImpl->getDevice());
Expand Down
121 changes: 121 additions & 0 deletions sycl/test-e2e/Graph/Update/whole_update_barrier_node.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
// Extra run to check for immediate-command-list in Level Zero
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}

// Tests that whole graph update works when a graph contains a barrier node.

#include "../graph_common.hpp"

// Queue submissions that can be recorded to a graph, with a barrier node
// separating initialization and computation kernel nodes
template <class T>
void RecordGraph(queue &Queue, size_t Size, T *Input1, T *Input2, T *Output) {
Queue.submit([&](handler &CGH) {
CGH.single_task([=]() {
for (int i = 0; i < Size; i++) {
Input1[i] += i;
}
});
});

Queue.submit([&](handler &CGH) {
CGH.single_task([=]() {
for (int i = 0; i < Size; i++) {
Input2[i] += i;
}
});
});

Queue.ext_oneapi_submit_barrier();

Queue.submit([&](handler &CGH) {
CGH.single_task([=]() {
for (int i = 0; i < Size; i++) {
Output[i] = Input1[i] * Input2[i];
}
});
});
}

int main() {
queue Queue{};

using T = int;

// USM allocations for GraphA
T *InputA1 = malloc_device<T>(Size, Queue);
T *InputA2 = malloc_device<T>(Size, Queue);
T *OutputA = malloc_device<T>(Size, Queue);

// Initialize USM allocations
T Pattern1 = 0xA;
T Pattern2 = 0x42;
T PatternZero = 0;

Queue.fill(InputA1, Pattern1, Size);
Queue.fill(InputA2, Pattern2, Size);
Queue.fill(OutputA, PatternZero, Size);
Queue.wait();

// Define GraphA
exp_ext::command_graph GraphA{Queue};
GraphA.begin_recording(Queue);
RecordGraph(Queue, Size, InputA1, InputA2, OutputA);
GraphA.end_recording();

// Finalize, run, and validate GraphA
auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{});
Queue.ext_oneapi_graph(GraphExecA).wait();

std::vector<T> HostOutput(Size);
Queue.copy(OutputA, HostOutput.data(), Size).wait();

for (int i = 0; i < Size; i++) {
T Ref = (Pattern1 + i) * (Pattern2 + i);
assert(check_value(i, Ref, HostOutput[i], "OutputA"));
}

// Create GraphB which will be used to update GraphA
exp_ext::command_graph GraphB{Queue};

// USM allocations for GraphB
T *InputB1 = malloc_device<T>(Size, Queue);
T *InputB2 = malloc_device<T>(Size, Queue);
T *OutputB = malloc_device<T>(Size, Queue);

// Initialize GraphB allocations
Pattern1 = -42;
Pattern2 = 0xF;

Queue.fill(InputB1, Pattern1, Size);
Queue.fill(InputB2, Pattern2, Size);
Queue.fill(OutputB, PatternZero, Size);
Queue.wait();

// Create GraphB
GraphB.begin_recording(Queue);
RecordGraph(Queue, Size, InputB1, InputB2, OutputB);
GraphB.end_recording();

// Update executable GraphA with GraphB, run, and validate
GraphExecA.update(GraphB);
Queue.ext_oneapi_graph(GraphExecA).wait();

Queue.copy(OutputB, HostOutput.data(), Size).wait();
for (int i = 0; i < Size; i++) {
T Ref = (Pattern1 + i) * (Pattern2 + i);
assert(check_value(i, Ref, HostOutput[i], "OutputB"));
}

free(InputA1, Queue);
free(InputA2, Queue);
free(OutputA, Queue);

free(InputB1, Queue);
free(InputB2, Queue);
free(OutputB, Queue);
return 0;
}
123 changes: 123 additions & 0 deletions sycl/test-e2e/Graph/Update/whole_update_empty_node.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
// Extra run to check for immediate-command-list in Level Zero
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}

// Tests that whole graph update works when a graph contain an empty node.

#include "../graph_common.hpp"

// Creates a graph with an empty node separating initialization and computation
// kernel nodes
template <class T>
void CreateGraph(
exp_ext::command_graph<exp_ext::graph_state::modifiable> &Graph,
size_t Size, T *Input1, T *Input2, T *Output) {
Graph.add([&](handler &CGH) {
CGH.single_task([=]() {
for (int i = 0; i < Size; i++) {
Input1[i] += i;
}
});
});

Graph.add([&](handler &CGH) {
CGH.single_task([=]() {
for (int i = 0; i < Size; i++) {
Input2[i] += i;
}
});
});

auto EmptyNodeA =
Graph.add({exp_ext::property::node::depends_on_all_leaves()});

Graph.add(
[&](handler &CGH) {
CGH.single_task([=]() {
for (int i = 0; i < Size; i++) {
Output[i] = Input1[i] * Input2[i];
}
});
},
{exp_ext::property::node::depends_on(EmptyNodeA)});
}

int main() {
queue Queue{};

using T = int;

// USM allocations for GraphA
T *InputA1 = malloc_device<T>(Size, Queue);
T *InputA2 = malloc_device<T>(Size, Queue);
T *OutputA = malloc_device<T>(Size, Queue);

// Initialize USM allocations
T Pattern1 = 0xA;
T Pattern2 = 0x42;
T PatternZero = 0;

Queue.fill(InputA1, Pattern1, Size);
Queue.fill(InputA2, Pattern2, Size);
Queue.fill(OutputA, PatternZero, Size);
Queue.wait();

// Construct GraphA
exp_ext::command_graph GraphA{Queue};
CreateGraph(GraphA, Size, InputA1, InputA2, OutputA);

// Finalize, run, and validate GraphA
auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{});
Queue.ext_oneapi_graph(GraphExecA).wait();

std::vector<T> HostOutput(Size);
Queue.copy(OutputA, HostOutput.data(), Size).wait();

for (int i = 0; i < Size; i++) {
T Ref = (Pattern1 + i) * (Pattern2 + i);
assert(check_value(i, Ref, HostOutput[i], "OutputA"));
}

// Create GraphB which will be used to update GraphA
exp_ext::command_graph GraphB{Queue};

// USM allocations for GraphB
T *InputB1 = malloc_device<T>(Size, Queue);
T *InputB2 = malloc_device<T>(Size, Queue);
T *OutputB = malloc_device<T>(Size, Queue);

// Initialize GraphB
Pattern1 = -42;
Pattern2 = 0xF;

Queue.fill(InputB1, Pattern1, Size);
Queue.fill(InputB2, Pattern2, Size);
Queue.fill(OutputB, PatternZero, Size);
Queue.wait();

// Construct GraphB
CreateGraph(GraphB, Size, InputB1, InputB2, OutputB);

// Update executable GraphA with GraphB, run, and validate
GraphExecA.update(GraphB);
Queue.ext_oneapi_graph(GraphExecA).wait();

Queue.copy(OutputB, HostOutput.data(), Size).wait();

for (int i = 0; i < Size; i++) {
T Ref = (Pattern1 + i) * (Pattern2 + i);
assert(check_value(i, Ref, HostOutput[i], "OutputB"));
}

free(InputA1, Queue);
free(InputA2, Queue);
free(OutputA, Queue);

free(InputB1, Queue);
free(InputB2, Queue);
free(OutputB, Queue);
return 0;
}
12 changes: 11 additions & 1 deletion sycl/unittests/Extensions/CommandGraph/Exceptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,15 +216,25 @@ void addImagesCopies(experimental::detail::modifiable_command_graph &G,
} // anonymous namespace

TEST_F(CommandGraphTest, ExplicitBarrierException) {

bool Success = true;
std::error_code ExceptionCode = make_error_code(sycl::errc::success);
try {
auto Barrier =
Graph.add([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier(); });
} catch (exception &Exception) {
ExceptionCode = Exception.code();
std::string ErrorStr =
"The sycl_ext_oneapi_enqueue_barrier feature is "
"not available with SYCL Graph Explicit API. Please use empty nodes "
"instead.";
std::cout << Exception.what() << std::endl;
std::cout << ErrorStr << std::endl;
ASSERT_FALSE(std::string(Exception.what()).find(ErrorStr) ==
std::string::npos);
Success = false;
}
ASSERT_EQ(ExceptionCode, sycl::errc::invalid);
ASSERT_EQ(Success, false);
}

TEST_F(CommandGraphTest, FusionExtensionExceptionCheck) {
Expand Down
42 changes: 42 additions & 0 deletions sycl/unittests/Extensions/CommandGraph/Update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,18 @@ TEST_F(CommandGraphTest, UpdateNodeTypeExceptions) {
cgh.host_task([]() {});
}));

ASSERT_ANY_THROW(auto NodeBarreriTask = Graph.add([&](sycl::handler &cgh) {
cgh.set_arg(0, DynamicParam);
cgh.ext_oneapi_barrier();
}));

Graph.begin_recording(Queue);
ASSERT_ANY_THROW(auto NodeBarrierTask = Graph.add([&](sycl::handler &cgh) {
cgh.set_arg(0, DynamicParam);
cgh.ext_oneapi_barrier();
}));
Graph.end_recording(Queue);

auto NodeEmpty = Graph.add();

experimental::command_graph Subgraph(Queue.get_context(), Dev);
Expand Down Expand Up @@ -375,3 +387,33 @@ TEST_F(WholeGraphUpdateTest, MissingUpdatableProperty) {
auto GraphExec = Graph.finalize();
EXPECT_THROW(GraphExec.update(UpdateGraph), sycl::exception);
}

TEST_F(WholeGraphUpdateTest, EmptyNode) {
// Test that updating a graph that has an empty node is not an error
auto NodeEmpty = Graph.add();
auto UpdateNodeEmpty = UpdateGraph.add();

auto NodeKernel = Graph.add(EmptyKernel);
auto UpdateNodeKernel = UpdateGraph.add(EmptyKernel);

auto GraphExec = Graph.finalize(experimental::property::graph::updatable{});
GraphExec.update(UpdateGraph);
}

TEST_F(WholeGraphUpdateTest, BarrierNode) {
// Test that updating a graph that has a barrier node is not an error
Graph.begin_recording(Queue);
auto NodeKernel = Queue.submit(
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
Queue.ext_oneapi_submit_barrier({NodeKernel});
Graph.end_recording(Queue);

UpdateGraph.begin_recording(Queue);
auto UpdateNodeKernel = Queue.submit(
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
Queue.ext_oneapi_submit_barrier({UpdateNodeKernel});
UpdateGraph.end_recording(Queue);

auto GraphExec = Graph.finalize(experimental::property::graph::updatable{});
GraphExec.update(UpdateGraph);
}

0 comments on commit 93fef86

Please sign in to comment.