diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc index 70898ecf61a10..59ec73be714a3 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc @@ -709,6 +709,32 @@ optimize such partial barriers. _{endnote}_] |==== +==== Command Graph + +The functions in this section are only available if the +link:./sycl_ext_oneapi_graph.asciidoc[ + sycl_ext_oneapi_graph] extension is supported. + +|==== +a| +[frame=all,grid=none] +!==== +a! +[source,c++] +---- +namespace sycl::ext::oneapi::experimental { + +template +void submit(command_graph g, CommandGroupFunc&& cgf); + +} +---- +!==== +_Effects_: Submits a command-group function object (as defined by the SYCL +specification) to the `command_graph` for deferred execution. Subsequent calls +will create a linear chain of nodes with implicit edges. +|==== + == Issues diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index ad78970d82709..743499dc3de59 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -103,6 +104,12 @@ void submit(queue Q, CommandGroupFunc &&CGF, Q, std::forward(CGF), CodeLoc); } +template +void submit(command_graph G, CommandGroupFunc &&CGF) { + G.add(std::forward(CGF), + {property::node::depends_on_all_leaves()}); +} + template event submit_with_event(queue Q, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc = diff --git a/sycl/test-e2e/Graph/Explicit/ext_oneapi_enqueue_functions.cpp b/sycl/test-e2e/Graph/Explicit/ext_oneapi_enqueue_functions.cpp new file mode 100644 index 0000000000000..5a7e01504597d --- /dev/null +++ b/sycl/test-e2e/Graph/Explicit/ext_oneapi_enqueue_functions.cpp @@ -0,0 +1,10 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +#define GRAPH_E2E_EXPLICIT + +#include "../Inputs/ext_oneapi_enqueue_functions.cpp" diff --git a/sycl/test-e2e/Graph/Inputs/ext_oneapi_enqueue_functions.cpp b/sycl/test-e2e/Graph/Inputs/ext_oneapi_enqueue_functions.cpp new file mode 100644 index 0000000000000..1fcd535a33a70 --- /dev/null +++ b/sycl/test-e2e/Graph/Inputs/ext_oneapi_enqueue_functions.cpp @@ -0,0 +1,99 @@ +// Tests the enqueue free function kernel shortcuts. + +#include "../graph_common.hpp" +#include +#include + +template +void run_kernels_usm_in_order(queue Q, const size_t Size, T *DataA, T *DataB, + T *DataC, std::vector &Output, T Pattern) { + exp_ext::fill(Q, DataA, Pattern, Size); + + exp_ext::single_task(Q, [=]() { + for (size_t i = 0; i < Size; ++i) { + DataB[i] = i; + } + }); + + exp_ext::parallel_for(Q, sycl::range<1>{Size}, [=](sycl::item<1> Item) { + DataC[Item] = DataA[Item] * DataB[Item]; + }); + + exp_ext::copy(Q, DataC, Output.data(), Size); +} + +template +void add_kernels_usm_in_order( + exp_ext::command_graph G, + const size_t Size, T *DataA, T *DataB, T *DataC, std::vector &Output, + T Pattern) { + exp_ext::submit(G, + [&](sycl::handler &CGH) { CGH.fill(DataA, Pattern, Size); }); + + exp_ext::submit(G, [&](sycl::handler &CGH) { + CGH.single_task([=]() { + for (size_t i = 0; i < Size; ++i) { + DataB[i] = i; + } + }); + }); + + exp_ext::submit(G, [&](sycl::handler &CGH) { + CGH.parallel_for(sycl::range<1>{Size}, [=](sycl::item<1> Item) { + DataC[Item] = DataA[Item] * DataB[Item]; + }); + }); + + exp_ext::submit( + G, [&](sycl::handler &CGH) { CGH.copy(DataC, Output.data(), Size); }); +} + +template +void add_nodes_in_order( + exp_ext::command_graph Graph, queue Queue, + const size_t Size, T *DataA, T *DataB, T *DataC, std::vector &Output, + T Pattern) { +#if defined(GRAPH_E2E_EXPLICIT) + add_kernels_usm_in_order(Graph, Size, DataA, DataB, DataC, Output, Pattern); +#elif defined(GRAPH_E2E_RECORD_REPLAY) + Graph.begin_recording(Queue); + run_kernels_usm_in_order(Queue, Size, DataA, DataB, DataC, Output, Pattern); + Graph.end_recording(Queue); +#else + assert(0 && "Error: Cannot use add_nodes without selecting an API"); +#endif +} + +int main() { + queue InOrderQueue{property::queue::in_order{}}; + + using T = int; + T Pattern = 42; + + T *PtrA = malloc_device(Size, InOrderQueue); + T *PtrB = malloc_device(Size, InOrderQueue); + T *PtrC = malloc_device(Size, InOrderQueue); + + std::vector Output(Size); + + exp_ext::command_graph Graph{InOrderQueue}; + + add_nodes_in_order(Graph, InOrderQueue, Size, PtrA, PtrB, PtrC, Output, + Pattern); + + auto GraphExec = Graph.finalize(); + + InOrderQueue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + InOrderQueue.wait_and_throw(); + + free(PtrA, InOrderQueue); + free(PtrB, InOrderQueue); + free(PtrC, InOrderQueue); + + for (size_t i = 0; i < Size; i++) { + T Ref = Pattern * i; + assert(Output[i] == Ref); + } + + return 0; +} diff --git a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp index ef3b440790c6b..abb2f18e9eae9 100644 --- a/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/ext_oneapi_enqueue_functions.cpp @@ -5,55 +5,6 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// Tests the enqueue free function kernel shortcuts. +#define GRAPH_E2E_RECORD_REPLAY -#include "../graph_common.hpp" -#include -#include - -int main() { - queue InOrderQueue{property::queue::in_order{}}; - - using T = int; - - T *PtrA = malloc_device(Size, InOrderQueue); - T *PtrB = malloc_device(Size, InOrderQueue); - T *PtrC = malloc_device(Size, InOrderQueue); - - exp_ext::command_graph Graph{InOrderQueue}; - Graph.begin_recording(InOrderQueue); - - T Pattern = 42; - exp_ext::fill(InOrderQueue, PtrA, Pattern, Size); - - exp_ext::single_task(InOrderQueue, [=]() { - for (size_t i = 0; i < Size; ++i) { - PtrB[i] = i; - } - }); - - exp_ext::parallel_for( - InOrderQueue, sycl::range<1>{Size}, - [=](sycl::item<1> Item) { PtrC[Item] = PtrA[Item] * PtrB[Item]; }); - - std::vector Output(Size); - exp_ext::copy(InOrderQueue, PtrC, Output.data(), Size); - - Graph.end_recording(); - - auto GraphExec = Graph.finalize(); - - InOrderQueue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); - InOrderQueue.wait_and_throw(); - - free(PtrA, InOrderQueue); - free(PtrB, InOrderQueue); - free(PtrC, InOrderQueue); - - for (size_t i = 0; i < Size; i++) { - T Ref = Pattern * i; - assert(Output[i] == Ref); - } - - return 0; -} +#include "../Inputs/ext_oneapi_enqueue_functions.cpp"