diff --git a/sycl/include/sycl/accessor.hpp b/sycl/include/sycl/accessor.hpp index cd36b740d939d..32b34e76f2def 100644 --- a/sycl/include/sycl/accessor.hpp +++ b/sycl/include/sycl/accessor.hpp @@ -1661,6 +1661,8 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { throwIfUsedByGraph(); preScreenAccessor(PropertyList); + detail::associateBufferWithGraph(CommandGroupHandler, + detail::getSyclObjImpl(BufferRef)); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); initHostAcc(); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), @@ -1699,6 +1701,8 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { throwIfUsedByGraph(); preScreenAccessor(PropertyList); + detail::associateBufferWithGraph(CommandGroupHandler, + detail::getSyclObjImpl(BufferRef)); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); initHostAcc(); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), @@ -1834,6 +1838,8 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { throwIfUsedByGraph(); preScreenAccessor(PropertyList); + detail::associateBufferWithGraph(CommandGroupHandler, + detail::getSyclObjImpl(BufferRef)); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); initHostAcc(); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), @@ -1871,6 +1877,8 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : throwIfUsedByGraph(); preScreenAccessor(PropertyList); initHostAcc(); + detail::associateBufferWithGraph(CommandGroupHandler, + detail::getSyclObjImpl(BufferRef)); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), @@ -2172,6 +2180,8 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : PI_ERROR_INVALID_VALUE); initHostAcc(); + detail::associateBufferWithGraph(CommandGroupHandler, + detail::getSyclObjImpl(BufferRef)); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), @@ -2216,6 +2226,8 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : PI_ERROR_INVALID_VALUE); initHostAcc(); + detail::associateBufferWithGraph(CommandGroupHandler, + detail::getSyclObjImpl(BufferRef)); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), diff --git a/sycl/include/sycl/detail/handler_proxy.hpp b/sycl/include/sycl/detail/handler_proxy.hpp index f717d0e72a1bd..e6b9c255c24f7 100644 --- a/sycl/include/sycl/detail/handler_proxy.hpp +++ b/sycl/include/sycl/detail/handler_proxy.hpp @@ -11,6 +11,8 @@ #include // for image_target, target #include // for __SYCL_EXPORT +#include + namespace sycl { inline namespace _V1 { @@ -21,6 +23,7 @@ namespace detail { class AccessorBaseHost; class UnsampledImageAccessorBaseHost; class SampledImageAccessorBaseHost; +class buffer_impl; #ifdef __SYCL_DEVICE_ONLY__ // In device compilation accessor isn't inherited from host base classes, so @@ -35,6 +38,9 @@ __SYCL_EXPORT void associateWithHandler(handler &, UnsampledImageAccessorBaseHost *, image_target); __SYCL_EXPORT void associateWithHandler(handler &, SampledImageAccessorBaseHost *, image_target); + +__SYCL_EXPORT void associateBufferWithGraph(handler &, + std::shared_ptr); #endif } // namespace detail } // namespace _V1 diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index f0c6b2150e404..da9505644d786 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -622,6 +622,7 @@ class __SYCL_EXPORT handler { image_target AccTarget); void associateWithHandler(detail::SampledImageAccessorBaseHost *AccBase, image_target AccTarget); + void associateBufferWithGraph(std::shared_ptr Buffer); #endif // Recursively calls itself until arguments pack is fully processed. @@ -3384,6 +3385,9 @@ class __SYCL_EXPORT handler { handler &, detail::UnsampledImageAccessorBaseHost *, image_target); friend void detail::associateWithHandler( handler &, detail::SampledImageAccessorBaseHost *, image_target); + friend void + detail::associateBufferWithGraph(handler &, + std::shared_ptr); #endif friend class ::MockHandler; diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index f1fcb6b09751f..90e32bb3c0589 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -255,6 +256,12 @@ class graph_impl { const sycl::property_list &PropList = {}) : MContext(SyclContext), MDevice(SyclDevice), MRecordingQueues(), MEventsMap(), MInorderQueueMap() { + // Check if buffer lifetime extension has been enabled by env var + const char *ExtendBufferLifetimes = + std::getenv("SYCL_GRAPH_EXTEND_BUFFER_LIFETIMES"); + MExtendBufferLifetimes = + ExtendBufferLifetimes && (std::stoi(ExtendBufferLifetimes) != 0); + if (PropList.has_property()) { MSkipCycleChecks = true; } @@ -544,6 +551,14 @@ class graph_impl { /// @return vector of events associated to exit nodes. std::vector getExitNodesEvents(); + void associateBufferWithGraph( + const std::shared_ptr &bufferImpl) { + // If this is false this call is a no-op. + if (!MExtendBufferLifetimes) + return; + MAssociatedBuffers.push_back(bufferImpl); + } + private: /// Iterate over the graph depth-first and run \p NodeFunc on each node. /// @param NodeFunc A function which receives as input a node in the graph to @@ -622,6 +637,15 @@ class graph_impl { /// This list is mainly used by barrier nodes which must be considered /// as predecessors for all nodes subsequently added to the graph. std::vector> MExtraDependencies; + + /// List of buffers which are associated with this graph, i.e. accessors + /// to these buffers are used in one or more nodes of the graph. + /// Buffers are stored here to extend their lifetime for the duration + /// of the graph. + std::vector> MAssociatedBuffers; + + /// Controls whether buffer lifetimes are extended by the graph. + bool MExtendBufferLifetimes = false; }; /// Class representing the implementation of command_graph. diff --git a/sycl/source/detail/handler_proxy.cpp b/sycl/source/detail/handler_proxy.cpp index 05ea058cc5d3b..8bc6ba4f338c4 100644 --- a/sycl/source/detail/handler_proxy.cpp +++ b/sycl/source/detail/handler_proxy.cpp @@ -29,6 +29,11 @@ void associateWithHandler(handler &CGH, SampledImageAccessorBaseHost *Acc, CGH.associateWithHandler(Acc, Target); } +void associateBufferWithGraph(handler &CGH, + std::shared_ptr Buffer) { + CGH.associateBufferWithGraph(Buffer); +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index a54c7b0e20283..f762c3e01811b 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -574,6 +574,13 @@ void handler::associateWithHandler( static_cast(AccTarget)); } +void handler::associateBufferWithGraph( + std::shared_ptr Buffer) { + if (auto graph = getCommandGraph(); graph) { + graph->associateBufferWithGraph(Buffer); + } +} + static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, size_t &IndexShift, int Size, bool IsKernelCreatedFromSource, diff --git a/sycl/test-e2e/Graph/Explicit/buffer_lifetime.cpp b/sycl/test-e2e/Graph/Explicit/buffer_lifetime.cpp new file mode 100644 index 0000000000000..b230ce1991db3 --- /dev/null +++ b/sycl/test-e2e/Graph/Explicit/buffer_lifetime.cpp @@ -0,0 +1,11 @@ +// REQUIRES: level_zero, gpu +// RUN: %{build} -o %t.out +// RUN: env SYCL_GRAPH_EXTEND_BUFFER_LIFETIMES=1 %{run} %t.out +// Extra run to check for leaks in Level Zero using ZE_DEBUG +// RUN: %if ext_oneapi_level_zero %{env SYCL_GRAPH_EXTEND_BUFFER_LIFETIMES=1 env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %} +// +// CHECK-NOT: LEAK + +#define GRAPH_E2E_EXPLICIT + +#include "../Inputs/buffer_lifetime.cpp" diff --git a/sycl/test-e2e/Graph/Inputs/buffer_lifetime.cpp b/sycl/test-e2e/Graph/Inputs/buffer_lifetime.cpp new file mode 100644 index 0000000000000..265d65ffedd01 --- /dev/null +++ b/sycl/test-e2e/Graph/Inputs/buffer_lifetime.cpp @@ -0,0 +1,56 @@ +// Tests that extending buffer lifetimes with handler::get_access() +// works correctly. + +#include "../graph_common.hpp" + +int main() { + + queue Queue{{sycl::ext::intel::property::queue::no_immediate_command_list{}}}; + + using T = int; + + std::vector DataA(Size), DataB(Size), DataC(Size), Result(Size); + + std::iota(DataA.begin(), DataA.end(), 1); + std::iota(DataB.begin(), DataB.end(), 10); + std::iota(DataC.begin(), DataC.end(), 1000); + + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + { + // Create a buffer in temporary scope to test lifetime extension + buffer BufferA{DataA.data(), range<1>{DataA.size()}}; + BufferA.set_write_back(false); + buffer BufferB{DataB.data(), range<1>{DataB.size()}}; + BufferB.set_write_back(false); + buffer BufferC{DataC.data(), range<1>{DataC.size()}}; + BufferC.set_write_back(false); + + auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + auto AccC = BufferC.get_access(CGH); + CGH.parallel_for(range<1>{Size}, + [=](id<1> idx) { AccC[idx] += AccA[idx] + AccB[idx]; }); + }); + + add_node(Graph, Queue, [&](handler &CGH) { + auto AccC = BufferC.get_access(CGH); + CGH.copy(AccC, Result.data()); + }); + } + + auto ExecGraph = Graph.finalize(); + + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }); + Queue.wait(); + + for (size_t i = 0; i < Size; i++) { + T Expected = DataA[i] + DataB[i] + DataC[i]; + assert(check_value(i, Expected, Result[i], "Result")); + } + + return 0; +} diff --git a/sycl/test-e2e/Graph/RecordReplay/buffer_lifetime.cpp b/sycl/test-e2e/Graph/RecordReplay/buffer_lifetime.cpp new file mode 100644 index 0000000000000..85501ad775a61 --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/buffer_lifetime.cpp @@ -0,0 +1,11 @@ +// REQUIRES: level_zero, gpu +// RUN: %{build} -o %t.out +// RUN: env SYCL_GRAPH_EXTEND_BUFFER_LIFETIMES=1 %{run} %t.out +// Extra run to check for leaks in Level Zero using ZE_DEBUG +// RUN: %if ext_oneapi_level_zero %{env SYCL_GRAPH_EXTEND_BUFFER_LIFETIMES=1 env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %} +// +// CHECK-NOT: LEAK + +#define GRAPH_E2E_RECORD_REPLAY + +#include "../Inputs/buffer_lifetime.cpp"