Skip to content

Commit

Permalink
[SYCL][Fusion] Avoid deadlock on events with no command group
Browse files Browse the repository at this point in the history
`sycl::queue` shortcut functions, e.g., `sycl::queue::memcpy`, may not
create a command group, so events coming from those will not have a
command group attached. Avoid deadlock when going through event list
in fusion mode.

Signed-off-by: Victor Perez <victor.perez@codeplay.com>
  • Loading branch information
victor-eds committed Feb 7, 2024
1 parent c320f3b commit 62bd64e
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 1 deletion.
1 change: 1 addition & 0 deletions sycl/source/detail/scheduler/graph_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,7 @@ Scheduler::GraphBuildResult Scheduler::GraphBuilder::addCG(
for (auto Ev = Events.begin(); Ev != Events.end();) {
auto *EvDepCmd = static_cast<Command *>((*Ev)->getCommand());
if (!EvDepCmd) {
++Ev;
continue;
}
// Handle event dependencies on any commands part of another active
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// RUN: %{build} -fsycl-embed-ir -o %t.out
// RUN: %{run} %t.out
// UNSUPPORTED: hip || cuda

// COM: When ran on HIP and CUDA, this algorithm launches 'memcpy' commands
// leading to an infinite loop due to a bug in kernel fusion.
Expand Down
99 changes: 99 additions & 0 deletions sycl/test-e2e/KernelFusion/queue-shortcut-functions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// RUN: %{build} -fsycl-embed-ir -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 \
// RUN: | FileCheck %s --implicit-check-not=ERROR

// Test fusion with queue shortcut functions being involved.

#include <sycl/sycl.hpp>

using namespace sycl;

template <int FusionStartPoint, int KernelNum> class Kernel;

template <int FusionStartPoint> void test() {
static_assert(0 <= FusionStartPoint && FusionStartPoint < 3,
"Invalid fusion start point");

constexpr size_t size = 1024;
constexpr float value = 10;
queue q{ext::codeplay::experimental::property::queue::enable_fusion{}};
std::array<float, size> h;
h.fill(0);
auto *ptr0 = sycl::malloc_device<float>(size, q);
auto *ptr1 = sycl::malloc_device<float>(size, q);

{
range<1> r{size};

ext::codeplay::experimental::fusion_wrapper fw{q};

if constexpr (FusionStartPoint == 0) {
fw.start_fusion();
}

// ptr0(x) = value
auto e0 = q.parallel_for<Kernel<FusionStartPoint, 0>>(
r, [=](sycl::id<1> i) { ptr0[i] = value; });
// ptr1(x) = value / 2
auto e1 = q.parallel_for<Kernel<FusionStartPoint, 1>>(
r, [=](sycl::id<1> i) { ptr1[i] = value / 2; });

if constexpr (FusionStartPoint == 1) {
fw.start_fusion();
}

// ptr0(x) = value / 2 if x < size / 2 else value
auto e2 = q.memcpy(ptr0, ptr1, sizeof(float) * size / 2, {e0, e1});

if constexpr (FusionStartPoint == 2) {
fw.start_fusion();
}

// ptr0(x) = value / 2 + 1 if x < size / 2 else value + 1
auto e3 = q.parallel_for<Kernel<FusionStartPoint, 2>>(
r, e2, [=](sycl::id<1> i) { ptr0[i]++; });

fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}});

// Copyback
q.memcpy(h.data(), ptr0, sizeof(float) * size, e3).wait();
}

sycl::free(ptr0, q);
sycl::free(ptr1, q);

assert(std::all_of(h.begin(), h.begin() + size / 2,
[=](float f) { return f == value / 2 + 1; }) &&
"ERROR");
assert(std::all_of(h.begin() + size / 2, h.end(),
[=](float f) { return f == value + 1; }) &&
"ERROR");
}

int main() {
std::cerr << "FusionStartPoint = 0:\n";
// COM: memcpy leads to a CG being created as it depends on CGs not producing
// a PI event (coming from the CGs to be fused), so not safe to bypass. Fusion
// should be cancelled as a dependency with an event to be fused is found.

// CHECK: FusionStartPoint = 0:
// CHECK-NEXT: WARNING: Not fusing 'copy usm' command group. Can only fuse device kernel command groups.
// CHECK-NEXT: WARNING: Aborting fusion because synchronization with one of the kernels in the fusion list was requested
test<0>();

std::cerr << "FusionStartPoint = 1:\n";
// COM: memcpy does not create CG, memory manager handles the operation
// instead. As no dependency with a CG to be fused is found, events are issued
// as usual and fusion takes place.

// CHECK-NEXT: FusionStartPoint = 1:
// CHECK-NEXT: JIT DEBUG: Compiling new kernel, no suitable cached kernel found
test<1>();

std::cerr << "FusionStartPoint = 2:\n";
// COM: Same as above.

// CHECK-NEXT: FusionStartPoint = 2:
// CHECK-NEXT: JIT DEBUG: Compiling new kernel, no suitable cached kernel found
test<2>();
}

0 comments on commit 62bd64e

Please sign in to comment.