Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ESIMD] Enable -fsycl-esimd-force-stateless-mem by default #9452

Merged
4 changes: 2 additions & 2 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -3170,10 +3170,10 @@ def fsycl_device_only : Flag<["-"], "fsycl-device-only">, Flags<[CoreOption]>,
def fsycl_embed_ir : Flag<["-"], "fsycl-embed-ir">, Flags<[CoreOption]>,
HelpText<"Embed LLVM IR for runtime kernel fusion">;
defm sycl_esimd_force_stateless_mem : BoolFOption<"sycl-esimd-force-stateless-mem",
LangOpts<"SYCLESIMDForceStatelessMem">, DefaultFalse,
LangOpts<"SYCLESIMDForceStatelessMem">, DefaultTrue,
PosFlag<SetTrue, [], "Enforce using stateless memory accesses. "
"Convert stateful accesses via SYCL accessors to stateless within ESIMD kernels. "
"Disabled by default. (experimental)">,
"Enabled by default. (experimental)">,
NegFlag<SetFalse, [], "Do not enforce using stateless memory accesses. (experimental)">,
BothFlags<[CC1Option, CoreOption], "">>;

Expand Down
17 changes: 11 additions & 6 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5294,6 +5294,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_sycl_unnamed_lambda, true))
CmdArgs.push_back("-fno-sycl-unnamed-lambda");

if (!Args.hasFlag(options::OPT_fsycl_esimd_force_stateless_mem,
options::OPT_fno_sycl_esimd_force_stateless_mem, true))
CmdArgs.push_back("-fno-sycl-esimd-force-stateless-mem");

// Add the Unique ID prefix
StringRef UniqueID = D.getSYCLUniqueID(Input.getBaseInput());
if (!UniqueID.empty())
Expand Down Expand Up @@ -5390,9 +5394,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
for (auto &Macro : D.getSYCLTargetMacroArgs())
CmdArgs.push_back(Args.MakeArgString(Macro));
}
if (Args.hasFlag(options::OPT_fsycl_esimd_force_stateless_mem,
options::OPT_fno_sycl_esimd_force_stateless_mem, false))
CmdArgs.push_back("-fsycl-esimd-force-stateless-mem");
if (IsSYCL &&
v-klochkov marked this conversation as resolved.
Show resolved Hide resolved
!Args.hasFlag(options::OPT_fsycl_esimd_force_stateless_mem,
options::OPT_fno_sycl_esimd_force_stateless_mem, true))
CmdArgs.push_back("-fno-sycl-esimd-force-stateless-mem");
v-klochkov marked this conversation as resolved.
Show resolved Hide resolved

const auto DeviceTraitsMacrosArgs = D.getDeviceTraitsMacrosArgs();
for (const auto &Arg : DeviceTraitsMacrosArgs) {
Expand Down Expand Up @@ -10065,9 +10070,9 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,
addArgs(CmdArgs, TCArgs, {"-device-globals"});

// Make ESIMD accessors use stateless memory accesses.
if (TCArgs.hasFlag(options::OPT_fsycl_esimd_force_stateless_mem,
options::OPT_fno_sycl_esimd_force_stateless_mem, false))
addArgs(CmdArgs, TCArgs, {"-lower-esimd-force-stateless-mem"});
if (TCArgs.hasFlag(options::OPT_fno_sycl_esimd_force_stateless_mem,
options::OPT_fsycl_esimd_force_stateless_mem, false))
addArgs(CmdArgs, TCArgs, {"-lower-esimd-force-stateless-mem=false"});

// Add output file table file option
assert(Output.isFilename() && "output must be a filename");
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/Frontend/InitPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1344,7 +1344,9 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
if (LangOpts.SYCLUnnamedLambda)
Builder.defineMacro("__SYCL_UNNAMED_LAMBDA__");

if (LangOpts.SYCLESIMDForceStatelessMem)
// Stateless memory may be enforced only for SYCL device or host.
if ((LangOpts.SYCLIsDevice || LangOpts.SYCLIsHost) &&
LangOpts.SYCLESIMDForceStatelessMem)
Builder.defineMacro("__ESIMD_FORCE_STATELESS_MEM");

// OpenCL definitions.
Expand Down
19 changes: 11 additions & 8 deletions clang/test/Driver/sycl-esimd-force-stateless-mem.cpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@

/// Verify that the driver option is translated to corresponding options
/// to host/device compilation and sycl-post-link.
// RUN: %clang -### -fsycl -fsycl-esimd-force-stateless-mem \
// RUN: %s 2>&1 | FileCheck -check-prefix=CHECK-PASS-TO-COMPS %s
// CHECK-PASS-TO-COMPS: clang{{.*}} "-fsycl-esimd-force-stateless-mem"
// CHECK-PASS-TO-COMPS: sycl-post-link{{.*}} "-lower-esimd-force-stateless-mem"
// CHECK-PASS-TO-COMPS: clang{{.*}} "-fsycl-is-host" {{.*}}"-fsycl-esimd-force-stateless-mem"
"

/// Verify that stateless memory accesses mapping is not enforced by default
// Case1: Check that the enforcing is turned on by default.
// Actually, only sycl-post-link gets an additional flag in this case.
// RUN: %clang -### -fsycl %s 2>&1 | FileCheck -check-prefix=CHECK-DEFAULT %s
// CHECK-DEFAULT-NOT: clang{{.*}} "-fsycl-esimd-force-stateless-mem"
// CHECK-DEFAULT-NOT: clang{{.*}} "sycl-esimd-force-stateless-mem"
// CHECK-DEFAULT-NOT: sycl-post-link{{.*}} "-lower-esimd-force-stateless-mem"
// CHECK-DEFAULT-NOT: clang{{.*}} "-fsycl-is-host" {{.*}}"sycl-esimd-force-stateless-mem"

// Case2: Check that -fno-sycl-esimd-force-stateless-mem is handled correctly -
// i.e. sycl-post-link gets nothing and clang gets corresponding -fno... option.
// RUN: %clang -### -fsycl -fno-sycl-esimd-force-stateless-mem %s 2>&1 | FileCheck -check-prefix=CHECK-NEG %s
// CHECK-NEG: clang{{.*}} "-fno-sycl-esimd-force-stateless-mem"
// CHECK-NEG: sycl-post-link{{.*}} "-lower-esimd-force-stateless-mem=false"
// CHECK-NEG-NOT: clang{{.*}} "-fsycl-is-host" {{.*}}"sycl-esimd-force-stateless-mem"
8 changes: 5 additions & 3 deletions clang/test/Preprocessor/sycl-esimd-force-stateless-mem.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
/// This test checks that the macro __ESIMD_FORCE_STATELESS_MEM is automatically
/// defined only if the option -fsycl-esimd-force-stateless-mem is used.
/// defined by default with -fsycl, -fsycl-is-device or -fsycl-is-host.
v-klochkov marked this conversation as resolved.
Show resolved Hide resolved

// RUN: %clang_cc1 %s -fsycl-is-device -fsycl-esimd-force-stateless-mem -E -dM | FileCheck --check-prefix=CHECK-OPT %s
// RUN: %clang_cc1 %s -fsycl-is-device -E -dM | FileCheck --check-prefix=CHECK-OPT %s
// RUN: %clang_cc1 %s -fsycl-is-host -E -dM | FileCheck --check-prefix=CHECK-OPT %s

// RUN: %clang_cc1 %s -E -dM | FileCheck --check-prefix=CHECK-NOOPT %s
// RUN: %clang_cc1 %s -fsycl-is-device -E -dM | FileCheck --check-prefix=CHECK-NOOPT %s
// RUN: %clang_cc1 %s -fsycl-is-host -E -dM | FileCheck --check-prefix=CHECK-NOOPT %s
// RUN: %clang_cc1 %s -fsycl-is-device -fno-sycl-esimd-force-stateless-mem -E -dM | FileCheck --check-prefix=CHECK-NOOPT %s
// RUN: %clang_cc1 %s -fsycl-is-host -fno-sycl-esimd-force-stateless-mem -E -dM | FileCheck --check-prefix=CHECK-NOOPT %s

// CHECK-OPT:#define __ESIMD_FORCE_STATELESS_MEM 1
// CHECK-NOOPT-NOT:#define __ESIMD_FORCE_STATELESS_MEM 1
2 changes: 1 addition & 1 deletion llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ using namespace llvm::esimd;
cl::opt<bool> ForceStatelessMem(
"lower-esimd-force-stateless-mem", llvm::cl::Optional, llvm::cl::Hidden,
llvm::cl::desc("Use stateless API for accessor based API."),
llvm::cl::init(false));
llvm::cl::init(true));

namespace {
SmallPtrSet<Type *, 4> collectGenXVolatileTypes(Module &);
Expand Down
17 changes: 12 additions & 5 deletions llvm/test/SYCLLowerIR/ESIMD/acc_ptr.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; RUN: opt -passes=LowerESIMD -S < %s | FileCheck %s
; RUN: opt -passes=LowerESIMD -lower-esimd-force-stateless-mem=false -S < %s | FileCheck --check-prefix=CHECK-NOFORCE --check-prefix=CHECK %s
; RUN: opt -passes=LowerESIMD -lower-esimd-force-stateless-mem=true -S < %s | FileCheck --check-prefix=CHECK-FORCE --check-prefix=CHECK %s

; This test checks that LowerESIMD pass correctly interpretes the
; 'kernel_arg_accessor_ptr' metadata. Particularly, that it generates additional
Expand Down Expand Up @@ -30,8 +31,14 @@ define weak_odr dso_local spir_kernel void @ESIMDKernel(i32 %_arg_, ptr addrspac

; CHECK: attributes #[[GENX_MAIN]] = { "CMGenxMain" "oclrt"="1" }
; CHECK: !genx.kernels = !{![[GENX_KERNELS:[0-9]+]]}
; CHECK: ![[GENX_KERNELS]] = !{ptr @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0}
; CHECK: ![[ARG_KINDS]] = !{i32 0, i32 2, i32 2, i32 0, i32 0}
; CHECK: ![[ARG_IO_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0}
; CHECK: ![[ARG_DESCS]] = !{!"", !"buffer_t", !"buffer_t", !"", !"svmptr_t"}

; CHECK-NOFORCE: ![[GENX_KERNELS]] = !{ptr @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0}
; CHECK-FORCE: ![[GENX_KERNELS]] = !{ptr @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_KINDS]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0}

; CHECK-NOFORCE: ![[ARG_KINDS]] = !{i32 0, i32 2, i32 2, i32 0, i32 0}
; CHECK-FORCE: ![[ARG_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0}
; CHECK-NOFORCE: ![[ARG_IO_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0}

; CHECK-NOFORCE: ![[ARG_DESCS]] = !{!"", !"buffer_t", !"buffer_t", !"", !"svmptr_t"}
; CHECK-FORCE: ![[ARG_DESCS]] = !{!"", !"svmptr_t", !"svmptr_t", !"", !"svmptr_t"}

21 changes: 17 additions & 4 deletions sycl/include/sycl/ext/intel/esimd/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -620,9 +620,15 @@ scatter(AccessorTy acc, simd<Toffset, N> offsets, simd<T, N> vals,
/// @return The loaded value.
///
template <typename T, typename AccessorTy>
__ESIMD_API T scalar_load(AccessorTy acc, uint32_t offset) {
__ESIMD_API T scalar_load(AccessorTy acc,
#ifdef __ESIMD_FORCE_STATELESS_MEM
uint64_t offset
#else
uint32_t offset
#endif
) {
const simd<T, 1> Res =
gather<T, 1, AccessorTy>(acc, simd<uint32_t, 1>(offset));
gather<T, 1, AccessorTy>(acc, simd<decltype(offset), 1>(offset));
return Res[0];
}

Expand All @@ -634,8 +640,15 @@ __ESIMD_API T scalar_load(AccessorTy acc, uint32_t offset) {
/// @param val The stored value.
///
template <typename T, typename AccessorTy>
__ESIMD_API void scalar_store(AccessorTy acc, uint32_t offset, T val) {
scatter<T, 1, AccessorTy>(acc, simd<uint32_t, 1>(offset), simd<T, 1>(val));
__ESIMD_API void scalar_store(AccessorTy acc,
#ifdef __ESIMD_FORCE_STATELESS_MEM
uint64_t offset,
#else
uint32_t offset,
#endif
T val) {
scatter<T, 1, AccessorTy>(acc, simd<decltype(offset), 1>(offset),
simd<T, 1>(val));
}

/// @anchor usm_gather_rgba
Expand Down
33 changes: 16 additions & 17 deletions sycl/test-e2e/ESIMD/api/simd_view_copy_move_assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,34 +24,34 @@ using namespace sycl::ext::intel::esimd;
template <unsigned VL, class T, class F>
bool test(queue q, std::string str, F funcUnderTest) {
std::cout << "Testing " << str << ", VL = " << VL << " ...\n";
T A[VL];
T B[VL];
size_t Size = 4 * VL;
T A[Size];
T B[Size];
constexpr unsigned HalfVL = VL > 1 ? (VL / 2) : 1;

// The expected result gets the first half of values from B,
int gold[VL];
for (int i = 0; i < VL; ++i) {
int gold[Size];
for (int i = 0; i < Size; ++i) {
A[i] = -i - 1;
B[i] = i + 1;
gold[i] = ((VL > 1) && (i < HalfVL)) ? B[i] : A[i];
gold[i] = ((VL > 1) && ((i % VL) < HalfVL)) ? B[i] : A[i];
}

try {
buffer<T, 1> bufA(A, range<1>(VL));
buffer<T, 1> bufB(B, range<1>(VL));
range<1> glob_range{1};
buffer<T, 1> BufA(A, range<1>(Size));
buffer<T, 1> BufB(B, range<1>(Size));

q.submit([&](handler &cgh) {
auto PA = bufA.template get_access<access::mode::read_write>(cgh);
auto PB = bufB.template get_access<access::mode::read>(cgh);
cgh.parallel_for(glob_range, [=](id<1> i) SYCL_ESIMD_KERNEL {
auto PA = BufA.template get_access<access::mode::read_write>(cgh);
auto PB = BufB.template get_access<access::mode::read_write>(cgh);
cgh.parallel_for(range<1>{Size / VL}, [=](id<1> i) SYCL_ESIMD_KERNEL {
using namespace sycl::ext::intel::esimd;
unsigned int offset = i * VL * sizeof(T);
simd<T, VL> va;
simd<T, VL> vb;
if constexpr (VL == 1) {
va[0] = scalar_load<T>(PA, 0);
vb[0] = scalar_load<T>(PB, 0);
va[0] = scalar_load<T>(PA, offset);
vb[0] = scalar_load<T>(PB, offset);
} else {
va.copy_from(PA, offset);
vb.copy_from(PB, offset);
Expand All @@ -62,7 +62,7 @@ bool test(queue q, std::string str, F funcUnderTest) {
funcUnderTest(va_view, vb_view);

if constexpr (VL == 1) {
scalar_store(PB, 0, (T)va[0]);
scalar_store(PB, offset, (T)va[0]);
} else {
va.copy_to(PA, offset);
}
Expand All @@ -74,7 +74,7 @@ bool test(queue q, std::string str, F funcUnderTest) {
}

int err_cnt = 0;
for (unsigned i = 0; i < VL; ++i) {
for (unsigned i = 0; i < Size; ++i) {
if (A[i] != gold[i]) {
err_cnt++;
std::cout << "failed at index " << i << ": " << A[i] << " != " << gold[i]
Expand Down Expand Up @@ -157,8 +157,7 @@ template <class T> bool testT(queue &q) {
int main(void) {
queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler());
auto dev = q.get_device();
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
<< "\n";
esimd_test::printTestLabel(q);
bool passed = true;
passed &= testT<char>(q);
passed &= testT<float>(q);
Expand Down
4 changes: 2 additions & 2 deletions sycl/test/esimd/intrins_trans.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t
// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table
// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL

// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=true -O0 -S %t -o %t.table
// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS

// Checks ESIMD intrinsic translation with opaque pointers.
Expand Down
4 changes: 2 additions & 2 deletions sycl/test/esimd/lsc.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: %clangxx -O0 -fsycl -fno-sycl-esimd-force-stateless-mem -fsycl-device-only -Xclang -emit-llvm %s -o %t
// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table
// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL

// RUN: %clangxx -O0 -fsycl -fsycl-esimd-force-stateless-mem -fsycl-device-only -Xclang -emit-llvm %s -o %t
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table
// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table
// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS

// Checks ESIMD intrinsic translation.
Expand Down
Loading