diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 80633de59c951..8fae433713432 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -1343,7 +1343,9 @@ static void InitializePredefinedMacros(const TargetInfo &TI, if (LangOpts.SYCLUnnamedLambda) Builder.defineMacro("__SYCL_UNNAMED_LAMBDA__"); - if (LangOpts.SYCLESIMDForceStatelessMem) + // Stateless memory may be enforced only for SYCL device or host. + if ((LangOpts.SYCLIsDevice || LangOpts.SYCLIsHost) && + LangOpts.SYCLESIMDForceStatelessMem) Builder.defineMacro("__ESIMD_FORCE_STATELESS_MEM"); // OpenCL definitions. diff --git a/llvm/test/SYCLLowerIR/ESIMD/acc_ptr.ll b/llvm/test/SYCLLowerIR/ESIMD/acc_ptr.ll index c53a19f1af99f..650bd09cb451a 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/acc_ptr.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/acc_ptr.ll @@ -1,4 +1,5 @@ -; RUN: opt -passes=LowerESIMD -S < %s | FileCheck %s +; RUN: opt -passes=LowerESIMD -lower-esimd-force-stateless-mem=false -S < %s | FileCheck --check-prefix=CHECK-NOFORCE --check-prefix=CHECK %s +; RUN: opt -passes=LowerESIMD -lower-esimd-force-stateless-mem=true -S < %s | FileCheck --check-prefix=CHECK-FORCE --check-prefix=CHECK %s ; This test checks that LowerESIMD pass correctly interpretes the ; 'kernel_arg_accessor_ptr' metadata. Particularly, that it generates additional @@ -30,8 +31,14 @@ define weak_odr dso_local spir_kernel void @ESIMDKernel(i32 %_arg_, ptr addrspac ; CHECK: attributes #[[GENX_MAIN]] = { "CMGenxMain" "oclrt"="1" } ; CHECK: !genx.kernels = !{![[GENX_KERNELS:[0-9]+]]} -; CHECK: ![[GENX_KERNELS]] = !{ptr @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0} -; CHECK: ![[ARG_KINDS]] = !{i32 0, i32 2, i32 2, i32 0, i32 0} -; CHECK: ![[ARG_IO_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0} -; CHECK: ![[ARG_DESCS]] = !{!"", !"buffer_t", !"buffer_t", !"", !"svmptr_t"} + +; CHECK-NOFORCE: ![[GENX_KERNELS]] = !{ptr @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0} +; CHECK-FORCE: ![[GENX_KERNELS]] = !{ptr @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_KINDS]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0} + +; CHECK-NOFORCE: ![[ARG_KINDS]] = !{i32 0, i32 2, i32 2, i32 0, i32 0} +; CHECK-FORCE: ![[ARG_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0} +; CHECK-NOFORCE: ![[ARG_IO_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0} + +; CHECK-NOFORCE: ![[ARG_DESCS]] = !{!"", !"buffer_t", !"buffer_t", !"", !"svmptr_t"} +; CHECK-FORCE: ![[ARG_DESCS]] = !{!"", !"svmptr_t", !"svmptr_t", !"", !"svmptr_t"} diff --git a/sycl/include/sycl/ext/intel/esimd/memory.hpp b/sycl/include/sycl/ext/intel/esimd/memory.hpp index 07b9961b8d68a..fdf70937cf28a 100644 --- a/sycl/include/sycl/ext/intel/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/esimd/memory.hpp @@ -620,9 +620,15 @@ scatter(AccessorTy acc, simd offsets, simd vals, /// @return The loaded value. /// template -__ESIMD_API T scalar_load(AccessorTy acc, uint32_t offset) { +__ESIMD_API T scalar_load(AccessorTy acc, +#ifdef __ESIMD_FORCE_STATELESS_MEM + uint64_t offset +#else + uint32_t offset +#endif +) { const simd Res = - gather(acc, simd(offset)); + gather(acc, simd(offset)); return Res[0]; } @@ -634,8 +640,15 @@ __ESIMD_API T scalar_load(AccessorTy acc, uint32_t offset) { /// @param val The stored value. /// template -__ESIMD_API void scalar_store(AccessorTy acc, uint32_t offset, T val) { - scatter(acc, simd(offset), simd(val)); +__ESIMD_API void scalar_store(AccessorTy acc, +#ifdef __ESIMD_FORCE_STATELESS_MEM + uint64_t offset, +#else + uint32_t offset, +#endif + T val) { + scatter(acc, simd(offset), + simd(val)); } /// @anchor usm_gather_rgba diff --git a/sycl/test-e2e/ESIMD/api/simd_view_copy_move_assign.cpp b/sycl/test-e2e/ESIMD/api/simd_view_copy_move_assign.cpp index c80ec19b397b2..c02cca27430a3 100644 --- a/sycl/test-e2e/ESIMD/api/simd_view_copy_move_assign.cpp +++ b/sycl/test-e2e/ESIMD/api/simd_view_copy_move_assign.cpp @@ -24,34 +24,34 @@ using namespace sycl::ext::intel::esimd; template bool test(queue q, std::string str, F funcUnderTest) { std::cout << "Testing " << str << ", VL = " << VL << " ...\n"; - T A[VL]; - T B[VL]; + size_t Size = 4 * VL; + T A[Size]; + T B[Size]; constexpr unsigned HalfVL = VL > 1 ? (VL / 2) : 1; // The expected result gets the first half of values from B, - int gold[VL]; - for (int i = 0; i < VL; ++i) { + int gold[Size]; + for (int i = 0; i < Size; ++i) { A[i] = -i - 1; B[i] = i + 1; - gold[i] = ((VL > 1) && (i < HalfVL)) ? B[i] : A[i]; + gold[i] = ((VL > 1) && ((i % VL) < HalfVL)) ? B[i] : A[i]; } try { - buffer bufA(A, range<1>(VL)); - buffer bufB(B, range<1>(VL)); - range<1> glob_range{1}; + buffer BufA(A, range<1>(Size)); + buffer BufB(B, range<1>(Size)); q.submit([&](handler &cgh) { - auto PA = bufA.template get_access(cgh); - auto PB = bufB.template get_access(cgh); - cgh.parallel_for(glob_range, [=](id<1> i) SYCL_ESIMD_KERNEL { + auto PA = BufA.template get_access(cgh); + auto PB = BufB.template get_access(cgh); + cgh.parallel_for(range<1>{Size / VL}, [=](id<1> i) SYCL_ESIMD_KERNEL { using namespace sycl::ext::intel::esimd; unsigned int offset = i * VL * sizeof(T); simd va; simd vb; if constexpr (VL == 1) { - va[0] = scalar_load(PA, 0); - vb[0] = scalar_load(PB, 0); + va[0] = scalar_load(PA, offset); + vb[0] = scalar_load(PB, offset); } else { va.copy_from(PA, offset); vb.copy_from(PB, offset); @@ -62,7 +62,7 @@ bool test(queue q, std::string str, F funcUnderTest) { funcUnderTest(va_view, vb_view); if constexpr (VL == 1) { - scalar_store(PB, 0, (T)va[0]); + scalar_store(PB, offset, (T)va[0]); } else { va.copy_to(PA, offset); } @@ -74,7 +74,7 @@ bool test(queue q, std::string str, F funcUnderTest) { } int err_cnt = 0; - for (unsigned i = 0; i < VL; ++i) { + for (unsigned i = 0; i < Size; ++i) { if (A[i] != gold[i]) { err_cnt++; std::cout << "failed at index " << i << ": " << A[i] << " != " << gold[i] @@ -157,8 +157,7 @@ template bool testT(queue &q) { int main(void) { queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); auto dev = q.get_device(); - std::cout << "Running on " << dev.get_info() - << "\n"; + esimd_test::printTestLabel(q); bool passed = true; passed &= testT(q); passed &= testT(q); diff --git a/sycl/test/esimd/intrins_trans.cpp b/sycl/test/esimd/intrins_trans.cpp index b761aec084645..89e26b04db1b2 100644 --- a/sycl/test/esimd/intrins_trans.cpp +++ b/sycl/test/esimd/intrins_trans.cpp @@ -1,9 +1,9 @@ // RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table +// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL // RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table +// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=true -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS // Checks ESIMD intrinsic translation with opaque pointers.