Skip to content

Commit

Permalink
[ESIMD] Prepare for enabling of enforcing of stateless memory mode (#…
Browse files Browse the repository at this point in the history
…10931)

* Support 64-bit offset if scalar_load() and scalar_store()
* Fix simd_view_copy_move_assign.cpp test
* Fix the test intrins_trans.cpp

This patch is only the preliminary step to final enabling of
force-stateless-memory mode that should be finalized here:
#9452

Signed-off-by: Vyacheslav N Klochkov <vyacheslav.n.klochkov@intel.com>
  • Loading branch information
v-klochkov authored Sep 6, 2023
1 parent cb34ea9 commit 0da723f
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 29 deletions.
4 changes: 3 additions & 1 deletion clang/lib/Frontend/InitPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1343,7 +1343,9 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
if (LangOpts.SYCLUnnamedLambda)
Builder.defineMacro("__SYCL_UNNAMED_LAMBDA__");

if (LangOpts.SYCLESIMDForceStatelessMem)
// Stateless memory may be enforced only for SYCL device or host.
if ((LangOpts.SYCLIsDevice || LangOpts.SYCLIsHost) &&
LangOpts.SYCLESIMDForceStatelessMem)
Builder.defineMacro("__ESIMD_FORCE_STATELESS_MEM");

// OpenCL definitions.
Expand Down
17 changes: 12 additions & 5 deletions llvm/test/SYCLLowerIR/ESIMD/acc_ptr.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; RUN: opt -passes=LowerESIMD -S < %s | FileCheck %s
; RUN: opt -passes=LowerESIMD -lower-esimd-force-stateless-mem=false -S < %s | FileCheck --check-prefix=CHECK-NOFORCE --check-prefix=CHECK %s
; RUN: opt -passes=LowerESIMD -lower-esimd-force-stateless-mem=true -S < %s | FileCheck --check-prefix=CHECK-FORCE --check-prefix=CHECK %s

; This test checks that LowerESIMD pass correctly interpretes the
; 'kernel_arg_accessor_ptr' metadata. Particularly, that it generates additional
Expand Down Expand Up @@ -30,8 +31,14 @@ define weak_odr dso_local spir_kernel void @ESIMDKernel(i32 %_arg_, ptr addrspac

; CHECK: attributes #[[GENX_MAIN]] = { "CMGenxMain" "oclrt"="1" }
; CHECK: !genx.kernels = !{![[GENX_KERNELS:[0-9]+]]}
; CHECK: ![[GENX_KERNELS]] = !{ptr @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0}
; CHECK: ![[ARG_KINDS]] = !{i32 0, i32 2, i32 2, i32 0, i32 0}
; CHECK: ![[ARG_IO_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0}
; CHECK: ![[ARG_DESCS]] = !{!"", !"buffer_t", !"buffer_t", !"", !"svmptr_t"}

; CHECK-NOFORCE: ![[GENX_KERNELS]] = !{ptr @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0}
; CHECK-FORCE: ![[GENX_KERNELS]] = !{ptr @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_KINDS]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0}

; CHECK-NOFORCE: ![[ARG_KINDS]] = !{i32 0, i32 2, i32 2, i32 0, i32 0}
; CHECK-FORCE: ![[ARG_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0}
; CHECK-NOFORCE: ![[ARG_IO_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0}

; CHECK-NOFORCE: ![[ARG_DESCS]] = !{!"", !"buffer_t", !"buffer_t", !"", !"svmptr_t"}
; CHECK-FORCE: ![[ARG_DESCS]] = !{!"", !"svmptr_t", !"svmptr_t", !"", !"svmptr_t"}

21 changes: 17 additions & 4 deletions sycl/include/sycl/ext/intel/esimd/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -620,9 +620,15 @@ scatter(AccessorTy acc, simd<Toffset, N> offsets, simd<T, N> vals,
/// @return The loaded value.
///
template <typename T, typename AccessorTy>
__ESIMD_API T scalar_load(AccessorTy acc, uint32_t offset) {
__ESIMD_API T scalar_load(AccessorTy acc,
#ifdef __ESIMD_FORCE_STATELESS_MEM
uint64_t offset
#else
uint32_t offset
#endif
) {
const simd<T, 1> Res =
gather<T, 1, AccessorTy>(acc, simd<uint32_t, 1>(offset));
gather<T, 1, AccessorTy>(acc, simd<decltype(offset), 1>(offset));
return Res[0];
}

Expand All @@ -634,8 +640,15 @@ __ESIMD_API T scalar_load(AccessorTy acc, uint32_t offset) {
/// @param val The stored value.
///
template <typename T, typename AccessorTy>
__ESIMD_API void scalar_store(AccessorTy acc, uint32_t offset, T val) {
scatter<T, 1, AccessorTy>(acc, simd<uint32_t, 1>(offset), simd<T, 1>(val));
__ESIMD_API void scalar_store(AccessorTy acc,
#ifdef __ESIMD_FORCE_STATELESS_MEM
uint64_t offset,
#else
uint32_t offset,
#endif
T val) {
scatter<T, 1, AccessorTy>(acc, simd<decltype(offset), 1>(offset),
simd<T, 1>(val));
}

/// @anchor usm_gather_rgba
Expand Down
33 changes: 16 additions & 17 deletions sycl/test-e2e/ESIMD/api/simd_view_copy_move_assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,34 +24,34 @@ using namespace sycl::ext::intel::esimd;
template <unsigned VL, class T, class F>
bool test(queue q, std::string str, F funcUnderTest) {
std::cout << "Testing " << str << ", VL = " << VL << " ...\n";
T A[VL];
T B[VL];
size_t Size = 4 * VL;
T A[Size];
T B[Size];
constexpr unsigned HalfVL = VL > 1 ? (VL / 2) : 1;

// The expected result gets the first half of values from B,
int gold[VL];
for (int i = 0; i < VL; ++i) {
int gold[Size];
for (int i = 0; i < Size; ++i) {
A[i] = -i - 1;
B[i] = i + 1;
gold[i] = ((VL > 1) && (i < HalfVL)) ? B[i] : A[i];
gold[i] = ((VL > 1) && ((i % VL) < HalfVL)) ? B[i] : A[i];
}

try {
buffer<T, 1> bufA(A, range<1>(VL));
buffer<T, 1> bufB(B, range<1>(VL));
range<1> glob_range{1};
buffer<T, 1> BufA(A, range<1>(Size));
buffer<T, 1> BufB(B, range<1>(Size));

q.submit([&](handler &cgh) {
auto PA = bufA.template get_access<access::mode::read_write>(cgh);
auto PB = bufB.template get_access<access::mode::read>(cgh);
cgh.parallel_for(glob_range, [=](id<1> i) SYCL_ESIMD_KERNEL {
auto PA = BufA.template get_access<access::mode::read_write>(cgh);
auto PB = BufB.template get_access<access::mode::read_write>(cgh);
cgh.parallel_for(range<1>{Size / VL}, [=](id<1> i) SYCL_ESIMD_KERNEL {
using namespace sycl::ext::intel::esimd;
unsigned int offset = i * VL * sizeof(T);
simd<T, VL> va;
simd<T, VL> vb;
if constexpr (VL == 1) {
va[0] = scalar_load<T>(PA, 0);
vb[0] = scalar_load<T>(PB, 0);
va[0] = scalar_load<T>(PA, offset);
vb[0] = scalar_load<T>(PB, offset);
} else {
va.copy_from(PA, offset);
vb.copy_from(PB, offset);
Expand All @@ -62,7 +62,7 @@ bool test(queue q, std::string str, F funcUnderTest) {
funcUnderTest(va_view, vb_view);

if constexpr (VL == 1) {
scalar_store(PB, 0, (T)va[0]);
scalar_store(PB, offset, (T)va[0]);
} else {
va.copy_to(PA, offset);
}
Expand All @@ -74,7 +74,7 @@ bool test(queue q, std::string str, F funcUnderTest) {
}

int err_cnt = 0;
for (unsigned i = 0; i < VL; ++i) {
for (unsigned i = 0; i < Size; ++i) {
if (A[i] != gold[i]) {
err_cnt++;
std::cout << "failed at index " << i << ": " << A[i] << " != " << gold[i]
Expand Down Expand Up @@ -157,8 +157,7 @@ template <class T> bool testT(queue &q) {
int main(void) {
queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler());
auto dev = q.get_device();
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
<< "\n";
esimd_test::printTestLabel(q);
bool passed = true;
passed &= testT<char>(q);
passed &= testT<float>(q);
Expand Down
4 changes: 2 additions & 2 deletions sycl/test/esimd/intrins_trans.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t
// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table
// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL

// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=true -O0 -S %t -o %t.table
// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS

// Checks ESIMD intrinsic translation with opaque pointers.
Expand Down

0 comments on commit 0da723f

Please sign in to comment.