From 149c63f55114a36209fec76b679248e2bd7288e3 Mon Sep 17 00:00:00 2001 From: sergei <57672082+s-kanaev@users.noreply.github.com> Date: Thu, 1 Oct 2020 00:18:04 +0300 Subject: [PATCH 1/4] [NFC][SYCL] Fix comment. (#2541) Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 7406f82331bf..4f7db7d0f4dc 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -493,10 +493,10 @@ class ExecCGCommand : public Command { detail::CG &getCG() const { return *MCommandGroup; } - // MEmptyCmd one is only employed if this command refers to host-task. - // MEmptyCmd due to unreliable mechanism of lookup for single EmptyCommand - // amongst users of host-task-representing command. This unreliability roots - // in cleanup process. + // MEmptyCmd is only employed if this command refers to host-task. + // The mechanism of lookup for single EmptyCommand amongst users of + // host-task-representing command is unreliable. This unreliability roots in + // the cleanup process. EmptyCommand *MEmptyCmd = nullptr; private: From eac16d8aa7177969ea063976c78081d0efd2f93b Mon Sep 17 00:00:00 2001 From: Aleksander Fadeev <60697485+fadeeval@users.noreply.github.com> Date: Thu, 1 Oct 2020 00:19:01 +0300 Subject: [PATCH 2/4] [SYCL] Change adress space for global variables (#2534) GlobalVariables shouldn't have private address space. This PR change usage of private address space to global for global variables. Private address space maps to Function StorageClass in llvm-spirv translator, but global declarations shouldn't have Function Storage Class due to SPIRV spec (https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html). Signed-off-by: Aleksander Fadeev --- clang/lib/CodeGen/CodeGenModule.cpp | 5 ++- clang/test/CodeGenSYCL/address-space-new.cpp | 10 ++--- .../CodeGenSYCL/address-space-of-returns.cpp | 2 +- .../CodeGenSYCL/static-var-address-space.cpp | 29 ++++++++++++++ clang/test/CodeGenSYCL/unique-stable-name.cpp | 38 +++++++++---------- 5 files changed, 58 insertions(+), 26 deletions(-) create mode 100644 clang/test/CodeGenSYCL/static-var-address-space.cpp diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index a8da0b910092..3df6120840cc 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4012,7 +4012,10 @@ LangAS CodeGenModule::getStringLiteralAddressSpace() const { // const char *getLiteral() n{ // return "AB"; // } - return LangAS::opencl_private; + // Use global address space to avoid illegal casts from constant to generic. + // Private address space is not used here because in SPIR-V global values + // cannot have private address space. + return LangAS::opencl_global; if (auto AS = getTarget().getConstantAddressSpace()) return AS.getValue(); return LangAS::Default; diff --git a/clang/test/CodeGenSYCL/address-space-new.cpp b/clang/test/CodeGenSYCL/address-space-new.cpp index 1caf5d49dd20..788d7e0e2f13 100644 --- a/clang/test/CodeGenSYCL/address-space-new.cpp +++ b/clang/test/CodeGenSYCL/address-space-new.cpp @@ -29,7 +29,7 @@ void test() { (void)bars; // CHECK: @_ZZ4testvE4bars = internal addrspace(1) constant <{ [21 x i32], [235 x i32] }> <{ [21 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20], [235 x i32] zeroinitializer }>, align 4 - // CHECK: @[[STR:[.a-zA-Z0-9_]+]] = private unnamed_addr constant [14 x i8] c"Hello, world!\00", align 1 + // CHECK: @[[STR:[.a-zA-Z0-9_]+]] = private unnamed_addr addrspace(1) constant [14 x i8] c"Hello, world!\00", align 1 // CHECK: %i.ascast = addrspacecast i32* %i to i32 addrspace(4)* // CHECK: %[[ARR:[a-zA-Z0-9]+]] = alloca [42 x i32] @@ -69,7 +69,7 @@ void test() { // CHECK: %cmp{{[0-9]+}} = icmp ult i32 addrspace(4)* %[[VALAPTR]], %[[ADDPTRCAST]] const char *str = "Hello, world!"; - // CHECK: store i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([14 x i8], [14 x i8]* @[[STR]], i64 0, i64 0) to i8 addrspace(4)*), i8 addrspace(4)** %[[STRVAL:[a-zA-Z0-9]+]], align 8 + // CHECK: store i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(1)* @[[STR]], i64 0, i64 0) to i8 addrspace(4)*), i8 addrspace(4)** %[[STRVAL:[a-zA-Z0-9]+]], align 8 i = str[0]; @@ -85,11 +85,11 @@ void test() { // CHECK: [[CONDFALSE]]: // CHECK: [[CONDEND]]: - // CHECK-NEXT: phi i8 addrspace(4)* [ %[[VALTRUE]], %[[CONDTRUE]] ], [ addrspacecast (i8* getelementptr inbounds ([21 x i8], [21 x i8]* @{{.*}}, i64 0, i64 0) to i8 addrspace(4)*), %[[CONDFALSE]] ] + // CHECK-NEXT: phi i8 addrspace(4)* [ %[[VALTRUE]], %[[CONDTRUE]] ], [ addrspacecast (i8 addrspace(1)* getelementptr inbounds ([21 x i8], [21 x i8] addrspace(1)* @{{.*}}, i64 0, i64 0) to i8 addrspace(4)*), %[[CONDFALSE]] ] const char *select_null = i > 2 ? "Yet another Hello world" : nullptr; (void)select_null; - // CHECK: select i1 %{{.*}}, i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([24 x i8], [24 x i8]* @{{.*}}, i64 0, i64 0) to i8 addrspace(4)*), i8 addrspace(4)* null + // CHECK: select i1 %{{.*}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([24 x i8], [24 x i8] addrspace(1)* @{{.*}}, i64 0, i64 0) to i8 addrspace(4)*), i8 addrspace(4)* null const char *select_str_trivial1 = true ? str : "Another hello world!"; (void)select_str_trivial1; @@ -98,7 +98,7 @@ void test() { const char *select_str_trivial2 = false ? str : "Another hello world!"; (void)select_str_trivial2; - // CHECK: store i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([21 x i8], [21 x i8]* @{{.*}}, i64 0, i64 0) to i8 addrspace(4)*), i8 addrspace(4)** %{{.*}} + // CHECK: store i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([21 x i8], [21 x i8] addrspace(1)* @{{.*}}, i64 0, i64 0) to i8 addrspace(4)*), i8 addrspace(4)** %{{.*}} // // Y yy; diff --git a/clang/test/CodeGenSYCL/address-space-of-returns.cpp b/clang/test/CodeGenSYCL/address-space-of-returns.cpp index 24bd762bb28d..3b56e34bd5be 100644 --- a/clang/test/CodeGenSYCL/address-space-of-returns.cpp +++ b/clang/test/CodeGenSYCL/address-space-of-returns.cpp @@ -7,7 +7,7 @@ struct A { const char *ret_char() { return "N"; } -// CHECK: ret i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i64 0, i64 0) to i8 addrspace(4)*) +// CHECK: ret i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(1)* @.str, i64 0, i64 0) to i8 addrspace(4)*) const char *ret_arr() { const static char Arr[36] = "Carrots, cabbage, radish, potatoes!"; diff --git a/clang/test/CodeGenSYCL/static-var-address-space.cpp b/clang/test/CodeGenSYCL/static-var-address-space.cpp new file mode 100644 index 000000000000..f5d6b7b27041 --- /dev/null +++ b/clang/test/CodeGenSYCL/static-var-address-space.cpp @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +#include "Inputs/sycl.hpp" +struct C { + static int c; +}; + +template +struct D { + static T d; +}; + +template +void test() { + // CHECK: @_ZZ4testIiEvvE1a = linkonce_odr addrspace(1) constant i32 0, comdat, align 4 + static const int a = 0; + // CHECK: @_ZZ4testIiEvvE1b = linkonce_odr addrspace(1) constant i32 0, comdat, align 4 + static const T b = T(0); + // CHECK: @_ZN1C1cE = external addrspace(1) global i32, align 4 + C::c = 10; + const C struct_c; + // CHECK: @_ZN1DIiE1dE = external addrspace(1) global i32, align 4 + D::d = 11; + const D struct_d; +} + +int main() { + cl::sycl::kernel_single_task([]() { test(); }); + return 0; +} diff --git a/clang/test/CodeGenSYCL/unique-stable-name.cpp b/clang/test/CodeGenSYCL/unique-stable-name.cpp index 66ca499e6cda..64b06f0a2fd7 100644 --- a/clang/test/CodeGenSYCL/unique-stable-name.cpp +++ b/clang/test/CodeGenSYCL/unique-stable-name.cpp @@ -1,12 +1,12 @@ // RUN: %clang_cc1 -triple spir64-unknown-unknown-sycldevice -fsycl -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s -// CHECK: @[[INT:[^\w]+]] = private unnamed_addr constant [[INT_SIZE:\[[0-9]+ x i8\]]] c"_ZTSi\00" -// CHECK: @[[LAMBDA_X:[^\w]+]] = private unnamed_addr constant [[LAMBDA_X_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE46_16\00" -// CHECK: @[[MACRO_X:[^\w]+]] = private unnamed_addr constant [[MACRO_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE52_7m28_18\00" -// CHECK: @[[MACRO_Y:[^\w]+]] = private unnamed_addr constant [[MACRO_SIZE]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE52_7m28_41\00" -// CHECK: @[[MACRO_MACRO_X:[^\w]+]] = private unnamed_addr constant [[MACRO_MACRO_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE55_7m28_18m33_4\00" -// CHECK: @[[MACRO_MACRO_Y:[^\w]+]] = private unnamed_addr constant [[MACRO_MACRO_SIZE]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE55_7m28_41m33_4\00" -// CHECK: @[[LAMBDA_IN_DEP_INT:[^\w]+]] = private unnamed_addr constant [[DEP_INT_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZ28lambda_in_dependent_functionIiEvvEUlvE23_12\00", -// CHECK: @[[LAMBDA_IN_DEP_X:[^\w]+]] = private unnamed_addr constant [[DEP_LAMBDA_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZ28lambda_in_dependent_functionIZZ4mainENKUlvE42_5clEvEUlvE46_16EvvEUlvE23_12\00", +// CHECK: @[[INT:[^\w]+]] = private unnamed_addr addrspace(1) constant [[INT_SIZE:\[[0-9]+ x i8\]]] c"_ZTSi\00" +// CHECK: @[[LAMBDA_X:[^\w]+]] = private unnamed_addr addrspace(1) constant [[LAMBDA_X_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE46_16\00" +// CHECK: @[[MACRO_X:[^\w]+]] = private unnamed_addr addrspace(1) constant [[MACRO_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE52_7m28_18\00" +// CHECK: @[[MACRO_Y:[^\w]+]] = private unnamed_addr addrspace(1) constant [[MACRO_SIZE]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE52_7m28_41\00" +// CHECK: @[[MACRO_MACRO_X:[^\w]+]] = private unnamed_addr addrspace(1) constant [[MACRO_MACRO_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE55_7m28_18m33_4\00" +// CHECK: @[[MACRO_MACRO_Y:[^\w]+]] = private unnamed_addr addrspace(1) constant [[MACRO_MACRO_SIZE]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE55_7m28_41m33_4\00" +// CHECK: @[[LAMBDA_IN_DEP_INT:[^\w]+]] = private unnamed_addr addrspace(1) constant [[DEP_INT_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZ28lambda_in_dependent_functionIiEvvEUlvE23_12\00", +// CHECK: @[[LAMBDA_IN_DEP_X:[^\w]+]] = private unnamed_addr addrspace(1) constant [[DEP_LAMBDA_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZ28lambda_in_dependent_functionIZZ4mainENKUlvE42_5clEvEUlvE46_16EvvEUlvE23_12\00", extern "C" void printf(const char *) {} @@ -41,36 +41,36 @@ int main() { kernel_single_task( []() { printf(__builtin_unique_stable_name(int)); - // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]]* @[[INT]] + // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]] addrspace(1)* @[[INT]] auto x = [](){}; printf(__builtin_unique_stable_name(x)); printf(__builtin_unique_stable_name(decltype(x))); - // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[LAMBDA_X_SIZE]], [[LAMBDA_X_SIZE]]* @[[LAMBDA_X]] - // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[LAMBDA_X_SIZE]], [[LAMBDA_X_SIZE]]* @[[LAMBDA_X]] + // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([[LAMBDA_X_SIZE]], [[LAMBDA_X_SIZE]] addrspace(1)* @[[LAMBDA_X]] + // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([[LAMBDA_X_SIZE]], [[LAMBDA_X_SIZE]] addrspace(1)* @[[LAMBDA_X]] DEF_IN_MACRO(); - // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[MACRO_SIZE]], [[MACRO_SIZE]]* @[[MACRO_X]] - // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[MACRO_SIZE]], [[MACRO_SIZE]]* @[[MACRO_Y]] + // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([[MACRO_SIZE]], [[MACRO_SIZE]] addrspace(1)* @[[MACRO_X]] + // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([[MACRO_SIZE]], [[MACRO_SIZE]] addrspace(1)* @[[MACRO_Y]] MACRO_CALLS_MACRO(); - // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[MACRO_MACRO_SIZE]], [[MACRO_MACRO_SIZE]]* @[[MACRO_MACRO_X]] - // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[MACRO_MACRO_SIZE]], [[MACRO_MACRO_SIZE]]* @[[MACRO_MACRO_Y]] + // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([[MACRO_MACRO_SIZE]], [[MACRO_MACRO_SIZE]] addrspace(1)* @[[MACRO_MACRO_X]] + // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([[MACRO_MACRO_SIZE]], [[MACRO_MACRO_SIZE]] addrspace(1)* @[[MACRO_MACRO_Y]] template_param(); // CHECK: define linkonce_odr spir_func void @_Z14template_paramIiEvv - // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]]* @[[INT]] + // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]] addrspace(1)* @[[INT]] template_param(); // CHECK: define internal spir_func void @"_Z14template_paramIZZ4mainENK3 - // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[LAMBDA_X_SIZE]], [[LAMBDA_X_SIZE]]* @[[LAMBDA_X]] + // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([[LAMBDA_X_SIZE]], [[LAMBDA_X_SIZE]] addrspace(1)* @[[LAMBDA_X]] lambda_in_dependent_function(); // CHECK: define linkonce_odr spir_func void @_Z28lambda_in_dependent_functionIiEvv - // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[DEP_INT_SIZE]], [[DEP_INT_SIZE]]* @[[LAMBDA_IN_DEP_INT]] + // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([[DEP_INT_SIZE]], [[DEP_INT_SIZE]] addrspace(1)* @[[LAMBDA_IN_DEP_INT]] lambda_in_dependent_function(); // CHECK: define internal spir_func void @"_Z28lambda_in_dependent_functionIZZ4mainENK3$_0clEvEUlvE_Evv - // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[DEP_LAMBDA_SIZE]], [[DEP_LAMBDA_SIZE]]* @[[LAMBDA_IN_DEP_X]] + // CHECK: call spir_func void @printf(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([[DEP_LAMBDA_SIZE]], [[DEP_LAMBDA_SIZE]] addrspace(1)* @[[LAMBDA_IN_DEP_X]] }); } From a108e931921dce0165458901f2828e432a82351e Mon Sep 17 00:00:00 2001 From: Alexander Batashev Date: Thu, 1 Oct 2020 00:19:51 +0300 Subject: [PATCH 3/4] [SYCL][NFC] Add class layout ABI tests for memory objects (#2559) --- sycl/test/abi/layout_buffer.cpp | 94 ++++++++++++++++++++++++++++++ sycl/test/abi/layout_image.cpp | 100 ++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 sycl/test/abi/layout_buffer.cpp create mode 100644 sycl/test/abi/layout_image.cpp diff --git a/sycl/test/abi/layout_buffer.cpp b/sycl/test/abi/layout_buffer.cpp new file mode 100644 index 000000000000..f4de00d90ccc --- /dev/null +++ b/sycl/test/abi/layout_buffer.cpp @@ -0,0 +1,94 @@ +// RUN: %clangxx -fsycl -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s | FileCheck %s +// REQUIRES: linux + +// clang-format off + +#include + +void foo(sycl::buffer) {} +// CHECK: 0 | class cl::sycl::detail::buffer_impl +// CHECK-NEXT: 0 | class cl::sycl::detail::SYCLMemObjT (primary base) +// CHECK-NEXT: 0 | class cl::sycl::detail::SYCLMemObjI (primary base) +// CHECK-NEXT: 0 | (SYCLMemObjI vtable pointer) +// CHECK-NEXT: 8 | class std::shared_ptr MRecord +// CHECK-NEXT: 8 | class std::__shared_ptr (base) +// CHECK-NEXT: 8 | class std::__shared_ptr_access (base) (empty) +// CHECK-NEXT: 8 | std::__shared_ptr::element_type * _M_ptr +// CHECK-NEXT: 16 | class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount +// CHECK-NEXT: 16 | _Sp_counted_base<(enum __gnu_cxx::_Lock_policy)2U> * _M_pi +// CHECK-NEXT: 24 | class std::unique_ptr > MAllocator +// CHECK: 24 | class std::__uniq_ptr_impl > +// CHECK-NEXT: 24 | class std::tuple > _M_t +// CHECK-NEXT: 24 | struct std::_Tuple_impl<0, class cl::sycl::detail::SYCLMemObjAllocator *, struct std::default_delete > (base) +// CHECK-NEXT: 24 | struct std::_Tuple_impl<1, struct std::default_delete > (base) (empty) +// CHECK-NEXT: 24 | struct std::_Head_base<1, struct std::default_delete, true> (base) (empty) +// CHECK-NEXT: 24 | struct std::default_delete (base) (empty) +// CHECK-NEXT: 24 | struct std::_Head_base<0, class cl::sycl::detail::SYCLMemObjAllocator *, false> (base) +// CHECK-NEXT: 24 | class cl::sycl::detail::SYCLMemObjAllocator * _M_head_impl +// CHECK-NEXT: 32 | class cl::sycl::property_list MProps +// CHECK-NEXT: 32 | class cl::sycl::detail::PropertyListBase (base) +// CHECK-NEXT: 32 | class std::bitset<7> MDataLessProps +// CHECK-NEXT: 32 | struct std::_Base_bitset<1> (base) +// CHECK-NEXT: 32 | std::_Base_bitset<1>::_WordT _M_w +// CHECK-NEXT: 40 | class std::vector, class std::allocator > > MPropsWithData +// CHECK-NEXT: 40 | struct std::_Vector_base, class std::allocator > > (base) +// CHECK-NEXT: 40 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl +// CHECK-NEXT: 40 | class std::allocator > (base) (empty) +// CHECK-NEXT: 40 | class __gnu_cxx::new_allocator > (base) (empty) +// CHECK: 40 | std::_Vector_base, class std::allocator > >::pointer _M_start +// CHECK-NEXT: 48 | std::_Vector_base, class std::allocator > >::pointer _M_finish +// CHECK-NEXT: 56 | std::_Vector_base, class std::allocator > >::pointer _M_end_of_storage +// CHECK-NEXT: 64 | class std::shared_ptr MInteropEvent +// CHECK-NEXT: 64 | class std::__shared_ptr (base) +// CHECK-NEXT: 64 | class std::__shared_ptr_access (base) (empty) +// CHECK-NEXT: 64 | std::__shared_ptr::element_type * _M_ptr +// CHECK-NEXT: 72 | class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount +// CHECK-NEXT: 72 | _Sp_counted_base<(enum __gnu_cxx::_Lock_policy)2U> * _M_pi +// CHECK-NEXT: 80 | class std::shared_ptr MInteropContext +// CHECK-NEXT: 80 | class std::__shared_ptr (base) +// CHECK-NEXT: 80 | class std::__shared_ptr_access (base) (empty) +// CHECK-NEXT: 80 | std::__shared_ptr::element_type * _M_ptr +// CHECK-NEXT: 88 | class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount +// CHECK-NEXT: 88 | _Sp_counted_base<(enum __gnu_cxx::_Lock_policy)2U> * _M_pi +// CHECK-NEXT: 96 | cl_mem MInteropMemObject +// CHECK-NEXT: 104 | _Bool MOpenCLInterop +// CHECK-NEXT: 105 | _Bool MHostPtrReadOnly +// CHECK-NEXT: 106 | _Bool MNeedWriteBack +// CHECK-NEXT: 112 | size_t MSizeInBytes +// CHECK-NEXT: 120 | void * MUserPtr +// CHECK-NEXT: 128 | void * MShadowCopy +// CHECK-NEXT: 136 | class std::function MUploadDataFunctor +// CHECK-NEXT: 136 | struct std::_Maybe_unary_or_binary_function (base) (empty) +// CHECK-NEXT: 136 | class std::_Function_base (base) +// CHECK-NEXT: 136 | union std::_Any_data _M_functor +// CHECK-NEXT: 136 | union std::_Nocopy_types _M_unused +// CHECK-NEXT: 136 | void * _M_object +// CHECK-NEXT: 136 | const void * _M_const_object +// CHECK-NEXT: 136 | void (*)(void) _M_function_pointer +// CHECK-NEXT: 136 | void (class std::_Undefined_class::*)(void) _M_member_pointer +// CHECK-NEXT: 136 | char [16] _M_pod_data +// CHECK-NEXT: 152 | std::_Function_base::_Manager_type _M_manager +// CHECK-NEXT: 160 | std::function::_Invoker_type _M_invoker +// CHECK-NEXT: 168 | class std::shared_ptr MSharedPtrStorage +// CHECK-NEXT: 168 | class std::__shared_ptr (base) +// CHECK-NEXT: 168 | class std::__shared_ptr_access (base) (empty) +// CHECK-NEXT: 168 | std::__shared_ptr::element_type * _M_ptr +// CHECK-NEXT: 176 | class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount +// CHECK-NEXT: 176 | _Sp_counted_base<(enum __gnu_cxx::_Lock_policy)2U> * _M_pi +// CHECK-NEXT: | [sizeof=184, dsize=184, align=8, +// CHECK-NEXT: | nvsize=184, nvalign=8] + +// CHECK: 0 | class cl::sycl::buffer, void> +// CHECK-NEXT: 0 | class std::shared_ptr impl +// CHECK-NEXT: 0 | class std::__shared_ptr (base) +// CHECK-NEXT: 0 | class std::__shared_ptr_access (base) (empty) +// CHECK-NEXT: 0 | std::__shared_ptr::element_type * _M_ptr +// CHECK-NEXT: 8 | class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount +// CHECK-NEXT: 8 | _Sp_counted_base<(enum __gnu_cxx::_Lock_policy)2U> * _M_pi +// CHECK-NEXT: 16 | class cl::sycl::range<2> Range +// CHECK-NEXT: 16 | class cl::sycl::detail::array<2> (base) +// CHECK-NEXT: 16 | size_t [2] common_array +// CHECK-NEXT: 32 | size_t OffsetInBytes +// CHECK-NEXT: 40 | _Bool IsSubBuffer +// CHECK-NEXT: | [sizeof=48, dsize=41, align=8, +// CHECK-NEXT: | nvsize=41, nvalign=8] diff --git a/sycl/test/abi/layout_image.cpp b/sycl/test/abi/layout_image.cpp new file mode 100644 index 000000000000..0e514d22c939 --- /dev/null +++ b/sycl/test/abi/layout_image.cpp @@ -0,0 +1,100 @@ +// RUN: %clangxx -fsycl -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s | FileCheck %s +// REQUIRES: linux + +// clang-format off + +#include + +sycl::image<2> Img{sycl::image_channel_order::rgba, sycl::image_channel_type::fp16, sycl::range<2>{10, 10}}; + +// CHECK: 0 | class cl::sycl::detail::image_impl<2> +// CHECK-NEXT: 0 | class cl::sycl::detail::SYCLMemObjT (primary base) +// CHECK-NEXT: 0 | class cl::sycl::detail::SYCLMemObjI (primary base) +// CHECK-NEXT: 0 | (SYCLMemObjI vtable pointer) +// CHECK-NEXT: 8 | class std::shared_ptr MRecord +// CHECK-NEXT: 8 | class std::__shared_ptr (base) +// CHECK-NEXT: 8 | class std::__shared_ptr_access (base) (empty) +// CHECK-NEXT: 8 | std::__shared_ptr::element_type * _M_ptr +// CHECK-NEXT: 16 | class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount +// CHECK-NEXT: 16 | _Sp_counted_base<(enum __gnu_cxx::_Lock_policy)2U> * _M_pi +// CHECK-NEXT: 24 | class std::unique_ptr > MAllocator +// CHECK: 24 | class std::__uniq_ptr_impl > +// CHECK-NEXT: 24 | class std::tuple > _M_t +// CHECK-NEXT: 24 | struct std::_Tuple_impl<0, class cl::sycl::detail::SYCLMemObjAllocator *, struct std::default_delete > (base) +// CHECK-NEXT: 24 | struct std::_Tuple_impl<1, struct std::default_delete > (base) (empty) +// CHECK-NEXT: 24 | struct std::_Head_base<1, struct std::default_delete, true> (base) (empty) +// CHECK-NEXT: 24 | struct std::default_delete (base) (empty) +// CHECK-NEXT: 24 | struct std::_Head_base<0, class cl::sycl::detail::SYCLMemObjAllocator *, false> (base) +// CHECK-NEXT: 24 | class cl::sycl::detail::SYCLMemObjAllocator * _M_head_impl +// CHECK-NEXT: 32 | class cl::sycl::property_list MProps +// CHECK-NEXT: 32 | class cl::sycl::detail::PropertyListBase (base) +// CHECK-NEXT: 32 | class std::bitset<7> MDataLessProps +// CHECK-NEXT: 32 | struct std::_Base_bitset<1> (base) +// CHECK-NEXT: 32 | std::_Base_bitset<1>::_WordT _M_w +// CHECK-NEXT: 40 | class std::vector, class std::allocator > > MPropsWithData +// CHECK-NEXT: 40 | struct std::_Vector_base, class std::allocator > > (base) +// CHECK-NEXT: 40 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl +// CHECK-NEXT: 40 | class std::allocator > (base) (empty) +// CHECK-NEXT: 40 | class __gnu_cxx::new_allocator > (base) (empty) +// CHECK: 40 | std::_Vector_base, class std::allocator > >::pointer _M_start +// CHECK-NEXT: 48 | std::_Vector_base, class std::allocator > >::pointer _M_finish +// CHECK-NEXT: 56 | std::_Vector_base, class std::allocator > >::pointer _M_end_of_storage +// CHECK-NEXT: 64 | class std::shared_ptr MInteropEvent +// CHECK-NEXT: 64 | class std::__shared_ptr (base) +// CHECK-NEXT: 64 | class std::__shared_ptr_access (base) (empty) +// CHECK-NEXT: 64 | std::__shared_ptr::element_type * _M_ptr +// CHECK-NEXT: 72 | class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount +// CHECK-NEXT: 72 | _Sp_counted_base<(enum __gnu_cxx::_Lock_policy)2U> * _M_pi +// CHECK-NEXT: 80 | class std::shared_ptr MInteropContext +// CHECK-NEXT: 80 | class std::__shared_ptr (base) +// CHECK-NEXT: 80 | class std::__shared_ptr_access (base) (empty) +// CHECK-NEXT: 80 | std::__shared_ptr::element_type * _M_ptr +// CHECK-NEXT: 88 | class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount +// CHECK-NEXT: 88 | _Sp_counted_base<(enum __gnu_cxx::_Lock_policy)2U> * _M_pi +// CHECK-NEXT: 96 | cl_mem MInteropMemObject +// CHECK-NEXT: 104 | _Bool MOpenCLInterop +// CHECK-NEXT: 105 | _Bool MHostPtrReadOnly +// CHECK-NEXT: 106 | _Bool MNeedWriteBack +// CHECK-NEXT: 112 | size_t MSizeInBytes +// CHECK-NEXT: 120 | void * MUserPtr +// CHECK-NEXT: 128 | void * MShadowCopy +// CHECK-NEXT: 136 | class std::function MUploadDataFunctor +// CHECK-NEXT: 136 | struct std::_Maybe_unary_or_binary_function (base) (empty) +// CHECK-NEXT: 136 | class std::_Function_base (base) +// CHECK-NEXT: 136 | union std::_Any_data _M_functor +// CHECK-NEXT: 136 | union std::_Nocopy_types _M_unused +// CHECK-NEXT: 136 | void * _M_object +// CHECK-NEXT: 136 | const void * _M_const_object +// CHECK-NEXT: 136 | void (*)(void) _M_function_pointer +// CHECK-NEXT: 136 | void (class std::_Undefined_class::*)(void) _M_member_pointer +// CHECK-NEXT: 136 | char [16] _M_pod_data +// CHECK-NEXT: 152 | std::_Function_base::_Manager_type _M_manager +// CHECK-NEXT: 160 | std::function::_Invoker_type _M_invoker +// CHECK-NEXT: 168 | class std::shared_ptr MSharedPtrStorage +// CHECK-NEXT: 168 | class std::__shared_ptr (base) +// CHECK-NEXT: 168 | class std::__shared_ptr_access (base) (empty) +// CHECK-NEXT: 168 | std::__shared_ptr::element_type * _M_ptr +// CHECK-NEXT: 176 | class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount +// CHECK-NEXT: 176 | _Sp_counted_base<(enum __gnu_cxx::_Lock_policy)2U> * _M_pi +// CHECK-NEXT: 184 | _Bool MIsArrayImage +// CHECK-NEXT: 192 | class cl::sycl::range<2> MRange +// CHECK-NEXT: 192 | class cl::sycl::detail::array<2> (base) +// CHECK-NEXT: 192 | size_t [2] common_array +// CHECK-NEXT: 208 | enum cl::sycl::image_channel_order MOrder +// CHECK-NEXT: 212 | enum cl::sycl::image_channel_type MType +// CHECK-NEXT: 216 | uint8_t MNumChannels +// CHECK-NEXT: 224 | size_t MElementSize +// CHECK-NEXT: 232 | size_t MRowPitch +// CHECK-NEXT: 240 | size_t MSlicePitch +// CHECK-NEXT: | [sizeof=248, dsize=248, align=8, +// CHECK-NEXT: | nvsize=248, nvalign=8] + +// CHECK: 0 | class cl::sycl::image<2, class cl::sycl::detail::aligned_allocator > +// CHECK-NEXT: 0 | class std::shared_ptr > impl +// CHECK-NEXT: 0 | class std::__shared_ptr, __gnu_cxx::_S_atomic> (base) +// CHECK-NEXT: 0 | class std::__shared_ptr_access, __gnu_cxx::_S_atomic, false, false> (base) (empty) +// CHECK-NEXT: 0 | std::__shared_ptr, __gnu_cxx::_S_atomic>::element_type * _M_ptr +// CHECK-NEXT: 8 | class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount +// CHECK-NEXT: 8 | _Sp_counted_base<(enum __gnu_cxx::_Lock_policy)2U> * _M_pi +// CHECK-NEXT: | [sizeof=16, dsize=16, align=8, +// CHECK-NEXT: | nvsize=16, nvalign=8] From bb68eef4084f7ccbcdce84b168c1e966ef422d71 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Thu, 1 Oct 2020 00:20:09 +0300 Subject: [PATCH 4/4] [SYCL][ESIMD][NFC] Align namespace name with the spec guidelines (#2573) SYCL specification recommends using `detail` namespace for implementation details. This change should unify the name across the whole project. --- .../CL/sycl/INTEL/esimd/detail/esimd_util.hpp | 4 +- .../CL/sycl/INTEL/esimd/esimd_math.hpp | 242 +++++++++--------- 2 files changed, 117 insertions(+), 129 deletions(-) diff --git a/sycl/include/CL/sycl/INTEL/esimd/detail/esimd_util.hpp b/sycl/include/CL/sycl/INTEL/esimd/detail/esimd_util.hpp index 42ce828da229..4bd5755c8f33 100755 --- a/sycl/include/CL/sycl/INTEL/esimd/detail/esimd_util.hpp +++ b/sycl/include/CL/sycl/INTEL/esimd/detail/esimd_util.hpp @@ -77,7 +77,7 @@ constexpr unsigned int ElemsPerAddrDecoding(unsigned int ElemsPerAddrEncoded) { return (1 << ElemsPerAddrEncoded); } -namespace details { +namespace detail { /// type traits template struct is_esimd_vector { @@ -236,7 +236,7 @@ template <> struct word_type { using type = short; }; template <> struct word_type { using type = ushort; }; template <> struct word_type { using type = ushort; }; -} // namespace details +} // namespace detail } // namespace gpu } // namespace INTEL } // namespace sycl diff --git a/sycl/include/CL/sycl/INTEL/esimd/esimd_math.hpp b/sycl/include/CL/sycl/INTEL/esimd/esimd_math.hpp index 1f241c63745c..c97649f00f77 100644 --- a/sycl/include/CL/sycl/INTEL/esimd/esimd_math.hpp +++ b/sycl/include/CL/sycl/INTEL/esimd/esimd_math.hpp @@ -45,7 +45,7 @@ ESIMD_NODEBUG ESIMD_INLINE simd esimd_sat(simd src) { } // esimd_abs -namespace details { +namespace detail { template ESIMD_NODEBUG ESIMD_INLINE simd @@ -59,8 +59,8 @@ __esimd_abs_common_internal(simd src0, int flag = GENX_NOSAT) { template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && - details::is_esimd_scalar::value, + typename std::enable_if::value && + detail::is_esimd_scalar::value, typename std::remove_const::type>::type __esimd_abs_common_internal(T1 src0, int flag = GENX_NOSAT) { typedef typename std::remove_const::type TT0; @@ -70,7 +70,7 @@ ESIMD_NODEBUG ESIMD_INLINE simd Result = __esimd_abs_common_internal(Src0, flag); return Result[0]; } -} // namespace details +} // namespace detail template ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< @@ -78,32 +78,32 @@ ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< typename std::remove_const::type>::value, simd>::type esimd_abs(simd src0, int flag = GENX_NOSAT) { - return details::__esimd_abs_common_internal(src0, flag); + return detail::__esimd_abs_common_internal(src0, flag); } template ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< !std::is_same::type, typename std::remove_const::type>::value && - details::is_esimd_scalar::value && - details::is_esimd_scalar::value, + detail::is_esimd_scalar::value && + detail::is_esimd_scalar::value, typename std::remove_const::type>::type esimd_abs(T1 src0, int flag = GENX_NOSAT) { - return details::__esimd_abs_common_internal(src0, flag); + return detail::__esimd_abs_common_internal(src0, flag); } template ESIMD_NODEBUG ESIMD_INLINE simd esimd_abs(simd src0, int flag = GENX_NOSAT) { - return details::__esimd_abs_common_internal(src0, flag); + return detail::__esimd_abs_common_internal(src0, flag); } template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value, + typename std::enable_if::value, typename std::remove_const::type>::type esimd_abs(T1 src0, int flag = GENX_NOSAT) { - return details::__esimd_abs_common_internal(src0, flag); + return detail::__esimd_abs_common_internal(src0, flag); } // esimd_shl @@ -115,8 +115,8 @@ ESIMD_NODEBUG ESIMD_INLINE simd>::type esimd_shl(simd src0, U src1, int flag = GENX_NOSAT) { typedef typename computation_type::type ComputationTy; - typename details::simd_type::type Src0 = src0; - typename details::simd_type::type Src1 = src1; + typename detail::simd_type::type Src0 = src0; + typename detail::simd_type::type Src1 = src1; if (flag != GENX_SAT) { if constexpr (std::is_unsigned::value) { @@ -147,15 +147,14 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< - details::is_esimd_scalar::value && - details::is_esimd_scalar::value && - details::is_esimd_scalar::value && std::is_integral::value && + detail::is_esimd_scalar::value && detail::is_esimd_scalar::value && + detail::is_esimd_scalar::value && std::is_integral::value && std::is_integral::value && std::is_integral::value, typename std::remove_const::type>::type esimd_shl(T1 src0, T2 src1, int flag = GENX_NOSAT) { typedef typename computation_type::type ComputationTy; - typename details::simd_type::type Src0 = src0; - typename details::simd_type::type Src1 = src1; + typename detail::simd_type::type Src0 = src0; + typename detail::simd_type::type Src1 = src1; simd Result = esimd_shl(Src0, Src1, flag); return Result[0]; } @@ -169,9 +168,9 @@ ESIMD_NODEBUG ESIMD_INLINE simd>::type esimd_shr(simd src0, U src1, int flag = GENX_NOSAT) { typedef typename computation_type::type ComputationTy; - typename details::simd_type::type Src0 = src0; - typename details::simd_type::type Src1 = src1; - typename details::simd_type::type Result = + typename detail::simd_type::type Src0 = src0; + typename detail::simd_type::type Src1 = src1; + typename detail::simd_type::type Result = Src0.data() >> Src1.data(); if (flag != GENX_SAT) @@ -182,15 +181,14 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< - details::is_esimd_scalar::value && - details::is_esimd_scalar::value && - details::is_esimd_scalar::value && std::is_integral::value && + detail::is_esimd_scalar::value && detail::is_esimd_scalar::value && + detail::is_esimd_scalar::value && std::is_integral::value && std::is_integral::value && std::is_integral::value, typename std::remove_const::type>::type esimd_shr(T1 src0, T2 src1, int flag = GENX_NOSAT) { typedef typename computation_type::type ComputationTy; - typename details::simd_type::type Src0 = src0; - typename details::simd_type::type Src1 = src1; + typename detail::simd_type::type Src0 = src0; + typename detail::simd_type::type Src1 = src1; simd Result = esimd_shr(Src0, Src1, flag); return Result[0]; } @@ -213,22 +211,21 @@ ESIMD_NODEBUG ESIMD_INLINE simd>::type esimd_rol(simd src0, U src1) { typedef typename computation_type::type ComputationTy; - typename details::simd_type::type Src0 = src0; - typename details::simd_type::type Src1 = src1; + typename detail::simd_type::type Src0 = src0; + typename detail::simd_type::type Src1 = src1; return __esimd_rol(Src0.data(), Src1.data()); } template ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< - details::is_esimd_scalar::value && - details::is_esimd_scalar::value && - details::is_esimd_scalar::value && std::is_integral::value && + detail::is_esimd_scalar::value && detail::is_esimd_scalar::value && + detail::is_esimd_scalar::value && std::is_integral::value && std::is_integral::value && std::is_integral::value, typename std::remove_const::type>::type esimd_rol(T1 src0, T2 src1) { typedef typename computation_type::type ComputationTy; - typename details::simd_type::type Src0 = src0; - typename details::simd_type::type Src1 = src1; + typename detail::simd_type::type Src0 = src0; + typename detail::simd_type::type Src1 = src1; simd Result = esimd_rol(Src0, Src1); return Result[0]; } @@ -251,22 +248,21 @@ ESIMD_NODEBUG ESIMD_INLINE simd>::type esimd_ror(simd src0, U src1) { typedef typename computation_type::type ComputationTy; - typename details::simd_type::type Src0 = src0; - typename details::simd_type::type Src1 = src1; + typename detail::simd_type::type Src0 = src0; + typename detail::simd_type::type Src1 = src1; return __esimd_ror(Src0.data(), Src1.data()); } template ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< - details::is_esimd_scalar::value && - details::is_esimd_scalar::value && - details::is_esimd_scalar::value && std::is_integral::value && + detail::is_esimd_scalar::value && detail::is_esimd_scalar::value && + detail::is_esimd_scalar::value && std::is_integral::value && std::is_integral::value && std::is_integral::value, typename std::remove_const::type>::type esimd_ror(T1 src0, T2 src1) { typedef typename computation_type::type ComputationTy; - typename details::simd_type::type Src0 = src0; - typename details::simd_type::type Src1 = src1; + typename detail::simd_type::type Src0 = src0; + typename detail::simd_type::type Src1 = src1; simd Result = esimd_ror(Src0, Src1); return Result[0]; } @@ -292,24 +288,23 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< - details::is_esimd_scalar::value && - details::is_esimd_scalar::value && - details::is_esimd_scalar::value && std::is_integral::value && + detail::is_esimd_scalar::value && detail::is_esimd_scalar::value && + detail::is_esimd_scalar::value && std::is_integral::value && std::is_integral::value && std::is_integral::value, typename std::remove_const::type>::type esimd_lsr(T1 src0, T2 src1, int flag = GENX_NOSAT) { typedef typename computation_type::type ComputationTy; - typename details::simd_type::type Src0 = src0; - typename details::simd_type::type Src1 = src1; + typename detail::simd_type::type Src0 = src0; + typename detail::simd_type::type Src1 = src1; simd Result = esimd_lsr(Src0, Src1, flag); return Result[0]; } template ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< - details::is_esimd_scalar::value && - details::is_esimd_vector::value && std::is_integral::value && - std::is_integral::value && std::is_integral::value, + detail::is_esimd_scalar::value && detail::is_esimd_vector::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value, decltype(esimd_lsr(T2(), T1()))>::type esimd_lsr(T1 src0, T2 src1, int flag = GENX_NOSAT) { return esimd_lsr(src1, src0, flag); @@ -336,24 +331,23 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< - details::is_esimd_scalar::value && - details::is_esimd_scalar::value && - details::is_esimd_scalar::value && std::is_integral::value && + detail::is_esimd_scalar::value && detail::is_esimd_scalar::value && + detail::is_esimd_scalar::value && std::is_integral::value && std::is_integral::value && std::is_integral::value, typename std::remove_const::type>::type esimd_asr(T1 src0, T2 src1, int flag = GENX_NOSAT) { typedef typename computation_type::type ComputationTy; - typename details::simd_type::type Src0 = src0; - typename details::simd_type::type Src1 = src1; + typename detail::simd_type::type Src0 = src0; + typename detail::simd_type::type Src1 = src1; simd Result = esimd_asr(Src0, Src1, flag); return Result[0]; } template ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< - details::is_esimd_scalar::value && - details::is_esimd_vector::value && std::is_integral::value && - std::is_integral::value && std::is_integral::value, + detail::is_esimd_scalar::value && detail::is_esimd_vector::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value, decltype(esimd_asr(T2(), T1()))>::type esimd_asr(T1 src0, T2 src1, int flag = GENX_NOSAT) { return esimd_asr(src1, src0, flag); @@ -364,14 +358,14 @@ esimd_asr(T1 src0, T2 src1, int flag = GENX_NOSAT) { // use mulh instruction for high half template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && - details::is_dword_type::value && - details::is_dword_type::value, + typename std::enable_if::value && + detail::is_dword_type::value && + detail::is_dword_type::value, simd>::type esimd_imul(simd &rmd, simd src0, U src1) { typedef typename computation_type::type ComputationTy; - typename details::simd_type::type Src0 = src0; - typename details::simd_type::type Src1 = src1; + typename detail::simd_type::type Src0 = src0; + typename detail::simd_type::type Src1 = src1; rmd = Src0 * Src1; if constexpr (std::is_unsigned::value) return __esimd_umulh(Src0.data(), Src1.data()); @@ -385,9 +379,9 @@ ESIMD_NODEBUG ESIMD_INLINE // stride must also be 1" on the selects. template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && - details::is_dword_type::value && - details::is_dword_type::value && SZ == 1, + typename std::enable_if::value && + detail::is_dword_type::value && + detail::is_dword_type::value && SZ == 1, simd>::type esimd_imul(simd &rmd, simd src0, U src1) { typedef @@ -400,9 +394,9 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && - details::is_dword_type::value && - details::is_dword_type::value && SZ != 1, + typename std::enable_if::value && + detail::is_dword_type::value && + detail::is_dword_type::value && SZ != 1, simd>::type esimd_imul(simd &rmd, simd src0, U src1) { typedef @@ -418,7 +412,7 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value, + typename std::enable_if::value, simd>::type esimd_imul(simd &rmd, U src0, simd src1) { return esimd_imul(rmd, src1, src0); @@ -426,9 +420,9 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && - details::is_esimd_scalar::value && - details::is_esimd_scalar::value, + typename std::enable_if::value && + detail::is_esimd_scalar::value && + detail::is_esimd_scalar::value, T0>::type esimd_imul(simd &rmd, T src0, U src1) { simd src_0 = src0; @@ -446,13 +440,11 @@ esimd_quot(simd src0, U src1) { } template -ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && - details::is_esimd_scalar::value && - std::is_integral::value && - std::is_integral::value, - typename std::remove_const::type>::type - esimd_quot(T0 src0, T1 src1) { +ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< + detail::is_esimd_scalar::value && detail::is_esimd_scalar::value && + std::is_integral::value && std::is_integral::value, + typename std::remove_const::type>::type +esimd_quot(T0 src0, T1 src1) { return src0 / src1; } @@ -465,13 +457,11 @@ esimd_mod(simd src0, U src1) { } template -ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && - details::is_esimd_scalar::value && - std::is_integral::value && - std::is_integral::value, - typename std::remove_const::type>::type - esimd_mod(T0 src0, T1 src1) { +ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< + detail::is_esimd_scalar::value && detail::is_esimd_scalar::value && + std::is_integral::value && std::is_integral::value, + typename std::remove_const::type>::type +esimd_mod(T0 src0, T1 src1) { return src0 % src1; } @@ -488,7 +478,7 @@ template ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if::value && std::is_integral::value && - details::is_esimd_scalar::value, + detail::is_esimd_scalar::value, simd>::type esimd_div(simd &remainder, U src0, simd src1) { remainder = src0 % src1; @@ -497,9 +487,9 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && - details::is_esimd_scalar::value && - details::is_esimd_scalar::value, + typename std::enable_if::value && + detail::is_esimd_scalar::value && + detail::is_esimd_scalar::value, typename std::remove_const::type>::type esimd_div(simd::type, 1> &remainder, T0 src0, T1 src1) { @@ -532,7 +522,7 @@ esimd_max(simd src0, simd src1, int flag = GENX_NOSAT) { template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value, + typename std::enable_if::value, simd>::type esimd_max(simd src0, T src1, int flag = GENX_NOSAT) { simd Src1 = src1; @@ -542,7 +532,7 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value, + typename std::enable_if::value, simd>::type esimd_max(T src0, simd src1, int flag = GENX_NOSAT) { simd Src0 = src0; @@ -552,7 +542,7 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value, T>::type + typename std::enable_if::value, T>::type esimd_max(T src0, T src1, int flag = GENX_NOSAT) { simd Src0 = src0; simd Src1 = src1; @@ -579,7 +569,7 @@ esimd_min(simd src0, simd src1, int flag = GENX_NOSAT) { template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value, + typename std::enable_if::value, simd>::type esimd_min(simd src0, T src1, int flag = GENX_NOSAT) { simd Src1 = src1; @@ -589,7 +579,7 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value, + typename std::enable_if::value, simd>::type esimd_min(T src0, simd src1, int flag = GENX_NOSAT) { simd Src0 = src0; @@ -598,7 +588,7 @@ ESIMD_NODEBUG ESIMD_INLINE } template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value, T>::type + typename std::enable_if::value, T>::type esimd_min(T src0, T src1, int flag = GENX_NOSAT) { simd Src0 = src0; simd Src1 = src1; @@ -700,9 +690,9 @@ esimd_line(float P, float Q, simd src1, int flag = GENX_NOSAT) { // using a less efficient implementation if not on GEN10 or above. template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && + typename std::enable_if::value && std::is_floating_point::value && - details::is_fp_or_dword_type::value && + detail::is_fp_or_dword_type::value && std::is_floating_point::value, simd>::type esimd_dp2(simd src0, U src1, int flag = GENX_NOSAT) { @@ -722,9 +712,9 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && + typename std::enable_if::value && std::is_floating_point::value && - details::is_fp_or_dword_type::value && + detail::is_fp_or_dword_type::value && std::is_floating_point::value, simd>::type esimd_dp3(simd src0, U src1, int flag = GENX_NOSAT) { @@ -745,9 +735,9 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && + typename std::enable_if::value && std::is_floating_point::value && - details::is_fp_or_dword_type::value && + detail::is_fp_or_dword_type::value && std::is_floating_point::value, simd>::type esimd_dp4(simd src0, U src1, int flag = GENX_NOSAT) { @@ -769,9 +759,9 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && + typename std::enable_if::value && std::is_floating_point::value && - details::is_fp_or_dword_type::value && + detail::is_fp_or_dword_type::value && std::is_floating_point::value, simd>::type esimd_dph(simd src0, U src1, int flag = GENX_NOSAT) { @@ -792,7 +782,7 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && + typename std::enable_if::value && std::is_floating_point::value, simd>::type esimd_line(simd src0, simd src1, int flag = GENX_NOSAT) { @@ -813,7 +803,7 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && + typename std::enable_if::value && std::is_floating_point::value, simd>::type esimd_line(float P, float Q, simd src1, int flag = GENX_NOSAT) { @@ -847,8 +837,8 @@ ESIMD_NODEBUG ESIMD_INLINE simd esimd_lzd(simd src0, template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && - details::is_esimd_scalar::value, + typename std::enable_if::value && + detail::is_esimd_scalar::value, typename std::remove_const::type>::type esimd_lzd(T0 src0, int flag = GENX_NOSAT) { simd Src0 = src0; @@ -888,9 +878,9 @@ esimd_lrp(simd src0, U src1, V src2, int flag = GENX_NOSAT) { // using less efficient implementation. template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && + typename std::enable_if::value && std::is_floating_point::value && - details::is_fp_or_dword_type::value && + detail::is_fp_or_dword_type::value && std::is_floating_point::value, simd>::type esimd_lrp(simd src0, U src1, V src2, int flag = GENX_NOSAT) { @@ -943,8 +933,8 @@ ESIMD_NODEBUG ESIMD_INLINE simd esimd_bf_reverse(simd src0) { template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && - details::is_esimd_scalar::value, + typename std::enable_if::value && + detail::is_esimd_scalar::value, typename std::remove_const::type>::type esimd_bf_reverse(T1 src0) { simd Src0 = src0; @@ -957,7 +947,7 @@ template ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if::value, simd>::type esimd_bf_insert(U src0, V src1, W src2, simd src3) { - typedef typename details::dword_type::type DT1; + typedef typename detail::dword_type::type DT1; static_assert(std::is_integral::value && sizeof(DT1) == sizeof(int), "operand conversion failed"); simd Src0 = src0; @@ -970,8 +960,8 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && - details::is_esimd_scalar::value, + typename std::enable_if::value && + detail::is_esimd_scalar::value, typename std::remove_const::type>::type esimd_bf_insert(T1 src0, T2 src1, T3 src2, T4 src3) { simd Src3 = src3; @@ -984,7 +974,7 @@ template ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if::value, simd>::type esimd_bf_extract(U src0, V src1, simd src2) { - typedef typename details::dword_type::type DT1; + typedef typename detail::dword_type::type DT1; static_assert(std::is_integral::value && sizeof(DT1) == sizeof(int), "operand conversion failed"); simd Src0 = src0; @@ -996,8 +986,8 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && - details::is_esimd_scalar::value, + typename std::enable_if::value && + detail::is_esimd_scalar::value, typename std::remove_const::type>::type esimd_bf_extract(T1 src0, T2 src1, T3 src2) { simd Src2 = src2; @@ -1082,7 +1072,7 @@ ESIMD_INTRINSIC_DEF(double, sqrt_ieee) } \ template \ ESIMD_NODEBUG ESIMD_INLINE \ - typename std::enable_if::value, \ + typename std::enable_if::value, \ simd>::type \ esimd_##name(U src0, simd src1, int flag = GENX_NOSAT) { \ simd Src0 = src0; \ @@ -1288,11 +1278,9 @@ ESIMD_NODEBUG ESIMD_INLINE } template -ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && - details::is_esimd_scalar::value, - uint>::type - esimd_cbit(T src) { +ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< + std::is_integral::value && detail::is_esimd_scalar::value, uint>::type +esimd_cbit(T src) { simd Src = src; simd Result = esimd_cbit(Src); return Result[0]; @@ -1325,8 +1313,8 @@ esimd_fbh(simd src) { template ESIMD_NODEBUG ESIMD_INLINE - typename std::enable_if::value && - details::is_esimd_scalar::value, + typename std::enable_if::value && + detail::is_esimd_scalar::value, T>::type esimd_fbh(T src) { simd Src = src; @@ -1338,8 +1326,8 @@ template simd esimd_rdtsc(); template ESIMD_NODEBUG ESIMD_INLINE typename std::enable_if< - details::is_dword_type::value && details::is_dword_type::value && - details::is_dword_type::value && details::is_dword_type::value, + detail::is_dword_type::value && detail::is_dword_type::value && + detail::is_dword_type::value && detail::is_dword_type::value, simd>::type esimd_dp4a(simd src0, simd src1, simd src2, int flag = GENX_NOSAT) {