Skip to content

Commit

Permalink
Merge from 'sycl' to 'sycl-web' (#3)
Browse files Browse the repository at this point in the history
  • Loading branch information
iclsrc committed Oct 5, 2020
2 parents dcd1f12 + 73e957f commit 6af2069
Show file tree
Hide file tree
Showing 7 changed files with 280 additions and 77 deletions.
47 changes: 5 additions & 42 deletions llvm/lib/SYCLLowerIR/LowerESIMDVecArg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,6 @@ class ESIMDLowerVecArgPass {
Function *rewriteFunc(Function &F);
Type *getSimdArgPtrTyOrNull(Value *arg);
void fixGlobals(Module &M);
void replaceConstExprWithGlobals(Module &M);
ConstantExpr *createNewConstantExpr(GlobalVariable *newGlobalVar,
Type *oldGlobalType, Value *old);
void removeOldGlobals();
};

Expand Down Expand Up @@ -229,41 +226,6 @@ Function *ESIMDLowerVecArgPass::rewriteFunc(Function &F) {
return NF;
}

// Replace ConstantExpr if it contains old global variable.
ConstantExpr *
ESIMDLowerVecArgPass::createNewConstantExpr(GlobalVariable *NewGlobalVar,
Type *OldGlobalType, Value *Old) {
ConstantExpr *NewConstantExpr = nullptr;

if (isa<GlobalVariable>(Old)) {
NewConstantExpr = cast<ConstantExpr>(
ConstantExpr::getBitCast(NewGlobalVar, OldGlobalType));
return NewConstantExpr;
}

auto InnerMost = createNewConstantExpr(
NewGlobalVar, OldGlobalType, cast<ConstantExpr>(Old)->getOperand(0));

NewConstantExpr = cast<ConstantExpr>(
cast<ConstantExpr>(Old)->getWithOperandReplaced(0, InnerMost));

return NewConstantExpr;
}

// Globals are part of ConstantExpr. This loop iterates over
// all such instances and replaces them with a new ConstantExpr
// consisting of new global vector* variable.
void ESIMDLowerVecArgPass::replaceConstExprWithGlobals(Module &M) {
for (auto &GlobalVars : OldNewGlobal) {
auto &G = *GlobalVars.first;
for (auto UseOfG : G.users()) {
auto NewGlobal = GlobalVars.second;
auto NewConstExpr = createNewConstantExpr(NewGlobal, G.getType(), UseOfG);
UseOfG->replaceAllUsesWith(NewConstExpr);
}
}
}

// This function creates new global variables of type vector* type
// when old one is of simd* type.
void ESIMDLowerVecArgPass::fixGlobals(Module &M) {
Expand All @@ -288,16 +250,17 @@ void ESIMDLowerVecArgPass::fixGlobals(Module &M) {
}
}

replaceConstExprWithGlobals(M);

removeOldGlobals();
}

// Remove old global variables from the program.
void ESIMDLowerVecArgPass::removeOldGlobals() {
for (auto &G : OldNewGlobal) {
G.first->removeDeadConstantUsers();
G.first->eraseFromParent();
auto OldGlob = G.first;
auto NewGlobal = G.second;
OldGlob->replaceAllUsesWith(
ConstantExpr::getBitCast(NewGlobal, OldGlob->getType()));
OldGlob->eraseFromParent();
}
}

Expand Down
26 changes: 26 additions & 0 deletions llvm/test/SYCLLowerIR/esimd_global_crash.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -ESIMDLowerVecArg -S | FileCheck %s

; This test checks that there is no compiler crash when a Global
; is used in simple instruction, not directly in ConstantExpr.

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown-sycldevice"

%"class.cl::sycl::INTEL::gpu::simd" = type { <2512 x i32> }

; CHECK: @Global = dso_local global <2512 x i32> undef, align 16384
@Global = dso_local global %"class.cl::sycl::INTEL::gpu::simd" undef, align 16384

define void @no_crash(<2512 x i32> %simd_val) {
; CHECK-LABEL: @no_crash(
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast %"class.cl::sycl::INTEL::gpu::simd"* bitcast (<2512 x i32>* @Global to %"class.cl::sycl::INTEL::gpu::simd"*) to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)*
; CHECK-NEXT: [[GEP:%.*]] = getelementptr %"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* [[CAST]], i64 0, i32 0
; CHECK-NEXT: store <2512 x i32> [[SIMD_VAL:%.*]], <2512 x i32> addrspace(4)* [[GEP]], align 16384
; CHECK-NEXT: ret void
;
%cast = addrspacecast %"class.cl::sycl::INTEL::gpu::simd"* @Global to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)*
%gep = getelementptr %"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* %cast, i64 0, i32 0
store <2512 x i32> %simd_val, <2512 x i32> addrspace(4)* %gep, align 16384
ret void
}
13 changes: 13 additions & 0 deletions sycl/include/CL/sycl/detail/type_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,19 @@ template <typename T>
struct is_vector_arithmetic
: bool_constant<is_vec<T>::value && is_arithmetic<T>::value> {};

// is_bool
template <typename T>
struct is_scalar_bool
: bool_constant<std::is_same<remove_cv_t<T>, bool>::value> {};

template <typename T>
struct is_vector_bool
: bool_constant<is_vec<T>::value &&
is_scalar_bool<vector_element_t<T>>::value> {};

template <typename T>
struct is_bool : bool_constant<is_scalar_bool<vector_element_t<T>>::value> {};

// is_pointer
template <typename T> struct is_pointer_impl : std::false_type {};

Expand Down
101 changes: 71 additions & 30 deletions sycl/include/CL/sycl/group.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,58 +274,99 @@ template <int Dimensions = 1> class group {
__spirv_MemoryBarrier(__spv::Scope::Workgroup, flags);
}

/// Asynchronously copies a number of elements specified by \p numElements
/// from the source pointed by \p src to destination pointed by \p dest
/// with a source stride specified by \p srcStride, and returns a SYCL
/// device_event which can be used to wait on the completion of the copy.
/// Permitted types for dataT are all scalar and vector types, except boolean.
template <typename dataT>
device_event async_work_group_copy(local_ptr<dataT> dest,
global_ptr<dataT> src,
size_t numElements) const {
detail::enable_if_t<!detail::is_bool<dataT>::value, device_event>
async_work_group_copy(local_ptr<dataT> dest, global_ptr<dataT> src,
size_t numElements, size_t srcStride) const {
using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;

__ocl_event_t e = OpGroupAsyncCopyGlobalToLocal(
__ocl_event_t E = OpGroupAsyncCopyGlobalToLocal(
__spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
numElements, 1, 0);
return device_event(&e);
numElements, srcStride, 0);
return device_event(&E);
}

/// Asynchronously copies a number of elements specified by \p numElements
/// from the source pointed by \p src to destination pointed by \p dest with
/// the destination stride specified by \p destStride, and returns a SYCL
/// device_event which can be used to wait on the completion of the copy.
/// Permitted types for dataT are all scalar and vector types, except boolean.
template <typename dataT>
device_event async_work_group_copy(global_ptr<dataT> dest,
local_ptr<dataT> src,
size_t numElements) const {
detail::enable_if_t<!detail::is_bool<dataT>::value, device_event>
async_work_group_copy(global_ptr<dataT> dest, local_ptr<dataT> src,
size_t numElements, size_t destStride) const {
using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;

__ocl_event_t e = OpGroupAsyncCopyLocalToGlobal(
__ocl_event_t E = OpGroupAsyncCopyLocalToGlobal(
__spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
numElements, 1, 0);
return device_event(&e);
numElements, destStride, 0);
return device_event(&E);
}

/// Specialization for scalar bool type.
/// Asynchronously copies a number of elements specified by \p NumElements
/// from the source pointed by \p Src to destination pointed by \p Dest
/// with a stride specified by \p Stride, and returns a SYCL device_event
/// which can be used to wait on the completion of the copy.
template <typename T, access::address_space DestS, access::address_space SrcS>
detail::enable_if_t<detail::is_scalar_bool<T>::value, device_event>
async_work_group_copy(multi_ptr<T, DestS> Dest, multi_ptr<T, SrcS> Src,
size_t NumElements, size_t Stride) const {
static_assert(sizeof(bool) == sizeof(uint8_t),
"Async copy to/from bool memory is not supported.");
auto DestP =
multi_ptr<uint8_t, DestS>(reinterpret_cast<uint8_t *>(Dest.get()));
auto SrcP =
multi_ptr<uint8_t, SrcS>(reinterpret_cast<uint8_t *>(Src.get()));
return async_work_group_copy(DestP, SrcP, NumElements, Stride);
}

/// Specialization for vector bool type.
/// Asynchronously copies a number of elements specified by \p NumElements
/// from the source pointed by \p Src to destination pointed by \p Dest
/// with a stride specified by \p Stride, and returns a SYCL device_event
/// which can be used to wait on the completion of the copy.
template <typename T, access::address_space DestS, access::address_space SrcS>
detail::enable_if_t<detail::is_vector_bool<T>::value, device_event>
async_work_group_copy(multi_ptr<T, DestS> Dest, multi_ptr<T, SrcS> Src,
size_t NumElements, size_t Stride) const {
static_assert(sizeof(bool) == sizeof(uint8_t),
"Async copy to/from bool memory is not supported.");
using VecT = detail::change_base_type_t<T, uint8_t>;
auto DestP = multi_ptr<VecT, DestS>(reinterpret_cast<VecT *>(Dest.get()));
auto SrcP = multi_ptr<VecT, SrcS>(reinterpret_cast<VecT *>(Src.get()));
return async_work_group_copy(DestP, SrcP, NumElements, Stride);
}

/// Asynchronously copies a number of elements specified by \p numElements
/// from the source pointed by \p src to destination pointed by \p dest and
/// returns a SYCL device_event which can be used to wait on the completion
/// of the copy.
/// Permitted types for dataT are all scalar and vector types.
template <typename dataT>
device_event async_work_group_copy(local_ptr<dataT> dest,
global_ptr<dataT> src,
size_t numElements,
size_t srcStride) const {
using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;

__ocl_event_t e = OpGroupAsyncCopyGlobalToLocal(
__spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
numElements, srcStride, 0);
return device_event(&e);
size_t numElements) const {
return async_work_group_copy(dest, src, numElements, 1);
}

/// Asynchronously copies a number of elements specified by \p numElements
/// from the source pointed by \p src to destination pointed by \p dest and
/// returns a SYCL device_event which can be used to wait on the completion
/// of the copy.
/// Permitted types for dataT are all scalar and vector types.
template <typename dataT>
device_event async_work_group_copy(global_ptr<dataT> dest,
local_ptr<dataT> src,
size_t numElements,
size_t destStride) const {
using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;

__ocl_event_t e = OpGroupAsyncCopyLocalToGlobal(
__spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
numElements, destStride, 0);
return device_event(&e);
size_t numElements) const {
return async_work_group_copy(dest, src, numElements, 1);
}

template <typename... eventTN>
Expand Down
4 changes: 2 additions & 2 deletions sycl/include/CL/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1589,7 +1589,7 @@ class __SYCL_EXPORT handler {
// Make sure data shared_ptr points to is not released until we finish
// work with it.
MSharedPtrStorage.push_back(Dst);
T_Dst *RawDstPtr = Dst.get();
typename shared_ptr_class<T_Dst>::element_type *RawDstPtr = Dst.get();
copy(Src, RawDstPtr);
}

Expand All @@ -1612,7 +1612,7 @@ class __SYCL_EXPORT handler {
// Make sure data shared_ptr points to is not released until we finish
// work with it.
MSharedPtrStorage.push_back(Src);
T_Src *RawSrcPtr = Src.get();
typename shared_ptr_class<T_Src>::element_type *RawSrcPtr = Src.get();
copy(RawSrcPtr, Dst);
}

Expand Down
Loading

0 comments on commit 6af2069

Please sign in to comment.