From c24bda2c81057feda5ff8a13838d7e636e7347a0 Mon Sep 17 00:00:00 2001 From: Ronan Keryell Date: Wed, 30 Jan 2019 14:29:07 -0800 Subject: [PATCH] [SYCL][NFC] Fix wrong end-of-line encoding Change the encoding of some files from MS-DOS to Unix. Signed-off-by: Ronan Keryell --- clang/test/CodeGenSYCL/kernel_functor.cpp | 360 ++++---- .../clang-offload-wrapper/CMakeLists.txt | 50 +- .../ClangOffloadWrapper.cpp | 824 +++++++++--------- llvm-spirv/lib/SPIRV/SPIRVLowerOCLBlocks.cpp | 606 ++++++------- sycl/include/CL/sycl/nd_range.hpp | 118 +-- sycl/test/functor/kernel_functor.cpp | 372 ++++---- 6 files changed, 1165 insertions(+), 1165 deletions(-) diff --git a/clang/test/CodeGenSYCL/kernel_functor.cpp b/clang/test/CodeGenSYCL/kernel_functor.cpp index 1ca39e025c80c..56b9b34c2dbeb 100644 --- a/clang/test/CodeGenSYCL/kernel_functor.cpp +++ b/clang/test/CodeGenSYCL/kernel_functor.cpp @@ -1,180 +1,180 @@ -// RUN: %clang -I %S/Inputs -std=c++11 --sycl -Xclang -fsycl-int-header=%t.h %s -c -o %t.spv -// RUN: FileCheck %s --input-file=%t.h - -// Checks that functors are supported as SYCL kernels. - -#include "sycl.hpp" - -constexpr auto sycl_read_write = cl::sycl::access::mode::read_write; -constexpr auto sycl_global_buffer = cl::sycl::access::target::global_buffer; - -// Case 1: -// - functor class is defined in an anonymous namespace -// - the '()' operator: -// * does not have parameters (to be used in 'single_task'). -// * has no 'const' qualifier -namespace { - class Functor1 { - public: - Functor1(int X_, cl::sycl::accessor &Acc_) : - X(X_), Acc(Acc_) - {} - - void operator()() { - Acc.use(X); - } - - private: - int X; - cl::sycl::accessor Acc; - }; -} - -// Case 2: -// - functor class is defined in a namespace -// - the '()' operator: -// * does not have parameters (to be used in 'single_task'). -// * has the 'const' qualifier -namespace ns { - class Functor2 { - public: - Functor2(int X_, cl::sycl::accessor &Acc_) : - X(X_), Acc(Acc_) - {} - - void operator()() const { - Acc.use(X); - } - - private: - int X; - cl::sycl::accessor Acc; - }; -} - -// Case 3: -// - functor class is templated and defined in the translation unit scope -// - the '()' operator: -// * has a parameter of type cl::sycl::id<1> (to be used in 'parallel_for'). -// * has no 'const' qualifier -template class TmplFunctor { -public: - TmplFunctor(T X_, cl::sycl::accessor &Acc_) : - X(X_), Acc(Acc_) - {} - - void operator()(cl::sycl::id<1> id) { - Acc.use(id, X); - } - -private: - T X; - cl::sycl::accessor Acc; -}; - -// Case 4: -// - functor class is templated and defined in the translation unit scope -// - the '()' operator: -// * has a parameter of type cl::sycl::id<1> (to be used in 'parallel_for'). -// * has the 'const' qualifier -template class TmplConstFunctor { -public: - TmplConstFunctor(T X_, cl::sycl::accessor &Acc_) : - X(X_), Acc(Acc_) - {} - - void operator()(cl::sycl::id<1> id) const { - Acc.use(id, X); - } - -private: - T X; - cl::sycl::accessor Acc; -}; - -// Exercise non-templated functors in 'single_task'. -int foo(int X) { - int A[] = { 10 }; - { - cl::sycl::queue Q; - cl::sycl::buffer Buf(A, 1); - - Q.submit([&](cl::sycl::handler& cgh) { - auto Acc = Buf.get_access(cgh); - Functor1 F(X, Acc); - - cgh.single_task(F); - }); - Q.submit([&](cl::sycl::handler& cgh) { - auto Acc = Buf.get_access(cgh); - ns::Functor2 F(X, Acc); - - cgh.single_task(F); - }); - Q.submit([&](cl::sycl::handler& cgh) { - auto Acc = Buf.get_access(cgh); - ns::Functor2 F(X, Acc); - - cgh.single_task(F); - }); - } - return A[0]; -} - -#define ARR_LEN(x) sizeof(x)/sizeof(x[0]) - -// Exercise templated functors in 'parallel_for'. -template T bar(T X) { - T A[] = { (T)10, (T)10 }; - { - cl::sycl::queue Q; - cl::sycl::buffer Buf(A, ARR_LEN(A)); - - Q.submit([&](cl::sycl::handler& cgh) { - auto Acc = Buf.template get_access(cgh); - TmplFunctor F(X, Acc); - - cgh.parallel_for(cl::sycl::range<1>(ARR_LEN(A)), F); - }); - // Spice with lambdas to make sure functors and lambdas work together. - Q.submit([&](cl::sycl::handler& cgh) { - auto Acc = Buf.template get_access(cgh); - cgh.parallel_for( - cl::sycl::range<1>(ARR_LEN(A)), - [=](cl::sycl::id<1> id) { Acc.use(id, X); }); - }); - Q.submit([&](cl::sycl::handler& cgh) { - auto Acc = Buf.template get_access(cgh); - TmplConstFunctor F(X, Acc); - - cgh.parallel_for(cl::sycl::range<1>(ARR_LEN(A)), F); - }); - } - T res = (T)0; - - for (int i = 0; i < ARR_LEN(A); i++) { - res += A[i]; - } - return res; -} - -int main() { - const int Res1 = foo(10); - const int Res2 = bar(10); - const int Gold1 = 40; - const int Gold2 = 80; - -#ifndef __SYCL_DEVICE_ONLY__ - cl::sycl::detail::KernelInfo::getName(); - // CHECK: Functor1 - cl::sycl::detail::KernelInfo::getName(); - // CHECK: ::ns::Functor2 - cl::sycl::detail::KernelInfo>::getName(); - // CHECK: TmplFunctor - cl::sycl::detail::KernelInfo>::getName(); - // CHECK: TmplConstFunctor -#endif // __SYCL_DEVICE_ONLY__ - - return 0; -} - +// RUN: %clang -I %S/Inputs -std=c++11 --sycl -Xclang -fsycl-int-header=%t.h %s -c -o %t.spv +// RUN: FileCheck %s --input-file=%t.h + +// Checks that functors are supported as SYCL kernels. + +#include "sycl.hpp" + +constexpr auto sycl_read_write = cl::sycl::access::mode::read_write; +constexpr auto sycl_global_buffer = cl::sycl::access::target::global_buffer; + +// Case 1: +// - functor class is defined in an anonymous namespace +// - the '()' operator: +// * does not have parameters (to be used in 'single_task'). +// * has no 'const' qualifier +namespace { + class Functor1 { + public: + Functor1(int X_, cl::sycl::accessor &Acc_) : + X(X_), Acc(Acc_) + {} + + void operator()() { + Acc.use(X); + } + + private: + int X; + cl::sycl::accessor Acc; + }; +} + +// Case 2: +// - functor class is defined in a namespace +// - the '()' operator: +// * does not have parameters (to be used in 'single_task'). +// * has the 'const' qualifier +namespace ns { + class Functor2 { + public: + Functor2(int X_, cl::sycl::accessor &Acc_) : + X(X_), Acc(Acc_) + {} + + void operator()() const { + Acc.use(X); + } + + private: + int X; + cl::sycl::accessor Acc; + }; +} + +// Case 3: +// - functor class is templated and defined in the translation unit scope +// - the '()' operator: +// * has a parameter of type cl::sycl::id<1> (to be used in 'parallel_for'). +// * has no 'const' qualifier +template class TmplFunctor { +public: + TmplFunctor(T X_, cl::sycl::accessor &Acc_) : + X(X_), Acc(Acc_) + {} + + void operator()(cl::sycl::id<1> id) { + Acc.use(id, X); + } + +private: + T X; + cl::sycl::accessor Acc; +}; + +// Case 4: +// - functor class is templated and defined in the translation unit scope +// - the '()' operator: +// * has a parameter of type cl::sycl::id<1> (to be used in 'parallel_for'). +// * has the 'const' qualifier +template class TmplConstFunctor { +public: + TmplConstFunctor(T X_, cl::sycl::accessor &Acc_) : + X(X_), Acc(Acc_) + {} + + void operator()(cl::sycl::id<1> id) const { + Acc.use(id, X); + } + +private: + T X; + cl::sycl::accessor Acc; +}; + +// Exercise non-templated functors in 'single_task'. +int foo(int X) { + int A[] = { 10 }; + { + cl::sycl::queue Q; + cl::sycl::buffer Buf(A, 1); + + Q.submit([&](cl::sycl::handler& cgh) { + auto Acc = Buf.get_access(cgh); + Functor1 F(X, Acc); + + cgh.single_task(F); + }); + Q.submit([&](cl::sycl::handler& cgh) { + auto Acc = Buf.get_access(cgh); + ns::Functor2 F(X, Acc); + + cgh.single_task(F); + }); + Q.submit([&](cl::sycl::handler& cgh) { + auto Acc = Buf.get_access(cgh); + ns::Functor2 F(X, Acc); + + cgh.single_task(F); + }); + } + return A[0]; +} + +#define ARR_LEN(x) sizeof(x)/sizeof(x[0]) + +// Exercise templated functors in 'parallel_for'. +template T bar(T X) { + T A[] = { (T)10, (T)10 }; + { + cl::sycl::queue Q; + cl::sycl::buffer Buf(A, ARR_LEN(A)); + + Q.submit([&](cl::sycl::handler& cgh) { + auto Acc = Buf.template get_access(cgh); + TmplFunctor F(X, Acc); + + cgh.parallel_for(cl::sycl::range<1>(ARR_LEN(A)), F); + }); + // Spice with lambdas to make sure functors and lambdas work together. + Q.submit([&](cl::sycl::handler& cgh) { + auto Acc = Buf.template get_access(cgh); + cgh.parallel_for( + cl::sycl::range<1>(ARR_LEN(A)), + [=](cl::sycl::id<1> id) { Acc.use(id, X); }); + }); + Q.submit([&](cl::sycl::handler& cgh) { + auto Acc = Buf.template get_access(cgh); + TmplConstFunctor F(X, Acc); + + cgh.parallel_for(cl::sycl::range<1>(ARR_LEN(A)), F); + }); + } + T res = (T)0; + + for (int i = 0; i < ARR_LEN(A); i++) { + res += A[i]; + } + return res; +} + +int main() { + const int Res1 = foo(10); + const int Res2 = bar(10); + const int Gold1 = 40; + const int Gold2 = 80; + +#ifndef __SYCL_DEVICE_ONLY__ + cl::sycl::detail::KernelInfo::getName(); + // CHECK: Functor1 + cl::sycl::detail::KernelInfo::getName(); + // CHECK: ::ns::Functor2 + cl::sycl::detail::KernelInfo>::getName(); + // CHECK: TmplFunctor + cl::sycl::detail::KernelInfo>::getName(); + // CHECK: TmplConstFunctor +#endif // __SYCL_DEVICE_ONLY__ + + return 0; +} + diff --git a/clang/tools/clang-offload-wrapper/CMakeLists.txt b/clang/tools/clang-offload-wrapper/CMakeLists.txt index bf01640057120..f8827d0208bff 100644 --- a/clang/tools/clang-offload-wrapper/CMakeLists.txt +++ b/clang/tools/clang-offload-wrapper/CMakeLists.txt @@ -1,25 +1,25 @@ -set(LLVM_LINK_COMPONENTS BitWriter Core Support TransformUtils) - -if(NOT CLANG_BUILT_STANDALONE) - set(tablegen_deps intrinsics_gen) -endif() - -add_clang_executable(clang-offload-wrapper - ClangOffloadWrapper.cpp - - DEPENDS - ${tablegen_deps} - ) - -set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS - clangBasic - ) - -add_dependencies(clang clang-offload-wrapper) - -target_link_libraries(clang-offload-wrapper - PRIVATE - ${CLANG_OFFLOAD_WRAPPER_LIB_DEPS} - ) - -install(TARGETS clang-offload-wrapper RUNTIME DESTINATION bin COMPONENT clang-offload-wrapper) +set(LLVM_LINK_COMPONENTS BitWriter Core Support TransformUtils) + +if(NOT CLANG_BUILT_STANDALONE) + set(tablegen_deps intrinsics_gen) +endif() + +add_clang_executable(clang-offload-wrapper + ClangOffloadWrapper.cpp + + DEPENDS + ${tablegen_deps} + ) + +set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS + clangBasic + ) + +add_dependencies(clang clang-offload-wrapper) + +target_link_libraries(clang-offload-wrapper + PRIVATE + ${CLANG_OFFLOAD_WRAPPER_LIB_DEPS} + ) + +install(TARGETS clang-offload-wrapper RUNTIME DESTINATION bin COMPONENT clang-offload-wrapper) diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index 08f3043f285d6..b76a7b58eb353 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -1,412 +1,412 @@ -//===-- clang-offload-wrapper/ClangOffloadWrapper.cpp ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of the offload wrapper tool. It takes offload target binaries -/// as input and creates wrapper bitcode from them which, after linking with the -/// offload application, provides access to the binaries. -/// TODO Add Windows support. -/// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/Version.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Triple.h" -#include "llvm/Bitcode/BitcodeWriter.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/ErrorOr.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" -#include -#include -#include - -using namespace llvm; - -static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden); - -// Mark all our options with this category, everything else (except for -version -// and -help) will be hidden. -static cl::OptionCategory - ClangOffloadWrapperCategory("clang-offload-wrapper options"); - -static cl::opt Output("o", cl::Required, - cl::desc("Output filename"), - cl::value_desc("filename"), - cl::cat(ClangOffloadWrapperCategory)); - -static cl::list Inputs(cl::Positional, cl::OneOrMore, - cl::desc(""), - cl::cat(ClangOffloadWrapperCategory)); - -static cl::opt - Target("target", cl::Required, - cl::desc("Offload kind + target triple of the wrapper object: " - "-"), - cl::value_desc("kind-triple"), cl::cat(ClangOffloadWrapperCategory)); - -static cl::opt EmitEntryTable("emit-entry-table", cl::NotHidden, - cl::init(true), cl::Optional, - cl::desc("Emit offload entry table"), - cl::cat(ClangOffloadWrapperCategory)); - -static cl::opt EmitRegFuncs("emit-reg-funcs", cl::NotHidden, - cl::init(true), cl::Optional, - cl::desc("Emit [un-]registration functions"), - cl::cat(ClangOffloadWrapperCategory)); - -static cl::opt - RegFuncName("reg-func-name", cl::Optional, cl::init("__tgt_register_lib"), - cl::desc("Offload descriptor registration function name"), - cl::value_desc("name"), cl::cat(ClangOffloadWrapperCategory)); - -static cl::opt - UnregFuncName("unreg-func-name", cl::Optional, - cl::init("__tgt_unregister_lib"), - cl::desc("Offload descriptor un-registration function name"), - cl::value_desc("name"), cl::cat(ClangOffloadWrapperCategory)); - -static cl::opt DescriptorName( - "desc-name", cl::Optional, cl::init("descriptor"), - cl::desc( - "Specifies offload descriptor symbol name: '..'" - ", and makes it globally visible"), - cl::value_desc("name"), cl::cat(ClangOffloadWrapperCategory)); - -namespace { -// TODO offload bundler and wrapper should share this. -// Offload kinds this tool supports -enum class OffloadKind { - Unknown = 0, - Host, - OpenMP, - HIP, - SYCL, - First = Host, - Last = SYCL -}; - -OffloadKind parseOffloadKind(StringRef KindStr) { - OffloadKind Kind = StringSwitch(KindStr) - .Case("host", OffloadKind::Host) - .Case("openmp", OffloadKind::OpenMP) - .Case("hip", OffloadKind::HIP) - .Case("sycl", OffloadKind::SYCL) - .Default(OffloadKind::Unknown); - return Kind; -} - -StringRef offloadKindToString(OffloadKind Kind) { - switch (Kind) { - case OffloadKind::Unknown: - return "unknown"; - case OffloadKind::Host: - return "host"; - case OffloadKind::OpenMP: - return "openmp"; - case OffloadKind::HIP: - return "hip"; - case OffloadKind::SYCL: - return "sycl"; - default: - llvm_unreachable("bad offload kind"); - } - return ""; -} - -void dumpOffloadKinds(raw_ostream &OS) { - OffloadKind Kinds[] = {OffloadKind::Host, OffloadKind::OpenMP, - OffloadKind::HIP, OffloadKind::SYCL}; - for (auto K : Kinds) { - if (K != OffloadKind::Host) - OS << " "; - OS << offloadKindToString(K); - } -} - -class BinaryWrapper { - LLVMContext C; - Module M; - std::string OffloadKindTag; - - StructType *EntryTy = nullptr; - StructType *ImageTy = nullptr; - StructType *DescTy = nullptr; - - using MemoryBuffersVector = SmallVectorImpl>; - -private: - IntegerType *getSizeTTy() { - auto PtrSize = M.getDataLayout().getPointerTypeSize(Type::getInt8PtrTy(C)); - return PtrSize == 8 ? Type::getInt64Ty(C) : Type::getInt32Ty(C); - } - - // struct __tgt_offload_entry { - // void *addr; - // char *name; - // size_t size; - // int32_t flags; - // int32_t reserved; - // }; - StructType *getEntryTy() { - if (!EntryTy) - EntryTy = StructType::create("__tgt_offload_entry", Type::getInt8PtrTy(C), - Type::getInt8PtrTy(C), getSizeTTy(), - Type::getInt32Ty(C), Type::getInt32Ty(C)); - return EntryTy; - } - - PointerType *getEntryPtrTy() { return PointerType::getUnqual(getEntryTy()); } - - // struct __tgt_device_image { - // void *ImageStart; - // void *ImageEnd; - // __tgt_offload_entry *EntriesBegin; [optional] - // __tgt_offload_entry *EntriesEnd; [optional] - // }; - StructType *getDeviceImageTy() { - if (!ImageTy) { - SmallVector FieldTypes( - {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)}); - if (EmitEntryTable) - FieldTypes.append({getEntryPtrTy(), getEntryPtrTy()}); - ImageTy = StructType::create(FieldTypes, "__tgt_device_image"); - } - return ImageTy; - } - - PointerType *getDeviceImagePtrTy() { - return PointerType::getUnqual(getDeviceImageTy()); - } - - // struct __tgt_bin_desc { - // int32_t NumDeviceImages; - // __tgt_device_image *DeviceImages; - // __tgt_offload_entry *HostEntriesBegin; [optional] - // __tgt_offload_entry *HostEntriesEnd; [optional] - // }; - StructType *getBinDescTy() { - if (!DescTy) { - SmallVector FieldTypes( - {Type::getInt32Ty(C), getDeviceImagePtrTy()}); - if (EmitEntryTable) - FieldTypes.append({getEntryPtrTy(), getEntryPtrTy()}); - DescTy = StructType::create(FieldTypes, "__tgt_bin_desc"); - } - return DescTy; - } - - PointerType *getBinDescPtrTy() { - return PointerType::getUnqual(getBinDescTy()); - } - - GlobalVariable *createBinDesc(const MemoryBuffersVector &Bufs) { - GlobalVariable *EntriesB = nullptr, *EntriesE = nullptr; - - if (EmitEntryTable) { - EntriesB = new GlobalVariable(M, getEntryTy(), true, - GlobalValue::ExternalLinkage, nullptr, - OffloadKindTag + "entries_begin"); - EntriesE = new GlobalVariable(M, getEntryTy(), true, - GlobalValue::ExternalLinkage, nullptr, - OffloadKindTag + "entries_end"); - } - auto *Zero = ConstantInt::get(getSizeTTy(), 0u); - Constant *ZeroZero[] = {Zero, Zero}; - - SmallVector ImagesInits; - for (const auto &Buf : Bufs) { - auto *Data = ConstantDataArray::get( - C, makeArrayRef(Buf->getBufferStart(), Buf->getBufferSize())); - - auto *Image = new GlobalVariable(M, Data->getType(), true, - GlobalVariable::InternalLinkage, Data, - OffloadKindTag + "device_image"); - Image->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - - auto *Size = ConstantInt::get(getSizeTTy(), Buf->getBufferSize()); - Constant *ZeroSize[] = {Zero, Size}; - - auto *ImageB = ConstantExpr::getGetElementPtr(Image->getValueType(), - Image, ZeroZero); - auto *ImageE = ConstantExpr::getGetElementPtr(Image->getValueType(), - Image, ZeroSize); - - SmallVector Inits({ImageB, ImageE}); - if (EmitEntryTable) - Inits.append({EntriesB, EntriesE}); - ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(), Inits)); - } - - auto *ImagesData = ConstantArray::get( - ArrayType::get(getDeviceImageTy(), ImagesInits.size()), ImagesInits); - - auto *Images = new GlobalVariable(M, ImagesData->getType(), true, - GlobalValue::InternalLinkage, ImagesData, - OffloadKindTag + "device_images"); - Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - - auto *ImagesB = ConstantExpr::getGetElementPtr(Images->getValueType(), - Images, ZeroZero); - - SmallVector Inits( - {ConstantInt::get(Type::getInt32Ty(C), ImagesInits.size()), ImagesB}); - if (EmitEntryTable) - Inits.append({EntriesB, EntriesE}); - - auto *DescInit = ConstantStruct::get(getBinDescTy(), Inits); - - GlobalValue::LinkageTypes Lnk = DescriptorName.getNumOccurrences() > 0 - ? GlobalValue::ExternalLinkage - : GlobalValue::InternalLinkage; - Twine DescName = Twine(OffloadKindTag) + Twine(DescriptorName); - - return new GlobalVariable(M, DescInit->getType(), true, Lnk, DescInit, - DescName); - } - - void createRegisterFunction(GlobalVariable *BinDesc) { - auto *FuncTy = FunctionType::get(Type::getVoidTy(C), {}, false); - auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, - OffloadKindTag + "descriptor_reg", &M); - Func->setSection(".text.startup"); - - // Get RegFuncName function declaration. - auto *RegFuncTy = - FunctionType::get(Type::getVoidTy(C), {getBinDescPtrTy()}, false); - auto *RegFunc = M.getOrInsertFunction(RegFuncName, RegFuncTy); - - // Construct function body - IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func)); - Builder.CreateCall(RegFunc, {BinDesc}); - Builder.CreateRetVoid(); - - // Add this function to constructors. - appendToGlobalCtors(M, Func, 0); - } - - void createUnregisterFunction(GlobalVariable *BinDesc) { - auto *FuncTy = FunctionType::get(Type::getVoidTy(C), {}, false); - auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, - OffloadKindTag + "descriptor_unreg", &M); - Func->setSection(".text.startup"); - - // Get UnregFuncName function declaration. - auto *UnRegFuncTy = - FunctionType::get(Type::getVoidTy(C), {getBinDescPtrTy()}, false); - auto *UnRegFunc = M.getOrInsertFunction(UnregFuncName, UnRegFuncTy); - - // Construct function body - IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func)); - Builder.CreateCall(UnRegFunc, {BinDesc}); - Builder.CreateRetVoid(); - - // Add this function to global destructors. - appendToGlobalDtors(M, Func, 0); - } - -public: - BinaryWrapper(const StringRef &KindStr, const StringRef &Target) - : M("offload.wrapper.object", C) { - - OffloadKindTag = - (Twine(".") + Twine(KindStr) + Twine("_offloading.")).str(); - M.setTargetTriple(Target); - } - - const Module &wrapBinaries(const MemoryBuffersVector &Binaries) { - auto *Desc = createBinDesc(Binaries); - assert(Desc && "no binary descriptor"); - - if (EmitRegFuncs) { - createRegisterFunction(Desc); - createUnregisterFunction(Desc); - } - return M; - } -}; - -} // anonymous namespace - -int main(int argc, const char **argv) { - sys::PrintStackTraceOnErrorSignal(argv[0]); - - cl::HideUnrelatedOptions(ClangOffloadWrapperCategory); - cl::SetVersionPrinter([](raw_ostream &OS) { - OS << clang::getClangToolFullVersion("clang-offload-wrapper") << '\n'; - }); - cl::ParseCommandLineOptions( - argc, argv, - "A tool to create a wrapper bitcode for offload target binaries.\n" - "Takes offload target binaries as input and produces bitcode file " - "containing\ntarget binaries packaged as data and initialization code " - "which registers target\nbinaries in offload runtime."); - - if (Help) { - cl::PrintHelpMessage(); - return 0; - } - - std::pair KindTriplePair = StringRef(Target).split('-'); - auto OffloadKindStr = KindTriplePair.first; - auto TargetStr = KindTriplePair.second; - - if (OffloadKindStr.empty()) { - errs() << "error: no offload kind specified\n"; - return 1; - } - OffloadKind Kind = parseOffloadKind(OffloadKindStr); - - if (Kind == OffloadKind::Unknown) { - errs() << "error: unknown offload kind: " << OffloadKindStr << "\n"; - errs() << "valid offload kinds: "; - dumpOffloadKinds(errs()); - errs() << "\n"; - return 1; - } - if (TargetStr.empty()) { - errs() << "error: no target specified\n"; - return 1; - } - // Create the bitcode file to write the resulting code to. - { - std::error_code EC; - raw_fd_ostream OutF(Output, EC, sys::fs::F_None); - if (EC) { - errs() << "error: unable to open output file: " << EC.message() << ".\n"; - return 1; - } - - // Read device binaries. - SmallVector, 4> DeviceBinaries; - for (const auto &File : Inputs) { - auto InputOrErr = MemoryBuffer::getFileOrSTDIN(File); - if (auto EC = InputOrErr.getError()) { - errs() << "error: can't open file " << File << ": " << EC.message() - << "\n"; - return 1; - } - DeviceBinaries.emplace_back(std::move(*InputOrErr)); - } - - // Create a wrapper for device binaries and write its bitcode to the file. - WriteBitcodeToFile( - BinaryWrapper(OffloadKindStr, TargetStr).wrapBinaries(DeviceBinaries), - OutF); - } - return 0; -} +//===-- clang-offload-wrapper/ClangOffloadWrapper.cpp ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implementation of the offload wrapper tool. It takes offload target binaries +/// as input and creates wrapper bitcode from them which, after linking with the +/// offload application, provides access to the binaries. +/// TODO Add Windows support. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Version.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include +#include +#include + +using namespace llvm; + +static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden); + +// Mark all our options with this category, everything else (except for -version +// and -help) will be hidden. +static cl::OptionCategory + ClangOffloadWrapperCategory("clang-offload-wrapper options"); + +static cl::opt Output("o", cl::Required, + cl::desc("Output filename"), + cl::value_desc("filename"), + cl::cat(ClangOffloadWrapperCategory)); + +static cl::list Inputs(cl::Positional, cl::OneOrMore, + cl::desc(""), + cl::cat(ClangOffloadWrapperCategory)); + +static cl::opt + Target("target", cl::Required, + cl::desc("Offload kind + target triple of the wrapper object: " + "-"), + cl::value_desc("kind-triple"), cl::cat(ClangOffloadWrapperCategory)); + +static cl::opt EmitEntryTable("emit-entry-table", cl::NotHidden, + cl::init(true), cl::Optional, + cl::desc("Emit offload entry table"), + cl::cat(ClangOffloadWrapperCategory)); + +static cl::opt EmitRegFuncs("emit-reg-funcs", cl::NotHidden, + cl::init(true), cl::Optional, + cl::desc("Emit [un-]registration functions"), + cl::cat(ClangOffloadWrapperCategory)); + +static cl::opt + RegFuncName("reg-func-name", cl::Optional, cl::init("__tgt_register_lib"), + cl::desc("Offload descriptor registration function name"), + cl::value_desc("name"), cl::cat(ClangOffloadWrapperCategory)); + +static cl::opt + UnregFuncName("unreg-func-name", cl::Optional, + cl::init("__tgt_unregister_lib"), + cl::desc("Offload descriptor un-registration function name"), + cl::value_desc("name"), cl::cat(ClangOffloadWrapperCategory)); + +static cl::opt DescriptorName( + "desc-name", cl::Optional, cl::init("descriptor"), + cl::desc( + "Specifies offload descriptor symbol name: '..'" + ", and makes it globally visible"), + cl::value_desc("name"), cl::cat(ClangOffloadWrapperCategory)); + +namespace { +// TODO offload bundler and wrapper should share this. +// Offload kinds this tool supports +enum class OffloadKind { + Unknown = 0, + Host, + OpenMP, + HIP, + SYCL, + First = Host, + Last = SYCL +}; + +OffloadKind parseOffloadKind(StringRef KindStr) { + OffloadKind Kind = StringSwitch(KindStr) + .Case("host", OffloadKind::Host) + .Case("openmp", OffloadKind::OpenMP) + .Case("hip", OffloadKind::HIP) + .Case("sycl", OffloadKind::SYCL) + .Default(OffloadKind::Unknown); + return Kind; +} + +StringRef offloadKindToString(OffloadKind Kind) { + switch (Kind) { + case OffloadKind::Unknown: + return "unknown"; + case OffloadKind::Host: + return "host"; + case OffloadKind::OpenMP: + return "openmp"; + case OffloadKind::HIP: + return "hip"; + case OffloadKind::SYCL: + return "sycl"; + default: + llvm_unreachable("bad offload kind"); + } + return ""; +} + +void dumpOffloadKinds(raw_ostream &OS) { + OffloadKind Kinds[] = {OffloadKind::Host, OffloadKind::OpenMP, + OffloadKind::HIP, OffloadKind::SYCL}; + for (auto K : Kinds) { + if (K != OffloadKind::Host) + OS << " "; + OS << offloadKindToString(K); + } +} + +class BinaryWrapper { + LLVMContext C; + Module M; + std::string OffloadKindTag; + + StructType *EntryTy = nullptr; + StructType *ImageTy = nullptr; + StructType *DescTy = nullptr; + + using MemoryBuffersVector = SmallVectorImpl>; + +private: + IntegerType *getSizeTTy() { + auto PtrSize = M.getDataLayout().getPointerTypeSize(Type::getInt8PtrTy(C)); + return PtrSize == 8 ? Type::getInt64Ty(C) : Type::getInt32Ty(C); + } + + // struct __tgt_offload_entry { + // void *addr; + // char *name; + // size_t size; + // int32_t flags; + // int32_t reserved; + // }; + StructType *getEntryTy() { + if (!EntryTy) + EntryTy = StructType::create("__tgt_offload_entry", Type::getInt8PtrTy(C), + Type::getInt8PtrTy(C), getSizeTTy(), + Type::getInt32Ty(C), Type::getInt32Ty(C)); + return EntryTy; + } + + PointerType *getEntryPtrTy() { return PointerType::getUnqual(getEntryTy()); } + + // struct __tgt_device_image { + // void *ImageStart; + // void *ImageEnd; + // __tgt_offload_entry *EntriesBegin; [optional] + // __tgt_offload_entry *EntriesEnd; [optional] + // }; + StructType *getDeviceImageTy() { + if (!ImageTy) { + SmallVector FieldTypes( + {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)}); + if (EmitEntryTable) + FieldTypes.append({getEntryPtrTy(), getEntryPtrTy()}); + ImageTy = StructType::create(FieldTypes, "__tgt_device_image"); + } + return ImageTy; + } + + PointerType *getDeviceImagePtrTy() { + return PointerType::getUnqual(getDeviceImageTy()); + } + + // struct __tgt_bin_desc { + // int32_t NumDeviceImages; + // __tgt_device_image *DeviceImages; + // __tgt_offload_entry *HostEntriesBegin; [optional] + // __tgt_offload_entry *HostEntriesEnd; [optional] + // }; + StructType *getBinDescTy() { + if (!DescTy) { + SmallVector FieldTypes( + {Type::getInt32Ty(C), getDeviceImagePtrTy()}); + if (EmitEntryTable) + FieldTypes.append({getEntryPtrTy(), getEntryPtrTy()}); + DescTy = StructType::create(FieldTypes, "__tgt_bin_desc"); + } + return DescTy; + } + + PointerType *getBinDescPtrTy() { + return PointerType::getUnqual(getBinDescTy()); + } + + GlobalVariable *createBinDesc(const MemoryBuffersVector &Bufs) { + GlobalVariable *EntriesB = nullptr, *EntriesE = nullptr; + + if (EmitEntryTable) { + EntriesB = new GlobalVariable(M, getEntryTy(), true, + GlobalValue::ExternalLinkage, nullptr, + OffloadKindTag + "entries_begin"); + EntriesE = new GlobalVariable(M, getEntryTy(), true, + GlobalValue::ExternalLinkage, nullptr, + OffloadKindTag + "entries_end"); + } + auto *Zero = ConstantInt::get(getSizeTTy(), 0u); + Constant *ZeroZero[] = {Zero, Zero}; + + SmallVector ImagesInits; + for (const auto &Buf : Bufs) { + auto *Data = ConstantDataArray::get( + C, makeArrayRef(Buf->getBufferStart(), Buf->getBufferSize())); + + auto *Image = new GlobalVariable(M, Data->getType(), true, + GlobalVariable::InternalLinkage, Data, + OffloadKindTag + "device_image"); + Image->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + + auto *Size = ConstantInt::get(getSizeTTy(), Buf->getBufferSize()); + Constant *ZeroSize[] = {Zero, Size}; + + auto *ImageB = ConstantExpr::getGetElementPtr(Image->getValueType(), + Image, ZeroZero); + auto *ImageE = ConstantExpr::getGetElementPtr(Image->getValueType(), + Image, ZeroSize); + + SmallVector Inits({ImageB, ImageE}); + if (EmitEntryTable) + Inits.append({EntriesB, EntriesE}); + ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(), Inits)); + } + + auto *ImagesData = ConstantArray::get( + ArrayType::get(getDeviceImageTy(), ImagesInits.size()), ImagesInits); + + auto *Images = new GlobalVariable(M, ImagesData->getType(), true, + GlobalValue::InternalLinkage, ImagesData, + OffloadKindTag + "device_images"); + Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + + auto *ImagesB = ConstantExpr::getGetElementPtr(Images->getValueType(), + Images, ZeroZero); + + SmallVector Inits( + {ConstantInt::get(Type::getInt32Ty(C), ImagesInits.size()), ImagesB}); + if (EmitEntryTable) + Inits.append({EntriesB, EntriesE}); + + auto *DescInit = ConstantStruct::get(getBinDescTy(), Inits); + + GlobalValue::LinkageTypes Lnk = DescriptorName.getNumOccurrences() > 0 + ? GlobalValue::ExternalLinkage + : GlobalValue::InternalLinkage; + Twine DescName = Twine(OffloadKindTag) + Twine(DescriptorName); + + return new GlobalVariable(M, DescInit->getType(), true, Lnk, DescInit, + DescName); + } + + void createRegisterFunction(GlobalVariable *BinDesc) { + auto *FuncTy = FunctionType::get(Type::getVoidTy(C), {}, false); + auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, + OffloadKindTag + "descriptor_reg", &M); + Func->setSection(".text.startup"); + + // Get RegFuncName function declaration. + auto *RegFuncTy = + FunctionType::get(Type::getVoidTy(C), {getBinDescPtrTy()}, false); + auto *RegFunc = M.getOrInsertFunction(RegFuncName, RegFuncTy); + + // Construct function body + IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func)); + Builder.CreateCall(RegFunc, {BinDesc}); + Builder.CreateRetVoid(); + + // Add this function to constructors. + appendToGlobalCtors(M, Func, 0); + } + + void createUnregisterFunction(GlobalVariable *BinDesc) { + auto *FuncTy = FunctionType::get(Type::getVoidTy(C), {}, false); + auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage, + OffloadKindTag + "descriptor_unreg", &M); + Func->setSection(".text.startup"); + + // Get UnregFuncName function declaration. + auto *UnRegFuncTy = + FunctionType::get(Type::getVoidTy(C), {getBinDescPtrTy()}, false); + auto *UnRegFunc = M.getOrInsertFunction(UnregFuncName, UnRegFuncTy); + + // Construct function body + IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func)); + Builder.CreateCall(UnRegFunc, {BinDesc}); + Builder.CreateRetVoid(); + + // Add this function to global destructors. + appendToGlobalDtors(M, Func, 0); + } + +public: + BinaryWrapper(const StringRef &KindStr, const StringRef &Target) + : M("offload.wrapper.object", C) { + + OffloadKindTag = + (Twine(".") + Twine(KindStr) + Twine("_offloading.")).str(); + M.setTargetTriple(Target); + } + + const Module &wrapBinaries(const MemoryBuffersVector &Binaries) { + auto *Desc = createBinDesc(Binaries); + assert(Desc && "no binary descriptor"); + + if (EmitRegFuncs) { + createRegisterFunction(Desc); + createUnregisterFunction(Desc); + } + return M; + } +}; + +} // anonymous namespace + +int main(int argc, const char **argv) { + sys::PrintStackTraceOnErrorSignal(argv[0]); + + cl::HideUnrelatedOptions(ClangOffloadWrapperCategory); + cl::SetVersionPrinter([](raw_ostream &OS) { + OS << clang::getClangToolFullVersion("clang-offload-wrapper") << '\n'; + }); + cl::ParseCommandLineOptions( + argc, argv, + "A tool to create a wrapper bitcode for offload target binaries.\n" + "Takes offload target binaries as input and produces bitcode file " + "containing\ntarget binaries packaged as data and initialization code " + "which registers target\nbinaries in offload runtime."); + + if (Help) { + cl::PrintHelpMessage(); + return 0; + } + + std::pair KindTriplePair = StringRef(Target).split('-'); + auto OffloadKindStr = KindTriplePair.first; + auto TargetStr = KindTriplePair.second; + + if (OffloadKindStr.empty()) { + errs() << "error: no offload kind specified\n"; + return 1; + } + OffloadKind Kind = parseOffloadKind(OffloadKindStr); + + if (Kind == OffloadKind::Unknown) { + errs() << "error: unknown offload kind: " << OffloadKindStr << "\n"; + errs() << "valid offload kinds: "; + dumpOffloadKinds(errs()); + errs() << "\n"; + return 1; + } + if (TargetStr.empty()) { + errs() << "error: no target specified\n"; + return 1; + } + // Create the bitcode file to write the resulting code to. + { + std::error_code EC; + raw_fd_ostream OutF(Output, EC, sys::fs::F_None); + if (EC) { + errs() << "error: unable to open output file: " << EC.message() << ".\n"; + return 1; + } + + // Read device binaries. + SmallVector, 4> DeviceBinaries; + for (const auto &File : Inputs) { + auto InputOrErr = MemoryBuffer::getFileOrSTDIN(File); + if (auto EC = InputOrErr.getError()) { + errs() << "error: can't open file " << File << ": " << EC.message() + << "\n"; + return 1; + } + DeviceBinaries.emplace_back(std::move(*InputOrErr)); + } + + // Create a wrapper for device binaries and write its bitcode to the file. + WriteBitcodeToFile( + BinaryWrapper(OffloadKindStr, TargetStr).wrapBinaries(DeviceBinaries), + OutF); + } + return 0; +} diff --git a/llvm-spirv/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/llvm-spirv/lib/SPIRV/SPIRVLowerOCLBlocks.cpp index 50e183826b60e..c80bf040ee6c9 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVLowerOCLBlocks.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVLowerOCLBlocks.cpp @@ -1,303 +1,303 @@ -//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===// -// -// The LLVM/SPIRV Translator -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -// Copyright (c) 2018 Intel Corporation. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimers in the documentation -// and/or other materials provided with the distribution. -// Neither the names of Intel Corporation, nor the names of its -// contributors may be used to endorse or promote products derived from this -// Software without specific prior written permission. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH -// THE SOFTWARE. -// -//===----------------------------------------------------------------------===// -// -// SPIR-V specification doesn't allow function pointers, so SPIR-V translator -// is designed to fail if a value with function type (except calls) is occured. -// Currently there is only two cases, when function pointers are generating in -// LLVM IR in OpenCL - block calls and device side enqueue built-in calls. -// -// In both cases values with function type used as intermediate representation -// for block literal structure. -// -// This pass is designed to find such cases and simplify them to avoid any -// function pointer types occurrences in LLVM IR in 4 steps. -// -// 1. Find all function pointer allocas, like -// %block = alloca void () * -// -// Then find a single store to that alloca: -// %blockLit = alloca <{ i32, i32, ...}>, align 4 -// %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()* -// > store void ()* %0, void ()** %block, align 4 -// -// And replace the alloca users by new instructions which used stored value -// %blockLit itself instead of function pointer alloca %block. -// -// 2. Find consecutive casts from block literal type to i8 addrspace(4)* -// used function pointers as an intermediate type: -// %0 = bitcast <{ i32, i32 }> %block to void() * -// %1 = addrspacecast void() * %0 to i8 addrspace(4)* -// And simplify them: -// %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)* -// -// 3. Find all unused instructions with function pointer type occured after -// pp.1-2 and remove them. -// -// 4. Find unused globals with function pointer type, like -// @block = constant void ()* -// bitcast ({ i32, i32 }* @__block_literal_global to void ()* -// -// And remove them. -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "spv-lower-ocl-blocks" - -#include "OCLUtil.h" -#include "SPIRVInternal.h" - -#include "llvm/ADT/SetVector.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/PassSupport.h" -#include "llvm/Support/Casting.h" - -using namespace llvm; - -namespace { - -static void -removeUnusedFunctionPtrInst(Instruction *I, - SmallSetVector &FuncPtrInsts) { - for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) { - Instruction *OpI = dyn_cast(I->getOperand(OpIdx)); - I->setOperand(OpIdx, nullptr); - if (OpI && OpI != I && OpI->user_empty()) - FuncPtrInsts.insert(OpI); - } - I->eraseFromParent(); -} - -static bool isFuncPtrAlloca(const AllocaInst *AI) { - auto *ET = dyn_cast(AI->getAllocatedType()); - return ET && ET->getElementType()->isFunctionTy(); -} - -static bool hasFuncPtrType(const Value *V) { - auto *PT = dyn_cast(V->getType()); - return PT && PT->getElementType()->isFunctionTy(); -} - -static bool isFuncPtrInst(const Instruction *I) { - if (auto *AI = dyn_cast(I)) - return isFuncPtrAlloca(AI); - - for (auto &Op : I->operands()) { - if (auto *AI = dyn_cast(Op)) - return isFuncPtrAlloca(AI); - - auto *OpI = dyn_cast(&Op); - if (OpI && OpI != I && hasFuncPtrType(OpI)) - return true; - } - return false; -} - -static StoreInst *findSingleStore(AllocaInst *AI) { - StoreInst *Store = nullptr; - for (auto *U : AI->users()) { - if (!isa(U)) - continue; // not a store - if (Store) - return nullptr; // there are more than one stores - Store = dyn_cast(U); - } - return Store; -} - -static void fixFunctionPtrAllocaUsers(AllocaInst *AI) { - // Find and remove a single store to alloca - auto *SingleStore = findSingleStore(AI); - assert(SingleStore && "More than one store to the function pointer alloca"); - auto *StoredVal = SingleStore->getValueOperand(); - SingleStore->eraseFromParent(); - - // Find loads from the alloca and replace thier users - for (auto *U : AI->users()) { - auto *LI = dyn_cast(U); - if (!LI) - continue; - - for (auto *U : LI->users()) { - auto *UInst = cast(U); - auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast( - StoredVal, UInst->getType(), "", UInst); - UInst->replaceAllUsesWith(Cast); - } - } -} - -static int getBlockLiteralIdx(const Function &F) { - StringRef FName = F.getName(); - if (isEnqueueKernelBI(FName)) - return FName.contains("events") ? 7 : 4; - if (isKernelQueryBI(FName)) - return FName.contains("for_ndrange") ? 2 : 1; - if (FName.startswith("__") && FName.contains("_block_invoke")) - return F.hasStructRetAttr() ? 1 : 0; - - return -1; // No block literal argument -} - -static bool hasBlockLiteralArg(const Function &F) { - return getBlockLiteralIdx(F) != -1; -} - -static bool simplifyFunctionPtrCasts(Function &F) { - bool Changed = false; - int BlockLiteralIdx = getBlockLiteralIdx(F); - for (auto *U : F.users()) { - auto *Call = dyn_cast(U); - if (!Call) - continue; - if (Call->getFunction()->getName() == F.getName().str() + "_kernel") - continue; // Skip block invoke function calls inside block invoke kernels - - const DataLayout &DL = F.getParent()->getDataLayout(); - auto *BlockLiteral = Call->getOperand(BlockLiteralIdx); - auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL); - if (isa(BlockLiteralVal)) - continue; // nothing to do with globals - - auto *BlockLiteralAlloca = cast(BlockLiteralVal); - assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() && - "Function type shouldn't be there"); - - auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast( - BlockLiteralAlloca, BlockLiteral->getType(), "", Call); - BlockLiteral->replaceAllUsesWith(NewBlockLiteral); - Changed |= true; - } - return Changed; -} - -static void -findFunctionPtrAllocas(Module &M, - SmallVectorImpl &FuncPtrAllocas) { - for (auto &F : M) { - if (F.isDeclaration()) - continue; - for (auto &I : instructions(F)) { - auto *AI = dyn_cast(&I); - if (!AI || !isFuncPtrAlloca(AI)) - continue; - FuncPtrAllocas.push_back(AI); - } - } -} - -static void -findUnusedFunctionPtrInsts(Module &M, - SmallSetVector &FuncPtrInsts) { - for (auto &F : M) { - if (F.isDeclaration()) - continue; - for (auto &I : instructions(F)) - if (I.user_empty() && isFuncPtrInst(&I)) - FuncPtrInsts.insert(&I); - } -} - -static void -findUnusedFunctionPtrGlbs(Module &M, - SmallVectorImpl &FuncPtrGlbs) { - for (auto &GV : M.globals()) { - if (!GV.user_empty()) - continue; - auto *GVType = dyn_cast(GV.getType()->getElementType()); - if (GVType && GVType->getElementType()->isFunctionTy()) - FuncPtrGlbs.push_back(&GV); - } -} - -class SPIRVLowerOCLBlocks : public ModulePass { - -public: - SPIRVLowerOCLBlocks() : ModulePass(ID) {} - - bool runOnModule(Module &M) { - bool Changed = false; - - // 1. Find function pointer allocas and fix their users - SmallVector FuncPtrAllocas; - findFunctionPtrAllocas(M, FuncPtrAllocas); - - Changed |= !FuncPtrAllocas.empty(); - for (auto *AI : FuncPtrAllocas) - fixFunctionPtrAllocaUsers(AI); - - // 2. Simplify consecutive casts which use function pointer types - for (auto &F : M) - if (hasBlockLiteralArg(F)) - Changed |= simplifyFunctionPtrCasts(F); - - // 3. Cleanup unused instructions with function pointer type - // which are occured after pp. 1-2 - SmallSetVector FuncPtrInsts; - findUnusedFunctionPtrInsts(M, FuncPtrInsts); - - Changed |= !FuncPtrInsts.empty(); - while (!FuncPtrInsts.empty()) { - Instruction *I = FuncPtrInsts.pop_back_val(); - removeUnusedFunctionPtrInst(I, FuncPtrInsts); - } - - // 4. Find and remove unused global variables with function pointer type - SmallVector FuncPtrGlbs; - findUnusedFunctionPtrGlbs(M, FuncPtrGlbs); - - Changed |= !FuncPtrGlbs.empty(); - for (auto *GV : FuncPtrGlbs) - GV->eraseFromParent(); - - return Changed; - } - - static char ID; -}; // class SPIRVLowerOCLBlocks - -char SPIRVLowerOCLBlocks::ID = 0; - -} // namespace - -INITIALIZE_PASS( - SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks", - "Remove function pointers occured in case of using OpenCL blocks", false, - false) - -llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() { - return new SPIRVLowerOCLBlocks(); -} +//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===// +// +// The LLVM/SPIRV Translator +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// Copyright (c) 2018 Intel Corporation. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimers in the documentation +// and/or other materials provided with the distribution. +// Neither the names of Intel Corporation, nor the names of its +// contributors may be used to endorse or promote products derived from this +// Software without specific prior written permission. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH +// THE SOFTWARE. +// +//===----------------------------------------------------------------------===// +// +// SPIR-V specification doesn't allow function pointers, so SPIR-V translator +// is designed to fail if a value with function type (except calls) is occured. +// Currently there is only two cases, when function pointers are generating in +// LLVM IR in OpenCL - block calls and device side enqueue built-in calls. +// +// In both cases values with function type used as intermediate representation +// for block literal structure. +// +// This pass is designed to find such cases and simplify them to avoid any +// function pointer types occurrences in LLVM IR in 4 steps. +// +// 1. Find all function pointer allocas, like +// %block = alloca void () * +// +// Then find a single store to that alloca: +// %blockLit = alloca <{ i32, i32, ...}>, align 4 +// %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()* +// > store void ()* %0, void ()** %block, align 4 +// +// And replace the alloca users by new instructions which used stored value +// %blockLit itself instead of function pointer alloca %block. +// +// 2. Find consecutive casts from block literal type to i8 addrspace(4)* +// used function pointers as an intermediate type: +// %0 = bitcast <{ i32, i32 }> %block to void() * +// %1 = addrspacecast void() * %0 to i8 addrspace(4)* +// And simplify them: +// %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)* +// +// 3. Find all unused instructions with function pointer type occured after +// pp.1-2 and remove them. +// +// 4. Find unused globals with function pointer type, like +// @block = constant void ()* +// bitcast ({ i32, i32 }* @__block_literal_global to void ()* +// +// And remove them. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "spv-lower-ocl-blocks" + +#include "OCLUtil.h" +#include "SPIRVInternal.h" + +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/Casting.h" + +using namespace llvm; + +namespace { + +static void +removeUnusedFunctionPtrInst(Instruction *I, + SmallSetVector &FuncPtrInsts) { + for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) { + Instruction *OpI = dyn_cast(I->getOperand(OpIdx)); + I->setOperand(OpIdx, nullptr); + if (OpI && OpI != I && OpI->user_empty()) + FuncPtrInsts.insert(OpI); + } + I->eraseFromParent(); +} + +static bool isFuncPtrAlloca(const AllocaInst *AI) { + auto *ET = dyn_cast(AI->getAllocatedType()); + return ET && ET->getElementType()->isFunctionTy(); +} + +static bool hasFuncPtrType(const Value *V) { + auto *PT = dyn_cast(V->getType()); + return PT && PT->getElementType()->isFunctionTy(); +} + +static bool isFuncPtrInst(const Instruction *I) { + if (auto *AI = dyn_cast(I)) + return isFuncPtrAlloca(AI); + + for (auto &Op : I->operands()) { + if (auto *AI = dyn_cast(Op)) + return isFuncPtrAlloca(AI); + + auto *OpI = dyn_cast(&Op); + if (OpI && OpI != I && hasFuncPtrType(OpI)) + return true; + } + return false; +} + +static StoreInst *findSingleStore(AllocaInst *AI) { + StoreInst *Store = nullptr; + for (auto *U : AI->users()) { + if (!isa(U)) + continue; // not a store + if (Store) + return nullptr; // there are more than one stores + Store = dyn_cast(U); + } + return Store; +} + +static void fixFunctionPtrAllocaUsers(AllocaInst *AI) { + // Find and remove a single store to alloca + auto *SingleStore = findSingleStore(AI); + assert(SingleStore && "More than one store to the function pointer alloca"); + auto *StoredVal = SingleStore->getValueOperand(); + SingleStore->eraseFromParent(); + + // Find loads from the alloca and replace thier users + for (auto *U : AI->users()) { + auto *LI = dyn_cast(U); + if (!LI) + continue; + + for (auto *U : LI->users()) { + auto *UInst = cast(U); + auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast( + StoredVal, UInst->getType(), "", UInst); + UInst->replaceAllUsesWith(Cast); + } + } +} + +static int getBlockLiteralIdx(const Function &F) { + StringRef FName = F.getName(); + if (isEnqueueKernelBI(FName)) + return FName.contains("events") ? 7 : 4; + if (isKernelQueryBI(FName)) + return FName.contains("for_ndrange") ? 2 : 1; + if (FName.startswith("__") && FName.contains("_block_invoke")) + return F.hasStructRetAttr() ? 1 : 0; + + return -1; // No block literal argument +} + +static bool hasBlockLiteralArg(const Function &F) { + return getBlockLiteralIdx(F) != -1; +} + +static bool simplifyFunctionPtrCasts(Function &F) { + bool Changed = false; + int BlockLiteralIdx = getBlockLiteralIdx(F); + for (auto *U : F.users()) { + auto *Call = dyn_cast(U); + if (!Call) + continue; + if (Call->getFunction()->getName() == F.getName().str() + "_kernel") + continue; // Skip block invoke function calls inside block invoke kernels + + const DataLayout &DL = F.getParent()->getDataLayout(); + auto *BlockLiteral = Call->getOperand(BlockLiteralIdx); + auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL); + if (isa(BlockLiteralVal)) + continue; // nothing to do with globals + + auto *BlockLiteralAlloca = cast(BlockLiteralVal); + assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() && + "Function type shouldn't be there"); + + auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast( + BlockLiteralAlloca, BlockLiteral->getType(), "", Call); + BlockLiteral->replaceAllUsesWith(NewBlockLiteral); + Changed |= true; + } + return Changed; +} + +static void +findFunctionPtrAllocas(Module &M, + SmallVectorImpl &FuncPtrAllocas) { + for (auto &F : M) { + if (F.isDeclaration()) + continue; + for (auto &I : instructions(F)) { + auto *AI = dyn_cast(&I); + if (!AI || !isFuncPtrAlloca(AI)) + continue; + FuncPtrAllocas.push_back(AI); + } + } +} + +static void +findUnusedFunctionPtrInsts(Module &M, + SmallSetVector &FuncPtrInsts) { + for (auto &F : M) { + if (F.isDeclaration()) + continue; + for (auto &I : instructions(F)) + if (I.user_empty() && isFuncPtrInst(&I)) + FuncPtrInsts.insert(&I); + } +} + +static void +findUnusedFunctionPtrGlbs(Module &M, + SmallVectorImpl &FuncPtrGlbs) { + for (auto &GV : M.globals()) { + if (!GV.user_empty()) + continue; + auto *GVType = dyn_cast(GV.getType()->getElementType()); + if (GVType && GVType->getElementType()->isFunctionTy()) + FuncPtrGlbs.push_back(&GV); + } +} + +class SPIRVLowerOCLBlocks : public ModulePass { + +public: + SPIRVLowerOCLBlocks() : ModulePass(ID) {} + + bool runOnModule(Module &M) { + bool Changed = false; + + // 1. Find function pointer allocas and fix their users + SmallVector FuncPtrAllocas; + findFunctionPtrAllocas(M, FuncPtrAllocas); + + Changed |= !FuncPtrAllocas.empty(); + for (auto *AI : FuncPtrAllocas) + fixFunctionPtrAllocaUsers(AI); + + // 2. Simplify consecutive casts which use function pointer types + for (auto &F : M) + if (hasBlockLiteralArg(F)) + Changed |= simplifyFunctionPtrCasts(F); + + // 3. Cleanup unused instructions with function pointer type + // which are occured after pp. 1-2 + SmallSetVector FuncPtrInsts; + findUnusedFunctionPtrInsts(M, FuncPtrInsts); + + Changed |= !FuncPtrInsts.empty(); + while (!FuncPtrInsts.empty()) { + Instruction *I = FuncPtrInsts.pop_back_val(); + removeUnusedFunctionPtrInst(I, FuncPtrInsts); + } + + // 4. Find and remove unused global variables with function pointer type + SmallVector FuncPtrGlbs; + findUnusedFunctionPtrGlbs(M, FuncPtrGlbs); + + Changed |= !FuncPtrGlbs.empty(); + for (auto *GV : FuncPtrGlbs) + GV->eraseFromParent(); + + return Changed; + } + + static char ID; +}; // class SPIRVLowerOCLBlocks + +char SPIRVLowerOCLBlocks::ID = 0; + +} // namespace + +INITIALIZE_PASS( + SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks", + "Remove function pointers occured in case of using OpenCL blocks", false, + false) + +llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() { + return new SPIRVLowerOCLBlocks(); +} diff --git a/sycl/include/CL/sycl/nd_range.hpp b/sycl/include/CL/sycl/nd_range.hpp index 6520309792b8a..689bc256dc4b5 100644 --- a/sycl/include/CL/sycl/nd_range.hpp +++ b/sycl/include/CL/sycl/nd_range.hpp @@ -1,59 +1,59 @@ -//==-------- nd_range.hpp --- SYCL iteration nd_range ----------------------==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include -#include -#include - -namespace cl { -namespace sycl { - -template class nd_range { - range globalSize; - range localSize; - id offset; - -public: - template - nd_range( - typename std::enable_if<((N > 0) && (N < 4)), range>::type globalSize, - range localSize, id offset = id()) - : globalSize(globalSize), localSize(localSize), offset(offset) {} - - range get_global_range() const { return globalSize; } - - range get_local_range() const { return localSize; } - - range get_group_range() const { return globalSize / localSize; } - - id get_offset() const { return offset; } - - // Common special member functions for by-value semantics - nd_range(const nd_range &rhs) = default; - nd_range(nd_range &&rhs) = default; - nd_range &operator=(const nd_range &rhs) = default; - nd_range &operator=(nd_range &&rhs) = default; - nd_range() = default; - - // Common member functions for by-value semantics - bool operator==(const nd_range &rhs) const { - return (rhs.globalSize == this->globalSize) && - (rhs.localSize == this->localSize) && (rhs.offset == this->offset); - } - - bool operator!=(const nd_range &rhs) const { - return !(*this == rhs); - } -}; - -} // namespace sycl -} // namespace cl +//==-------- nd_range.hpp --- SYCL iteration nd_range ----------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include + +namespace cl { +namespace sycl { + +template class nd_range { + range globalSize; + range localSize; + id offset; + +public: + template + nd_range( + typename std::enable_if<((N > 0) && (N < 4)), range>::type globalSize, + range localSize, id offset = id()) + : globalSize(globalSize), localSize(localSize), offset(offset) {} + + range get_global_range() const { return globalSize; } + + range get_local_range() const { return localSize; } + + range get_group_range() const { return globalSize / localSize; } + + id get_offset() const { return offset; } + + // Common special member functions for by-value semantics + nd_range(const nd_range &rhs) = default; + nd_range(nd_range &&rhs) = default; + nd_range &operator=(const nd_range &rhs) = default; + nd_range &operator=(nd_range &&rhs) = default; + nd_range() = default; + + // Common member functions for by-value semantics + bool operator==(const nd_range &rhs) const { + return (rhs.globalSize == this->globalSize) && + (rhs.localSize == this->localSize) && (rhs.offset == this->offset); + } + + bool operator!=(const nd_range &rhs) const { + return !(*this == rhs); + } +}; + +} // namespace sycl +} // namespace cl diff --git a/sycl/test/functor/kernel_functor.cpp b/sycl/test/functor/kernel_functor.cpp index 214e57ea0d3dc..b3fb07683d288 100644 --- a/sycl/test/functor/kernel_functor.cpp +++ b/sycl/test/functor/kernel_functor.cpp @@ -1,186 +1,186 @@ -// RUN: %clang -fsycl -o %t.out %s -lstdc++ -lOpenCL -lsycl -// RUN: cd %T -// RUN: env SYCL_DEVICE_TYPE=HOST %t.out | FileCheck %s -// RUNx: %CPU_RUN_PLACEHOLDER %t.out -// RUNx: %GPU_RUN_PLACEHOLDER %t.out -// CHECK:Passed. - -//==--- kernel_functor.cpp - Functors as SYCL kernel test ------------------==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include -#include - -constexpr auto sycl_read_write = cl::sycl::access::mode::read_write; -constexpr auto sycl_global_buffer = cl::sycl::access::target::global_buffer; - -// Case 1: -// - functor class is defined in an anonymous namespace -// - the '()' operator: -// * does not have parameters (to be used in 'single_task'). -// * has no 'const' qualifier -namespace { -class Functor1 { -public: - Functor1( - int X_, - cl::sycl::accessor &Acc_) - : X(X_), Acc(Acc_) {} - - void operator()() { Acc[0] += X; } - -private: - int X; - cl::sycl::accessor Acc; -}; -} - -// Case 2: -// - functor class is defined in a namespace -// - the '()' operator: -// * does not have parameters (to be used in 'single_task'). -// * has the 'const' qualifier -namespace ns { -class Functor2 { -public: - Functor2( - int X_, - cl::sycl::accessor &Acc_) - : X(X_), Acc(Acc_) {} - - // cl::sycl::accessor's operator [] is const, hence 'const' is possible below - void operator()() const { Acc[0] += X; } - -private: - int X; - cl::sycl::accessor Acc; -}; -} - -// Case 3: -// - functor class is templated and defined in the translation unit scope -// - the '()' operator: -// * has a parameter of type cl::sycl::id<1> (to be used in 'parallel_for'). -// * has no 'const' qualifier -template class TmplFunctor { -public: - TmplFunctor( - T X_, cl::sycl::accessor &Acc_) - : X(X_), Acc(Acc_) {} - - void operator()(cl::sycl::id<1> id) { Acc[id] += X; } - -private: - T X; - cl::sycl::accessor Acc; -}; - -// Case 4: -// - functor class is templated and defined in the translation unit scope -// - the '()' operator: -// * has a parameter of type cl::sycl::id<1> (to be used in 'parallel_for'). -// * has the 'const' qualifier -template class TmplConstFunctor { -public: - TmplConstFunctor( - T X_, cl::sycl::accessor &Acc_) - : X(X_), Acc(Acc_) {} - - void operator()(cl::sycl::id<1> id) const { Acc[id] += X; } - -private: - T X; - cl::sycl::accessor Acc; -}; - -// Exercise non-templated functors in 'single_task'. -int foo(int X) { - int A[] = { 10 }; - { - cl::sycl::queue Q; - cl::sycl::buffer Buf(A, 1); - - Q.submit([&](cl::sycl::handler &cgh) { - auto Acc = Buf.get_access(cgh); - Functor1 F(X, Acc); - - cgh.single_task(F); - }); - Q.submit([&](cl::sycl::handler &cgh) { - auto Acc = Buf.get_access(cgh); - ns::Functor2 F(X, Acc); - - cgh.single_task(F); - }); - Q.submit([&](cl::sycl::handler &cgh) { - auto Acc = Buf.get_access(cgh); - ns::Functor2 F(X, Acc); - - cgh.single_task(F); - }); - } - return A[0]; -} - -#define ARR_LEN(x) sizeof(x) / sizeof(x[0]) - -// Exercise templated functors in 'parallel_for'. -template T bar(T X) { - T A[] = {(T)10, (T)10 }; - { - cl::sycl::queue Q; - cl::sycl::buffer Buf(A, ARR_LEN(A)); - - Q.submit([&](cl::sycl::handler &cgh) { - auto Acc = - Buf.template get_access(cgh); - TmplFunctor F(X, Acc); - - cgh.parallel_for(cl::sycl::range<1>(ARR_LEN(A)), F); - }); - // Spice with lambdas to make sure functors and lambdas work together. - Q.submit([&](cl::sycl::handler &cgh) { - auto Acc = - Buf.template get_access(cgh); - cgh.parallel_for( - cl::sycl::range<1>(ARR_LEN(A)), - [=](cl::sycl::id<1> id) { Acc[id] += X; }); - }); - Q.submit([&](cl::sycl::handler &cgh) { - auto Acc = - Buf.template get_access(cgh); - TmplConstFunctor F(X, Acc); - - cgh.parallel_for(cl::sycl::range<1>(ARR_LEN(A)), F); - }); - } - T res = (T)0; - - for (int i = 0; i < ARR_LEN(A); i++) - res += A[i]; - return res; -} - -int main() { - const int Res1 = foo(10); - const int Res2 = bar(10); - const int Gold1 = 40; - const int Gold2 = 80; - - if (Res1 != Gold1) { - std::cout << "FAILED. " << Res1 << "!=" << Gold1 << "\n"; - return 1; - } - if (Res2 != Gold2) { - std::cout << "FAILED. " << Res2 << "!=" << Gold2 << "\n"; - return 1; - } - std::cout << "Passed.\n"; - return 0; -} +// RUN: %clang -fsycl -o %t.out %s -lstdc++ -lOpenCL -lsycl +// RUN: cd %T +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out | FileCheck %s +// RUNx: %CPU_RUN_PLACEHOLDER %t.out +// RUNx: %GPU_RUN_PLACEHOLDER %t.out +// CHECK:Passed. + +//==--- kernel_functor.cpp - Functors as SYCL kernel test ------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include +#include + +constexpr auto sycl_read_write = cl::sycl::access::mode::read_write; +constexpr auto sycl_global_buffer = cl::sycl::access::target::global_buffer; + +// Case 1: +// - functor class is defined in an anonymous namespace +// - the '()' operator: +// * does not have parameters (to be used in 'single_task'). +// * has no 'const' qualifier +namespace { +class Functor1 { +public: + Functor1( + int X_, + cl::sycl::accessor &Acc_) + : X(X_), Acc(Acc_) {} + + void operator()() { Acc[0] += X; } + +private: + int X; + cl::sycl::accessor Acc; +}; +} + +// Case 2: +// - functor class is defined in a namespace +// - the '()' operator: +// * does not have parameters (to be used in 'single_task'). +// * has the 'const' qualifier +namespace ns { +class Functor2 { +public: + Functor2( + int X_, + cl::sycl::accessor &Acc_) + : X(X_), Acc(Acc_) {} + + // cl::sycl::accessor's operator [] is const, hence 'const' is possible below + void operator()() const { Acc[0] += X; } + +private: + int X; + cl::sycl::accessor Acc; +}; +} + +// Case 3: +// - functor class is templated and defined in the translation unit scope +// - the '()' operator: +// * has a parameter of type cl::sycl::id<1> (to be used in 'parallel_for'). +// * has no 'const' qualifier +template class TmplFunctor { +public: + TmplFunctor( + T X_, cl::sycl::accessor &Acc_) + : X(X_), Acc(Acc_) {} + + void operator()(cl::sycl::id<1> id) { Acc[id] += X; } + +private: + T X; + cl::sycl::accessor Acc; +}; + +// Case 4: +// - functor class is templated and defined in the translation unit scope +// - the '()' operator: +// * has a parameter of type cl::sycl::id<1> (to be used in 'parallel_for'). +// * has the 'const' qualifier +template class TmplConstFunctor { +public: + TmplConstFunctor( + T X_, cl::sycl::accessor &Acc_) + : X(X_), Acc(Acc_) {} + + void operator()(cl::sycl::id<1> id) const { Acc[id] += X; } + +private: + T X; + cl::sycl::accessor Acc; +}; + +// Exercise non-templated functors in 'single_task'. +int foo(int X) { + int A[] = { 10 }; + { + cl::sycl::queue Q; + cl::sycl::buffer Buf(A, 1); + + Q.submit([&](cl::sycl::handler &cgh) { + auto Acc = Buf.get_access(cgh); + Functor1 F(X, Acc); + + cgh.single_task(F); + }); + Q.submit([&](cl::sycl::handler &cgh) { + auto Acc = Buf.get_access(cgh); + ns::Functor2 F(X, Acc); + + cgh.single_task(F); + }); + Q.submit([&](cl::sycl::handler &cgh) { + auto Acc = Buf.get_access(cgh); + ns::Functor2 F(X, Acc); + + cgh.single_task(F); + }); + } + return A[0]; +} + +#define ARR_LEN(x) sizeof(x) / sizeof(x[0]) + +// Exercise templated functors in 'parallel_for'. +template T bar(T X) { + T A[] = {(T)10, (T)10 }; + { + cl::sycl::queue Q; + cl::sycl::buffer Buf(A, ARR_LEN(A)); + + Q.submit([&](cl::sycl::handler &cgh) { + auto Acc = + Buf.template get_access(cgh); + TmplFunctor F(X, Acc); + + cgh.parallel_for(cl::sycl::range<1>(ARR_LEN(A)), F); + }); + // Spice with lambdas to make sure functors and lambdas work together. + Q.submit([&](cl::sycl::handler &cgh) { + auto Acc = + Buf.template get_access(cgh); + cgh.parallel_for( + cl::sycl::range<1>(ARR_LEN(A)), + [=](cl::sycl::id<1> id) { Acc[id] += X; }); + }); + Q.submit([&](cl::sycl::handler &cgh) { + auto Acc = + Buf.template get_access(cgh); + TmplConstFunctor F(X, Acc); + + cgh.parallel_for(cl::sycl::range<1>(ARR_LEN(A)), F); + }); + } + T res = (T)0; + + for (int i = 0; i < ARR_LEN(A); i++) + res += A[i]; + return res; +} + +int main() { + const int Res1 = foo(10); + const int Res2 = bar(10); + const int Gold1 = 40; + const int Gold2 = 80; + + if (Res1 != Gold1) { + std::cout << "FAILED. " << Res1 << "!=" << Gold1 << "\n"; + return 1; + } + if (Res2 != Gold2) { + std::cout << "FAILED. " << Res2 << "!=" << Gold2 << "\n"; + return 1; + } + std::cout << "Passed.\n"; + return 0; +}