diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 43ce5d983217f..41e32c2433ac3 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -13116,6 +13116,19 @@ class Sema final { /// Adds Callee to DeviceCallGraph if we don't know if its caller will be /// codegen'ed yet. bool checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee); + +private: + /// Contains generated OpenCL kernel functions for SYCL. + SmallVector SYCLKernels; + +public: + void addSYCLKernel(Decl *D) { SYCLKernels.push_back(D); } + /// Access to SYCL kernels. + SmallVectorImpl &getSYCLKernels() { return SYCLKernels; } + + /// Constructs an OpenCL kernel using the KernelCaller function and adds it to + /// the SYCL device code. + void constructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC); }; /// RAII object that enters a new expression evaluation context. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 294cc20f76c53..4c7a9e6df02ba 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -11072,6 +11072,10 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) { if (D->hasAttr() || D->hasAttr()) return true; + // If SYCL, only kernels are required. + if (LangOpts.SYCLIsDevice && !(D->hasAttr())) + return false; + if (const auto *FD = dyn_cast(D)) { // Forward declarations aren't required. if (!FD->doesThisDeclarationHaveABody()) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 59f3e02705713..2cdd98571817f 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2960,6 +2960,12 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } } + if (LangOpts.SYCLIsDevice && Global->hasAttr() && + MustBeEmitted(Global)) { + addDeferredDeclToEmit(GD); + return; + } + // Ignore declarations, they will be emitted on their first use. if (const auto *FD = dyn_cast(Global)) { // Forward declarations are emitted lazily on first use. diff --git a/clang/lib/Parse/ParseAST.cpp b/clang/lib/Parse/ParseAST.cpp index 01510e8caf3b7..eee790a109f60 100644 --- a/clang/lib/Parse/ParseAST.cpp +++ b/clang/lib/Parse/ParseAST.cpp @@ -168,6 +168,10 @@ void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) { for (Decl *D : S.WeakTopLevelDecls()) Consumer->HandleTopLevelDecl(DeclGroupRef(D)); + if (S.getLangOpts().SYCLIsDevice) + for (Decl *D : S.getSYCLKernels()) + Consumer->HandleTopLevelDecl(DeclGroupRef(D)); + Consumer->HandleTranslationUnit(S.getASTContext()); // Finalize the template instantiation observer chain. diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 815463307ecc7..9876182499705 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -8,7 +8,11 @@ // This implements Semantic Analysis for SYCL constructs. //===----------------------------------------------------------------------===// +#include "TreeTransform.h" +#include "clang/AST/AST.h" #include "clang/AST/Mangle.h" +#include "clang/AST/QualTypeNames.h" +#include "clang/Sema/Initialization.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaDiagnostic.h" @@ -48,3 +52,442 @@ bool Sema::checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee) { return DiagKind != SemaDiagnosticBuilder::K_Immediate && DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; } + +using ParamDesc = std::tuple; + +/// Various utilities. +class Util { +public: + using DeclContextDesc = std::pair; + + /// Checks whether given clang type is a full specialization of the SYCL + /// accessor class. + static bool isSyclAccessorType(const QualType &Ty); + + /// Checks whether given clang type is declared in the given hierarchy of + /// declaration contexts. + /// \param Ty the clang type being checked + /// \param Scopes the declaration scopes leading from the type to the + /// translation unit (excluding the latter) + static bool matchQualifiedTypeName(const QualType &Ty, + ArrayRef Scopes); +}; + +static CXXRecordDecl *getKernelObjectType(FunctionDecl *Caller) { + return (*Caller->param_begin())->getType()->getAsCXXRecordDecl(); +} + +class KernelBodyTransform : public TreeTransform { +public: + KernelBodyTransform(std::pair &MPair, + Sema &S) + : TreeTransform(S), MappingPair(MPair), SemaRef(S) {} + bool AlwaysRebuild() { return true; } + + ExprResult TransformDeclRefExpr(DeclRefExpr *DRE) { + auto Ref = dyn_cast(DRE->getDecl()); + if (Ref && Ref == MappingPair.first) { + auto NewDecl = MappingPair.second; + return DeclRefExpr::Create( + SemaRef.getASTContext(), DRE->getQualifierLoc(), + DRE->getTemplateKeywordLoc(), NewDecl, false, DRE->getNameInfo(), + NewDecl->getType(), DRE->getValueKind()); + } + return DRE; + } + +private: + std::pair MappingPair; + Sema &SemaRef; +}; + +static FunctionDecl * +CreateOpenCLKernelDeclaration(ASTContext &Context, StringRef Name, + ArrayRef ParamDescs) { + + DeclContext *DC = Context.getTranslationUnitDecl(); + QualType RetTy = Context.VoidTy; + SmallVector ArgTys; + + // Extract argument types from the descriptor array: + std::transform( + ParamDescs.begin(), ParamDescs.end(), std::back_inserter(ArgTys), + [](const ParamDesc &PD) -> QualType { return std::get<0>(PD); }); + FunctionProtoType::ExtProtoInfo Info(CC_OpenCLKernel); + QualType FuncTy = Context.getFunctionType(RetTy, ArgTys, Info); + DeclarationName DN = DeclarationName(&Context.Idents.get(Name)); + + FunctionDecl *OpenCLKernel = FunctionDecl::Create( + Context, DC, SourceLocation(), SourceLocation(), DN, FuncTy, + Context.getTrivialTypeSourceInfo(RetTy), SC_None); + + llvm::SmallVector Params; + int i = 0; + for (const auto &PD : ParamDescs) { + auto P = ParmVarDecl::Create(Context, OpenCLKernel, SourceLocation(), + SourceLocation(), std::get<1>(PD), + std::get<0>(PD), std::get<2>(PD), SC_None, 0); + P->setScopeInfo(0, i++); + P->setIsUsed(); + Params.push_back(P); + } + OpenCLKernel->setParams(Params); + + OpenCLKernel->addAttr(OpenCLKernelAttr::CreateImplicit(Context)); + OpenCLKernel->addAttr(AsmLabelAttr::CreateImplicit(Context, Name)); + OpenCLKernel->addAttr(ArtificialAttr::CreateImplicit(Context)); + + // Add kernel to translation unit to see it in AST-dump + DC->addDecl(OpenCLKernel); + return OpenCLKernel; +} + +/// Return __init method +static CXXMethodDecl *getInitMethod(const CXXRecordDecl *CRD) { + CXXMethodDecl *InitMethod; + auto It = std::find_if(CRD->methods().begin(), CRD->methods().end(), + [](const CXXMethodDecl *Method) { + return Method->getNameAsString() == "__init"; + }); + InitMethod = (It != CRD->methods().end()) ? *It : nullptr; + return InitMethod; +} + +// Creates body for new OpenCL kernel. This body contains initialization of SYCL +// kernel object fields with kernel parameters and a little bit transformed body +// of the kernel caller function. +static CompoundStmt *CreateOpenCLKernelBody(Sema &S, + FunctionDecl *KernelCallerFunc, + DeclContext *KernelDecl) { + llvm::SmallVector BodyStmts; + CXXRecordDecl *LC = getKernelObjectType(KernelCallerFunc); + assert(LC && "Kernel object must be available"); + TypeSourceInfo *TSInfo = LC->isLambda() ? LC->getLambdaTypeInfo() : nullptr; + + // Create a local kernel object (lambda or functor) assembled from the + // incoming formal parameters. + auto KernelObjClone = VarDecl::Create( + S.Context, KernelDecl, SourceLocation(), SourceLocation(), + LC->getIdentifier(), QualType(LC->getTypeForDecl(), 0), TSInfo, SC_None); + Stmt *DS = new (S.Context) DeclStmt(DeclGroupRef(KernelObjClone), + SourceLocation(), SourceLocation()); + BodyStmts.push_back(DS); + auto KernelObjCloneRef = + DeclRefExpr::Create(S.Context, NestedNameSpecifierLoc(), SourceLocation(), + KernelObjClone, false, DeclarationNameInfo(), + QualType(LC->getTypeForDecl(), 0), VK_LValue); + + auto KernelFuncDecl = cast(KernelDecl); + auto KernelFuncParam = + KernelFuncDecl->param_begin(); // Iterator to ParamVarDecl (VarDecl) + if (KernelFuncParam) { + llvm::SmallVector InitExprs; + InitializedEntity VarEntity = + InitializedEntity::InitializeVariable(KernelObjClone); + for (auto Field : LC->fields()) { + // Creates Expression for special SYCL object accessor. + // All special SYCL objects must have __init method, here we use it to + // initialize them. We create call of __init method and pass built kernel + // arguments as parameters to the __init method. + auto getExprForSpecialSYCLObj = [&](const QualType ¶mTy, + FieldDecl *Field, + const CXXRecordDecl *CRD, + Expr *Base) { + // All special SYCL objects must have __init method. + CXXMethodDecl *InitMethod = getInitMethod(CRD); + assert(InitMethod && + "__init method is expected."); + unsigned NumParams = InitMethod->getNumParams(); + llvm::SmallVector ParamDREs(NumParams); + auto KFP = KernelFuncParam; + for (size_t I = 0; I < NumParams; ++KFP, ++I) { + QualType ParamType = (*KFP)->getOriginalType(); + ParamDREs[I] = DeclRefExpr::Create( + S.Context, NestedNameSpecifierLoc(), SourceLocation(), *KFP, + false, DeclarationNameInfo(), ParamType, VK_LValue); + } + + if (NumParams) + std::advance(KernelFuncParam, NumParams - 1); + + DeclAccessPair FieldDAP = DeclAccessPair::make(Field, AS_none); + // [kernel_obj].special_obj + auto SpecialObjME = MemberExpr::Create( + S.Context, Base, false, SourceLocation(), NestedNameSpecifierLoc(), + SourceLocation(), Field, FieldDAP, + DeclarationNameInfo(Field->getDeclName(), SourceLocation()), + nullptr, Field->getType(), VK_LValue, OK_Ordinary, NOUR_None); + + // [kernel_obj].special_obj.__init + DeclAccessPair MethodDAP = DeclAccessPair::make(InitMethod, AS_none); + auto ME = MemberExpr::Create( + S.Context, SpecialObjME, false, SourceLocation(), + NestedNameSpecifierLoc(), SourceLocation(), InitMethod, MethodDAP, + DeclarationNameInfo(InitMethod->getDeclName(), SourceLocation()), + nullptr, InitMethod->getType(), VK_LValue, OK_Ordinary, NOUR_None); + + // Not referenced -> not emitted + S.MarkFunctionReferenced(SourceLocation(), InitMethod, true); + + QualType ResultTy = InitMethod->getReturnType(); + ExprValueKind VK = Expr::getValueKindForType(ResultTy); + ResultTy = ResultTy.getNonLValueExprType(S.Context); + + llvm::SmallVector ParamStmts; + const auto *Proto = cast(InitMethod->getType()); + S.GatherArgumentsForCall(SourceLocation(), InitMethod, Proto, 0, + ParamDREs, ParamStmts); + // [kernel_obj].special_obj.__init(_ValueType*, + // range, range, id) + CXXMemberCallExpr *Call = + CXXMemberCallExpr::Create(S.Context, ME, ParamStmts, ResultTy, VK, + SourceLocation(), FPOptionsOverride()); + BodyStmts.push_back(Call); + }; + + // Run through kernel object fields and add initialization for them using + // built kernel parameters. There are a several possible cases: + // - Kernel object field is a SYCL special object (SYCL accessor). + // These objects has a special initialization scheme - using + // __init method. + // - Kernel object field has a scalar type. In this case we should add + // simple initialization. + // - Kernel object field has a structure or class type. Same handling as + // a scalar. + QualType FieldType = Field->getType(); + CXXRecordDecl *CRD = FieldType->getAsCXXRecordDecl(); + InitializedEntity Entity = + InitializedEntity::InitializeMember(Field, &VarEntity); + if (Util::isSyclAccessorType(FieldType)) { + // Initialize kernel object field with the default constructor and + // construct a call of __init method. + InitializationKind InitKind = + InitializationKind::CreateDefault(SourceLocation()); + InitializationSequence InitSeq(S, Entity, InitKind, None); + ExprResult MemberInit = InitSeq.Perform(S, Entity, InitKind, None); + InitExprs.push_back(MemberInit.get()); + getExprForSpecialSYCLObj(FieldType, Field, CRD, KernelObjCloneRef); + } else if (CRD || FieldType->isScalarType()) { + // If field has built-in or a structure/class type just initialize + // this field with corresponding kernel argument using copy + // initialization. + QualType ParamType = (*KernelFuncParam)->getOriginalType(); + Expr *DRE = + DeclRefExpr::Create(S.Context, NestedNameSpecifierLoc(), + SourceLocation(), *KernelFuncParam, false, + DeclarationNameInfo(), ParamType, VK_LValue); + + InitializationKind InitKind = + InitializationKind::CreateCopy(SourceLocation(), SourceLocation()); + InitializationSequence InitSeq(S, Entity, InitKind, DRE); + + ExprResult MemberInit = InitSeq.Perform(S, Entity, InitKind, DRE); + InitExprs.push_back(MemberInit.get()); + + } else + llvm_unreachable("Unsupported field type"); + KernelFuncParam++; + } + Expr *ILE = new (S.Context) + InitListExpr(S.Context, SourceLocation(), InitExprs, SourceLocation()); + ILE->setType(QualType(LC->getTypeForDecl(), 0)); + KernelObjClone->setInit(ILE); + } + + // In the kernel caller function kernel object is a function parameter, so we + // need to replace all refs to this kernel oject with refs to our clone + // declared inside the kernel body. + Stmt *FunctionBody = KernelCallerFunc->getBody(); + ParmVarDecl *KernelObjParam = *(KernelCallerFunc->param_begin()); + + // DeclRefExpr with a valid source location but with decl which is not marked + // as used becomes invalid. + KernelObjClone->setIsUsed(); + std::pair MappingPair; + MappingPair.first = KernelObjParam; + MappingPair.second = KernelObjClone; + + // Function scope might be empty, so we do push + S.PushFunctionScope(); + KernelBodyTransform KBT(MappingPair, S); + Stmt *NewBody = KBT.TransformStmt(FunctionBody).get(); + BodyStmts.push_back(NewBody); + return CompoundStmt::Create(S.Context, BodyStmts, SourceLocation(), + SourceLocation()); +} + +/// Creates a kernel parameter descriptor +/// \param Src field declaration to construct name from +/// \param Ty the desired parameter type +/// \return the constructed descriptor +static ParamDesc makeParamDesc(const FieldDecl *Src, QualType Ty) { + ASTContext &Ctx = Src->getASTContext(); + std::string Name = (Twine("_arg_") + Src->getName()).str(); + return std::make_tuple(Ty, &Ctx.Idents.get(Name), + Ctx.getTrivialTypeSourceInfo(Ty)); +} + +// Creates list of kernel parameters descriptors using KernelObj (kernel +// object). Fields of kernel object must be initialized with SYCL kernel +// arguments so in the following function we extract types of kernel object +// fields and add it to the array with kernel parameters descriptors. +static void buildArgTys(ASTContext &Context, CXXRecordDecl *KernelObj, + SmallVectorImpl &ParamDescs) { + auto CreateAndAddPrmDsc = [&](const FieldDecl *Fld, const QualType &ArgType) { + // Create a parameter descriptor and append it to the result + ParamDescs.push_back(makeParamDesc(Fld, ArgType)); + }; + + // Creates a parameter descriptor for SYCL special object - SYCL accessor. + // All special SYCL objects must have __init method. We extract types for + // kernel parameters from __init method parameters. We will use __init method + // and kernel parameters which we build here to initialize special objects in + // the kernel body. + auto createSpecialSYCLObjParamDesc = [&](const FieldDecl *Fld, + const QualType &ArgTy) { + const auto *RecordDecl = ArgTy->getAsCXXRecordDecl(); + assert(RecordDecl && "Special SYCL object must be of a record type"); + + CXXMethodDecl *InitMethod = getInitMethod(RecordDecl); + assert(InitMethod && "__init method is expected."); + unsigned NumParams = InitMethod->getNumParams(); + for (size_t I = 0; I < NumParams; ++I) { + ParmVarDecl *PD = InitMethod->getParamDecl(I); + CreateAndAddPrmDsc(Fld, PD->getType().getCanonicalType()); + } + }; + + // Run through kernel object fields and create corresponding kernel + // parameters descriptors. There are a several possible cases: + // - Kernel object field is a SYCL special object (SYCL accessor). + // These objects has a special initialization scheme - using + // __init method. + // - Kernel object field has a scalar type. In this case we should add + // kernel parameter with the same type. + // - Kernel object field has a structure or class type. Same handling as a + // scalar but we should check if this structure/class contains accessors + // and add parameter decriptor for them properly. + for (const auto *Fld : KernelObj->fields()) { + QualType ArgTy = Fld->getType(); + if (Util::isSyclAccessorType(ArgTy)) + createSpecialSYCLObjParamDesc(Fld, ArgTy); + else if (ArgTy->isStructureOrClassType()) + CreateAndAddPrmDsc(Fld, ArgTy); + else if (ArgTy->isScalarType()) + CreateAndAddPrmDsc(Fld, ArgTy); + else + llvm_unreachable("Unsupported kernel parameter type"); + } +} + +// Creates a mangled kernel name for given kernel name type +static std::string constructKernelName(QualType KernelNameType, + MangleContext &MC) { + SmallString<256> Result; + llvm::raw_svector_ostream Out(Result); + + MC.mangleTypeName(KernelNameType, Out); + return std::string(Out.str()); +} + +// Generates the OpenCL kernel using KernelCallerFunc (kernel caller +// function) defined is SYCL headers. +// Generated OpenCL kernel contains the body of the kernel caller function, +// receives OpenCL like parameters and additionally does some manipulation to +// initialize captured lambda/functor fields with these parameters. +// SYCL runtime marks kernel caller function with sycl_kernel attribute. +// To be able to generate OpenCL kernel from KernelCallerFunc we put +// the following requirements to the function which SYCL runtime can mark with +// sycl_kernel attribute: +// - Must be template function with at least two template parameters. +// First parameter must represent "unique kernel name" +// Second parameter must be the function object type +// - Must have only one function parameter - function object. +// +// Example of kernel caller function: +// template +// __attribute__((sycl_kernel)) void kernel_caller_function(KernelType +// KernelFuncObj) { +// KernelFuncObj(); +// } +// +// +void Sema::constructOpenCLKernel(FunctionDecl *KernelCallerFunc, + MangleContext &MC) { + CXXRecordDecl *LE = getKernelObjectType(KernelCallerFunc); + assert(LE && "invalid kernel caller"); + + // Build list of kernel arguments. + llvm::SmallVector ParamDescs; + buildArgTys(getASTContext(), LE, ParamDescs); + + // Extract name from kernel caller parameters and mangle it. + const TemplateArgumentList *TemplateArgs = + KernelCallerFunc->getTemplateSpecializationArgs(); + assert(TemplateArgs && "No template argument info"); + QualType KernelNameType = TypeName::getFullyQualifiedType( + TemplateArgs->get(0).getAsType(), getASTContext(), true); + std::string Name = constructKernelName(KernelNameType, MC); + + FunctionDecl *OpenCLKernel = + CreateOpenCLKernelDeclaration(getASTContext(), Name, ParamDescs); + + // Let's copy source location of a functor/lambda to emit nicer diagnostics. + OpenCLKernel->setLocation(LE->getLocation()); + + CompoundStmt *OpenCLKernelBody = + CreateOpenCLKernelBody(*this, KernelCallerFunc, OpenCLKernel); + OpenCLKernel->setBody(OpenCLKernelBody); + + addSYCLKernel(OpenCLKernel); +} + +// ----------------------------------------------------------------------------- +// Utility class methods +// ----------------------------------------------------------------------------- + +bool Util::isSyclAccessorType(const QualType &Ty) { + static std::array Scopes = { + Util::DeclContextDesc{clang::Decl::Kind::Namespace, "cl"}, + Util::DeclContextDesc{clang::Decl::Kind::Namespace, "sycl"}, + Util::DeclContextDesc{clang::Decl::Kind::ClassTemplateSpecialization, + "accessor"}}; + return matchQualifiedTypeName(Ty, Scopes); +} + +bool Util::matchQualifiedTypeName(const QualType &Ty, + ArrayRef Scopes) { + // The idea: check the declaration context chain starting from the type + // itself. At each step check the context is of expected kind + // (namespace) and name. + const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl(); + + if (!RecTy) + return false; // only classes/structs supported + const auto *Ctx = dyn_cast(RecTy); + StringRef Name = ""; + + for (const auto &Scope : llvm::reverse(Scopes)) { + clang::Decl::Kind DK = Ctx->getDeclKind(); + + if (DK != Scope.first) + return false; + + switch (DK) { + case clang::Decl::Kind::ClassTemplateSpecialization: + // ClassTemplateSpecializationDecl inherits from CXXRecordDecl + case clang::Decl::Kind::CXXRecord: + Name = cast(Ctx)->getName(); + break; + case clang::Decl::Kind::Namespace: + Name = cast(Ctx)->getName(); + break; + default: + llvm_unreachable("matchQualifiedTypeName: decl kind not supported"); + } + if (Name != Scope.second) + return false; + Ctx = Ctx->getParent(); + } + return Ctx->isTranslationUnit(); +} diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 27ac2cd08f2a8..8bcec92993a68 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -18,6 +18,7 @@ #include "clang/AST/DependentDiagnostic.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/Mangle.h" #include "clang/AST/PrettyDeclStackTrace.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/SourceManager.h" @@ -6270,6 +6271,8 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D, /// instantiations we have seen until this point. void Sema::PerformPendingInstantiations(bool LocalOnly) { std::deque delayedPCHInstantiations; + std::unique_ptr MangleCtx( + getASTContext().createMangleContext()); while (!PendingLocalImplicitInstantiations.empty() || (!LocalOnly && !PendingInstantiations.empty())) { PendingImplicitInstantiation Inst; @@ -6288,17 +6291,25 @@ void Sema::PerformPendingInstantiations(bool LocalOnly) { TSK_ExplicitInstantiationDefinition; if (Function->isMultiVersion()) { getASTContext().forEachMultiversionedFunctionVersion( - Function, [this, Inst, DefinitionRequired](FunctionDecl *CurFD) { + Function, [this, Inst, DefinitionRequired, + MangleCtx = move(MangleCtx)](FunctionDecl *CurFD) { InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, CurFD, true, DefinitionRequired, true); - if (CurFD->isDefined()) + if (CurFD->isDefined()) { CurFD->setInstantiationIsPending(false); + if (getLangOpts().SYCLIsDevice && + CurFD->hasAttr()) + constructOpenCLKernel(CurFD, *MangleCtx); + } }); } else { InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, Function, true, DefinitionRequired, true); - if (Function->isDefined()) + if (Function->isDefined()) { + if (getLangOpts().SYCLIsDevice && Function->hasAttr()) + constructOpenCLKernel(Function, *MangleCtx); Function->setInstantiationIsPending(false); + } } // Definition of a PCH-ed template declaration may be available only in the TU. if (!LocalOnly && LangOpts.PCHInstantiateTemplates && diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp new file mode 100644 index 0000000000000..56908fe5f9a3a --- /dev/null +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -0,0 +1,86 @@ +#pragma once + +inline namespace cl { +namespace sycl { +namespace access { + +enum class target { + global_buffer = 2014, + constant_buffer, + local, + image, + host_buffer, + host_image, + image_array +}; + +enum class mode { + read = 1024, + write, + read_write, + discard_write, + discard_read_write, + atomic +}; + +enum class placeholder { + false_t, + true_t +}; + +enum class address_space : int { + private_space = 0, + global_space, + constant_space, + local_space +}; +} // namespace access + +template +struct id { + template + id(T... args) {} // fake constructor +private: + // Some fake field added to see using of id arguments in the + // kernel wrapper + int Data; +}; + +template +struct range { + template + range(T... args) {} // fake constructor +private: + // Some fake field added to see using of range arguments in the + // kernel wrapper + int Data; +}; + +template +struct _ImplT { + range AccessRange; + range MemRange; + id Offset; +}; + +template +class accessor { + +public: + void use(void) const {} + template + void use(T... args) {} + template + void use(T... args) const {} + _ImplT impl; + +private: + void __init(__attribute__((opencl_global)) dataT *Ptr, + range AccessRange, + range MemRange, id Offset) {} +}; + +} // namespace sycl +} // namespace cl diff --git a/clang/test/CodeGenSYCL/address-space-conversions.cpp b/clang/test/CodeGenSYCL/address-space-conversions.cpp index 3732c4a1b889b..b49ee9d8c1505 100644 --- a/clang/test/CodeGenSYCL/address-space-conversions.cpp +++ b/clang/test/CodeGenSYCL/address-space-conversions.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -I%S/Inputs -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s void bar(int &Data) {} // CHECK-DAG: define{{.*}} spir_func void @[[RAW_REF:[a-zA-Z0-9_]+]](i32 addrspace(4)* align 4 dereferenceable(4) % void bar2(int &Data) {} @@ -136,3 +136,15 @@ void usages() { // CHECK-DAG: define linkonce_odr spir_func void @_Z4tmplIPU3AS3iEvT_(i32 addrspace(3)* % // CHECK-DAG: define linkonce_odr spir_func void @_Z4tmplIPU3AS0iEvT_(i32* % // CHECK-DAG: define linkonce_odr spir_func void @_Z4tmplIPiEvT_(i32 addrspace(4)* % + +#include "sycl.hpp" + +int main() { + cl::sycl::queue Q; + Q.submit([&](cl::sycl::handler &cgh) { + cgh.single_task([=]() { + usages(); + }); + }); + return 0; +} diff --git a/clang/test/CodeGenSYCL/address-space-deduction.cpp b/clang/test/CodeGenSYCL/address-space-deduction.cpp index 3453d18787c26..03206c5d4a24b 100644 --- a/clang/test/CodeGenSYCL/address-space-deduction.cpp +++ b/clang/test/CodeGenSYCL/address-space-deduction.cpp @@ -1,7 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -I%S/Inputs -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s -// CHECK-LABEL: @_Z4testv( +// Validates SYCL deduction rules compliance. +// See clang/docs/SYCLSupport.rst#address-space-handling for the details. + +// CHECK-LABEL: define {{.*}} @_Z4testv( // CHECK-NEXT: entry: // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[PPTR:%.*]] = alloca i32 addrspace(4)*, align 8 @@ -87,7 +90,8 @@ // CHECK-NEXT: store i8 addrspace(4)* getelementptr inbounds ([21 x i8], [21 x i8] addrspace(4)* addrspacecast ([21 x i8] addrspace(1)* @.str.1 to [21 x i8] addrspace(4)*), i64 0, i64 0), i8 addrspace(4)* addrspace(4)* [[SELECT_STR_TRIVIAL2_ASCAST]], align 8 // CHECK-NEXT: ret void // -void test() { + void test() { + static const int foo = 0x42; @@ -127,3 +131,15 @@ void test() { const char *select_str_trivial2 = false ? str : "Another hello world!"; (void)select_str_trivial2; } + +#include "sycl.hpp" + +int main() { + cl::sycl::queue Q; + Q.submit([&](cl::sycl::handler &cgh) { + cgh.single_task([=]() { + test(); + }); + }); + return 0; +} diff --git a/clang/test/CodeGenSYCL/address-space-mangling.cpp b/clang/test/CodeGenSYCL/address-space-mangling.cpp index 76feec552fa2b..ceab9414a75c3 100644 --- a/clang/test/CodeGenSYCL/address-space-mangling.cpp +++ b/clang/test/CodeGenSYCL/address-space-mangling.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=SPIR -// RUN: %clang_cc1 -triple x86_64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=X86 +// RUN: %clang_cc1 -I%S/Inputs -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=SPIR +// RUN: %clang_cc1 -I%S/Inputs -triple x86_64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=X86 // REQUIRES: x86-registered-target @@ -8,15 +8,15 @@ void foo(__attribute__((opencl_local)) int *); void foo(__attribute__((opencl_private)) int *); void foo(int *); -// SPIR: declare spir_func void @_Z3fooPU3AS1i(i32 addrspace(1)*) #1 -// SPIR: declare spir_func void @_Z3fooPU3AS3i(i32 addrspace(3)*) #1 -// SPIR: declare spir_func void @_Z3fooPU3AS0i(i32*) #1 -// SPIR: declare spir_func void @_Z3fooPi(i32 addrspace(4)*) #1 +// SPIR: declare spir_func void @_Z3fooPU3AS1i(i32 addrspace(1)*) +// SPIR: declare spir_func void @_Z3fooPU3AS3i(i32 addrspace(3)*) +// SPIR: declare spir_func void @_Z3fooPU3AS0i(i32*) +// SPIR: declare spir_func void @_Z3fooPi(i32 addrspace(4)*) -// X86: declare void @_Z3fooPU8SYglobali(i32*) #1 -// X86: declare void @_Z3fooPU7SYlocali(i32*) #1 -// X86: declare void @_Z3fooPU9SYprivatei(i32*) #1 -// X86: declare void @_Z3fooPi(i32*) #1 +// X86: declare void @_Z3fooPU8SYglobali(i32*) +// X86: declare void @_Z3fooPU7SYlocali(i32*) +// X86: declare void @_Z3fooPU9SYprivatei(i32*) +// X86: declare void @_Z3fooPi(i32*) void test() { __attribute__((opencl_global)) int *glob; @@ -28,3 +28,15 @@ void test() { foo(priv); foo(def); } + +#include "sycl.hpp" + +int main() { + cl::sycl::queue Q; + Q.submit([&](cl::sycl::handler &cgh) { + cgh.single_task([=]() { + test(); + }); + }); + return 0; +} diff --git a/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp b/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp new file mode 100644 index 0000000000000..f118c931e171a --- /dev/null +++ b/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s + +// This test checks that compiler generates correct kernel wrapper for basic +// case. + +#include "Inputs/sycl.hpp" + +template +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + cl::sycl::accessor accessorA; + kernel( + [=]() { + accessorA.use(); + }); + return 0; +} + +// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_function +// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG:%[a-zA-Z0-9_]+]], +// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE:%[a-zA-Z0-9_]+_1]], +// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE:%[a-zA-Z0-9_]+_2]], +// CHECK-SAME: %"struct.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET:%[a-zA-Z0-9_]+]]) +// Check alloca for pointer argument +// CHECK: [[MEM_ARG]].addr = alloca i32 addrspace(1)* +// CHECK: [[ANONALLOCA:%[0-9]+]] = alloca %class.anon +// CHECK: [[ARANGEA:%agg.tmp.*]] = alloca %"struct.cl::sycl::range" +// CHECK: [[MRANGEA:%agg.tmp.*]] = alloca %"struct.cl::sycl::range" +// CHECK: [[OIDA:%agg.tmp.*]] = alloca %"struct.cl::sycl::id" +// CHECK: [[ANON:%[0-9]+]] = addrspacecast %class.anon* [[ANONALLOCA]] to %class.anon addrspace(4)* +// CHECK: [[ARANGET:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range"* [[ARANGEA]] to %"struct.cl::sycl::range" addrspace(4)* +// CHECK: [[MRANGET:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range"* [[MRANGEA]] to %"struct.cl::sycl::range" addrspace(4)* +// CHECK: [[OIDT:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::id"* [[OIDA]] to %"struct.cl::sycl::id" addrspace(4)* +// +// Check store of kernel pointer argument to alloca +// CHECK: store i32 addrspace(1)* [[MEM_ARG]], i32 addrspace(1)* addrspace(4)* [[MEM_ARG]].addr.ascast, align 8 + +// Check for default constructor of accessor +// CHECK: call spir_func {{.*}}accessor + +// Check accessor GEP +// CHECK: [[ACCESSOR:%[a-zA-Z0-9_]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* [[ANON]], i32 0, i32 0 + +// Check load from kernel pointer argument alloca +// CHECK: [[MEM_LOAD:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[MEM_ARG]].addr.ascast + +// Check accessor __init method call +// CHECK: [[ARANGE:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range" addrspace(4)* [[ARANGET]] to %"struct.cl::sycl::range"* +// CHECK: [[MRANGE:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range" addrspace(4)* [[MRANGET]] to %"struct.cl::sycl::range"* +// CHECK: [[OID:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::id" addrspace(4)* [[OIDT]] to %"struct.cl::sycl::id"* +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.cl::sycl::accessor" addrspace(4)* {{[^,]*}} [[ACCESSOR]], i32 addrspace(1)* [[MEM_LOAD]], %"struct.cl::sycl::range"* byval({{.*}}) align 4 [[ARANGE]], %"struct.cl::sycl::range"* byval({{.*}}) align 4 [[MRANGE]], %"struct.cl::sycl::id"* byval({{.*}}) align 4 [[OID]]) + +// Check lambda "()" operator call +// CHECK: call spir_func void @{{.*}}(%class.anon addrspace(4)* {{[^,]*}}) diff --git a/clang/test/CodeGenSYCL/device-functions.cpp b/clang/test/CodeGenSYCL/device-functions.cpp new file mode 100644 index 0000000000000..c8fa8729e29a6 --- /dev/null +++ b/clang/test/CodeGenSYCL/device-functions.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -triple spir64 -fsycl-is-device -S -emit-llvm %s -o - | FileCheck %s + +template +T bar(T arg); + +void foo() { + int a = 1 + 1 + bar(1); +} + +template +T bar(T arg) { + return arg; +} + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); +} + +// Make sure that definitions for the types not used in SYCL kernels are not +// emitted +// CHECK-NOT: %struct.A +// CHECK-NOT: @a = {{.*}} %struct.A +struct A { + int x = 10; +} a; + +int main() { + a.x = 8; + kernel_single_task([]() { foo(); }); + return 0; +} + +// baz is not called from the SYCL kernel, so it must not be emitted +// CHECK-NOT: define {{.*}} @{{.*}}baz +void baz() {} + +// CHECK-LABEL: define dso_local spir_kernel void @{{.*}}test_kernel +// CHECK-LABEL: define internal spir_func void @_ZZ4mainENKUlvE_clEv(%class.anon addrspace(4)* {{.*}}%this) +// CHECK-LABEL: define dso_local spir_func void @{{.*}}foo +// CHECK-LABEL: define linkonce_odr spir_func i32 @{{.*}}bar diff --git a/clang/test/CodeGenSYCL/field-annotate-addr-space.cpp b/clang/test/CodeGenSYCL/field-annotate-addr-space.cpp index 86d6f9a8a9e32..0c0c306ee361d 100644 --- a/clang/test/CodeGenSYCL/field-annotate-addr-space.cpp +++ b/clang/test/CodeGenSYCL/field-annotate-addr-space.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -I%S/Inputs -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s // CHECK: [[ANNOT:.+]] = private unnamed_addr constant {{.*}}c"my_annotation\00" @@ -17,3 +17,15 @@ void foo(int *b) { // CHECK: bitcast i8 addrspace(4)* %[[CALL]] to i32 addrspace(4)* addrspace(4)* f.a = b; } + +#include "sycl.hpp" + +int main() { + cl::sycl::queue Q; + Q.submit([&](cl::sycl::handler &cgh) { + cgh.single_task([=]() { + foo(nullptr); + }); + }); + return 0; +} diff --git a/clang/test/SemaSYCL/Inputs/sycl.hpp b/clang/test/SemaSYCL/Inputs/sycl.hpp new file mode 100644 index 0000000000000..9663a895243f8 --- /dev/null +++ b/clang/test/SemaSYCL/Inputs/sycl.hpp @@ -0,0 +1,87 @@ +#pragma once + +namespace cl { +namespace sycl { +namespace access { + +enum class target { + global_buffer = 2014, + constant_buffer, + local, + image, + host_buffer, + host_image, + image_array +}; + +enum class mode { + read = 1024, + write, + read_write, + discard_write, + discard_read_write, + atomic +}; + +enum class placeholder { false_t, + true_t }; + +enum class address_space : int { + private_space = 0, + global_space, + constant_space, + local_space +}; +} // namespace access + +template +struct range { +}; + +template +struct id { +}; + +template +struct _ImplT { + range AccessRange; + range MemRange; + id Offset; +}; + +template +struct DeviceValueType; + +template +struct DeviceValueType { + using type = __attribute__((opencl_global)) dataT; +}; + +template +struct DeviceValueType { + using type = __attribute__((opencl_global)) const dataT; +}; + +template +struct DeviceValueType { + using type = __attribute__((opencl_local)) dataT; +}; + +template +class accessor { + +public: + void use(void) const {} + void use(void *) const {} + _ImplT impl; + +private: + using PtrType = typename DeviceValueType::type *; + void __init(PtrType Ptr, range AccessRange, + range MemRange, id Offset) {} +}; + +} // namespace sycl +} // namespace cl diff --git a/clang/test/SemaSYCL/accessors-targets.cpp b/clang/test/SemaSYCL/accessors-targets.cpp new file mode 100644 index 0000000000000..ad6a6106c8f1a --- /dev/null +++ b/clang/test/SemaSYCL/accessors-targets.cpp @@ -0,0 +1,33 @@ +// RUN: %clang_cc1 -fsycl-is-device -ast-dump %s | FileCheck %s + +// This test checks that compiler generates correct OpenCL kernel arguments for +// different accessors targets. + +#include "Inputs/sycl.hpp" + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + accessor + local_acc; + accessor + global_acc; + kernel( + [=]() { + local_acc.use(); + }); + kernel( + [=]() { + global_acc.use(); + }); +} +// CHECK: {{.*}}use_local 'void (__local int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' +// CHECK: {{.*}}use_global 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' diff --git a/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp b/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp new file mode 100644 index 0000000000000..4e78277837f05 --- /dev/null +++ b/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp @@ -0,0 +1,70 @@ +// RUN: %clang_cc1 -I %S/Inputs -fsycl-is-device -ast-dump %s | FileCheck %s + +// This test checks that compiler generates correct initialization for arguments +// that have struct or built-in type inside the OpenCL kernel + +#include + +template +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { + kernelFunc(); +} + +struct test_struct { + int data; +}; + +void test(const int some_const) { + kernel( + [=]() { + int a = some_const; + }); +} + +int main() { + int data = 5; + test_struct s; + s.data = data; + kernel( + [=]() { + int kernel_data = data; + }); + kernel( + [=]() { + test_struct k_s; + k_s = s; + }); + const int some_const = 10; + test(some_const); + return 0; +} +// Check kernel parameters +// CHECK: FunctionDecl {{.*}}kernel_const{{.*}} 'void (const int)' +// CHECK: ParmVarDecl {{.*}} used _arg_ 'const int' + +// Check that lambda field of const built-in type is initialized +// CHECK: VarDecl {{.*}}'(lambda at {{.*}}built-in-type-kernel-arg.cpp{{.*}})' +// CHECK-NEXT: InitListExpr +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'const int' lvalue ParmVar {{.*}} '_arg_' 'const int' + +// Check kernel parameters +// CHECK: {{.*}}kernel_int{{.*}} 'void (int)' +// CHECK: ParmVarDecl {{.*}} used _arg_ 'int' + +// Check that lambda field of built-in type is initialized +// CHECK: VarDecl {{.*}}'(lambda at {{.*}}built-in-type-kernel-arg.cpp{{.*}})' +// CHECK-NEXT: InitListExpr +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_' 'int' + +// Check kernel parameters +// CHECK: {{.*}}kernel_struct{{.*}} 'void (test_struct)' +// CHECK: ParmVarDecl {{.*}} used _arg_ 'test_struct' + +// Check that lambda field of struct type is initialized +// CHECK: VarDecl {{.*}}'(lambda at {{.*}}built-in-type-kernel-arg.cpp{{.*}})' +// CHECK-NEXT: InitListExpr +// CHECK-NEXT: CXXConstructExpr {{.*}}'test_struct'{{.*}}void (const test_struct &) +// CHECK-NEXT: ImplicitCastExpr {{.*}}'const test_struct' lvalue +// CHECK-NEXT: DeclRefExpr {{.*}} 'test_struct' lvalue ParmVar {{.*}} '_arg_' 'test_struct' diff --git a/clang/test/SemaSYCL/fake-accessors.cpp b/clang/test/SemaSYCL/fake-accessors.cpp new file mode 100644 index 0000000000000..acce120e49f68 --- /dev/null +++ b/clang/test/SemaSYCL/fake-accessors.cpp @@ -0,0 +1,56 @@ +// RUN: %clang_cc1 -I %S/Inputs -fsycl-is-device -ast-dump %s | FileCheck %s + +#include + +namespace foo { +namespace cl { +namespace sycl { +class accessor { +public: + int field; +}; +} // namespace sycl +} // namespace cl +} // namespace foo + +class accessor { +public: + int field; +}; + +typedef cl::sycl::accessor + MyAccessorTD; + +using MyAccessorA = cl::sycl::accessor; + +template +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + foo::cl::sycl::accessor acc = {1}; + accessor acc1 = {1}; + + cl::sycl::accessor accessorA; + cl::sycl::accessor accessorB; + cl::sycl::accessor accessorC; + kernel( + [=]() { + accessorA.use((void*)(acc.field + acc1.field)); + }); + kernel( + [=]() { + accessorB.use((void*)(acc.field + acc1.field)); + }); + kernel( + [=]() { + accessorC.use((void*)(acc.field + acc1.field)); + }); + return 0; +} +// CHECK: fake_accessors 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, foo::cl::sycl::accessor, accessor) +// CHECK: accessor_typedef 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, foo::cl::sycl::accessor, accessor) +// CHECK: accessor_alias 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, foo::cl::sycl::accessor, accessor) diff --git a/clang/test/SemaSYCL/mangle-kernel.cpp b/clang/test/SemaSYCL/mangle-kernel.cpp new file mode 100644 index 0000000000000..4cbdfd56bc5d9 --- /dev/null +++ b/clang/test/SemaSYCL/mangle-kernel.cpp @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -triple spir64-unknown-unknown-unknown -I %S/Inputs -I %S/../Headers/Inputs/include/ -fsycl-is-device -ast-dump %s | FileCheck %s --check-prefix=CHECK-64 +// RUN: %clang_cc1 -triple spir-unknown-unknown-unknown -I %S/Inputs -I %S/../Headers/Inputs/include/ -fsycl-is-device -ast-dump %s | FileCheck %s --check-prefix=CHECK-32 +#include +#include + +template +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { + kernelFunc(); +} + +template +class SimpleVadd; + +int main() { + kernel>( + [=](){}); + + kernel>( + [=](){}); + + kernel>( + [=](){}); + return 0; +} + +// CHECK: _ZTS10SimpleVaddIiE +// CHECK: _ZTS10SimpleVaddIdE +// CHECK-64: _ZTS10SimpleVaddImE +// CHECK-32: _ZTS10SimpleVaddIjE