From 422a5faba4fbbfefd99a4d5ec127c4d03b8493a6 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Tue, 30 Jul 2019 14:56:41 +0300 Subject: [PATCH] [SYCL] Add unnamed lambda kernels support (#387) Add support for kernels without kernel name under -fsycl-unnamed-lambda flag. User activates the extension with the new flag. Uses new __unique_stable_name compiler built-in. Only the version of the integration header needed is generated (rather than both version with #ifdef). Defines __SYCL_UNNAMED_LAMBDA__ when extension is active. Signed-off-by: Roland Schulz Signed-off-by: Alexey Bader --- clang/include/clang/Basic/LangOptions.def | 1 + clang/include/clang/Driver/Options.td | 2 + clang/include/clang/Sema/Sema.h | 13 +++- clang/lib/Driver/ToolChains/Clang.cpp | 2 + clang/lib/Frontend/CompilerInvocation.cpp | 1 + clang/lib/Frontend/InitPreprocessor.cpp | 2 + clang/lib/Sema/SemaSYCL.cpp | 43 +++++++----- clang/test/CodeGenSYCL/integration_header.cpp | 1 - clang/test/CodeGenSYCL/wrapped-accessor.cpp | 2 - sycl/include/CL/sycl/detail/kernel_desc.hpp | 24 +++++++ sycl/test/regression/kernel_name_class.cpp | 2 + sycl/test/regression/kernel_unnamed.cpp | 67 +++++++++++++++++++ 12 files changed, 137 insertions(+), 23 deletions(-) create mode 100644 sycl/test/regression/kernel_unnamed.cpp diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index a58ea7ff57d65..4a32aea6be13e 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -226,6 +226,7 @@ LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code") LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device") LANGOPT(SYCLIsHost , 1, 0, "SYCL host compilation") LANGOPT(SYCLAllowFuncPtr , 1, 0, "Allow function pointers in SYCL device code") +LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels") LANGOPT(SizedDeallocation , 1, 0, "sized deallocation") LANGOPT(AlignedAllocation , 1, 0, "aligned allocation") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 8103ffdd79be2..4a662fea65ac0 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1744,6 +1744,8 @@ def fno_sycl_use_bitcode : Flag<["-"], "fno-sycl-use-bitcode">, Flags<[CC1Option]>, HelpText<"Use SPIR-V instead of LLVM bitcode in fat objects">; def fsycl_link : Flag<["-"], "fsycl-link">, Flags<[CC1Option]>, HelpText<"Generate partially linked device object to be used with the host link">; +def fsycl_unnamed_lambda : Flag<["-"], "fsycl-unnamed-lambda">, + Flags<[CC1Option]>, HelpText<"Allow unnamed SYCL lambda kernels">; def fsyntax_only : Flag<["-"], "fsyntax-only">, Flags<[DriverOption,CoreOption,CC1Option]>, Group; def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 6efeb1c0959a6..9c13b62c8dd11 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -306,7 +306,7 @@ class SYCLIntegrationHeader { }; public: - SYCLIntegrationHeader(DiagnosticsEngine &Diag); + SYCLIntegrationHeader(DiagnosticsEngine &Diag, bool UnnamedLambdaSupport); /// Emits contents of the header into given stream. void emit(raw_ostream &Out); @@ -317,7 +317,8 @@ class SYCLIntegrationHeader { /// Signals that subsequent parameter descriptor additions will go to /// the kernel with given name. Starts new kernel invocation descriptor. - void startKernel(StringRef KernelName, QualType KernelNameType); + void startKernel(StringRef KernelName, QualType KernelNameType, + StringRef KernelStableName); /// Adds a kernel parameter descriptor to current kernel invocation /// descriptor. @@ -352,6 +353,9 @@ class SYCLIntegrationHeader { /// Kernel name type. QualType NameType; + /// Kernel name with stable lambda name mangling + std::string StableName; + /// Descriptor of kernel actual parameters. SmallVector Params; @@ -387,6 +391,9 @@ class SYCLIntegrationHeader { /// Used for emitting diagnostics. DiagnosticsEngine &Diag; + + /// Whether header is generated with unnamed lambda support + bool UnnamedLambdaSupport; }; /// Keeps track of expected type during expression parsing. The type is tied to @@ -11404,7 +11411,7 @@ class Sema { SYCLIntegrationHeader &getSyclIntegrationHeader() { if (SyclIntHeader == nullptr) SyclIntHeader = llvm::make_unique( - getDiagnostics()); + getDiagnostics(), getLangOpts().SYCLUnnamedLambda); return *SyclIntHeader.get(); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 2799d460f0bc7..f8bbe165cbc73 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5373,6 +5373,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, HeaderOpt += Output.getFilename(); CmdArgs.push_back(Args.MakeArgString(HeaderOpt)); } + if (Args.hasArg(options::OPT_fsycl_unnamed_lambda)) + CmdArgs.push_back("-fsycl-unnamed-lambda"); } // OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index a2fd1d9c82ce3..867d60c5996a6 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2991,6 +2991,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Opts.SYCLIsHost = Args.hasArg(options::OPT_fsycl_is_host); Opts.SYCLAllowFuncPtr = Args.hasFlag(options::OPT_fsycl_allow_func_ptr, options::OPT_fno_sycl_allow_func_ptr, false); + Opts.SYCLUnnamedLambda = Args.hasArg(options::OPT_fsycl_unnamed_lambda); // Set CUDA mode for OpenMP target NVPTX if specified in options Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && T.isNVPTX() && diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index a4936d5dcb607..f369d2a087ed2 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -1077,6 +1077,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI, if (!getenv("DISABLE_INFER_AS")) Builder.defineMacro("__SYCL_ENABLE_INFER_AS__", "1"); } + if (LangOpts.SYCLUnnamedLambda) + Builder.defineMacro("__SYCL_UNNAMED_LAMBDA__", "1"); // OpenCL definitions. if (LangOpts.OpenCL) { diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index eccc4ca189d80..db28e3a917bd7 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -936,7 +936,9 @@ static void populateIntHeader(SYCLIntegrationHeader &H, const StringRef Name, ASTContext &Ctx = KernelObjTy->getASTContext(); const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(KernelObjTy); - H.startKernel(Name, NameType); + const std::string StableName = PredefinedExpr::ComputeName( + Ctx, PredefinedExpr::UniqueStableNameExpr, NameType); + H.startKernel(Name, NameType, StableName); auto populateHeaderForAccessor = [&](const QualType &ArgTy, uint64_t Offset) { // The parameter is a SYCL accessor object. @@ -1247,7 +1249,7 @@ void SYCLIntegrationHeader::emitFwdDecl(raw_ostream &O, const Decl *D) { ? cast(D)->getTemplatedDecl() : dyn_cast(D); - if (TD && TD->isCompleteDefinition()) { + if (TD && TD->isCompleteDefinition() && !UnnamedLambdaSupport) { // defined class constituting the kernel name is not globally // accessible - contradicts the spec Diag.Report(D->getSourceRange().getBegin(), @@ -1377,11 +1379,13 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) { O << "#include \n"; O << "\n"; - O << "// Forward declarations of templated kernel function types:\n"; + if (!UnnamedLambdaSupport) { + O << "// Forward declarations of templated kernel function types:\n"; - llvm::SmallPtrSet Printed; - for (const KernelDesc &K : KernelDescs) { - emitForwardClassDecls(O, K.NameType, Printed); + llvm::SmallPtrSet Printed; + for (const KernelDesc &K : KernelDescs) { + emitForwardClassDecls(O, K.NameType, Printed); + } } O << "\n"; @@ -1444,19 +1448,21 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) { } O << "};\n\n"; - O << "// Specializations of this template class encompasses information\n"; - O << "// about a kernel. The kernel is identified by the template\n"; - O << "// parameter type.\n"; - O << "template struct KernelInfo;\n"; - O << "\n"; - O << "// Specializations of KernelInfo for kernel function types:\n"; CurStart = 0; for (const KernelDesc &K : KernelDescs) { const size_t N = K.Params.size(); - O << "template <> struct KernelInfo<" - << eraseAnonNamespace(K.NameType.getAsString()) << "> {\n"; + if (UnnamedLambdaSupport) { + O << "template <> struct KernelInfoData<"; + O << "'" << K.StableName.front(); + for (char c : StringRef(K.StableName).substr(1)) + O << "', '" << c; + O << "'> {\n"; + } else { + O << "template <> struct KernelInfo<" + << eraseAnonNamespace(K.NameType.getAsString()) << "> {\n"; + } O << " DLL_LOCAL\n"; O << " static constexpr const char* getName() { return \"" << K.Name << "\"; }\n"; @@ -1494,10 +1500,12 @@ bool SYCLIntegrationHeader::emit(const StringRef &IntHeaderName) { } void SYCLIntegrationHeader::startKernel(StringRef KernelName, - QualType KernelNameType) { + QualType KernelNameType, + StringRef KernelStableName) { KernelDescs.resize(KernelDescs.size() + 1); KernelDescs.back().Name = KernelName; KernelDescs.back().NameType = KernelNameType; + KernelDescs.back().StableName = KernelStableName; } void SYCLIntegrationHeader::addParamDesc(kernel_param_kind_t Kind, int Info, @@ -1515,8 +1523,9 @@ void SYCLIntegrationHeader::endKernel() { // nop for now } -SYCLIntegrationHeader::SYCLIntegrationHeader(DiagnosticsEngine &_Diag) - : Diag(_Diag) {} +SYCLIntegrationHeader::SYCLIntegrationHeader(DiagnosticsEngine &_Diag, + bool _UnnamedLambdaSupport) + : Diag(_Diag), UnnamedLambdaSupport(_UnnamedLambdaSupport) {} bool Util::isSyclAccessorType(const QualType &Ty) { return isSyclType(Ty, "accessor", true /*Tmpl*/); diff --git a/clang/test/CodeGenSYCL/integration_header.cpp b/clang/test/CodeGenSYCL/integration_header.cpp index e0ef5f6cffda2..92ce63de9fb6b 100644 --- a/clang/test/CodeGenSYCL/integration_header.cpp +++ b/clang/test/CodeGenSYCL/integration_header.cpp @@ -47,7 +47,6 @@ // CHECK-EMPTY: // CHECK-NEXT: }; // -// CHECK: template struct KernelInfo; // CHECK: template <> struct KernelInfo { // CHECK: template <> struct KernelInfo<::second_namespace::second_kernel> { // CHECK: template <> struct KernelInfo<::third_kernel<1, int, ::point >> { diff --git a/clang/test/CodeGenSYCL/wrapped-accessor.cpp b/clang/test/CodeGenSYCL/wrapped-accessor.cpp index 2b1d8e176013a..f286af7398b93 100644 --- a/clang/test/CodeGenSYCL/wrapped-accessor.cpp +++ b/clang/test/CodeGenSYCL/wrapped-accessor.cpp @@ -27,8 +27,6 @@ // CHECK-NEXT: 0 // _ZTSZ4mainE14wrapped_access // CHECK-NEXT: }; -// CHECK: template struct KernelInfo; - // CHECK: template <> struct KernelInfo { #include diff --git a/sycl/include/CL/sycl/detail/kernel_desc.hpp b/sycl/include/CL/sycl/detail/kernel_desc.hpp index ebf71ac8561c7..5ff253ad8e27b 100644 --- a/sycl/include/CL/sycl/detail/kernel_desc.hpp +++ b/sycl/include/CL/sycl/detail/kernel_desc.hpp @@ -51,6 +51,7 @@ struct kernel_param_desc_t { int offset; }; +#ifndef __SYCL_UNNAMED_LAMBDA__ template struct KernelInfo { static constexpr unsigned getNumParams() { return 0; } static const kernel_param_desc_t &getParamDesc(int Idx) { @@ -59,6 +60,29 @@ template struct KernelInfo { } static constexpr const char *getName() { return ""; } }; +#else +template struct KernelInfoData; // Should this have dummy impl? + +// C++14 like index_sequence and make_index_sequence +// not needed C++14 members (value_type, size) not implemented +template struct integer_sequence {}; +template using index_sequence = integer_sequence; +template +using make_index_sequence = __make_integer_seq; + +template struct KernelInfoImpl { +private: + static constexpr auto n = __unique_stable_name(T); + template + static KernelInfoData impl(index_sequence) { + return {}; + } + +public: + using type = decltype(impl(make_index_sequence<__builtin_strlen(n)>{})); +}; +template using KernelInfo = typename KernelInfoImpl::type; +#endif //__SYCL_UNNAMED_LAMBDA__ } // namespace detail } // namespace sycl diff --git a/sycl/test/regression/kernel_name_class.cpp b/sycl/test/regression/kernel_name_class.cpp index b38e2b2b2574c..9f3959aa2912a 100644 --- a/sycl/test/regression/kernel_name_class.cpp +++ b/sycl/test/regression/kernel_name_class.cpp @@ -3,6 +3,8 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out +// RUN: %clangxx -fsycl %s -o %t.ext.out -lOpenCL -fsycl-unnamed-lambda +// RUN: %CPU_RUN_PLACEHOLDER %t.ext.out //==-- kernel_name_class.cpp - SYCL kernel naming variants test ------------==// // diff --git a/sycl/test/regression/kernel_unnamed.cpp b/sycl/test/regression/kernel_unnamed.cpp new file mode 100644 index 0000000000000..9b2bb4c870c58 --- /dev/null +++ b/sycl/test/regression/kernel_unnamed.cpp @@ -0,0 +1,67 @@ +// RUN: %clangxx -fsycl %s -o %t.out -lOpenCL -fsycl-unnamed-lambda +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +//==-- kernel_unnamed.cpp - SYCL kernel naming variants test ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#define GOLD 10 +static int NumTestCases = 0; + +template +void foo(cl::sycl::queue &deviceQueue, cl::sycl::buffer &buf, F f) { + deviceQueue.submit([&](cl::sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.single_task([=]() { acc[0] = f(acc[0], GOLD); }); + }); +} + +namespace nm { +struct Wrapper { + + int test() { + int arr[] = {0}; + { + // Simple test + cl::sycl::queue deviceQueue; + cl::sycl::buffer buf(arr, 1); + deviceQueue.submit([&](cl::sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.single_task([=]() { acc[0] += GOLD; }); + }); + ++NumTestCases; + +// Test lambdas with different ordinal because of macro expansion +#ifdef __SYCL_DEVICE_ONLY__ + [] {}(); +#endif + deviceQueue.submit([&](cl::sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.single_task([=]() { acc[0] += GOLD; }); + }); + ++NumTestCases; + + // Test lambda passed to function + foo(deviceQueue, buf, [](int a, int b) { return a + b; }); + ++NumTestCases; + } + return arr[0]; + } +}; +} // namespace nm + +int main() { + nm::Wrapper w; + int res = w.test(); + assert (res == GOLD * NumTestCases && "Wrong result"); +}