[SYCL] Add unnamed lambda kernels support (#387)

Add support for kernels without kernel name under -fsycl-unnamed-lambda flag. User activates the extension with the new flag. Uses new __unique_stable_name compiler built-in. Only the version of the integration header needed is generated (rather than both version with #ifdef). Defines __SYCL_UNNAMED_LAMBDA__ when extension is active. Signed-off-by: Roland Schulz <roland.schulz@intel.com> Signed-off-by: Alexey Bader <alexey.bader@intel.com>
intel · Jul 30, 2019 · 422a5fa · 422a5fa
1 parent 08a923f
commit 422a5fa
Show file tree

Hide file tree

Showing 12 changed files with 137 additions and 23 deletions.
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
@@ -226,6 +226,7 @@ LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code")
 LANGOPT(SYCLIsDevice      , 1, 0, "Generate code for SYCL device")
 LANGOPT(SYCLIsHost        , 1, 0, "SYCL host compilation")
 LANGOPT(SYCLAllowFuncPtr  , 1, 0, "Allow function pointers in SYCL device code")
+LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels")
 
 LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
 LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
@@ -1744,6 +1744,8 @@ def fno_sycl_use_bitcode : Flag<["-"], "fno-sycl-use-bitcode">,
   Flags<[CC1Option]>, HelpText<"Use SPIR-V instead of LLVM bitcode in fat objects">;
 def fsycl_link : Flag<["-"], "fsycl-link">,
   Flags<[CC1Option]>, HelpText<"Generate partially linked device object to be used with the host link">;
+def fsycl_unnamed_lambda : Flag<["-"], "fsycl-unnamed-lambda">,
+  Flags<[CC1Option]>, HelpText<"Allow unnamed SYCL lambda kernels">;
 def fsyntax_only : Flag<["-"], "fsyntax-only">,
   Flags<[DriverOption,CoreOption,CC1Option]>, Group<Action_Group>;
 def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group<f_Group>;

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
@@ -306,7 +306,7 @@ class SYCLIntegrationHeader {
   };
 
 public:
-  SYCLIntegrationHeader(DiagnosticsEngine &Diag);
+  SYCLIntegrationHeader(DiagnosticsEngine &Diag, bool UnnamedLambdaSupport);
 
   /// Emits contents of the header into given stream.
   void emit(raw_ostream &Out);
@@ -317,7 +317,8 @@ class SYCLIntegrationHeader {
 
   ///  Signals that subsequent parameter descriptor additions will go to
   ///  the kernel with given name. Starts new kernel invocation descriptor.
-  void startKernel(StringRef KernelName, QualType KernelNameType);
+  void startKernel(StringRef KernelName, QualType KernelNameType,
+                   StringRef KernelStableName);
 
   /// Adds a kernel parameter descriptor to current kernel invocation
   /// descriptor.
@@ -352,6 +353,9 @@ class SYCLIntegrationHeader {
     /// Kernel name type.
     QualType NameType;
 
+    /// Kernel name with stable lambda name mangling
+    std::string StableName;
+
     /// Descriptor of kernel actual parameters.
     SmallVector<KernelParamDesc, 8> Params;
 
@@ -387,6 +391,9 @@ class SYCLIntegrationHeader {
 
   /// Used for emitting diagnostics.
   DiagnosticsEngine &Diag;
+
+  /// Whether header is generated with unnamed lambda support
+  bool UnnamedLambdaSupport;
 };
 
 /// Keeps track of expected type during expression parsing. The type is tied to
@@ -11404,7 +11411,7 @@ class Sema {
   SYCLIntegrationHeader &getSyclIntegrationHeader() {
     if (SyclIntHeader == nullptr)
       SyclIntHeader = llvm::make_unique<SYCLIntegrationHeader>(
-        getDiagnostics());
+          getDiagnostics(), getLangOpts().SYCLUnnamedLambda);
     return *SyclIntHeader.get();
   }
 

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5373,6 +5373,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       HeaderOpt += Output.getFilename();
       CmdArgs.push_back(Args.MakeArgString(HeaderOpt));
     }
+    if (Args.hasArg(options::OPT_fsycl_unnamed_lambda))
+      CmdArgs.push_back("-fsycl-unnamed-lambda");
   }
 
   // OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path

diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2991,6 +2991,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.SYCLIsHost   = Args.hasArg(options::OPT_fsycl_is_host);
   Opts.SYCLAllowFuncPtr = Args.hasFlag(options::OPT_fsycl_allow_func_ptr,
                                   options::OPT_fno_sycl_allow_func_ptr, false);
+  Opts.SYCLUnnamedLambda = Args.hasArg(options::OPT_fsycl_unnamed_lambda);
 
   // Set CUDA mode for OpenMP target NVPTX if specified in options
   Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && T.isNVPTX() &&

diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -1077,6 +1077,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
     if (!getenv("DISABLE_INFER_AS"))
       Builder.defineMacro("__SYCL_ENABLE_INFER_AS__", "1");
   }
+  if (LangOpts.SYCLUnnamedLambda)
+    Builder.defineMacro("__SYCL_UNNAMED_LAMBDA__", "1");
 
   // OpenCL definitions.
   if (LangOpts.OpenCL) {

diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp
@@ -936,7 +936,9 @@ static void populateIntHeader(SYCLIntegrationHeader &H, const StringRef Name,
 
   ASTContext &Ctx = KernelObjTy->getASTContext();
   const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(KernelObjTy);
-  H.startKernel(Name, NameType);
+  const std::string StableName = PredefinedExpr::ComputeName(
+      Ctx, PredefinedExpr::UniqueStableNameExpr, NameType);
+  H.startKernel(Name, NameType, StableName);
 
   auto populateHeaderForAccessor = [&](const QualType &ArgTy, uint64_t Offset) {
     // The parameter is a SYCL accessor object.
@@ -1247,7 +1249,7 @@ void SYCLIntegrationHeader::emitFwdDecl(raw_ostream &O, const Decl *D) {
                                 ? cast<ClassTemplateDecl>(D)->getTemplatedDecl()
                                 : dyn_cast<TagDecl>(D);
 
-        if (TD && TD->isCompleteDefinition()) {
+        if (TD && TD->isCompleteDefinition() && !UnnamedLambdaSupport) {
           // defined class constituting the kernel name is not globally
           // accessible - contradicts the spec
           Diag.Report(D->getSourceRange().getBegin(),
@@ -1377,11 +1379,13 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
   O << "#include <CL/sycl/detail/kernel_desc.hpp>\n";
 
   O << "\n";
-  O << "// Forward declarations of templated kernel function types:\n";
+  if (!UnnamedLambdaSupport) {
+    O << "// Forward declarations of templated kernel function types:\n";
 
-  llvm::SmallPtrSet<const void *, 4> Printed;
-  for (const KernelDesc &K : KernelDescs) {
-    emitForwardClassDecls(O, K.NameType, Printed);
+    llvm::SmallPtrSet<const void *, 4> Printed;
+    for (const KernelDesc &K : KernelDescs) {
+      emitForwardClassDecls(O, K.NameType, Printed);
+    }
   }
   O << "\n";
 
@@ -1444,19 +1448,21 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
   }
   O << "};\n\n";
 
-  O << "// Specializations of this template class encompasses information\n";
-  O << "// about a kernel. The kernel is identified by the template\n";
-  O << "// parameter type.\n";
-  O << "template <class KernelNameType> struct KernelInfo;\n";
-  O << "\n";
-
   O << "// Specializations of KernelInfo for kernel function types:\n";
   CurStart = 0;
 
   for (const KernelDesc &K : KernelDescs) {
     const size_t N = K.Params.size();
-    O << "template <> struct KernelInfo<"
-      << eraseAnonNamespace(K.NameType.getAsString()) << "> {\n";
+    if (UnnamedLambdaSupport) {
+      O << "template <> struct KernelInfoData<";
+      O << "'" << K.StableName.front();
+      for (char c : StringRef(K.StableName).substr(1))
+        O << "', '" << c;
+      O << "'> {\n";
+    } else {
+      O << "template <> struct KernelInfo<"
+        << eraseAnonNamespace(K.NameType.getAsString()) << "> {\n";
+    }
     O << "  DLL_LOCAL\n";
     O << "  static constexpr const char* getName() { return \"" << K.Name
       << "\"; }\n";
@@ -1494,10 +1500,12 @@ bool SYCLIntegrationHeader::emit(const StringRef &IntHeaderName) {
 }
 
 void SYCLIntegrationHeader::startKernel(StringRef KernelName,
-                                        QualType KernelNameType) {
+                                        QualType KernelNameType,
+                                        StringRef KernelStableName) {
   KernelDescs.resize(KernelDescs.size() + 1);
   KernelDescs.back().Name = KernelName;
   KernelDescs.back().NameType = KernelNameType;
+  KernelDescs.back().StableName = KernelStableName;
 }
 
 void SYCLIntegrationHeader::addParamDesc(kernel_param_kind_t Kind, int Info,
@@ -1515,8 +1523,9 @@ void SYCLIntegrationHeader::endKernel() {
   // nop for now
 }
 
-SYCLIntegrationHeader::SYCLIntegrationHeader(DiagnosticsEngine &_Diag)
-    : Diag(_Diag) {}
+SYCLIntegrationHeader::SYCLIntegrationHeader(DiagnosticsEngine &_Diag,
+                                             bool _UnnamedLambdaSupport)
+    : Diag(_Diag), UnnamedLambdaSupport(_UnnamedLambdaSupport) {}
 
 bool Util::isSyclAccessorType(const QualType &Ty) {
   return isSyclType(Ty, "accessor", true /*Tmpl*/);

diff --git a/clang/test/CodeGenSYCL/integration_header.cpp b/clang/test/CodeGenSYCL/integration_header.cpp
@@ -47,7 +47,6 @@
 // CHECK-EMPTY:
 // CHECK-NEXT: };
 //
-// CHECK: template <class KernelNameType> struct KernelInfo;
 // CHECK: template <> struct KernelInfo<class first_kernel> {
 // CHECK: template <> struct KernelInfo<::second_namespace::second_kernel<char>> {
 // CHECK: template <> struct KernelInfo<::third_kernel<1, int, ::point<X> >> {

diff --git a/clang/test/CodeGenSYCL/wrapped-accessor.cpp b/clang/test/CodeGenSYCL/wrapped-accessor.cpp
@@ -27,8 +27,6 @@
 // CHECK-NEXT:  0 // _ZTSZ4mainE14wrapped_access
 // CHECK-NEXT: };
 
-// CHECK: template <class KernelNameType> struct KernelInfo;
-
 // CHECK: template <> struct KernelInfo<class wrapped_access> {
 
 #include <sycl.hpp>

diff --git a/sycl/include/CL/sycl/detail/kernel_desc.hpp b/sycl/include/CL/sycl/detail/kernel_desc.hpp
@@ -51,6 +51,7 @@ struct kernel_param_desc_t {
   int offset;
 };
 
+#ifndef __SYCL_UNNAMED_LAMBDA__
 template <class KernelNameType> struct KernelInfo {
   static constexpr unsigned getNumParams() { return 0; }
   static const kernel_param_desc_t &getParamDesc(int Idx) {
@@ -59,6 +60,29 @@ template <class KernelNameType> struct KernelInfo {
   }
   static constexpr const char *getName() { return ""; }
 };
+#else
+template <char...> struct KernelInfoData; // Should this have dummy impl?
+
+// C++14 like index_sequence and make_index_sequence
+// not needed C++14 members (value_type, size) not implemented
+template <class T, T...> struct integer_sequence {};
+template <size_t... I> using index_sequence = integer_sequence<size_t, I...>;
+template <size_t N>
+using make_index_sequence = __make_integer_seq<integer_sequence, size_t, N>;
+
+template <typename T> struct KernelInfoImpl {
+private:
+  static constexpr auto n = __unique_stable_name(T);
+  template <std::size_t... I>
+  static KernelInfoData<n[I]...> impl(index_sequence<I...>) {
+    return {};
+  }
+
+public:
+  using type = decltype(impl(make_index_sequence<__builtin_strlen(n)>{}));
+};
+template <typename T> using KernelInfo = typename KernelInfoImpl<T>::type;
+#endif //__SYCL_UNNAMED_LAMBDA__
 
 } // namespace detail
 } // namespace sycl

diff --git a/sycl/test/regression/kernel_name_class.cpp b/sycl/test/regression/kernel_name_class.cpp
@@ -3,6 +3,8 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
+// RUN: %clangxx -fsycl %s -o %t.ext.out -lOpenCL -fsycl-unnamed-lambda
+// RUN: %CPU_RUN_PLACEHOLDER %t.ext.out
 
 //==-- kernel_name_class.cpp - SYCL kernel naming variants test ------------==//
 //

diff --git a/sycl/test/regression/kernel_unnamed.cpp b/sycl/test/regression/kernel_unnamed.cpp
@@ -0,0 +1,67 @@
+// RUN: %clangxx -fsycl %s -o %t.out -lOpenCL -fsycl-unnamed-lambda
+// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+//==-- kernel_unnamed.cpp - SYCL kernel naming variants test ------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <CL/sycl.hpp>
+#include <iostream>
+
+#define GOLD 10
+static int NumTestCases = 0;
+
+template <class F>
+void foo(cl::sycl::queue &deviceQueue, cl::sycl::buffer<int, 1> &buf, F f) {
+  deviceQueue.submit([&](cl::sycl::handler &cgh) {
+    auto acc = buf.get_access<cl::sycl::access::mode::read_write>(cgh);
+    cgh.single_task([=]() { acc[0] = f(acc[0], GOLD); });
+  });
+}
+
+namespace nm {
+struct Wrapper {
+
+  int test() {
+    int arr[] = {0};
+    {
+      // Simple test
+      cl::sycl::queue deviceQueue;
+      cl::sycl::buffer<int, 1> buf(arr, 1);
+      deviceQueue.submit([&](cl::sycl::handler &cgh) {
+        auto acc = buf.get_access<cl::sycl::access::mode::read_write>(cgh);
+        cgh.single_task([=]() { acc[0] += GOLD; });
+      });
+      ++NumTestCases;
+
+// Test lambdas with different ordinal because of macro expansion
+#ifdef __SYCL_DEVICE_ONLY__
+      [] {}();
+#endif
+      deviceQueue.submit([&](cl::sycl::handler &cgh) {
+        auto acc = buf.get_access<cl::sycl::access::mode::read_write>(cgh);
+        cgh.single_task([=]() { acc[0] += GOLD; });
+      });
+      ++NumTestCases;
+
+      // Test lambda passed to function
+      foo(deviceQueue, buf, [](int a, int b) { return a + b; });
+      ++NumTestCases;
+    }
+    return arr[0];
+  }
+};
+} // namespace nm
+
+int main() {
+  nm::Wrapper w;
+  int res = w.test();
+  assert (res == GOLD * NumTestCases && "Wrong result");
+}