Skip to content

Commit

Permalink
[SYCL] Add unnamed lambda kernels support (#387)
Browse files Browse the repository at this point in the history
Add support for kernels without kernel name under -fsycl-unnamed-lambda flag. 
User activates the extension with the new flag.
Uses new __unique_stable_name compiler built-in.

Only the version of the integration header needed is generated
(rather than both version with #ifdef).

Defines __SYCL_UNNAMED_LAMBDA__ when extension is active.

Signed-off-by: Roland Schulz <roland.schulz@intel.com>
Signed-off-by: Alexey Bader <alexey.bader@intel.com>
  • Loading branch information
bader authored Jul 30, 2019
1 parent 08a923f commit 422a5fa
Show file tree
Hide file tree
Showing 12 changed files with 137 additions and 23 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code")
LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")
LANGOPT(SYCLIsHost , 1, 0, "SYCL host compilation")
LANGOPT(SYCLAllowFuncPtr , 1, 0, "Allow function pointers in SYCL device code")
LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels")

LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1744,6 +1744,8 @@ def fno_sycl_use_bitcode : Flag<["-"], "fno-sycl-use-bitcode">,
Flags<[CC1Option]>, HelpText<"Use SPIR-V instead of LLVM bitcode in fat objects">;
def fsycl_link : Flag<["-"], "fsycl-link">,
Flags<[CC1Option]>, HelpText<"Generate partially linked device object to be used with the host link">;
def fsycl_unnamed_lambda : Flag<["-"], "fsycl-unnamed-lambda">,
Flags<[CC1Option]>, HelpText<"Allow unnamed SYCL lambda kernels">;
def fsyntax_only : Flag<["-"], "fsyntax-only">,
Flags<[DriverOption,CoreOption,CC1Option]>, Group<Action_Group>;
def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group<f_Group>;
Expand Down
13 changes: 10 additions & 3 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ class SYCLIntegrationHeader {
};

public:
SYCLIntegrationHeader(DiagnosticsEngine &Diag);
SYCLIntegrationHeader(DiagnosticsEngine &Diag, bool UnnamedLambdaSupport);

/// Emits contents of the header into given stream.
void emit(raw_ostream &Out);
Expand All @@ -317,7 +317,8 @@ class SYCLIntegrationHeader {

/// Signals that subsequent parameter descriptor additions will go to
/// the kernel with given name. Starts new kernel invocation descriptor.
void startKernel(StringRef KernelName, QualType KernelNameType);
void startKernel(StringRef KernelName, QualType KernelNameType,
StringRef KernelStableName);

/// Adds a kernel parameter descriptor to current kernel invocation
/// descriptor.
Expand Down Expand Up @@ -352,6 +353,9 @@ class SYCLIntegrationHeader {
/// Kernel name type.
QualType NameType;

/// Kernel name with stable lambda name mangling
std::string StableName;

/// Descriptor of kernel actual parameters.
SmallVector<KernelParamDesc, 8> Params;

Expand Down Expand Up @@ -387,6 +391,9 @@ class SYCLIntegrationHeader {

/// Used for emitting diagnostics.
DiagnosticsEngine &Diag;

/// Whether header is generated with unnamed lambda support
bool UnnamedLambdaSupport;
};

/// Keeps track of expected type during expression parsing. The type is tied to
Expand Down Expand Up @@ -11404,7 +11411,7 @@ class Sema {
SYCLIntegrationHeader &getSyclIntegrationHeader() {
if (SyclIntHeader == nullptr)
SyclIntHeader = llvm::make_unique<SYCLIntegrationHeader>(
getDiagnostics());
getDiagnostics(), getLangOpts().SYCLUnnamedLambda);
return *SyclIntHeader.get();
}

Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5373,6 +5373,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
HeaderOpt += Output.getFilename();
CmdArgs.push_back(Args.MakeArgString(HeaderOpt));
}
if (Args.hasArg(options::OPT_fsycl_unnamed_lambda))
CmdArgs.push_back("-fsycl-unnamed-lambda");
}

// OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2991,6 +2991,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
Opts.SYCLIsHost = Args.hasArg(options::OPT_fsycl_is_host);
Opts.SYCLAllowFuncPtr = Args.hasFlag(options::OPT_fsycl_allow_func_ptr,
options::OPT_fno_sycl_allow_func_ptr, false);
Opts.SYCLUnnamedLambda = Args.hasArg(options::OPT_fsycl_unnamed_lambda);

// Set CUDA mode for OpenMP target NVPTX if specified in options
Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && T.isNVPTX() &&
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Frontend/InitPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1077,6 +1077,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
if (!getenv("DISABLE_INFER_AS"))
Builder.defineMacro("__SYCL_ENABLE_INFER_AS__", "1");
}
if (LangOpts.SYCLUnnamedLambda)
Builder.defineMacro("__SYCL_UNNAMED_LAMBDA__", "1");

// OpenCL definitions.
if (LangOpts.OpenCL) {
Expand Down
43 changes: 26 additions & 17 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,7 +936,9 @@ static void populateIntHeader(SYCLIntegrationHeader &H, const StringRef Name,

ASTContext &Ctx = KernelObjTy->getASTContext();
const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(KernelObjTy);
H.startKernel(Name, NameType);
const std::string StableName = PredefinedExpr::ComputeName(
Ctx, PredefinedExpr::UniqueStableNameExpr, NameType);
H.startKernel(Name, NameType, StableName);

auto populateHeaderForAccessor = [&](const QualType &ArgTy, uint64_t Offset) {
// The parameter is a SYCL accessor object.
Expand Down Expand Up @@ -1247,7 +1249,7 @@ void SYCLIntegrationHeader::emitFwdDecl(raw_ostream &O, const Decl *D) {
? cast<ClassTemplateDecl>(D)->getTemplatedDecl()
: dyn_cast<TagDecl>(D);

if (TD && TD->isCompleteDefinition()) {
if (TD && TD->isCompleteDefinition() && !UnnamedLambdaSupport) {
// defined class constituting the kernel name is not globally
// accessible - contradicts the spec
Diag.Report(D->getSourceRange().getBegin(),
Expand Down Expand Up @@ -1377,11 +1379,13 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
O << "#include <CL/sycl/detail/kernel_desc.hpp>\n";

O << "\n";
O << "// Forward declarations of templated kernel function types:\n";
if (!UnnamedLambdaSupport) {
O << "// Forward declarations of templated kernel function types:\n";

llvm::SmallPtrSet<const void *, 4> Printed;
for (const KernelDesc &K : KernelDescs) {
emitForwardClassDecls(O, K.NameType, Printed);
llvm::SmallPtrSet<const void *, 4> Printed;
for (const KernelDesc &K : KernelDescs) {
emitForwardClassDecls(O, K.NameType, Printed);
}
}
O << "\n";

Expand Down Expand Up @@ -1444,19 +1448,21 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
}
O << "};\n\n";

O << "// Specializations of this template class encompasses information\n";
O << "// about a kernel. The kernel is identified by the template\n";
O << "// parameter type.\n";
O << "template <class KernelNameType> struct KernelInfo;\n";
O << "\n";

O << "// Specializations of KernelInfo for kernel function types:\n";
CurStart = 0;

for (const KernelDesc &K : KernelDescs) {
const size_t N = K.Params.size();
O << "template <> struct KernelInfo<"
<< eraseAnonNamespace(K.NameType.getAsString()) << "> {\n";
if (UnnamedLambdaSupport) {
O << "template <> struct KernelInfoData<";
O << "'" << K.StableName.front();
for (char c : StringRef(K.StableName).substr(1))
O << "', '" << c;
O << "'> {\n";
} else {
O << "template <> struct KernelInfo<"
<< eraseAnonNamespace(K.NameType.getAsString()) << "> {\n";
}
O << " DLL_LOCAL\n";
O << " static constexpr const char* getName() { return \"" << K.Name
<< "\"; }\n";
Expand Down Expand Up @@ -1494,10 +1500,12 @@ bool SYCLIntegrationHeader::emit(const StringRef &IntHeaderName) {
}

void SYCLIntegrationHeader::startKernel(StringRef KernelName,
QualType KernelNameType) {
QualType KernelNameType,
StringRef KernelStableName) {
KernelDescs.resize(KernelDescs.size() + 1);
KernelDescs.back().Name = KernelName;
KernelDescs.back().NameType = KernelNameType;
KernelDescs.back().StableName = KernelStableName;
}

void SYCLIntegrationHeader::addParamDesc(kernel_param_kind_t Kind, int Info,
Expand All @@ -1515,8 +1523,9 @@ void SYCLIntegrationHeader::endKernel() {
// nop for now
}

SYCLIntegrationHeader::SYCLIntegrationHeader(DiagnosticsEngine &_Diag)
: Diag(_Diag) {}
SYCLIntegrationHeader::SYCLIntegrationHeader(DiagnosticsEngine &_Diag,
bool _UnnamedLambdaSupport)
: Diag(_Diag), UnnamedLambdaSupport(_UnnamedLambdaSupport) {}

bool Util::isSyclAccessorType(const QualType &Ty) {
return isSyclType(Ty, "accessor", true /*Tmpl*/);
Expand Down
1 change: 0 additions & 1 deletion clang/test/CodeGenSYCL/integration_header.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
// CHECK-EMPTY:
// CHECK-NEXT: };
//
// CHECK: template <class KernelNameType> struct KernelInfo;
// CHECK: template <> struct KernelInfo<class first_kernel> {
// CHECK: template <> struct KernelInfo<::second_namespace::second_kernel<char>> {
// CHECK: template <> struct KernelInfo<::third_kernel<1, int, ::point<X> >> {
Expand Down
2 changes: 0 additions & 2 deletions clang/test/CodeGenSYCL/wrapped-accessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
// CHECK-NEXT: 0 // _ZTSZ4mainE14wrapped_access
// CHECK-NEXT: };

// CHECK: template <class KernelNameType> struct KernelInfo;

// CHECK: template <> struct KernelInfo<class wrapped_access> {

#include <sycl.hpp>
Expand Down
24 changes: 24 additions & 0 deletions sycl/include/CL/sycl/detail/kernel_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ struct kernel_param_desc_t {
int offset;
};

#ifndef __SYCL_UNNAMED_LAMBDA__
template <class KernelNameType> struct KernelInfo {
static constexpr unsigned getNumParams() { return 0; }
static const kernel_param_desc_t &getParamDesc(int Idx) {
Expand All @@ -59,6 +60,29 @@ template <class KernelNameType> struct KernelInfo {
}
static constexpr const char *getName() { return ""; }
};
#else
template <char...> struct KernelInfoData; // Should this have dummy impl?

// C++14 like index_sequence and make_index_sequence
// not needed C++14 members (value_type, size) not implemented
template <class T, T...> struct integer_sequence {};
template <size_t... I> using index_sequence = integer_sequence<size_t, I...>;
template <size_t N>
using make_index_sequence = __make_integer_seq<integer_sequence, size_t, N>;

template <typename T> struct KernelInfoImpl {
private:
static constexpr auto n = __unique_stable_name(T);
template <std::size_t... I>
static KernelInfoData<n[I]...> impl(index_sequence<I...>) {
return {};
}

public:
using type = decltype(impl(make_index_sequence<__builtin_strlen(n)>{}));
};
template <typename T> using KernelInfo = typename KernelInfoImpl<T>::type;
#endif //__SYCL_UNNAMED_LAMBDA__

} // namespace detail
} // namespace sycl
Expand Down
2 changes: 2 additions & 0 deletions sycl/test/regression/kernel_name_class.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
// RUN: %clangxx -fsycl %s -o %t.ext.out -lOpenCL -fsycl-unnamed-lambda
// RUN: %CPU_RUN_PLACEHOLDER %t.ext.out

//==-- kernel_name_class.cpp - SYCL kernel naming variants test ------------==//
//
Expand Down
67 changes: 67 additions & 0 deletions sycl/test/regression/kernel_unnamed.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// RUN: %clangxx -fsycl %s -o %t.out -lOpenCL -fsycl-unnamed-lambda
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

//==-- kernel_unnamed.cpp - SYCL kernel naming variants test ------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <CL/sycl.hpp>
#include <iostream>

#define GOLD 10
static int NumTestCases = 0;

template <class F>
void foo(cl::sycl::queue &deviceQueue, cl::sycl::buffer<int, 1> &buf, F f) {
deviceQueue.submit([&](cl::sycl::handler &cgh) {
auto acc = buf.get_access<cl::sycl::access::mode::read_write>(cgh);
cgh.single_task([=]() { acc[0] = f(acc[0], GOLD); });
});
}

namespace nm {
struct Wrapper {

int test() {
int arr[] = {0};
{
// Simple test
cl::sycl::queue deviceQueue;
cl::sycl::buffer<int, 1> buf(arr, 1);
deviceQueue.submit([&](cl::sycl::handler &cgh) {
auto acc = buf.get_access<cl::sycl::access::mode::read_write>(cgh);
cgh.single_task([=]() { acc[0] += GOLD; });
});
++NumTestCases;

// Test lambdas with different ordinal because of macro expansion
#ifdef __SYCL_DEVICE_ONLY__
[] {}();
#endif
deviceQueue.submit([&](cl::sycl::handler &cgh) {
auto acc = buf.get_access<cl::sycl::access::mode::read_write>(cgh);
cgh.single_task([=]() { acc[0] += GOLD; });
});
++NumTestCases;

// Test lambda passed to function
foo(deviceQueue, buf, [](int a, int b) { return a + b; });
++NumTestCases;
}
return arr[0];
}
};
} // namespace nm

int main() {
nm::Wrapper w;
int res = w.test();
assert (res == GOLD * NumTestCases && "Wrong result");
}

0 comments on commit 422a5fa

Please sign in to comment.