Skip to content

Commit

Permalink
[SYCL] Generate the offload kernel and SYCL Kernel function.
Browse files Browse the repository at this point in the history
Functions marked with sycl_kernel_entry_point attribute are
used to generate the SYCL kernel caller function, i.e. the
offload kernel. The OutlinedFunctionDecl AST node is used
to generate the arguments and body of this function.
  • Loading branch information
elizabethandrews authored and tahonermann committed Jul 3, 2024
1 parent 59c5aa2 commit de5cea5
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 2 deletions.
5 changes: 5 additions & 0 deletions clang/lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11992,6 +11992,11 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) {
if (!FD->doesThisDeclarationHaveABody())
return FD->doesDeclarationForceExternallyVisibleDefinition();

// SYCL kernel entry point functions are used to generate and emit
// the offload kernel.
if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelEntryPointAttr>())
return true;

// Constructors and destructors are required.
if (FD->hasAttr<ConstructorAttr>() || FD->hasAttr<DestructorAttr>())
return true;
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,17 @@ CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType,
RequiredArgs::All);
}

const CGFunctionInfo &
CodeGenTypes::arrangeSYCLKernelCallerDeclaration(QualType resultType,
const FunctionArgList &args) {
auto argTypes = getArgTypesForDeclaration(Context, args);

return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None,
argTypes,
FunctionType::ExtInfo(CC_OpenCLKernel), {},
RequiredArgs::All);
}

/// Arrange a call to a C++ method, passing the given arguments.
///
/// numPrefixArgs is the number of ABI-specific prefix arguments we have. It
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ add_clang_library(clangCodeGen
CodeGenFunction.cpp
CodeGenModule.cpp
CodeGenPGO.cpp
CodeGenSYCL.cpp
CodeGenTBAA.cpp
CodeGenTypes.cpp
ConstantInitBuilder.cpp
Expand Down
16 changes: 14 additions & 2 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3197,8 +3197,20 @@ void CodeGenModule::EmitDeferred() {
if (LangOpts.OpenMP && OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(D))
continue;

// Otherwise, emit the definition and move on to the next one.
EmitGlobalDefinition(D, GV);
// If the Decl corresponds to a SYCL kernel entry point function, generate
// and emit the corresponding SYCL kernel caller function, i.e the
// offload kernel. Otherwise, emit the definition and move on to the next
// one.
const FunctionDecl *FD = nullptr;
if (LangOpts.SYCLIsDevice &&
(FD = D.getDecl()->getAsFunction()) != nullptr &&
FD->hasAttr<SYCLKernelEntryPointAttr>() &&
FD->isDefined()) {
// Generate and emit the offload kernel
EmitSYCLKernelCaller(FD, getContext());
} else {
EmitGlobalDefinition(D, GV);
}

// If we found out that we need to emit more decls, do that recursively.
// This has the advantage that the decls are emitted in a DFS and related
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/CodeGenModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -1754,6 +1754,10 @@ class CodeGenModule : public CodeGenTypeCache {
/// .gcda files in a way that persists in .bc files.
void EmitCoverageFile();

/// Emit the offload kernel.
void EmitSYCLKernelCaller(const FunctionDecl *KernelEntryPointFn,
ASTContext &Ctx);

/// Determine whether the definition must be emitted; if this returns \c
/// false, the definition can be emitted lazily if it's used.
bool MustBeEmitted(const ValueDecl *D);
Expand Down
60 changes: 60 additions & 0 deletions clang/lib/CodeGen/CodeGenSYCL.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//===--------- CodeGenSYCL.cpp - Code for SYCL kernel generation ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This contains code required for SYCL kernel generation.
//
//===----------------------------------------------------------------------===//

#include "CodeGenFunction.h"
#include "CodeGenModule.h"

using namespace clang;
using namespace CodeGen;

static void SetSYCLKernelAttributes(llvm::Function *Fn,
const CGFunctionInfo &FnInfo,
CodeGenFunction &CGF) {
Fn->setDoesNotRecurse();
Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
if (CGF.checkIfFunctionMustProgress())
Fn->addFnAttr(llvm::Attribute::MustProgress);
}

void CodeGenModule::EmitSYCLKernelCaller(const FunctionDecl *KernelEntryPointFn,
ASTContext &Ctx) {

SYCLKernelCallStmt *KernelCallStmt =
dyn_cast<SYCLKernelCallStmt>(KernelEntryPointFn->getBody());

assert(KernelCallStmt && "SYCLKernelCallStmt must exist");

// Build Kernel Arguments from OutlinedFunctionDecl
FunctionArgList Args;
const OutlinedFunctionDecl *OutlinedFnDecl =
KernelCallStmt->getOutlinedFunctionDecl();
Args.append(OutlinedFnDecl->param_begin(), OutlinedFnDecl->param_end());

// Compute the function info and LLVM Type
const CGFunctionInfo &FnInfo =
getTypes().arrangeSYCLKernelCallerDeclaration(Ctx.VoidTy, Args);
llvm::FunctionType *FnTy = getTypes().GetFunctionType(FnInfo);
// FIXME: Correct naming of generated offload kernels
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalVariable::ExternalLinkage,
"testKernelGen", &getModule());

// Emit the SYCL kernel caller function
CodeGenFunction CGF(*this);
SetLLVMFunctionAttributes(GlobalDecl(), FnInfo, Fn, false);
SetSYCLKernelAttributes(Fn, FnInfo, CGF);
CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, FnInfo, Args,
SourceLocation(), SourceLocation());
CGF.EmitFunctionBody(OutlinedFnDecl->getBody());
setDSOLocal(Fn);
SetLLVMFunctionAttributesForDefinition(cast<Decl>(OutlinedFnDecl), Fn);
CGF.FinishFunction();
}
6 changes: 6 additions & 0 deletions clang/lib/CodeGen/CodeGenTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,12 @@ class CodeGenTypes {
const CGFunctionInfo &arrangeBuiltinFunctionCall(QualType resultType,
const CallArgList &args);

/// A SYCL device kernel function is a free standing function with
/// spir_kernel calling convention
const CGFunctionInfo &
arrangeSYCLKernelCallerDeclaration(QualType resultType,
const FunctionArgList &args);

/// Objective-C methods are C functions with some implicit parameters.
const CGFunctionInfo &arrangeObjCMethodDeclaration(const ObjCMethodDecl *MD);
const CGFunctionInfo &arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
Expand Down
51 changes: 51 additions & 0 deletions clang/test/CodeGenSYCL/kernel-caller-generation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -triple spir64 %s -o - | FileCheck %s

template <typename name, typename Func>
__attribute__((sycl_kernel_entry_point(name))) void kernel_single_task(const Func kernelFunc) {
kernelFunc();
}

int main() {
int capture;
kernel_single_task<class test_kernel>(
[=]() {
(void) capture;
});
}


// IR for compiler generated SYCL kernel caller function :
// The arguments of the SYCL kernel caller function correspond to either the SYCL Kernel
// Object, or decomposed fields of the SYCL kernel object if special SYCL types are
// captured (not yet supported). In the latter case, the SYCL kernel object is reconstited
// in the body of the SYCL caller function. The body of the SYCL kernel caller function
// then invokes the SYCL kernel i.e. the operator method of the SYCL kernel object.

// CHECK: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
// FIXME: Kernel name is currently hardcoded.
// FIXME: Pointer Alignment mismatch with syclos must be clarified and confirmed.
// CHECK: define dso_local spir_kernel void @testKernelGen(ptr noundef byval(%class.anon) align 4 %kernelFunc) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: %kernelFunc.ascast = addrspacecast ptr %kernelFunc to ptr addrspace(4)
// CHECK-NEXT: call spir_func void @_ZZ4mainENKUlvE_clEv
// CHECK-SAME: (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %kernelFunc.ascast) #[[ATTR1:[0-9]+]]
// CHECK-NEXT: ret void
// CHECK-NEXT:}

// IR for operator method of kernel object:
// CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv
// FIXME: Pointer Alignment mismatch with syclos must be clarified and confirmed.
// FIXME: !srcloc metadata present in syclos (with incorrect value?). Why is this not present in llvm.org ?
// CHECK-SAME: (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %this) #[[ATTR0]] align 2 {
// CHECK-NEXT: entry:
// CHECK-NEXT: %this.addr = alloca ptr addrspace(4), align 8
// CHECK-NEXT: %this.addr.ascast = addrspacecast ptr %this.addr to ptr addrspace(4)
// CHECK-NEXT: store ptr addrspace(4) %this, ptr addrspace(4) %this.addr.ascast, align 8
// CHECK-NEXT: %this1 = load ptr addrspace(4), ptr addrspace(4) %this.addr.ascast, align 8
// CHECK-NEXT: %[[CAPTURE:.+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %this1, i32 0, i32 0
// CHECK-NEXT: ret void
// CHECK-NEXT:}

// FIXME:: Additional function attribute "sycl-optlevel"="0" generated in syclos vs llvm.org by a LLVM pass.
// CHECK: #[[ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: #[[ATTR1]] = { convergent nounwind }

0 comments on commit de5cea5

Please sign in to comment.