Skip to content

Commit

Permalink
[HIP] Support new kernel launching API
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D67947

llvm-svn: 372773
  • Loading branch information
yxsamliu committed Sep 24, 2019
1 parent b3a9320 commit 1282889
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 15 deletions.
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code")

LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")

LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")

LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")
LANGOPT(AlignedAllocationUnavailable, 1, 0, "aligned allocation functions are unavailable")
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,9 @@ def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group<Link_Group>,
HelpText<"HIP device library">;
def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-script">,
Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>;
def fhip_new_launch_api : Flag<["-"], "fhip-new-launch-api">,
Flags<[CC1Option]>, HelpText<"Use new kernel launching API for HIP.">;
def fno_hip_new_launch_api : Flag<["-"], "fno-hip-new-launch-api">;
def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group<i_Group>,
HelpText<"Path to libomptarget-nvptx libraries">;
def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
Expand Down
17 changes: 11 additions & 6 deletions clang/lib/CodeGen/CGCUDANV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,8 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,

EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
CudaFeature::CUDA_USES_NEW_LAUNCH))
CudaFeature::CUDA_USES_NEW_LAUNCH) ||
CGF.getLangOpts().HIPUseNewLaunchAPI)
emitDeviceStubBodyNew(CGF, Args);
else
emitDeviceStubBodyLegacy(CGF, Args);
Expand Down Expand Up @@ -264,14 +265,18 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,

llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");

// Lookup cudaLaunchKernel function.
// Lookup cudaLaunchKernel/hipLaunchKernel function.
// cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
// void **args, size_t sharedMem,
// cudaStream_t stream);
// hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
// void **args, size_t sharedMem,
// hipStream_t stream);
TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
auto LaunchKernelName = addPrefixToName("LaunchKernel");
IdentifierInfo &cudaLaunchKernelII =
CGM.getContext().Idents.get("cudaLaunchKernel");
CGM.getContext().Idents.get(LaunchKernelName);
FunctionDecl *cudaLaunchKernelFD = nullptr;
for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) {
if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result))
Expand All @@ -280,7 +285,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,

if (cudaLaunchKernelFD == nullptr) {
CGM.Error(CGF.CurFuncDecl->getLocation(),
"Can't find declaration for cudaLaunchKernel()");
"Can't find declaration for " + LaunchKernelName);
return;
}
// Create temporary dim3 grid_dim, block_dim.
Expand All @@ -301,7 +306,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
/*ShmemSize=*/ShmemSize.getType(),
/*Stream=*/Stream.getType()},
/*isVarArg=*/false),
"__cudaPopCallConfiguration");
addUnderscoredPrefixToName("PopCallConfiguration"));

CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn,
{GridDim.getPointer(), BlockDim.getPointer(),
Expand Down Expand Up @@ -329,7 +334,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
const CGFunctionInfo &FI =
CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD);
llvm::FunctionCallee cudaLaunchKernelFn =
CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel");
CGM.CreateRuntimeFunction(FTy, LaunchKernelName);
CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
LaunchKernelArgs);
CGF.EmitBranch(EndBlock);
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4774,6 +4774,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
// Forward -cl options to -cc1
RenderOpenCLOptions(Args, CmdArgs);

if (Args.hasFlag(options::OPT_fhip_new_launch_api,
options::OPT_fno_hip_new_launch_api, false))
CmdArgs.push_back("-fhip-new-launch-api");

if (Arg *A = Args.getLastArg(options::OPT_fcf_protection_EQ)) {
CmdArgs.push_back(
Args.MakeArgString(Twine("-fcf-protection=") + A->getValue()));
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2517,6 +2517,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
Opts.CUDADeviceApproxTranscendentals = 1;

Opts.GPURelocatableDeviceCode = Args.hasArg(OPT_fgpu_rdc);
Opts.HIPUseNewLaunchAPI = Args.hasArg(OPT_fhip_new_launch_api);

if (Opts.ObjC) {
if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Sema/SemaCUDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,8 @@ void Sema::inheritCUDATargetAttrs(FunctionDecl *FD,

std::string Sema::getCudaConfigureFuncName() const {
if (getLangOpts().HIP)
return "hipConfigureCall";
return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
: "hipConfigureCall";

// New CUDA kernel launch sequence.
if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(),
Expand Down
15 changes: 12 additions & 3 deletions clang/test/CodeGenCUDA/Inputs/cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,21 @@ struct dim3 {
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
};

typedef struct cudaStream *cudaStream_t;
typedef enum cudaError {} cudaError_t;
#ifdef __HIP__
typedef struct hipStream *hipStream_t;
typedef enum hipError {} hipError_t;
int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
cudaStream_t stream = 0);
hipStream_t stream = 0);
extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
size_t sharedSize = 0,
hipStream_t stream = 0);
extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
dim3 blockDim, void **args,
size_t sharedMem,
hipStream_t stream);
#else
typedef struct cudaStream *cudaStream_t;
typedef enum cudaError {} cudaError_t;
extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
size_t sharedSize = 0,
cudaStream_t stream = 0);
Expand Down
14 changes: 9 additions & 5 deletions clang/test/CodeGenCUDA/kernel-call.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@
// RUN: %clang_cc1 -target-sdk-version=9.2 -emit-llvm %s -o - \
// RUN: | FileCheck %s --check-prefixes=CUDA-NEW,CHECK
// RUN: %clang_cc1 -x hip -emit-llvm %s -o - \
// RUN: | FileCheck %s --check-prefixes=HIP,CHECK

// RUN: | FileCheck %s --check-prefixes=HIP-OLD,CHECK
// RUN: %clang_cc1 -fhip-new-launch-api -x hip -emit-llvm %s -o - \
// RUN: | FileCheck %s --check-prefixes=HIP-NEW,CHECK

#include "Inputs/cuda.h"

// CHECK-LABEL: define{{.*}}g1
// HIP: call{{.*}}hipSetupArgument
// HIP: call{{.*}}hipLaunchByPtr
// HIP-OLD: call{{.*}}hipSetupArgument
// HIP-OLD: call{{.*}}hipLaunchByPtr
// HIP-NEW: call{{.*}}__hipPopCallConfiguration
// HIP-NEW: call{{.*}}hipLaunchKernel
// CUDA-OLD: call{{.*}}cudaSetupArgument
// CUDA-OLD: call{{.*}}cudaLaunch
// CUDA-NEW: call{{.*}}__cudaPopCallConfiguration
Expand All @@ -19,7 +22,8 @@ __global__ void g1(int x) {}

// CHECK-LABEL: define{{.*}}main
int main(void) {
// HIP: call{{.*}}hipConfigureCall
// HIP-OLD: call{{.*}}hipConfigureCall
// HIP-NEW: call{{.*}}__hipPushCallConfiguration
// CUDA-OLD: call{{.*}}cudaConfigureCall
// CUDA-NEW: call{{.*}}__cudaPushCallConfiguration
// CHECK: icmp
Expand Down

0 comments on commit 1282889

Please sign in to comment.