Skip to content

Commit

Permalink
[mlir][gpu] Add builder to gpu.launch_func (#95541)
Browse files Browse the repository at this point in the history
This patch adds a builder to `gpu.launch_func` allowing it to be created
using `SymbolRefAttr` instead of `GPUFuncOp`. This allows creating
`launch_func` when only a `gpu.binary` is present, instead of the full
`gpu.module {...}`.
  • Loading branch information
fabianmcg authored Jun 15, 2024
1 parent 0938cdb commit f3b4c00
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
6 changes: 6 additions & 0 deletions mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,12 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
CArg<"Type", "nullptr">:$asyncTokenType,
CArg<"ValueRange", "{}">:$asyncDependencies,
CArg<"std::optional<KernelDim3>", "std::nullopt">:$clusterSize)>,
OpBuilder<(ins "SymbolRefAttr":$kernel, "KernelDim3":$gridSize,
"KernelDim3":$blockSize, "Value":$dynamicSharedMemorySize,
"ValueRange":$kernelOperands,
"Type":$asyncTokenType,
CArg<"ValueRange", "{}">:$asyncDependencies,
CArg<"std::optional<KernelDim3>", "std::nullopt">:$clusterSize)>,
OpBuilder<(ins "SymbolRefAttr":$kernel, "KernelDim3":$gridSize,
"KernelDim3":$blockSize, "Value":$dynamicSharedMemorySize,
"ValueRange":$kernelOperands,
Expand Down
23 changes: 18 additions & 5 deletions mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1081,11 +1081,13 @@ BlockArgument LaunchOp::addPrivateAttribution(Type type, Location loc) {
//===----------------------------------------------------------------------===//

void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
GPUFuncOp kernelFunc, KernelDim3 gridSize,
SymbolRefAttr kernelSymbol, KernelDim3 gridSize,
KernelDim3 getBlockSize, Value dynamicSharedMemorySize,
ValueRange kernelOperands, Type asyncTokenType,
ValueRange asyncDependencies,
std::optional<KernelDim3> clusterSize) {
assert(kernelSymbol.getNestedReferences().size() == 1 &&
"expected a symbol reference with a single nested reference");
result.addOperands(asyncDependencies);
if (asyncTokenType)
result.types.push_back(builder.getType<AsyncTokenType>());
Expand All @@ -1098,10 +1100,6 @@ void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
if (dynamicSharedMemorySize)
result.addOperands(dynamicSharedMemorySize);
result.addOperands(kernelOperands);
auto kernelModule = kernelFunc->getParentOfType<GPUModuleOp>();
auto kernelSymbol =
SymbolRefAttr::get(kernelModule.getNameAttr(),
{SymbolRefAttr::get(kernelFunc.getNameAttr())});

Properties &prop = result.getOrAddProperties<Properties>();
prop.kernel = kernelSymbol;
Expand All @@ -1122,6 +1120,21 @@ void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
prop.operandSegmentSizes[segmentSizesLen - 1] = 0;
}

void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
GPUFuncOp kernelFunc, KernelDim3 gridSize,
KernelDim3 getBlockSize, Value dynamicSharedMemorySize,
ValueRange kernelOperands, Type asyncTokenType,
ValueRange asyncDependencies,
std::optional<KernelDim3> clusterSize) {
auto kernelModule = kernelFunc->getParentOfType<GPUModuleOp>();
auto kernelSymbol =
SymbolRefAttr::get(kernelModule.getNameAttr(),
{SymbolRefAttr::get(kernelFunc.getNameAttr())});
build(builder, result, kernelSymbol, gridSize, getBlockSize,
dynamicSharedMemorySize, kernelOperands, asyncTokenType,
asyncDependencies, clusterSize);
}

void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
SymbolRefAttr kernel, KernelDim3 gridSize,
KernelDim3 getBlockSize, Value dynamicSharedMemorySize,
Expand Down

0 comments on commit f3b4c00

Please sign in to comment.