Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make outlined function arguments non-aliasing #1006

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,11 @@

#include "iree-amd-aie/Transforms/Passes.h"
#include "iree-amd-aie/Transforms/Utils/AMDAIEUtils.h"
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h"
#include "mlir/Dialect/Linalg/Transforms/Hoisting.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/IR/IRMapping.h"

#define DEBUG_TYPE "iree-amdaie-linalg-function-outlining"

Expand All @@ -23,7 +21,8 @@ namespace {
/// Utility to outline the linalg compute op.
static FailureOr<func::FuncOp> outline(IRRewriter &rewriter, ModuleOp moduleOp,
linalg::LinalgOp computeOp,
const std::string &funcName) {
const std::string &funcName,
bool noAliasMemrefArgs) {
// Form outlined FunctionType.
for (const auto &operand : computeOp->getOperands()) {
// Function signatures where the memrefs have layouts (strides / offsets)
Expand Down Expand Up @@ -65,6 +64,16 @@ static FailureOr<func::FuncOp> outline(IRRewriter &rewriter, ModuleOp moduleOp,
// arguments.
Operation *clonedComputeOp = rewriter.clone(*computeOp, operandMap);

if (noAliasMemrefArgs) {
StringRef noAliasAttrName = LLVM::LLVMDialect::getNoAliasAttrName();
ArrayRef<BlockArgument> args = func.getArguments();
for (auto iter : llvm::enumerate(args)) {
if (isa<MemRefType>(iter.value().getType())) {
func.setArgAttr(iter.index(), noAliasAttrName, rewriter.getUnitAttr());
}
}
}

// Create terminator op returning the cloned compute op's results.
rewriter.setInsertionPointToEnd(funcBody);
rewriter.create<func::ReturnOp>(clonedComputeOp->getLoc(), ValueRange({}));
Expand Down Expand Up @@ -143,7 +152,7 @@ class AMDAIELinalgFunctionOutliningPass
}

FailureOr<func::FuncOp> maybeFunc =
outline(rewriter, moduleOp, computeOp, funcName);
outline(rewriter, moduleOp, computeOp, funcName, noAliasMemrefArgs);

if (succeeded(maybeFunc)) {
computeOpToOutlinedFuncMap[computeOp] = maybeFunc.value();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
#include "llvm/Support/Debug.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/IR/Iterators.h"
#include "mlir/Pass/PassManager.h"

#define DEBUG_TYPE "iree-amdaie-lower-to-aie"
Expand Down Expand Up @@ -420,6 +419,7 @@ LogicalResult AIEDeviceBuilder::coreFuncCallOpToAIE(
StringRef fnName = oldCallOp.getCallee();
auto fnDecl = dyn_cast_if_present<func::FuncOp>(
SymbolTable::lookupSymbolIn(moduleOp, fnName));

assert(fnDecl && "expected function declaration");
// Check the mapper to see if we've already created a new function declaration
// with the new function type. If not, create the same. We need to create a
Expand All @@ -434,14 +434,20 @@ LogicalResult AIEDeviceBuilder::coreFuncCallOpToAIE(
SymbolTable::setSymbolVisibility(newFnDecl,
SymbolTable::Visibility::Private);
newFnDecl->setAttr("llvm.bareptr", rewriter.getBoolAttr(true));

fnDecl.getBody().cloneInto(&(newFnDecl.getBody()), mapper);
if (ArrayAttr oldAttrs = fnDecl.getAllArgAttrs()) {
newFnDecl.setAllArgAttrs(oldAttrs);
}

mapper.map(fnDecl.getOperation(), newFnDecl.getOperation());
fnDecl = newFnDecl;
}
// Fetch the new function declaration and create the new func.call op.
auto newFnDecl = cast<func::FuncOp>(mapper.lookupOrDefault(fnDecl));
rewriter.create<func::CallOp>(oldCallOp->getLoc(), newFnDecl, newArgs);
toBeErased.push_back(oldCallOp);

return success();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,22 @@ def AMDAIELinalgFunctionOutlining :
"Replace all outlined functions with a function that does nothing, "
"i.e. it just returns. Useful for measuring the performance of data "
"movement to/from the device -- by doing zero compute, all time is spent "
"moving data to/from the AIE cores.">
"moving data to/from the AIE cores.">,
Option<"noAliasMemrefArgs", "no-alias-memref-args", "bool", /*default=*/"true",
"A developer only option. When 'true' (the default), "
"the final memref argument of the outlined function "
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"the final memref argument of the outlined function "
"all memref arguments of the outlined function "

"will have the 'llvm.noalias' attribute attached to "
"it. The motivation for having this attribute is that "
"sometimes the matmul code generated in llvm's opt is "
"much (2x) faster. The motivation for adding it manually "
"without any analysis is that llvm/peano cannot always "
"infer that this attribute can safely be attached, "
"because (I suppose) the analysis of all call sites, "
"i.e. checking that the final argument is not aliased "
"to any other arguments is too complicated/expensive. "
"The addition of this attribute in this pass is exposed "
"as an option, as there is no guarantee that it is valid "
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no guarantee that is valid, and you're saying it's a developer only option, but we're setting it to true by default?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah. If by "developer only option" we mean one that we don't guarantee correctness for when changed from default, then I suppose it's not a developer only option. What I meant was that it's an option that you shouldn't be playing with to eke out extra performance.

Alternative approach (1): remove it as option, and do a shallow check that it's valid by checking if outlined payload is a matmul, or do some basic analysis of pointers. My concern is that this can balloon into complex code.

Alternative approach (2): make the option 'false' by default. My concern here is that it should probably be true for every workload we're likely to see.

Copy link
Collaborator

@jtuyls jtuyls Jan 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have been thinking about this and I think we can guarantee the noalias on the function arguments at the point in pipeline when we have amdaie.logicalobjectfifo.access operations to retrieve a memref from a logical objectFifo by checking that we only add noalias when the memref is a result from a amdaie.logicalobjectfifo.access and there is only one access on that logical objectFifo. This happens after function outlining, so I think it would be better to have a separate pass for adding the noalias.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for thinking about this! I think for an analysis to be easier in pass N than pass M (where M<N) either (1) the analysis is done in an intervening pass P (M<P<=N) or (2) an assumption is baked into the IR in an intervening pass.

Either way, I think it makes sense to have a pass to add the noalias attribute. Just wondering what level is best.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you base the analysis on amdaie.logicalobjectfifo.access, it should work on any IR, as long as those ops are available, which means that you could put it anywhere after createAMDAIEDistributeCoresAndObjectFifosPass. Similarly, the LinalgFunctionOutling pass could be moved anywhere as well I think, so no assumptions need to be baked in, but the noalias insertion would only find opportunities if amdaie.logicalobjectfifo.access ops are found.

"-- the final argument could in theory alias another argument. ">
Comment on lines +307 to +308
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"as an option, as there is no guarantee that it is valid "
"-- the final argument could in theory alias another argument. ">
"as an option, as there is no guarantee that it is valid.">

];
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ iree_lit_test_suite(
"fold_dma_waits.mlir"
"flatten_logical_objectfifo.mlir"
"linalg_function_outlining.mlir"
"linalg_function_outlining_to_empty.mlir"
"linalg_function_outlining_options.mlir"
"fuse_consumer_into_loop.mlir"
"fuse_fill_into_forall.mlir"
"fuse_pack_into_loop.mlir"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
// Test demonstrating multiple matmuls using different SSAs.

// CHECK-LABEL: func.func private @generic_matmul_0_outlined
// CHECK-SAME: (%[[LHS:.*]]: memref<4x8xbf16>,
// CHECK-SAME: %[[RHS:.*]]: memref<8x4xbf16>,
// CHECK-SAME: %[[OUT:.*]]: memref<4x4xf32>) {
// CHECK-SAME: (%[[LHS:.*]]: memref<4x8xbf16> {llvm.noalias},
// CHECK-SAME: %[[RHS:.*]]: memref<8x4xbf16> {llvm.noalias},
// CHECK-SAME: %[[OUT:.*]]: memref<4x4xf32> {llvm.noalias}) {
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[LHS]], %[[RHS]] :
// CHECK-SAME: outs(%[[OUT]] :
Expand Down Expand Up @@ -76,8 +76,8 @@ func.func @repeated_identical_matmul(%A: memref<4x8xbf16>, %B: memref<8x4xbf16>,
// Test demonstrating different kind of matmul operations being mapped to a
// unique corresponding outlined function.

// CHECK-DAG: func.func private @[[MATMUL_K6:.*]]({{.*}}memref<4x6xbf16>, {{.*}}memref<6x4xbf16>, {{.*}}memref<4x4xf32>)
// CHECK-DAG: func.func private @[[MATMUL_K4:.*]]({{.*}}memref<4x4xbf16>, {{.*}}memref<4x4xbf16>, {{.*}}memref<4x4xf32>)
// CHECK-DAG: func.func private @[[MATMUL_K6:.*]]({{.*}}memref<4x6xbf16> {llvm.noalias}, {{.*}}memref<6x4xbf16> {llvm.noalias}, {{.*}}memref<4x4xf32> {llvm.noalias})
// CHECK-DAG: func.func private @[[MATMUL_K4:.*]]({{.*}}memref<4x4xbf16> {llvm.noalias}, {{.*}}memref<4x4xbf16> {llvm.noalias}, {{.*}}memref<4x4xf32> {llvm.noalias})
// CHECK-NOT: func.func private
// CHECK: func.func @distinct_matmul_shapes(
// CHECK-SAME: %[[A0:.*]]: memref<4x4xbf16>, %[[B0:.*]]: memref<4x4xbf16>,
Expand Down Expand Up @@ -135,7 +135,7 @@ func.func @linalg_fill_copy(%A: memref<4xf32>, %B: memref<4xf32>) {
// a matmul or elementwise operation. Specifically, one which has not been
// 'blacklisted' like linalg.copy has (see test linalg_fill_copy above).
// CHECK: func.func private @generic_0_outlined
// CHECK-SAME: memref<4xbf16>,
// CHECK-SAME: memref<4xbf16>
// CHECK-SAME: memref<bf16>
// CHECK: linalg.generic
// CHECK-SAME: iterator_types = ["reduction"]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Currently this is the default:
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(iree-amdaie-linalg-function-outlining{empty-functions=false no-alias-memref-args=true})" --verify-diagnostics --split-input-file %s | FileCheck %s --check-prefix=CHECK_NOTEMPTY_NOALIAS

// CHECK_NOTEMPTY_NOALIAS: func.func private @
// CHECK_NOTEMPTY_NOALIAS-SAME: memref<4xbf16> {llvm.noalias},
// CHECK_NOTEMPTY_NOALIAS-SAME: memref<bf16> {llvm.noalias}) {
// CHECK_NOTEMPTY_NOALIAS: linalg.generic
// CHECK_NOTEMPTY_NOALIAS: return
// CHECK_NOTEMPTY_NOALIAS: func.func @reduction

// A run to check the option empty-functions=true:
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(iree-amdaie-linalg-function-outlining{empty-functions=true no-alias-memref-args=true})" --verify-diagnostics --split-input-file %s | FileCheck %s --check-prefix=CHECK_EMPTY_NOALIAS

// CHECK_EMPTY_NOALIAS: func.func private @
// CHECK_EMPTY_NOALIAS-SAME: memref<4xbf16> {llvm.noalias},
// CHECK_EMPTY_NOALIAS-SAME: memref<bf16> {llvm.noalias}) {
// CHECK_EMPTY_NOALIAS-NOT: linalg.generic
// CHECK_EMPTY_NOALIAS: return
// CHECK_EMPTY_NOALIAS: func.func @reduction

// A run to check the option no-alias-memref-args=false:
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(iree-amdaie-linalg-function-outlining{empty-functions=false no-alias-memref-args=false})" --verify-diagnostics --split-input-file %s | FileCheck %s --check-prefix=CHECK_NOTEMPTY_ALIAS

// CHECK_NOTEMPTY_ALIAS: func.func private @
// CHECK_NOTEMPTY_ALIAS-SAME: memref<4xbf16>,
// CHECK_NOTEMPTY_ALIAS-SAME: memref<bf16>) {
// CHECK_NOTEMPTY_ALIAS: linalg.generic
// CHECK_NOTEMPTY_ALIAS: return
// CHECK_NOTEMPTY_ALIAS: func.func @reduction

func.func @reduction(%A: memref<4xbf16>, %B: memref<bf16>) {
%c2 = arith.constant 2 : index
%tile = amdaie.tile(%c2, %c2)
%1 = amdaie.core(%tile, in : [], out : []) {
linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>],
iterator_types = ["reduction"]
} ins(%A: memref<4xbf16>) outs(%B : memref<bf16>) {
^bb0(%in: bf16, %out: bf16):
linalg.yield %in : bf16
}
amdaie.end
}
return
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -897,9 +897,11 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb}

// -----


// CHECK: aie.device
// CHECK: func.func private @ukernel_B(memref<i32, 2 : i32>, index, memref<f32, 2 : i32>, index) attributes {llvm.bareptr = true}
// CHECK: func.func private @ukernel_A(memref<i32, 2 : i32>, index) attributes {llvm.bareptr = true}
// CHECK-DAG: func.func private @f_with_arg_attr(%arg0: memref<i32, 2 : i32> {llvm.noalias})
// CHECK-DAG: func.func private @ukernel_B(memref<i32, 2 : i32>, index, memref<f32, 2 : i32>, index) attributes {llvm.bareptr = true}
// CHECK-DAG: func.func private @ukernel_A(memref<i32, 2 : i32>, index) attributes {llvm.bareptr = true}
// CHECK: %[[TILE_0_2:.*]] = aie.tile(0, 2)
// CHECK: %[[BUFFER_0_2:.*]] = aie.buffer(%[[TILE_0_2]]) {sym_name = "buff_0"} : memref<4096xi32, 2 : i32>
// CHECK: %[[LOCK_0_2:.*]] = aie.lock(%[[TILE_0_2]], 0) {init = 1 : i8, sym_name = "lock_0"}
Expand All @@ -925,6 +927,8 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb}
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func private @ukernel_A(memref<i32, 2 : i32>, index) attributes {link_with = "/path/to/ukernel.o", llvm.bareptr = true}
func.func private @ukernel_B(memref<i32, 2 : i32>, index, memref<f32, 2 : i32>, index) attributes {link_with = "/path/to/ukernel.o", llvm.bareptr = true}
func.func private @f_with_arg_attr(%0 : memref<i32, 2 : i32> {llvm.noalias}) attributes {llvm.bareptr = true} { return }

func.func @core_ukernel() {
amdaie.workgroup {
%c0 = arith.constant 0 : index
Expand All @@ -945,6 +949,7 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb}
%base_buffer0, %offset0, %sizes0:2, %strides0:2 = memref.extract_strided_metadata %4 : memref<64x64xf32, 2 : i32> -> memref<f32, 2 : i32>, index, index, index, index, index
func.call @ukernel_A(%base_buffer, %c0) : (memref<i32, 2 : i32>, index) -> ()
func.call @ukernel_B(%base_buffer, %c0, %base_buffer0, %c0) : (memref<i32, 2 : i32>, index, memref<f32, 2 : i32>, index) -> ()
func.call @f_with_arg_attr(%base_buffer) : (memref<i32, 2 : i32>) -> ()
amdaie.use_lock(%lock, Release(1))
amdaie.use_lock(%lock_2, Release(1))
amdaie.end
Expand Down
Loading