-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
incorporate Switch to using transaction binary flow with no control packet (#1517) vendor all passes
- Loading branch information
1 parent
b58f527
commit e201771
Showing
24 changed files
with
4,985 additions
and
170 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
module attributes {hal.device.targets = [#hal.device.target<"amd-aie-direct", [#hal.executable.target<"amd-aie-direct", "amdaie-xclbin-fb", {target_arch = "chip-tbd", ukernels = "none"}>]>]} { | ||
hal.executable private @dummy1 { | ||
hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie-direct", "amdaie-xclbin-fb", {target_arch = "chip-tbd", ukernels = "none"}>) { | ||
hal.executable.export public @dummy2 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>]>]>) attributes {hal.interface.bindings = [#hal.interface.binding<0, 0>]} { | ||
^bb0(%arg0: !hal.device): | ||
%x, %y, %z = flow.dispatch.workgroup_count_from_slice | ||
hal.return %x, %y, %z : index, index, index | ||
} | ||
builtin.module { | ||
aie.device(npu1_4col) { | ||
func.func @dummy2(%arg0: memref<16xf32>, %arg1: memref<16xf32>) { | ||
|
||
// TXN header | ||
// CHECK: 06030100 | ||
// CHECK: 00000105 | ||
// CHECK: 00000003 | ||
// CHECK: 00000068 | ||
|
||
%c16_i64 = arith.constant 16 : i64 | ||
%c1_i64 = arith.constant 1 : i64 | ||
%c0_i64 = arith.constant 0 : i64 | ||
%c64_i64 = arith.constant 64 : i64 | ||
%c0_i32 = arith.constant 0 : i32 | ||
%c1_i32 = arith.constant 1 : i32 | ||
// CHECK: 00000001 | ||
// CHECK: 00000000 | ||
// CHECK: 0601D0C0 | ||
// CHECK: 00000030 | ||
// CHECK: 00000001 | ||
// CHECK: 00000002 | ||
// CHECK: 00000000 | ||
// CHECK: 00600005 | ||
// CHECK: 80800007 | ||
// CHECK: 00000009 | ||
// CHECK: 2CD0000C | ||
// CHECK: 2E107041 | ||
aiex.npu.writebd { bd_id = 6 : i32, | ||
buffer_length = 1 : i32, | ||
buffer_offset = 2 : i32, | ||
enable_packet = 0 : i32, | ||
out_of_order_id = 0 : i32, | ||
packet_id = 0 : i32, | ||
packet_type = 0 : i32, | ||
column = 3 : i32, | ||
row = 0 : i32, | ||
d0_stride = 5 : i32, | ||
d0_size = 6 : i32, | ||
d1_stride = 7 : i32, | ||
d1_size = 8 : i32, | ||
d2_stride = 9 : i32, | ||
ddr_id = 10 : i32, | ||
iteration_current = 11 : i32, | ||
iteration_stride = 12 : i32, | ||
iteration_size = 13 : i32, | ||
lock_acq_enable = 1 : i32, | ||
lock_acq_id = 1 : i32, | ||
lock_acq_val = 2 : i32, | ||
lock_rel_id = 3 : i32, | ||
lock_rel_val = 4 : i32, | ||
next_bd = 5 : i32, | ||
use_next_bd = 1 : i32, | ||
valid_bd = 1 : i32} | ||
// CHECK: 00000000 | ||
// CHECK: 00000000 | ||
// CHECK: 06400DEF | ||
// CHECK: 00000000 | ||
// CHECK: 00000042 | ||
aiex.npu.write32 { column = 3 : i32, row = 4 : i32, address = 0xabc00def : ui32, value = 0x42 : ui32 } | ||
|
||
// CHECK: 00030401 | ||
// CHECK: 05010200 | ||
aiex.npu.sync { column = 3 : i32, row = 4 : i32, direction = 1 : i32, channel = 5 : i32, column_num = 1 : i32, row_num = 2 : i32 } | ||
return | ||
} | ||
} | ||
} | ||
} | ||
} | ||
util.func public @dummy3(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = ""}} { | ||
// this is all gibberish just to hit serializeExecutable | ||
%c0 = arith.constant 0 : index | ||
%c1 = arith.constant 1 : index | ||
%element_type_i8 = hal.element_type<i8> : i32 | ||
%dense_row_major = hal.encoding_type<dense_row_major> : i32 | ||
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("input0") shape([%c1, %c1]) type(%element_type_i8) encoding(%dense_row_major) | ||
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<1024x512xi8> in !stream.resource<external>{%c1} | ||
%result, %result_timepoint = stream.resource.alloca uninitialized : !stream.resource<external>{%c1} => !stream.timepoint | ||
|
||
%2 = stream.cmd.execute await(%result_timepoint) => with(%0 as %arg2: !stream.resource<external>{%c1}) { | ||
stream.cmd.dispatch @dummy1::@amdaie_xclbin_fb::@dummy2 { | ||
ro %arg2[%c0 for %c1] : !stream.resource<external>{%c1} | ||
} | ||
} => !stream.timepoint | ||
%3 = stream.timepoint.await %2 => %result : !stream.resource<external>{%c1} | ||
%4 = stream.tensor.export %3 : tensor<1024x1024xi32> in !stream.resource<external>{%c1} -> !hal.buffer_view | ||
util.return %4 : !hal.buffer_view | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 0 additions & 19 deletions
19
compiler/plugins/target/AMD-AIE/aie/AIEAssignBufferAddressesBasic.h
This file was deleted.
Oops, something went wrong.
187 changes: 187 additions & 0 deletions
187
compiler/plugins/target/AMD-AIE/aie/AIEAssignBufferDescriptorIDs.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
// Copyright 2024 The IREE Authors | ||
// | ||
// Licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#include <set> | ||
|
||
#include "Passes.h" | ||
#include "aie/Dialect/AIE/IR/AIEDialect.h" | ||
#include "mlir/Pass/Pass.h" | ||
|
||
#define DEBUG_TYPE "aie-assign-bd-ids" | ||
#define EVEN_BD_ID_START 0 | ||
#define ODD_BD_ID_START 24 | ||
|
||
using namespace mlir; | ||
using namespace xilinx; | ||
using namespace xilinx::AIE; | ||
|
||
#define GEN_PASS_DECL_AIEASSIGNBUFFERDESCRIPTORIDS | ||
#include "aie/Dialect/AIE/Transforms/AIEPasses.h.inc" | ||
#undef GEN_PASS_DECL_AIEASSIGNBUFFERDESCRIPTORIDS | ||
|
||
#define GEN_PASS_DEF_AIEASSIGNBUFFERDESCRIPTORIDS | ||
#include "aie/Dialect/AIE/Transforms/AIEPasses.h.inc" | ||
#undef GEN_PASS_DEF_AIEASSIGNBUFFERDESCRIPTORIDS | ||
|
||
struct BdIdGenerator { | ||
BdIdGenerator(int col, int row, const AIETargetModel &targetModel) | ||
: col(col), row(row), isMemTile(targetModel.isMemTile(col, row)) {} | ||
|
||
int32_t nextBdId(int channelIndex) { | ||
int32_t bdId = isMemTile && channelIndex & 1 ? oddBdId++ : evenBdId++; | ||
while (bdIdAlreadyAssigned(bdId)) | ||
bdId = isMemTile && channelIndex & 1 ? oddBdId++ : evenBdId++; | ||
assignBdId(bdId); | ||
return bdId; | ||
} | ||
|
||
void assignBdId(int32_t bdId) { | ||
assert(!alreadyAssigned.count(bdId) && "bdId has already been assigned"); | ||
alreadyAssigned.insert(bdId); | ||
} | ||
|
||
bool bdIdAlreadyAssigned(int32_t bdId) { return alreadyAssigned.count(bdId); } | ||
|
||
int col; | ||
int row; | ||
int oddBdId = ODD_BD_ID_START; | ||
int evenBdId = EVEN_BD_ID_START; | ||
bool isMemTile; | ||
std::set<int32_t> alreadyAssigned; | ||
}; | ||
|
||
namespace mlir::iree_compiler::AMDAIE { | ||
|
||
struct AIEAssignBufferDescriptorIDsPass | ||
: ::impl::AIEAssignBufferDescriptorIDsBase< | ||
AIEAssignBufferDescriptorIDsPass> { | ||
void runOnOperation() override { | ||
DeviceOp targetOp = getOperation(); | ||
const AIETargetModel &targetModel = targetOp.getTargetModel(); | ||
|
||
auto memOps = llvm::to_vector_of<TileElement>(targetOp.getOps<MemOp>()); | ||
llvm::append_range(memOps, targetOp.getOps<MemTileDMAOp>()); | ||
llvm::append_range(memOps, targetOp.getOps<ShimDMAOp>()); | ||
for (TileElement memOp : memOps) { | ||
int col = memOp.getTileID().col; | ||
int row = memOp.getTileID().row; | ||
|
||
BdIdGenerator gen(col, row, targetModel); | ||
memOp->walk<WalkOrder::PreOrder>([&](DMABDOp bd) { | ||
if (bd.getBdId().has_value()) gen.assignBdId(bd.getBdId().value()); | ||
}); | ||
|
||
auto dmaOps = memOp.getOperation()->getRegion(0).getOps<DMAOp>(); | ||
if (!dmaOps.empty()) { | ||
for (auto dmaOp : dmaOps) { | ||
auto bdRegions = dmaOp.getBds(); | ||
for (auto &bdRegion : bdRegions) { | ||
auto &block = bdRegion.getBlocks().front(); | ||
DMABDOp bd = *block.getOps<DMABDOp>().begin(); | ||
if (bd.getBdId().has_value()) | ||
assert( | ||
gen.bdIdAlreadyAssigned(bd.getBdId().value()) && | ||
"bdId assigned by user but not found during previous walk"); | ||
else | ||
bd.setBdId(gen.nextBdId(dmaOp.getChannelIndex())); | ||
} | ||
} | ||
} else { | ||
DenseMap<Block *, int> blockChannelMap; | ||
// Associate with each block the channel index specified by the | ||
// dma_start | ||
for (Block &block : memOp.getOperation()->getRegion(0)) | ||
for (auto op : block.getOps<DMAStartOp>()) { | ||
int chNum = op.getChannelIndex(); | ||
blockChannelMap[&block] = chNum; | ||
Block *dest = op.getDest(); | ||
while (dest) { | ||
blockChannelMap[dest] = chNum; | ||
if (dest->hasNoSuccessors()) break; | ||
dest = dest->getSuccessors()[0]; | ||
if (blockChannelMap.contains(dest)) dest = nullptr; | ||
} | ||
} | ||
|
||
for (Block &block : memOp.getOperation()->getRegion(0)) { | ||
if (block.getOps<DMABDOp>().empty()) continue; | ||
assert(blockChannelMap.count(&block)); | ||
DMABDOp bd = (*block.getOps<DMABDOp>().begin()); | ||
if (bd.getBdId().has_value()) | ||
assert(gen.bdIdAlreadyAssigned(bd.getBdId().value()) && | ||
"bdId assigned by user but not found during previous walk"); | ||
else | ||
bd.setBdId(gen.nextBdId(blockChannelMap[&block])); | ||
} | ||
} | ||
} | ||
for (TileElement memOp : memOps) { | ||
auto dmaOps = memOp.getOperation()->getRegion(0).getOps<DMAOp>(); | ||
if (!dmaOps.empty()) { | ||
for (auto dmaOp : dmaOps) { | ||
auto bdRegions = dmaOp.getBds(); | ||
for (auto *bdRegionIt = bdRegions.begin(); | ||
bdRegionIt != bdRegions.end();) { | ||
auto &block = bdRegionIt->getBlocks().front(); | ||
DMABDOp bd = *block.getOps<DMABDOp>().begin(); | ||
std::optional<int> nextBdId; | ||
if (++bdRegionIt != bdRegions.end()) | ||
nextBdId = | ||
(*bdRegionIt->getBlocks().front().getOps<DMABDOp>().begin()) | ||
.getBdId(); | ||
else if (dmaOp.getLoop()) | ||
nextBdId = (*bdRegions.front() | ||
.getBlocks() | ||
.front() | ||
.getOps<DMABDOp>() | ||
.begin()) | ||
.getBdId(); | ||
bd.setNextBdId(nextBdId); | ||
} | ||
} | ||
} else { | ||
DenseMap<Block *, int> blockBdIdMap; | ||
for (Block &block : memOp.getOperation()->getRegion(0)) { | ||
if (block.getOps<DMABDOp>().empty()) continue; | ||
DMABDOp bd = *block.getOps<DMABDOp>().begin(); | ||
assert(bd.getBdId().has_value() && | ||
"DMABDOp should have bd_id assigned by now"); | ||
blockBdIdMap[&block] = bd.getBdId().value(); | ||
} | ||
|
||
for (Block &block : memOp.getOperation()->getRegion(0)) { | ||
if (block.getOps<DMABDOp>().empty()) continue; | ||
DMABDOp bd = *block.getOps<DMABDOp>().begin(); | ||
std::optional<int> nextBdId; | ||
if (block.getNumSuccessors()) { | ||
assert(llvm::range_size(block.getSuccessors()) == 1 && | ||
"should have only one successor block"); | ||
Block *nextBlock = block.getSuccessor(0); | ||
if (!blockBdIdMap.contains(nextBlock)) | ||
assert(nextBlock->getOperations().size() == 1 && | ||
isa<EndOp>(nextBlock->getOperations().front()) && | ||
"bb that's not in blockMap can only have aie.end"); | ||
else | ||
nextBdId = blockBdIdMap[nextBlock]; | ||
bd.setNextBdId(nextBdId); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
}; | ||
|
||
std::unique_ptr<OperationPass<DeviceOp>> | ||
createAIEAssignBufferDescriptorIDsPass() { | ||
return std::make_unique<AIEAssignBufferDescriptorIDsPass>(); | ||
} | ||
|
||
void registerAIEAssignBufferDescriptorIDs() { | ||
mlir::registerPass([]() -> std::unique_ptr<mlir::Pass> { | ||
return createAIEAssignBufferDescriptorIDsPass(); | ||
}); | ||
} | ||
} // namespace mlir::iree_compiler::AMDAIE |
Oops, something went wrong.