Skip to content

Commit

Permalink
update tests
Browse files Browse the repository at this point in the history
incorporate Switch to using transaction binary flow with no control packet (#1517)

vendor all passes
  • Loading branch information
makslevental committed Jun 24, 2024
1 parent 3f3cd29 commit 4f13b0a
Show file tree
Hide file tree
Showing 24 changed files with 4,985 additions and 170 deletions.
98 changes: 98 additions & 0 deletions build_tools/ci/print_ir_aie2xclbin/npu_instgen.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
module attributes {hal.device.targets = [#hal.device.target<"amd-aie-direct", [#hal.executable.target<"amd-aie-direct", "amdaie-xclbin-fb", {target_arch = "chip-tbd", ukernels = "none"}>]>]} {
hal.executable private @dummy1 {
hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie-direct", "amdaie-xclbin-fb", {target_arch = "chip-tbd", ukernels = "none"}>) {
hal.executable.export public @dummy2 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>]>]>) attributes {hal.interface.bindings = [#hal.interface.binding<0, 0>]} {
^bb0(%arg0: !hal.device):
%x, %y, %z = flow.dispatch.workgroup_count_from_slice
hal.return %x, %y, %z : index, index, index
}
builtin.module {
aie.device(npu1_4col) {
func.func @dummy2(%arg0: memref<16xf32>, %arg1: memref<16xf32>) {

// TXN header
// CHECK: 06030100
// CHECK: 00000105
// CHECK: 00000003
// CHECK: 00000068

%c16_i64 = arith.constant 16 : i64
%c1_i64 = arith.constant 1 : i64
%c0_i64 = arith.constant 0 : i64
%c64_i64 = arith.constant 64 : i64
%c0_i32 = arith.constant 0 : i32
%c1_i32 = arith.constant 1 : i32
// CHECK: 00000001
// CHECK: 00000000
// CHECK: 0601D0C0
// CHECK: 00000030
// CHECK: 00000001
// CHECK: 00000002
// CHECK: 00000000
// CHECK: 00600005
// CHECK: 80800007
// CHECK: 00000009
// CHECK: 2CD0000C
// CHECK: 2E107041
aiex.npu.writebd { bd_id = 6 : i32,
buffer_length = 1 : i32,
buffer_offset = 2 : i32,
enable_packet = 0 : i32,
out_of_order_id = 0 : i32,
packet_id = 0 : i32,
packet_type = 0 : i32,
column = 3 : i32,
row = 0 : i32,
d0_stride = 5 : i32,
d0_size = 6 : i32,
d1_stride = 7 : i32,
d1_size = 8 : i32,
d2_stride = 9 : i32,
ddr_id = 10 : i32,
iteration_current = 11 : i32,
iteration_stride = 12 : i32,
iteration_size = 13 : i32,
lock_acq_enable = 1 : i32,
lock_acq_id = 1 : i32,
lock_acq_val = 2 : i32,
lock_rel_id = 3 : i32,
lock_rel_val = 4 : i32,
next_bd = 5 : i32,
use_next_bd = 1 : i32,
valid_bd = 1 : i32}
// CHECK: 00000000
// CHECK: 00000000
// CHECK: 06400DEF
// CHECK: 00000000
// CHECK: 00000042
aiex.npu.write32 { column = 3 : i32, row = 4 : i32, address = 0xabc00def : ui32, value = 0x42 : ui32 }

// CHECK: 00030401
// CHECK: 05010200
aiex.npu.sync { column = 3 : i32, row = 4 : i32, direction = 1 : i32, channel = 5 : i32, column_num = 1 : i32, row_num = 2 : i32 }
return
}
}
}
}
}
util.func public @dummy3(%arg0: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = ""}} {
// this is all gibberish just to hit serializeExecutable
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%element_type_i8 = hal.element_type<i8> : i32
%dense_row_major = hal.encoding_type<dense_row_major> : i32
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("input0") shape([%c1, %c1]) type(%element_type_i8) encoding(%dense_row_major)
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<1024x512xi8> in !stream.resource<external>{%c1}
%result, %result_timepoint = stream.resource.alloca uninitialized : !stream.resource<external>{%c1} => !stream.timepoint

%2 = stream.cmd.execute await(%result_timepoint) => with(%0 as %arg2: !stream.resource<external>{%c1}) {
stream.cmd.dispatch @dummy1::@amdaie_xclbin_fb::@dummy2 {
ro %arg2[%c0 for %c1] : !stream.resource<external>{%c1}
}
} => !stream.timepoint
%3 = stream.timepoint.await %2 => %result : !stream.resource<external>{%c1}
%4 = stream.tensor.export %3 : tensor<1024x1024xi32> in !stream.resource<external>{%c1} -> !hal.buffer_view
util.return %4 : !hal.buffer_view
}
}
24 changes: 23 additions & 1 deletion build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,6 @@ fi

${FILECHECK_EXE} --input-file ${STDOUT_FULLPATH} $SOURCE_MLIR_FILE


SOURCE_MLIR_FILE="${THIS}/buffers_xclbin.mlir"

IREE_COMPILE_COMMAND="${IREE_COMPILE_EXE} \
Expand All @@ -233,4 +232,27 @@ fi

${FILECHECK_EXE} --input-file ${OUTPUT}/module_dummy1_amdaie_xclbin_fb/kernels.json $SOURCE_MLIR_FILE

SOURCE_MLIR_FILE="${THIS}/npu_instgen.mlir"

IREE_COMPILE_COMMAND="${IREE_COMPILE_EXE} \
${SOURCE_MLIR_FILE} \
--compile-mode=hal-executable \
--iree-hal-target-backends=amd-aie-direct \
--iree-amd-aie-peano-install-dir=${PEANO} \
--iree-amd-aie-mlir-aie-install-dir=${MLIR_AIE} \
--iree-amd-aie-vitis-install-dir=${VITIS} \
--iree-hal-dump-executable-intermediates-to=${OUTPUT} \
--iree-hal-dump-executable-files-to=${OUTPUT} \
--mlir-disable-threading \
--iree-amd-aie-show-invoked-commands"

echo "Executing command: $IREE_COMPILE_COMMAND"
eval $IREE_COMPILE_COMMAND 1> ${STDOUT_FULLPATH}
if [ ! -f "${STDOUT_FULLPATH}" ]; then
echo "stdout file was not created: ${STDOUT_FULLPATH}"
exit 1
fi

${FILECHECK_EXE} --input-file ${OUTPUT}/module_dummy1_amdaie_xclbin_fb/dummy2_0.npu.txt $SOURCE_MLIR_FILE

echo "Test of printing in aie2xclbin passed."
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "AIEAssignBufferAddressesBasic.h"

#include "Passes.h"
#include "aie/Dialect/AIE/IR/AIEDialect.h"
#include "llvm/ADT/Twine.h"
#include "mlir/IR/Attributes.h"
Expand All @@ -17,6 +16,7 @@ using namespace mlir;
using namespace xilinx;
using namespace xilinx::AIE;

namespace mlir::iree_compiler::AMDAIE {
struct AIEAssignBufferAddressesPassBasic : mlir::OperationPass<DeviceOp> {
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
AIEAssignBufferAddressesPassBasic)
Expand Down Expand Up @@ -85,12 +85,13 @@ struct AIEAssignBufferAddressesPassBasic : mlir::OperationPass<DeviceOp> {
};

std::unique_ptr<OperationPass<DeviceOp>>
AIE::createAIEAssignBufferAddressesBasicPass() {
createAIEAssignBufferAddressesBasicPass() {
return std::make_unique<AIEAssignBufferAddressesPassBasic>();
}

void xilinx::AIE::registerAIEAssignBufferAddressesBasic() {
void registerAIEAssignBufferAddressesBasic() {
mlir::registerPass([]() -> std::unique_ptr<mlir::Pass> {
return xilinx::AIE::createAIEAssignBufferAddressesBasicPass();
return createAIEAssignBufferAddressesBasicPass();
});
}
} // namespace mlir::iree_compiler::AMDAIE

This file was deleted.

187 changes: 187 additions & 0 deletions compiler/plugins/target/AMD-AIE/aie/AIEAssignBufferDescriptorIDs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
// Copyright 2024 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <set>

#include "Passes.h"
#include "aie/Dialect/AIE/IR/AIEDialect.h"
#include "mlir/Pass/Pass.h"

#define DEBUG_TYPE "aie-assign-bd-ids"
#define EVEN_BD_ID_START 0
#define ODD_BD_ID_START 24

using namespace mlir;
using namespace xilinx;
using namespace xilinx::AIE;

#define GEN_PASS_DECL_AIEASSIGNBUFFERDESCRIPTORIDS
#include "aie/Dialect/AIE/Transforms/AIEPasses.h.inc"
#undef GEN_PASS_DECL_AIEASSIGNBUFFERDESCRIPTORIDS

#define GEN_PASS_DEF_AIEASSIGNBUFFERDESCRIPTORIDS
#include "aie/Dialect/AIE/Transforms/AIEPasses.h.inc"
#undef GEN_PASS_DEF_AIEASSIGNBUFFERDESCRIPTORIDS

struct BdIdGenerator {
BdIdGenerator(int col, int row, const AIETargetModel &targetModel)
: col(col), row(row), isMemTile(targetModel.isMemTile(col, row)) {}

int32_t nextBdId(int channelIndex) {
int32_t bdId = isMemTile && channelIndex & 1 ? oddBdId++ : evenBdId++;
while (bdIdAlreadyAssigned(bdId))
bdId = isMemTile && channelIndex & 1 ? oddBdId++ : evenBdId++;
assignBdId(bdId);
return bdId;
}

void assignBdId(int32_t bdId) {
assert(!alreadyAssigned.count(bdId) && "bdId has already been assigned");
alreadyAssigned.insert(bdId);
}

bool bdIdAlreadyAssigned(int32_t bdId) { return alreadyAssigned.count(bdId); }

int col;
int row;
int oddBdId = ODD_BD_ID_START;
int evenBdId = EVEN_BD_ID_START;
bool isMemTile;
std::set<int32_t> alreadyAssigned;
};

namespace mlir::iree_compiler::AMDAIE {

struct AIEAssignBufferDescriptorIDsPass
: ::impl::AIEAssignBufferDescriptorIDsBase<
AIEAssignBufferDescriptorIDsPass> {
void runOnOperation() override {
DeviceOp targetOp = getOperation();
const AIETargetModel &targetModel = targetOp.getTargetModel();

auto memOps = llvm::to_vector_of<TileElement>(targetOp.getOps<MemOp>());
llvm::append_range(memOps, targetOp.getOps<MemTileDMAOp>());
llvm::append_range(memOps, targetOp.getOps<ShimDMAOp>());
for (TileElement memOp : memOps) {
int col = memOp.getTileID().col;
int row = memOp.getTileID().row;

BdIdGenerator gen(col, row, targetModel);
memOp->walk<WalkOrder::PreOrder>([&](DMABDOp bd) {
if (bd.getBdId().has_value()) gen.assignBdId(bd.getBdId().value());
});

auto dmaOps = memOp.getOperation()->getRegion(0).getOps<DMAOp>();
if (!dmaOps.empty()) {
for (auto dmaOp : dmaOps) {
auto bdRegions = dmaOp.getBds();
for (auto &bdRegion : bdRegions) {
auto &block = bdRegion.getBlocks().front();
DMABDOp bd = *block.getOps<DMABDOp>().begin();
if (bd.getBdId().has_value())
assert(
gen.bdIdAlreadyAssigned(bd.getBdId().value()) &&
"bdId assigned by user but not found during previous walk");
else
bd.setBdId(gen.nextBdId(dmaOp.getChannelIndex()));
}
}
} else {
DenseMap<Block *, int> blockChannelMap;
// Associate with each block the channel index specified by the
// dma_start
for (Block &block : memOp.getOperation()->getRegion(0))
for (auto op : block.getOps<DMAStartOp>()) {
int chNum = op.getChannelIndex();
blockChannelMap[&block] = chNum;
Block *dest = op.getDest();
while (dest) {
blockChannelMap[dest] = chNum;
if (dest->hasNoSuccessors()) break;
dest = dest->getSuccessors()[0];
if (blockChannelMap.contains(dest)) dest = nullptr;
}
}

for (Block &block : memOp.getOperation()->getRegion(0)) {
if (block.getOps<DMABDOp>().empty()) continue;
assert(blockChannelMap.count(&block));
DMABDOp bd = (*block.getOps<DMABDOp>().begin());
if (bd.getBdId().has_value())
assert(gen.bdIdAlreadyAssigned(bd.getBdId().value()) &&
"bdId assigned by user but not found during previous walk");
else
bd.setBdId(gen.nextBdId(blockChannelMap[&block]));
}
}
}
for (TileElement memOp : memOps) {
auto dmaOps = memOp.getOperation()->getRegion(0).getOps<DMAOp>();
if (!dmaOps.empty()) {
for (auto dmaOp : dmaOps) {
auto bdRegions = dmaOp.getBds();
for (auto *bdRegionIt = bdRegions.begin();
bdRegionIt != bdRegions.end();) {
auto &block = bdRegionIt->getBlocks().front();
DMABDOp bd = *block.getOps<DMABDOp>().begin();
std::optional<int> nextBdId;
if (++bdRegionIt != bdRegions.end())
nextBdId =
(*bdRegionIt->getBlocks().front().getOps<DMABDOp>().begin())
.getBdId();
else if (dmaOp.getLoop())
nextBdId = (*bdRegions.front()
.getBlocks()
.front()
.getOps<DMABDOp>()
.begin())
.getBdId();
bd.setNextBdId(nextBdId);
}
}
} else {
DenseMap<Block *, int> blockBdIdMap;
for (Block &block : memOp.getOperation()->getRegion(0)) {
if (block.getOps<DMABDOp>().empty()) continue;
DMABDOp bd = *block.getOps<DMABDOp>().begin();
assert(bd.getBdId().has_value() &&
"DMABDOp should have bd_id assigned by now");
blockBdIdMap[&block] = bd.getBdId().value();
}

for (Block &block : memOp.getOperation()->getRegion(0)) {
if (block.getOps<DMABDOp>().empty()) continue;
DMABDOp bd = *block.getOps<DMABDOp>().begin();
std::optional<int> nextBdId;
if (block.getNumSuccessors()) {
assert(llvm::range_size(block.getSuccessors()) == 1 &&
"should have only one successor block");
Block *nextBlock = block.getSuccessor(0);
if (!blockBdIdMap.contains(nextBlock))
assert(nextBlock->getOperations().size() == 1 &&
isa<EndOp>(nextBlock->getOperations().front()) &&
"bb that's not in blockMap can only have aie.end");
else
nextBdId = blockBdIdMap[nextBlock];
bd.setNextBdId(nextBdId);
}
}
}
}
}
};

std::unique_ptr<OperationPass<DeviceOp>>
createAIEAssignBufferDescriptorIDsPass() {
return std::make_unique<AIEAssignBufferDescriptorIDsPass>();
}

void registerAIEAssignBufferDescriptorIDs() {
mlir::registerPass([]() -> std::unique_ptr<mlir::Pass> {
return createAIEAssignBufferDescriptorIDsPass();
});
}
} // namespace mlir::iree_compiler::AMDAIE
Loading

0 comments on commit 4f13b0a

Please sign in to comment.