diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp index e739b4868..ed1fd6cae 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AMDAIETargetCDODirect.cpp @@ -118,8 +118,10 @@ LogicalResult configureLocksAndBd(Block &block, const TileLoc &tileLoc, assert(bdOp.getBdId().has_value() && "DMABDOp must have assigned bd_id; did you forget to run " "aie-assign-bd-ids?"); + bool validBd = true; std::optional packetType; std::optional packetID; + bool enablePacket = false; auto maybePacketOps = block.getOps(); if (!maybePacketOps.empty()) { assert(llvm::range_size(maybePacketOps) == 1 && @@ -127,6 +129,7 @@ LogicalResult configureLocksAndBd(Block &block, const TileLoc &tileLoc, auto packetOp = *maybePacketOps.begin(); packetType = packetOp.getPacketType(); packetID = packetOp.getPacketId(); + enablePacket = true; } BufferOp bufferOp = cast(bdOp.getBuffer().getDefiningOp()); @@ -148,16 +151,20 @@ LogicalResult configureLocksAndBd(Block &block, const TileLoc &tileLoc, BDPadLayout{dim.getConstPadBefore(), dim.getConstPadAfter()}); } } - if (failed(configureDMABD(deviceModel, dmaTileBd.value(), tileLoc, - static_cast(*bdOp.getBdId()), - bdOp.getNextBdId().has_value() - ? std::optional{static_cast( - *bdOp.getNextBdId())} - : std::nullopt, - packetType, packetID, *bufferOp.getAddress(), - getLenInBytes(bdOp), getOffsetInBytes(bdOp), + + bool enableNextBd = bdOp.getNextBdId().has_value(); + std::optional nextBdId = + enableNextBd + ? std::optional{static_cast(*bdOp.getNextBdId())} + : std::nullopt; + std::optional maybeIter = std::nullopt; + if (failed(configureDMABD(deviceModel, dmaTileBd.value(), tileLoc, validBd, + static_cast(*bdOp.getBdId()), enableNextBd, + nextBdId, enablePacket, packetType, packetID, + *bufferOp.getAddress(), getLenInBytes(bdOp), + getOffsetInBytes(bdOp), getBufferElementTypeWidthInBytes(bdOp), maybeDims, - maybePadDims))) { + maybePadDims, maybeIter))) { return failure(); } return success(); @@ -233,11 +240,12 @@ LogicalResult addInitConfigToCDO(const AMDAIEDeviceModel &deviceModel, for (auto op : block.getOps()) { DMABDOp bd = *op.getDest()->getOps().begin(); int chNum = op.getChannelIndex(); - auto channelDir = op.getChannelDir(); - if (failed(pushToBdQueueAndEnable( - deviceModel, tileLoc, chNum, - static_cast(channelDir), bd.getBdId().value(), - op.getRepeatCount()))) + auto channelDir = static_cast(op.getChannelDir()); + bool issueToken = tileLoc.row == 0 && channelDir == DMAChannelDir::MM2S; + bool setChannelEnable = true; + if (failed(configurePushToBdQueue( + deviceModel, tileLoc, chNum, channelDir, bd.getBdId().value(), + op.getRepeatCount(), issueToken, setChannelEnable))) return failure(); } } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEControlCodeToTransaction.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEControlCodeToTransaction.cpp index 421900d6a..56edaf27a 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEControlCodeToTransaction.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEControlCodeToTransaction.cpp @@ -8,16 +8,16 @@ #include "iree-amd-aie/Transforms/Passes.h" #include "iree-amd-aie/Transforms/Transforms.h" #include "iree-amd-aie/Transforms/Utils/AMDAIEUtils.h" +#include "iree-amd-aie/aie_runtime/iree_aie_configure.h" +#include "iree-amd-aie/aie_runtime/iree_aie_runtime.h" #include "mlir/IR/AsmState.h" #include "mlir/IR/Iterators.h" #include "mlir/Transforms/DialectConversion.h" #define DEBUG_TYPE "iree-amdaie-controlcode-to-transaction" -#define TXN_OPC_WRITE 0x0 -#define TXN_OPC_BLOCKWRITE 0x1 -#define TXN_OPC_TCT 0x80 -#define TXN_OPC_DDR_PATCH 0x81 +#define TXN_OPC_TCT XAie_TxnOpcode::XAIE_IO_CUSTOM_OP_TCT +#define TXN_OPC_DDR_PATCH XAie_TxnOpcode::XAIE_IO_CUSTOM_OP_DDR_PATCH namespace mlir::iree_compiler::AMDAIE { @@ -29,16 +29,25 @@ class TransactionBuilder { void clearAndInitialize() { instructions.clear(); - llvm::MutableArrayRef words = reserveAndGetTail(4); - // setup txn header - words[0] = 0x06030100; - words[1] = 0x00000105; + // Setup txn header. + TRY_XAIE_API_FATAL_ERROR(XAie_StartTransaction, &deviceModel.devInst, + XAIE_TRANSACTION_DISABLE_AUTO_FLUSH); } size_t getInstructionSize() const { return instructions.size(); } ArrayRef finalizeAndReturnInstructions() { - finalizeHeader(); + std::unique_ptr txn_ptr( + XAie_ExportSerializedTransaction(&deviceModel.devInst, 0, 0), &free); + // Extract transaction size. + auto *hdr = reinterpret_cast(txn_ptr.get()); + size_t sizeInBytes = hdr->TxnSize; + size_t instructionCount = sizeInBytes / sizeof(uint32_t); + // Resize instructions and copy data. + instructions.resize(instructionCount); + memcpy(instructions.data(), txn_ptr.get(), sizeInBytes); + // Clear the transaction. + TRY_XAIE_API_FATAL_ERROR(XAie_ClearTransaction, &deviceModel.devInst); return ArrayRef(instructions.data(), instructions.size()); } @@ -52,139 +61,88 @@ class TransactionBuilder { LogicalResult appendAddressPatch(uint32_t addr, uint32_t argIdx, uint32_t offset) { - llvm::MutableArrayRef words = reserveAndGetTail(12); - words[0] = TXN_OPC_DDR_PATCH; - words[1] = words.size() * sizeof(uint32_t); // Operation Size + std::array words = {0}; - words[6] = addr; + words[4] = addr; + words[5] = 0; + words[6] = argIdx; words[7] = 0; - words[8] = argIdx; + words[8] = offset; words[9] = 0; - words[10] = offset; - words[11] = 0; - instructionCounter++; - return success(); + + uint8_t opCode = static_cast(TXN_OPC_DDR_PATCH); + uint32_t *data = &words[0]; + uint32_t size = words.size() * sizeof(uint32_t); + return configureCustomTxnOp(deviceModel, opCode, data, size); } LogicalResult appendTCTSync(uint32_t col, uint32_t row, uint32_t direction, uint32_t rowNum, uint32_t colNum, uint32_t channel) { - llvm::MutableArrayRef words = reserveAndGetTail(4); - words[0] = TXN_OPC_TCT; - words[1] = words.size() * sizeof(uint32_t); // Operation Size - - words[2] |= direction & 0xff; - words[2] |= (row & 0xff) << 8; - words[2] |= (col & 0xff) << 16; - - words[3] |= (rowNum & 0xff) << 8; - words[3] |= (colNum & 0xff) << 16; - words[3] |= (channel & 0xff) << 24; - instructionCounter++; - return success(); + std::array words = {0}; + + words[0] |= direction & 0xff; + words[0] |= (row & 0xff) << 8; + words[0] |= (col & 0xff) << 16; + + words[1] |= (rowNum & 0xff) << 8; + words[1] |= (colNum & 0xff) << 16; + words[1] |= (channel & 0xff) << 24; + + uint8_t opCode = static_cast(TXN_OPC_TCT); + uint32_t *data = &words[0]; + uint32_t size = words.size() * sizeof(uint32_t); + return configureCustomTxnOp(deviceModel, opCode, data, size); } LogicalResult appendPushToQueueOp(uint32_t col, uint32_t row, AMDAIE::DMAChannelDir direction, uint32_t channel, uint32_t bdId, uint32_t repeatCount, bool issueToken) { - uint32_t colShift = deviceModel.getColumnShift(); - uint32_t rowShift = deviceModel.getRowShift(); - uint32_t addr = - direction == AMDAIE::DMAChannelDir::MM2S ? 0x1D214 : 0x1D204; - if (channel == 1) addr += 0x8; - // TODO(jornt): use aie-rt's transaction serializer instead to avoid these - // indiscrepancies between this file and aie-rt. - addr = ((col & 0xff) << colShift) | ((row & 0xff) << rowShift) | - (addr & 0xFFFFF); - uint32_t value = 0; - value |= bdId & 0xF; - value |= (repeatCount & 0xFF) << 16; - if (issueToken) value |= 0x80000000; - return appendWrite32Op(addr, value); - } - - LogicalResult appendWrite32Op(uint32_t addr, uint32_t value) { - llvm::MutableArrayRef words = reserveAndGetTail(6); - // XAIE_IO_WRITE - words[0] = TXN_OPC_WRITE; - words[1] = 0; - words[2] = addr; - words[3] = 0; - words[4] = value; // Value - words[5] = words.size() * sizeof(uint32_t); // Operation Size - instructionCounter++; - return success(); + // Assume channel is enabled by default. + bool setChannelEnable = false; + auto tileLoc = XAie_TileLoc(col, row); + return configurePushToBdQueue(deviceModel, tileLoc, channel, direction, + bdId, repeatCount, issueToken, + setChannelEnable); } LogicalResult appendWriteBdOp( - uint32_t bdAddr, uint32_t bufferLength, uint32_t bufferOffset, - bool enablePacket, uint32_t outOfOrderId, uint32_t packetId, - uint32_t packetType, uint32_t d0Size, uint32_t d0Stride, uint32_t d1Size, - uint32_t d1Stride, uint32_t d2Stride, uint32_t iterationCurrent, - uint32_t iterationSize, uint32_t iterationStride, uint32_t nextBd, - bool useNextBd, bool validBd, int32_t lockRelVal, uint32_t lockRelId, - bool lockAcqEnable, int32_t lockAcqVal, uint32_t lockAcqId) { - llvm::MutableArrayRef words = reserveAndGetTail(12); - words[0] = TXN_OPC_BLOCKWRITE; - words[1] = 0; - // RegOff - words[2] = bdAddr; // ADDR - words[3] = words.size() * sizeof(uint32_t); // Operation Size - // DMA_BDX_0 - words[4] = bufferLength; - // DMA_BDX_1 - words[5] = bufferOffset; - // DMA_BDX_2 - // En Packet , OoO BD ID , Packet ID , Packet Type - words[6] |= ((int)enablePacket & 0x1) << 30; - words[6] |= (outOfOrderId & 0x3f) << 24; - words[6] |= (packetId & 0x1f) << 19; - words[6] |= (packetType & 0x7) << 16; - // DMA_BDX_3 - // TODO: Secure Access - words[7] |= (d0Size & 0x3ff) << 20; - words[7] |= d0Stride & 0xfffff; - // DMA_BDX_4 - words[8] = 0x80000000; // burst length; - words[8] |= (d1Size & 0x3ff) << 20; - words[8] |= d1Stride & 0xfffff; - // DMA_BDX_5 - // TODO: SIMID, AxCache, AXQoS - words[9] = d2Stride & 0xfffff; - // DMA_BDX_6 - words[10] |= (iterationCurrent & 0x3f) << 26; - words[10] |= (iterationSize & 0x3f) << 20; - words[10] |= iterationStride & 0xfffff; - // DMA_BDX_7 - // TODO: TLAST Suppress - words[11] |= (nextBd & 0xf) << 27; - words[11] |= ((int)useNextBd & 0x1) << 26; - words[11] |= ((int)validBd & 0x1) << 25; - words[11] |= (lockRelVal & 0xef) << 18; - words[11] |= (lockRelId & 0xf) << 13; - words[11] |= ((int)lockAcqEnable & 0x1) << 12; - words[11] |= (lockAcqVal & 0xef) << 5; - words[11] |= lockAcqId & 0xf; - instructionCounter++; - return success(); + uint32_t col, uint32_t row, uint32_t bdId, uint32_t bufferLength, + uint32_t bufferOffset, bool enablePacket, uint32_t packetId, + uint32_t packetType, ArrayRef sizes, ArrayRef strides, + uint32_t iterationCurrent, uint32_t iterationSize, + uint32_t iterationStride, uint32_t nextBd, bool useNextBd, bool validBd, + int32_t lockRelVal, uint32_t lockRelId, bool lockAcqEnable, + int32_t lockAcqVal, uint32_t lockAcqId) { + // Configure DMA Locks. + auto tileLoc = XAie_TileLoc(col, row); + FailureOr dmaTileBd = initDMADesc(deviceModel, tileLoc); + if (failed(dmaTileBd)) return failure(); + if (failed(configureDMALocks(deviceModel, dmaTileBd.value(), tileLoc, + lockAcqVal, lockRelVal, lockAcqId, lockRelId, + lockAcqEnable))) { + return failure(); + } + // Configure DMA BD. + uint32_t minStrideBitWidth = deviceModel.getMinStrideBitWidth(); + uint32_t bufferElementTypeWidthInBytes = minStrideBitWidth / 8; + uint32_t bufferLengthInBytes = bufferLength * bufferElementTypeWidthInBytes; + std::vector dims = { + {static_cast(sizes[0]), static_cast(strides[0])}, + {static_cast(sizes[1]), static_cast(strides[1])}, + {static_cast(sizes[2]), static_cast(strides[2])}}; + std::optional> pads = std::nullopt; + BDIterLayout iter = {iterationStride, static_cast(iterationSize), + static_cast(iterationCurrent)}; + return configureDMABD(deviceModel, dmaTileBd.value(), tileLoc, validBd, + bdId, useNextBd, nextBd, enablePacket, packetType, + packetId, deviceModel.devInst.BaseAddr, + bufferLengthInBytes, bufferOffset, + bufferElementTypeWidthInBytes, dims, pads, iter); } private: - void finalizeHeader() { - // Finalize txn header. - instructions[2] = instructionCounter; - instructions[3] = instructions.size() * sizeof(uint32_t); - } - - llvm::MutableArrayRef reserveAndGetTail(size_t tailSize) { - auto oldSize = instructions.size(); - auto newSize = oldSize + tailSize; - instructions.resize(newSize, 0); - return llvm::MutableArrayRef(instructions.data() + oldSize, - tailSize); - } - size_t instructionCounter{0}; std::vector instructions; }; @@ -223,33 +181,20 @@ LogicalResult convertOp(AMDAIE::NpuWriteBdOp op, TransactionBuilder &builder) { uint32_t col = op.getCol(); uint32_t row = op.getRow(); uint32_t bdId = op.getBdId(); - uint32_t colShift = builder.deviceModel.getColumnShift(); - uint32_t rowShift = builder.deviceModel.getRowShift(); - uint32_t bdAddr = - (col << colShift) | (row << rowShift) | (0x1D000 + bdId * 0x20); ArrayRef sizes = op.getSizes(); - ArrayRef strides = op.getStrides(); + SmallVector strides(op.getStrides()); if (sizes.size() != 3) return op.emitOpError() << "expected 3 sizes"; if (strides.size() != 3) return op.emitOpError() << "expected 3 strides"; - uint32_t d0Size = sizes[sizes.size() - 1]; - uint32_t d1Size = sizes[sizes.size() - 2]; - // Strides and iteration_size are encoded as `actual - 1`, but `0` should stay - // `0` as it's not supported; - uint32_t d0Stride = - std::max((int64_t)strides[strides.size() - 1] - 1, (int64_t)0); - uint32_t d1Stride = - std::max((int64_t)strides[strides.size() - 2] - 1, (int64_t)0); - uint32_t d2Stride = - std::max((int64_t)strides[strides.size() - 3] - 1, (int64_t)0); - uint32_t iterationSize = - std::max((int64_t)op.getIterationSize() - 1, (int64_t)0); - uint32_t iterationStride = - std::max((int64_t)op.getIterationStride() - 1, (int64_t)0); + // Strides and iteration_size will be encoded as `actual - 1`, so we need to + // ensure they are at least 1. + std::for_each(strides.begin(), strides.end(), + [](int32_t &stride) { stride = std::max(stride, int32_t(1)); }); + uint32_t iterationSize = std::max(op.getIterationSize(), uint32_t(1)); + uint32_t iterationStride = std::max(op.getIterationStride(), uint32_t(1)); if (failed(builder.appendWriteBdOp( - bdAddr, op.getBufferLength(), op.getBufferOffset(), - op.getEnablePacket(), op.getOutOfOrderId(), op.getPacketId(), - op.getPacketType(), d0Size, d0Stride, d1Size, d1Stride, d2Stride, - op.getIterationCurrent(), iterationSize, iterationStride, + col, row, bdId, op.getBufferLength(), op.getBufferOffset(), + op.getEnablePacket(), op.getPacketId(), op.getPacketType(), sizes, + strides, op.getIterationCurrent(), iterationSize, iterationStride, op.getNextBd(), op.getUseNextBd(), op.getValidBd(), op.getLockRelVal(), op.getLockRelId(), op.getLockAcqEnable(), op.getLockAcqVal(), op.getLockAcqId()))) { diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/controlcode_to_transaction.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/controlcode_to_transaction.mlir index 057ffebd5..92cc61d46 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/controlcode_to_transaction.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/controlcode_to_transaction.mlir @@ -15,7 +15,7 @@ module { // ----- // CHECK: 0x06030100 -// CHECK: 0x00000105 +// CHECK: 0x00000104 // CHECK: 0x00000000 // CHECK: 0x00000010 // CHECK-LABEL: @no_ops @@ -35,7 +35,7 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} // ----- // CHECK: 0x06030100 -// CHECK: 0x00000105 +// CHECK: 0x00000104 // CHECK: 0x00000001 // CHECK: 0x00000040 // CHECK: 0x00000081 @@ -68,10 +68,10 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} // ----- // CHECK: 0x06030100 -// CHECK: 0x00000105 +// CHECK: 0x00000104 // CHECK: 0x00000001 // CHECK: 0x00000028 -// CHECK: 0x00000000 +// CHECK: 0x00140000 // CHECK: 0x00000000 // CHECK: 0x0001D214 // CHECK: 0x00000000 @@ -95,10 +95,10 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} // ----- // CHECK: 0x06030100 -// CHECK: 0x00000105 +// CHECK: 0x00000104 // CHECK: 0x00000001 // CHECK: 0x00000028 -// CHECK: 0x00000000 +// CHECK: 0x001C0000 // CHECK: 0x00000000 // CHECK: 0x0601D21C // CHECK: 0x00000000 @@ -123,10 +123,10 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} // `tct_sync` on the single column. // CHECK: 0x06030100 -// CHECK: 0x00000105 +// CHECK: 0x00000104 // CHECK: 0x00000002 // CHECK: 0x00000038 -// CHECK: 0x00000000 +// CHECK: 0x00140000 // CHECK: 0x00000000 // CHECK: 0x0401D214 // CHECK: 0x00000000 @@ -156,28 +156,28 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} // Expect one `tct_sync` to cover four columns, with same channel, direction, and row. // CHECK: 0x06030100 -// CHECK: 0x00000105 +// CHECK: 0x00000104 // CHECK: 0x00000005 // CHECK: 0x00000080 -// CHECK: 0x00000000 +// CHECK: 0x00140000 // CHECK: 0x00000000 // CHECK: 0x0001D214 // CHECK: 0x00000000 // CHECK: 0x80000000 // CHECK: 0x00000018 -// CHECK: 0x00000000 +// CHECK: 0x00140000 // CHECK: 0x00000000 // CHECK: 0x0601D214 // CHECK: 0x00000000 // CHECK: 0x80000000 // CHECK: 0x00000018 -// CHECK: 0x00000000 +// CHECK: 0x00140000 // CHECK: 0x00000000 // CHECK: 0x0401D214 // CHECK: 0x00000000 // CHECK: 0x80000000 // CHECK: 0x00000018 -// CHECK: 0x00000000 +// CHECK: 0x00140000 // CHECK: 0x00000000 // CHECK: 0x0201D214 // CHECK: 0x00000000 @@ -209,7 +209,7 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} // ----- // CHECK: 0x06030100 -// CHECK: 0x00000105 +// CHECK: 0x00000104 // CHECK: 0x00000001 // CHECK: 0x00000040 // CHECK: 0x00000001 @@ -242,10 +242,10 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} // ----- // CHECK: 0x06030100 -// CHECK: 0x00000105 +// CHECK: 0x00000104 // CHECK: 0x00000001 // CHECK: 0x00000040 -// CHECK: 0x00000001 +// CHECK: 0x00400001 // CHECK: 0x00000000 // CHECK: 0x0201D040 // CHECK: 0x00000030 diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc index cae09da23..b89346b46 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc +++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.cc @@ -53,12 +53,14 @@ LogicalResult configureDMALocks(const AMDAIEDeviceModel &deviceModel, LogicalResult configureDMABD( const AMDAIEDeviceModel &deviceModel, XAie_DmaDesc &dmaDesc, - const TileLoc &tileLoc, uint8_t bdId, std::optional nextBdId, + const TileLoc &tileLoc, bool validBd, uint8_t bdId, bool enableNextBd, + std::optional nextBdId, bool enablePacket, std::optional packetType, std::optional packetId, uint64_t baseAddr, uint64_t lenInBytes, uint64_t offsetInBytes, uint32_t bufferElementTypeWidthInBytes, const std::optional> &maybeDims, - const std::optional> &maybePadDims) { + const std::optional> &maybePadDims, + const std::optional &maybeIter) { assert(dmaDesc.IsReady == XAIE_COMPONENT_IS_READY && "XAie_DmaDescs need to be created using initDMADesc"); if (deviceModel.isShimNOCTile(tileLoc.col, tileLoc.row)) { @@ -162,14 +164,19 @@ LogicalResult configureDMABD( TRY_XAIE_API_LOGICAL_RESULT(XAie_DmaSetPadding, &dmaDesc, &dmaPadTensor); } + if (maybeIter.has_value()) { + BDIterLayout iter = maybeIter.value(); + TRY_XAIE_API_LOGICAL_RESULT(XAie_DmaSetBdIteration, &dmaDesc, iter.stride, + iter.size, iter.current); + } + if (nextBdId) { - auto enableNextBd = 1; TRY_XAIE_API_LOGICAL_RESULT(XAie_DmaSetNextBd, &dmaDesc, nextBdId.value(), enableNextBd); } - if (packetId) { - if (!packetType) { + if (enablePacket) { + if (!packetId || !packetType) { llvm::errs() << "must have packetType with packetId"; return failure(); } @@ -184,19 +191,25 @@ LogicalResult configureDMABD( XAie_DmaSetPkt, &dmaDesc, XAie_PacketInit(packetId.value(), packetType.value())); } - TRY_XAIE_API_LOGICAL_RESULT(XAie_DmaEnableBd, &dmaDesc); + + if (validBd) { + TRY_XAIE_API_LOGICAL_RESULT(XAie_DmaEnableBd, &dmaDesc); + } else { + TRY_XAIE_API_LOGICAL_RESULT(XAie_DmaDisableBd, &dmaDesc); + } + auto devInst = const_cast(&deviceModel.devInst); TRY_XAIE_API_LOGICAL_RESULT(XAie_DmaWriteBd, devInst, &dmaDesc, tileLoc, bdId); return success(); } -LogicalResult pushToBdQueueAndEnable(const AMDAIEDeviceModel &deviceModel, +LogicalResult configurePushToBdQueue(const AMDAIEDeviceModel &deviceModel, const TileLoc &tileLoc, uint8_t chNum, const DMAChannelDir &channelDir, - uint8_t bdId, uint32_t repeatCount) { + uint8_t bdId, uint32_t repeatCount, + bool enTokenIssue, bool setChannelEnable) { XAie_DmaDirection direction = static_cast(channelDir); - auto enTokenIssue = tileLoc.row == 0 && direction == DMA_S2MM; // in english repeat_count==0 means "do it once" and don't repeat but // libxaie treats repeat_count=1 as do it once. repeatCount += 1; @@ -204,8 +217,18 @@ LogicalResult pushToBdQueueAndEnable(const AMDAIEDeviceModel &deviceModel, TRY_XAIE_API_LOGICAL_RESULT(XAie_DmaChannelSetStartQueue, devInst, tileLoc, chNum, direction, bdId, repeatCount, enTokenIssue); - TRY_XAIE_API_LOGICAL_RESULT(XAie_DmaChannelEnable, devInst, tileLoc, chNum, - direction); + if (setChannelEnable) { + TRY_XAIE_API_LOGICAL_RESULT(XAie_DmaChannelEnable, devInst, tileLoc, chNum, + direction); + } + return success(); +} + +LogicalResult configureCustomTxnOp(const AMDAIEDeviceModel &deviceModel, + uint8_t opCode, uint32_t *data, + uint32_t size) { + auto devInst = const_cast(&deviceModel.devInst); + TRY_XAIE_API_LOGICAL_RESULT(XAie_AddCustomTxnOp, devInst, opCode, data, size); return success(); } @@ -235,7 +258,8 @@ LogicalResult initializeLock(const AMDAIEDeviceModel &deviceModel, const Lock &lock) { auto devInst = const_cast(&deviceModel.devInst); auto locInit = XAie_LockInit(lock.id, lock.init); - TRY_XAIE_API_FATAL_ERROR(XAie_LockSetValue, devInst, lock.tileLoc, locInit); + TRY_XAIE_API_LOGICAL_RESULT(XAie_LockSetValue, devInst, lock.tileLoc, + locInit); return success(); } diff --git a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h index 92dbf3a37..ca1556d7d 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h +++ b/runtime/src/iree-amd-aie/aie_runtime/iree_aie_configure.h @@ -36,6 +36,13 @@ struct BDPadLayout { }; ASSERT_STANDARD_LAYOUT(BDPadLayout); +struct BDIterLayout { + uint32_t stride; + uint8_t size; + uint8_t current; +}; +ASSERT_STANDARD_LAYOUT(BDIterLayout); + /// Metadata necessary for configuring/setting a lock (actually semaphore). struct Lock { enum class Action : uint32_t { @@ -146,12 +153,14 @@ FailureOr initDMADesc(const AMDAIEDeviceModel &deviceModel, /// Configures/sets up a buffer descriptor (bd) associated with a dma. LogicalResult configureDMABD( const AMDAIEDeviceModel &deviceModel, XAie_DmaDesc &dmaDesc, - const TileLoc &tileLoc, uint8_t bdId, std::optional nextBdId, + const TileLoc &tileLoc, bool validBd, uint8_t bdId, bool enableNextBd, + std::optional nextBdId, bool enablePacket, std::optional packetType, std::optional packetId, uint64_t baseAddr, uint64_t lenInBytes, uint64_t offsetInBytes, uint32_t bufferElementTypeWidthInBytes, const std::optional> &maybeDims, - const std::optional> &maybePadDims); + const std::optional> &maybePadDims, + const std::optional &maybeIter); /// Configures/sets up locks associated with a dma (actually the bd...). LogicalResult configureDMALocks(const AMDAIEDeviceModel &deviceModel, @@ -166,10 +175,16 @@ LogicalResult configureDMALocks(const AMDAIEDeviceModel &deviceModel, /// once". /// TODO(max): revisit this and change it back to being like how most people /// understand. -LogicalResult pushToBdQueueAndEnable(const AMDAIEDeviceModel &deviceModel, +LogicalResult configurePushToBdQueue(const AMDAIEDeviceModel &deviceModel, const TileLoc &tileLoc, uint8_t chNum, const DMAChannelDir &channelDir, - uint8_t bdId, uint32_t repeatCount); + uint8_t bdId, uint32_t repeatCount, + bool issueToken, + bool configureChannelEnable); + +LogicalResult configureCustomTxnOp(const AMDAIEDeviceModel &deviceModel, + uint8_t opCode, uint32_t *data, + uint32_t size); LogicalResult configureStreamSwitch(const AMDAIEDeviceModel &deviceModel, const TileLoc &tileLoc,