diff --git a/compiler/plugins/target/AMD-AIE/aie/AIEDmaToNpu.cpp b/compiler/plugins/target/AMD-AIE/aie/AIEDmaToNpu.cpp index 5200e23bc..30377caf4 100644 --- a/compiler/plugins/target/AMD-AIE/aie/AIEDmaToNpu.cpp +++ b/compiler/plugins/target/AMD-AIE/aie/AIEDmaToNpu.cpp @@ -69,54 +69,6 @@ struct ShimDMAllocationGetter { }; } // namespace -struct RtpToNpuPattern : OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - - RtpToNpuPattern(MLIRContext *context, PatternBenefit benefit = 1) - : OpConversionPattern(context, benefit) {} - - LogicalResult - matchAndRewrite(NpuWriteRTPOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - auto *ctx = op->getContext(); - auto i32ty = IntegerType::get(ctx, 32); - auto ui32ty = - IntegerType::get(ctx, 32, IntegerType::SignednessSemantics::Unsigned); - auto device = op->getParentOfType(); - - uint32_t rtp_buffer_addr = UINT_MAX; - int c = op.getCol(); - int r = op.getRow(); - uint32_t v = op.getValue(); - uint32_t idx = op.getIndex(); - - if (auto buffer = device.lookupSymbol(op.getBufferSymName())) - if (AIE::TileOp tile = buffer.getTileOp(); - tile.colIndex() == c && tile.rowIndex() == r) { - assert(buffer.getAddress().has_value() && - "buffer must have address assigned"); - rtp_buffer_addr = static_cast(buffer.getAddress().value()); - } - - if (rtp_buffer_addr == UINT_MAX) { - return op->emitOpError("RTP buffer address cannot be found. Has " - "an RTP buffer been allocated?"); - } - - rtp_buffer_addr += idx * sizeof(uint32_t); - - IntegerAttr column = IntegerAttr::get(i32ty, c); - IntegerAttr row = IntegerAttr::get(i32ty, r); - IntegerAttr address = IntegerAttr::get(ui32ty, rtp_buffer_addr); - IntegerAttr value = IntegerAttr::get(i32ty, v); - rewriter.create(op->getLoc(), address.getUInt(), - value.getInt(), column, row); - - rewriter.eraseOp(op); - return success(); - } -}; - struct PushToNpuPattern : OpConversionPattern { public: @@ -416,7 +368,6 @@ struct AIEDmaToNpuPass : xilinx::AIEX::impl::AIEDmaToNpuBase { patterns.insert(&getContext(), cachingGetter); patterns.insert(&getContext(), cachingGetter); patterns.insert(&getContext()); - patterns.insert(&getContext()); if (failed(applyPartialConversion(device, target, std::move(patterns)))) signalPassFailure(); diff --git a/compiler/plugins/target/AMD-AIE/aie/AIEPass.cpp b/compiler/plugins/target/AMD-AIE/aie/AIEPass.cpp index c662be392..9b526adde 100644 --- a/compiler/plugins/target/AMD-AIE/aie/AIEPass.cpp +++ b/compiler/plugins/target/AMD-AIE/aie/AIEPass.cpp @@ -37,7 +37,6 @@ #include "mlir/IR/Iterators.h" #include "mlir/IR/Location.h" #include "mlir/IR/PatternMatch.h" -#include "mlir/InitAllExtensions.h" #include "mlir/Pass/Pass.h" #include "mlir/Tools/mlir-translate/MlirTranslateMain.h" #include "mlir/Transforms/DialectConversion.h" @@ -199,6 +198,7 @@ void xilinx::AIE::registerAIEAssignBufferAddressesBasic() { return xilinx::AIE::createAIEAssignBufferAddressesBasicPass(); }); } + //===- AIEAssignBufferDescriptorIDs.cpp -------------------------*- C++ -*-===// // // This file is licensed under the Apache License v2.0 with LLVM Exceptions. @@ -258,100 +258,58 @@ struct AIEAssignBufferDescriptorIDsPass if (bd.getBdId().has_value()) gen.assignBdId(bd.getBdId().value()); }); - auto dmaOps = memOp.getOperation()->getRegion(0).getOps(); - if (!dmaOps.empty()) { - for (auto dmaOp : dmaOps) { - auto bdRegions = dmaOp.getBds(); - for (auto &bdRegion : bdRegions) { - auto &block = bdRegion.getBlocks().front(); - DMABDOp bd = *block.getOps().begin(); - if (bd.getBdId().has_value()) - assert( - gen.bdIdAlreadyAssigned(bd.getBdId().value()) && - "bdId assigned by user but not found during previous walk"); - else - bd.setBdId(gen.nextBdId(dmaOp.getChannelIndex())); + DenseMap blockChannelMap; + // Associate with each block the channel index specified by the + // dma_start + for (Block &block : memOp.getOperation()->getRegion(0)) + for (auto op : block.getOps()) { + int chNum = op.getChannelIndex(); + blockChannelMap[&block] = chNum; + Block *dest = op.getDest(); + while (dest) { + blockChannelMap[dest] = chNum; + if (dest->hasNoSuccessors()) break; + dest = dest->getSuccessors()[0]; + if (blockChannelMap.contains(dest)) dest = nullptr; } } - } else { - DenseMap blockChannelMap; - // Associate with each block the channel index specified by the - // dma_start - for (Block &block : memOp.getOperation()->getRegion(0)) - for (auto op : block.getOps()) { - int chNum = op.getChannelIndex(); - blockChannelMap[&block] = chNum; - Block *dest = op.getDest(); - while (dest) { - blockChannelMap[dest] = chNum; - if (dest->hasNoSuccessors()) break; - dest = dest->getSuccessors()[0]; - if (blockChannelMap.contains(dest)) dest = nullptr; - } - } - for (Block &block : memOp.getOperation()->getRegion(0)) { - if (block.getOps().empty()) continue; - assert(blockChannelMap.count(&block)); - DMABDOp bd = (*block.getOps().begin()); - if (bd.getBdId().has_value()) - assert(gen.bdIdAlreadyAssigned(bd.getBdId().value()) && - "bdId assigned by user but not found during previous walk"); - else - bd.setBdId(gen.nextBdId(blockChannelMap[&block])); - } + for (Block &block : memOp.getOperation()->getRegion(0)) { + if (block.getOps().empty()) continue; + assert(blockChannelMap.count(&block)); + DMABDOp bd = (*block.getOps().begin()); + if (bd.getBdId().has_value()) + assert(gen.bdIdAlreadyAssigned(bd.getBdId().value()) && + "bdId assigned by user but not found during previous walk"); + else + bd.setBdId(gen.nextBdId(blockChannelMap[&block])); } } for (TileElement memOp : memOps) { - auto dmaOps = memOp.getOperation()->getRegion(0).getOps(); - if (!dmaOps.empty()) { - for (auto dmaOp : dmaOps) { - auto bdRegions = dmaOp.getBds(); - for (auto *bdRegionIt = bdRegions.begin(); - bdRegionIt != bdRegions.end();) { - auto &block = bdRegionIt->getBlocks().front(); - DMABDOp bd = *block.getOps().begin(); - std::optional nextBdId; - if (++bdRegionIt != bdRegions.end()) - nextBdId = - (*bdRegionIt->getBlocks().front().getOps().begin()) - .getBdId(); - else if (dmaOp.getLoop()) - nextBdId = (*bdRegions.front() - .getBlocks() - .front() - .getOps() - .begin()) - .getBdId(); - bd.setNextBdId(nextBdId); - } - } - } else { - DenseMap blockBdIdMap; - for (Block &block : memOp.getOperation()->getRegion(0)) { - if (block.getOps().empty()) continue; - DMABDOp bd = *block.getOps().begin(); - assert(bd.getBdId().has_value() && - "DMABDOp should have bd_id assigned by now"); - blockBdIdMap[&block] = bd.getBdId().value(); - } + DenseMap blockBdIdMap; + for (Block &block : memOp.getOperation()->getRegion(0)) { + if (block.getOps().empty()) continue; + DMABDOp bd = *block.getOps().begin(); + assert(bd.getBdId().has_value() && + "DMABDOp should have bd_id assigned by now"); + blockBdIdMap[&block] = bd.getBdId().value(); + } - for (Block &block : memOp.getOperation()->getRegion(0)) { - if (block.getOps().empty()) continue; - DMABDOp bd = *block.getOps().begin(); - std::optional nextBdId; - if (block.getNumSuccessors()) { - assert(llvm::range_size(block.getSuccessors()) == 1 && - "should have only one successor block"); - Block *nextBlock = block.getSuccessor(0); - if (!blockBdIdMap.contains(nextBlock)) - assert(nextBlock->getOperations().size() == 1 && - isa(nextBlock->getOperations().front()) && - "bb that's not in blockMap can only have aie.end"); - else - nextBdId = blockBdIdMap[nextBlock]; - bd.setNextBdId(nextBdId); - } + for (Block &block : memOp.getOperation()->getRegion(0)) { + if (block.getOps().empty()) continue; + DMABDOp bd = *block.getOps().begin(); + std::optional nextBdId; + if (block.getNumSuccessors()) { + assert(llvm::range_size(block.getSuccessors()) == 1 && + "should have only one successor block"); + Block *nextBlock = block.getSuccessor(0); + if (!blockBdIdMap.contains(nextBlock)) + assert(nextBlock->getOperations().size() == 1 && + isa(nextBlock->getOperations().front()) && + "bb that's not in blockMap can only have aie.end"); + else + nextBdId = blockBdIdMap[nextBlock]; + bd.setNextBdId(nextBdId); } } } @@ -471,60 +429,12 @@ using namespace mlir::vector; using namespace xilinx; using namespace xilinx::AIE; -static StringRef getArchIntrinsicString(AIEArch arch) { - switch (arch) { - case AIEArch::AIE1: - return "aie"; - case AIEArch::AIE2: - return "aie2"; - } - llvm::report_fatal_error("unsupported arch"); -} +static StringRef getArchIntrinsicString(AIEArch arch) { return "aie2"; } typedef std::tuple, std::vector> IntrinsicDecl; typedef std::vector IntrinsicDecls; -static auto getAIE1Intrinsics(OpBuilder &builder) { - Type int32Type = IntegerType::get(builder.getContext(), 32); - Type int128Type = IntegerType::get(builder.getContext(), 128); - Type int384Type = IntegerType::get(builder.getContext(), 384); - Type floatType = FloatType::getF32(builder.getContext()); - - // Note that not all of these are valid for a particular design, or needed. - // For right now, we will just accept the noise. - IntrinsicDecls functions = { - {"debug_i32", {int32Type}, {}}, - {"llvm.aie.event0", {}, {}}, - {"llvm.aie.event1", {}, {}}, - {"llvm.aie.put.ms", - {int32Type, int32Type}, - {}}, //(%channel, %value) -> () - {"llvm.aie.put.wms", - {int32Type, int128Type}, - {}}, //(%channel, %value) -> () - {"llvm.aie.put.fms", - {int32Type, floatType}, - {}}, //(%channel, %value) -> () - {"llvm.aie.get.ss", {int32Type}, {int32Type}}, //(%channel, %value) -> () - {"llvm.aie.get.wss", - {int32Type}, - {int128Type}}, //(%channel, %value) -> () - {"llvm.aie.get.fss", - {int32Type}, - {floatType}}, //(%channel, %value) -> () - {"llvm.aie.put.mcd", {int384Type}, {}}, - {"llvm.aie.get.scd", {}, {int384Type}}, - {"llvm.aie.lock.acquire.reg", - {int32Type, int32Type}, - {}}, //(%lock_id, %lock_val) -> () - {"llvm.aie.lock.release.reg", - {int32Type, int32Type}, - {}}, //(%lock_id, %lock_val) -> () - }; - return functions; -} - static auto getAIE2Intrinsics(OpBuilder &builder) { Type int32Type = IntegerType::get(builder.getContext(), 32); Type accType = VectorType::get({16}, int32Type); @@ -563,15 +473,7 @@ static void declareAIEIntrinsics(AIEArch arch, OpBuilder &builder) { .setPrivate(); } }; - switch (arch) { - case AIEArch::AIE1: - registerIntrinsics(getAIE1Intrinsics(builder)); - return; - case AIEArch::AIE2: - registerIntrinsics(getAIE2Intrinsics(builder)); - return; - } - llvm::report_fatal_error("unsupported arch"); + registerIntrinsics(getAIE2Intrinsics(builder)); } template @@ -626,13 +528,8 @@ struct AIEPutStreamToStdLowering : OpConversionPattern { LogicalResult matchAndRewrite( PutStreamOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - auto device = op->getParentOfType(); - const auto &targetModel = device.getTargetModel(); std::string funcName; - if (targetModel.getTargetArch() == AIEArch::AIE1) - funcName = "llvm.aie.put."; - else - funcName = "llvm.aie2.put."; + funcName = "llvm.aie2.put."; if (op.isWideStream()) funcName += "wms"; @@ -646,15 +543,10 @@ struct AIEPutStreamToStdLowering : OpConversionPattern { return op.emitOpError("Could not find the intrinsic function ") << funcName; SmallVector args; - if (targetModel.getTargetArch() == AIEArch::AIE1) { - args.push_back(op.getChannel()); - args.push_back(op.getStreamValue()); - } else { - args.push_back(op.getStreamValue()); - args.push_back(rewriter.create( - op.getLoc(), IntegerType::get(rewriter.getContext(), 32), - rewriter.getI32IntegerAttr(0))); // tlast - } + args.push_back(op.getStreamValue()); + args.push_back(rewriter.create( + op.getLoc(), IntegerType::get(rewriter.getContext(), 32), + rewriter.getI32IntegerAttr(0))); // tlast rewriter.create(rewriter.getUnknownLoc(), putMSFunc, args); rewriter.eraseOp(op); return success(); @@ -672,13 +564,8 @@ struct AIEGetStreamToStdLowering : OpConversionPattern { LogicalResult matchAndRewrite( GetStreamOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - auto device = op->getParentOfType(); - const auto &targetModel = device.getTargetModel(); std::string funcName; - if (targetModel.getTargetArch() == AIEArch::AIE1) - funcName = "llvm.aie.get."; - else - funcName = "llvm.aie2.get."; + funcName = "llvm.aie2.get."; if (op.isWideStream()) funcName += "wss"; @@ -692,8 +579,6 @@ struct AIEGetStreamToStdLowering : OpConversionPattern { return op.emitOpError("Could not find the intrinsic function ") << funcName; SmallVector args; - if (targetModel.getTargetArch() == AIEArch::AIE1) - args.push_back(op.getChannel()); auto getSSCall = rewriter.create(rewriter.getUnknownLoc(), getSSFunc, args); rewriter.replaceOp(op, getSSCall.getResult(0)); @@ -713,23 +598,17 @@ struct AIEPutCascadeToStdLowering : OpConversionPattern { LogicalResult matchAndRewrite( PutCascadeOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - auto device = op->getParentOfType(); - const auto &targetModel = device.getTargetModel(); std::string funcName; - if (targetModel.getTargetArch() == AIEArch::AIE1) - funcName = "llvm.aie.put.mcd"; - else - funcName = "llvm.aie2.mcd.write.vec"; + funcName = "llvm.aie2.mcd.write.vec"; auto putMCDFunc = module.lookupSymbol(funcName); if (!putMCDFunc) return op.emitOpError("Could not find the intrinsic function ") << funcName; SmallVector args; args.push_back(op.getCascadeValue()); - if (targetModel.getTargetArch() == AIEArch::AIE2) - args.push_back(rewriter.create( - op.getLoc(), IntegerType::get(rewriter.getContext(), 32), - rewriter.getI32IntegerAttr(1))); // enable + args.push_back(rewriter.create( + op.getLoc(), IntegerType::get(rewriter.getContext(), 32), + rewriter.getI32IntegerAttr(1))); // enable rewriter.create(rewriter.getUnknownLoc(), putMCDFunc, args); rewriter.eraseOp(op); @@ -748,22 +627,16 @@ struct AIEGetCascadeToStdLowering : OpConversionPattern { LogicalResult matchAndRewrite( GetCascadeOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - auto device = op->getParentOfType(); - const auto &targetModel = device.getTargetModel(); std::string funcName; - if (targetModel.getTargetArch() == AIEArch::AIE1) - funcName = "llvm.aie.get.scd"; - else - funcName = "llvm.aie2.scd.read.vec"; + funcName = "llvm.aie2.scd.read.vec"; auto getSCDFunc = module.lookupSymbol(funcName); if (!getSCDFunc) return op.emitOpError("Could not find the intrinsic function ") << funcName; SmallVector args; - if (targetModel.getTargetArch() == AIEArch::AIE2) - args.push_back(rewriter.create( - op.getLoc(), IntegerType::get(rewriter.getContext(), 32), - rewriter.getI32IntegerAttr(1))); // enable + args.push_back(rewriter.create( + op.getLoc(), IntegerType::get(rewriter.getContext(), 32), + rewriter.getI32IntegerAttr(1))); // enable auto getSCDCall = rewriter.create(rewriter.getUnknownLoc(), getSCDFunc, args); @@ -787,19 +660,14 @@ struct AIEUseLockToStdLowering : OpConversionPattern { if (!device) { return module.emitOpError("Device Not found!"); } - const auto &targetModel = device.getTargetModel(); // Generate the intrinsic name std::string funcName; - if (targetModel.getTargetArch() == AIEArch::AIE1) - funcName = "llvm.aie.lock."; - else - funcName = "llvm.aie2."; + funcName = "llvm.aie2."; if (useLock.acquire() || useLock.acquireGE()) funcName += "acquire"; else if (useLock.release()) funcName += "release"; - if (targetModel.getTargetArch() == AIEArch::AIE1) funcName += ".reg"; auto useLockFunc = module.lookupSymbol(funcName); if (!useLockFunc) @@ -1508,7 +1376,6 @@ struct AIELocalizeLocksPass int dstRow = tile.rowIndex(); int cardinalMemOffset = 0; - const auto &targetModel = getTargetModel(tile); int numLocks = targetModel.getNumLocks(dstCol, dstRow); for (auto user : tile.getResult().getUsers()) if (auto lock = dyn_cast(user)) { @@ -1783,45 +1650,25 @@ struct AIEObjectFifoStatefulTransformPass ObjectFifoCreateOp op, int numElem, TileOp creation_tile) { std::vector locks; - auto dev = op->getParentOfType(); - auto &target = dev.getTargetModel(); if (creation_tile.isShimTile()) numElem = externalBuffersPerFifo[op].size(); - if (target.getTargetArch() == AIEArch::AIE1) { - int of_elem_index = - 0; // used to give objectFifo elements a symbolic name - for (int i = 0; i < numElem; i++) { - // create corresponding aie1 locks - int lockID = lockAnalysis.getLockID(creation_tile); - assert(lockID >= 0 && "No more locks to allocate!"); - auto lock = builder.create(builder.getUnknownLoc(), - creation_tile, lockID, 0); - lock.getOperation()->setAttr( - SymbolTable::getSymbolAttrName(), - builder.getStringAttr(op.name().str() + "_lock_" + - std::to_string(of_elem_index))); - locks.push_back(lock); - of_elem_index++; - } - } else { - // create corresponding aie2 locks - int prodLockID = lockAnalysis.getLockID(creation_tile); - assert(prodLockID >= 0 && "No more locks to allocate!"); - auto prodLock = builder.create( - builder.getUnknownLoc(), creation_tile, prodLockID, numElem); - prodLock.getOperation()->setAttr( - SymbolTable::getSymbolAttrName(), - builder.getStringAttr(op.name().str() + "_prod_lock")); - locks.push_back(prodLock); - - int consLockID = lockAnalysis.getLockID(creation_tile); - assert(consLockID >= 0 && "No more locks to allocate!"); - auto consLock = builder.create(builder.getUnknownLoc(), - creation_tile, consLockID, 0); - consLock.getOperation()->setAttr( - SymbolTable::getSymbolAttrName(), - builder.getStringAttr(op.name().str() + "_cons_lock")); - locks.push_back(consLock); - } + // create corresponding aie2 locks + int prodLockID = lockAnalysis.getLockID(creation_tile); + assert(prodLockID >= 0 && "No more locks to allocate!"); + auto prodLock = builder.create(builder.getUnknownLoc(), + creation_tile, prodLockID, numElem); + prodLock.getOperation()->setAttr( + SymbolTable::getSymbolAttrName(), + builder.getStringAttr(op.name().str() + "_prod_lock")); + locks.push_back(prodLock); + + int consLockID = lockAnalysis.getLockID(creation_tile); + assert(consLockID >= 0 && "No more locks to allocate!"); + auto consLock = builder.create(builder.getUnknownLoc(), + creation_tile, consLockID, 0); + consLock.getOperation()->setAttr( + SymbolTable::getSymbolAttrName(), + builder.getStringAttr(op.name().str() + "_cons_lock")); + locks.push_back(consLock); return locks; } @@ -1955,22 +1802,13 @@ struct AIEObjectFifoStatefulTransformPass int acqMode = 1; int relMode = 1; auto acqLockAction = LockAction::Acquire; - auto dev = op->getParentOfType(); - if (auto &target = dev.getTargetModel(); - target.getTargetArch() == AIEArch::AIE1) { - acqMode = lockMode == 0 ? 1 : 0; - relMode = lockMode == 0 ? 0 : 1; - acqLock = locksPerFifo[op][blockIndex]; - relLock = locksPerFifo[op][blockIndex]; - } else { - acqMode = acqNum; - relMode = relNum; - acqLockAction = LockAction::AcquireGreaterEqual; - acqLock = channelDir == DMAChannelDir::S2MM ? locksPerFifo[op][0] - : locksPerFifo[op][1]; - relLock = channelDir == DMAChannelDir::S2MM ? locksPerFifo[op][1] - : locksPerFifo[op][0]; - } + acqMode = acqNum; + relMode = relNum; + acqLockAction = LockAction::AcquireGreaterEqual; + acqLock = channelDir == DMAChannelDir::S2MM ? locksPerFifo[op][0] + : locksPerFifo[op][1]; + relLock = channelDir == DMAChannelDir::S2MM ? locksPerFifo[op][1] + : locksPerFifo[op][0]; createBd(builder, acqLock, acqMode, acqLockAction, relLock, relMode, buff, offset, len, succ, dims); } @@ -2340,44 +2178,25 @@ struct AIEObjectFifoStatefulTransformPass if (objFifoLinks.find(*linkOp) != objFifoLinks.end()) target = objFifoLinks[*linkOp]; - auto dev = op->getParentOfType(); - if (auto &targetArch = dev.getTargetModel(); - targetArch.getTargetArch() == AIEArch::AIE1) { - int lockMode = 0; - if ((port == ObjectFifoPort::Produce && - lockAction == LockAction::Release) || - (port == ObjectFifoPort::Consume && - lockAction == LockAction::Acquire)) - lockMode = 1; - for (int i = 0; i < numLocks; i++) { - int lockID = acc[{op, portNum}]; - builder.create(builder.getUnknownLoc(), - locksPerFifo[target][lockID], lockAction, - lockMode); - acc[{op, portNum}] = - (lockID + 1) % op.size(); // update to next objFifo elem - } + if (numLocks == 0) return; + // search for the correct lock based on the port of the acq/rel + // operation e.g. acq as consumer is the read lock (second) + LockOp lock; + if (lockAction == LockAction::AcquireGreaterEqual) { + if (port == ObjectFifoPort::Produce) + lock = locksPerFifo[target][0]; + else + lock = locksPerFifo[target][1]; } else { - if (numLocks == 0) return; - // search for the correct lock based on the port of the acq/rel - // operation e.g. acq as consumer is the read lock (second) - LockOp lock; - if (lockAction == LockAction::AcquireGreaterEqual) { - if (port == ObjectFifoPort::Produce) - lock = locksPerFifo[target][0]; - else - lock = locksPerFifo[target][1]; - } else { - if (port == ObjectFifoPort::Produce) - lock = locksPerFifo[target][1]; - else - lock = locksPerFifo[target][0]; - } - builder.create(builder.getUnknownLoc(), lock, lockAction, - numLocks); - acc[{op, portNum}] = (acc[{op, portNum}] + numLocks) % - op.size(); // update to next objFifo elem + if (port == ObjectFifoPort::Produce) + lock = locksPerFifo[target][1]; + else + lock = locksPerFifo[target][0]; } + builder.create(builder.getUnknownLoc(), lock, lockAction, + numLocks); + acc[{op, portNum}] = (acc[{op, portNum}] + numLocks) % + op.size(); // update to next objFifo elem } /// Function used to check whether op is already contained in map. @@ -2839,14 +2658,8 @@ struct AIEObjectFifoStatefulTransformPass else numCreate = 0; - auto dev = op->getParentOfType(); - if (auto &targetArch = dev.getTargetModel(); - targetArch.getTargetArch() == AIEArch::AIE1) - createUseLocks(builder, op, port, acqPerFifo, numCreate, - LockAction::Acquire); - else - createUseLocks(builder, op, port, acqPerFifo, numCreate, - LockAction::AcquireGreaterEqual); + createUseLocks(builder, op, port, acqPerFifo, numCreate, + LockAction::AcquireGreaterEqual); // if objFifo was linked with others, find which objFifos // elements to use diff --git a/compiler/plugins/target/AMD-AIE/aie/AIETargetCDODirect.cpp b/compiler/plugins/target/AMD-AIE/aie/AIETargetCDODirect.cpp index a7f54cadf..42c3bafde 100644 --- a/compiler/plugins/target/AMD-AIE/aie/AIETargetCDODirect.cpp +++ b/compiler/plugins/target/AMD-AIE/aie/AIETargetCDODirect.cpp @@ -500,46 +500,23 @@ struct AIEControl { XAie_LocType tileLoc = XAie_TileLoc(col, row); // handle DMA ops separately - auto dmaOps = llvm::to_vector_of( - memOp.getOperation()->getRegion(0).getOps()); - if (!dmaOps.empty()) { - for (auto dmaOp : dmaOps) - for (auto &bdRegion : dmaOp.getBds()) { - Block &block = bdRegion.getBlocks().front(); - if (failed( - configureLocksAndBd(devInst, block, tileLoc, targetModel))) - return failure(); - } - } else { - for (Block &block : memOp.getOperation()->getRegion(0)) { - if (block.getOps().empty()) continue; - if (failed(configureLocksAndBd(devInst, block, tileLoc, targetModel))) - return failure(); - } + for (Block &block : memOp.getOperation()->getRegion(0)) { + if (block.getOps().empty()) continue; + if (failed(configureLocksAndBd(devInst, block, tileLoc, targetModel))) + return failure(); } - if (!dmaOps.empty()) - for (auto dmaOp : dmaOps) { - auto &block = dmaOp.getBds().front().getBlocks().front(); - DMABDOp bd = *block.getOps().begin(); + for (Block &block : memOp.getOperation()->getRegion(0)) { + for (auto op : block.getOps()) { + DMABDOp bd = *op.getDest()->getOps().begin(); + int chNum = op.getChannelIndex(); + auto channelDir = op.getChannelDir(); if (failed(pushToBdQueueAndEnable( - devInst, *dmaOp.getOperation(), tileLoc, - dmaOp.getChannelIndex(), dmaOp.getChannelDir(), - bd.getBdId().value(), dmaOp.getRepeatCount()))) + devInst, *bd.getOperation(), tileLoc, chNum, channelDir, + bd.getBdId().value(), op.getRepeatCount()))) return failure(); } - else - for (Block &block : memOp.getOperation()->getRegion(0)) { - for (auto op : block.getOps()) { - DMABDOp bd = *op.getDest()->getOps().begin(); - int chNum = op.getChannelIndex(); - auto channelDir = op.getChannelDir(); - if (failed(pushToBdQueueAndEnable( - devInst, *bd.getOperation(), tileLoc, chNum, channelDir, - bd.getBdId().value(), op.getRepeatCount()))) - return failure(); - } - } + } } // StreamSwitch (switchbox) configuration @@ -647,17 +624,15 @@ struct AIEControl { } // Cascade configuration - if (targetModel.getTargetArch() == AIEArch::AIE2) { - for (auto configOp : targetOp.getOps()) { - TileOp tile = cast(configOp.getTile().getDefiningOp()); - auto tileLoc = XAie_TileLoc(tile.getCol(), tile.getRow()); - TRY_XAIE_API_EMIT_ERROR( - targetOp, XAie_CoreConfigAccumulatorControl, &devInst, tileLoc, - WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at( - static_cast(configOp.getInputDir())), - WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at( - static_cast(configOp.getOutputDir()))); - } + for (auto configOp : targetOp.getOps()) { + TileOp tile = cast(configOp.getTile().getDefiningOp()); + auto tileLoc = XAie_TileLoc(tile.getCol(), tile.getRow()); + TRY_XAIE_API_EMIT_ERROR( + targetOp, XAie_CoreConfigAccumulatorControl, &devInst, tileLoc, + WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at( + static_cast(configOp.getInputDir())), + WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at( + static_cast(configOp.getOutputDir()))); } return success(); diff --git a/compiler/plugins/target/AMD-AIE/aie/AIETargets.cpp b/compiler/plugins/target/AMD-AIE/aie/AIETargets.cpp index 9af7e2ee3..97ac3a72b 100644 --- a/compiler/plugins/target/AMD-AIE/aie/AIETargets.cpp +++ b/compiler/plugins/target/AMD-AIE/aie/AIETargets.cpp @@ -53,10 +53,8 @@ LogicalResult AIETranslateToBCF(ModuleOp module, raw_ostream &output, output << "_entry_point _main_init\n"; output << "_symbol " << corefunc << " _after _main_init\n"; output << "_symbol _main_init 0\n"; - std::string initReserved = (targetModel.getTargetArch() == AIEArch::AIE2) - ? "0x40000" - : "0x20000"; - output << "_reserved DMb 0x00000 " << initReserved + int dataMemoryStart = targetModel.getMemSouthBaseAddress(); + output << "_reserved DMb 0x00000 " << utohexstr(dataMemoryStart) << " // Don't put data in code memory\n"; int stacksize = 0; @@ -116,14 +114,12 @@ LogicalResult AIETranslateToBCF(ModuleOp module, raw_ostream &output, doBuffer(targetModel.getMemEast(srcCoord), targetModel.getMemEastBaseAddress(), std::string("east")); output << "// end mapping neighbors tile memory\n\n"; - - if (targetModel.getTargetArch() == AIEArch::AIE2) { - output << "_reserved DMb 0x80000 0x80000 // And everything else " - "the core can't see\n"; - } else { - output << "_reserved DMb 0x40000 0xc0000 // And everything else " - "the core can't see\n"; - } + int addressSpaceSize = 0x100000; + int dataMemoryEnd = targetModel.getMemEastBaseAddress() + + targetModel.getLocalMemorySize(); + output << "_reserved DMb " << utohexstr(dataMemoryEnd) << " " + << utohexstr(addressSpaceSize - dataMemoryEnd) + << " // And everything else the core can't see\n"; if (tile.getCoreOp() && tile.getCoreOp().getLinkWith()) output << "_include _file " << tile.getCoreOp().getLinkWith().value().str() << "\n"; diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/base_test_AIE1.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/base_test_AIE1.mlir index 365a17837..ea5dd1ad0 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/base_test_AIE1.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/base_test_AIE1.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt --aie-objectFifo-stateful-transform %s | FileCheck %s -// CHECK-LABEL: aie.device(xcvc1902) { +// CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @of1_cons : memref<16xi32> // CHECK: memref.global "public" @of1 : memref<16xi32> // CHECK: memref.global "public" @of0 : memref<16xi32> @@ -10,32 +10,30 @@ // CHECK: %[[TILE_3_3:.*]] = aie.tile(3, 3) // CHECK: %[[OF1_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "of1_cons_buff_0"} : memref<16xi32> // CHECK: %[[OF1_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "of1_cons_buff_1"} : memref<16xi32> -// CHECK: %[[OF1_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_3_3]], 0) {init = 0 : i32, sym_name = "of1_cons_lock_0"} -// CHECK: %[[OF1_CONS_LOCK_1:.*]] = aie.lock(%[[TILE_3_3]], 1) {init = 0 : i32, sym_name = "of1_cons_lock_1"} +// CHECK: %[[OF1_CONS_PROD_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 0) {init = 2 : i32, sym_name = "of1_cons_prod_lock"} +// CHECK: %[[OF1_CONS_CONS_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 1) {init = 0 : i32, sym_name = "of1_cons_cons_lock"} // CHECK: %[[OF1_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of1_buff_0"} : memref<16xi32> // CHECK: %[[OF1_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of1_buff_1"} : memref<16xi32> -// CHECK: %[[OF1_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 4) {init = 0 : i32, sym_name = "of1_lock_0"} -// CHECK: %[[OF1_LOCK_1:.*]] = aie.lock(%[[TILE_1_2]], 5) {init = 0 : i32, sym_name = "of1_lock_1"} +// CHECK: %[[OF1_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 2) {init = 2 : i32, sym_name = "of1_prod_lock"} +// CHECK: %[[OF1_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 3) {init = 0 : i32, sym_name = "of1_cons_lock"} // CHECK: %[[OF0_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of0_buff_0"} : memref<16xi32> // CHECK: %[[OF0_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of0_buff_1"} : memref<16xi32> // CHECK: %[[OF0_BUFF_2:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of0_buff_2"} : memref<16xi32> // CHECK: %[[OF0_BUFF_3:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of0_buff_3"} : memref<16xi32> -// CHECK: %[[OF0_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 0 : i32, sym_name = "of0_lock_0"} -// CHECK: %[[OF0_LOCK_1:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "of0_lock_1"} -// CHECK: %[[OF0_LOCK_2:.*]] = aie.lock(%[[TILE_1_2]], 2) {init = 0 : i32, sym_name = "of0_lock_2"} -// CHECK: %[[OF0_LOCK_3:.*]] = aie.lock(%[[TILE_1_2]], 3) {init = 0 : i32, sym_name = "of0_lock_3"} +// CHECK: %[[OF0_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 4 : i32, sym_name = "of0_prod_lock"} +// CHECK: %[[OF0_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "of0_cons_lock"} // CHECK: aie.flow(%[[TILE_1_2]], DMA : 0, %[[TILE_3_3]], DMA : 0) // CHECK: %[[MEM_1_2:.*]] = aie.mem(%[[TILE_1_2]]) { // CHECK: %[[VAL_0:.*]] = aie.dma_start(MM2S, 0, ^bb1, ^bb3) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[OF1_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[OF1_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OF1_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OF1_LOCK_0]], Release, 0) +// CHECK: aie.use_lock(%[[OF1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[OF1_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[OF1_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OF1_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OF1_LOCK_1]], Release, 0) +// CHECK: aie.use_lock(%[[OF1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb3: // CHECK: aie.end @@ -43,14 +41,14 @@ // CHECK: %[[MEM_3_3:.*]] = aie.mem(%[[TILE_3_3]]) { // CHECK: %[[VAL_1:.*]] = aie.dma_start(S2MM, 0, ^bb1, ^bb3) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[OF1_CONS_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[OF1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OF1_CONS_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OF1_CONS_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[OF1_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[OF1_CONS_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[OF1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OF1_CONS_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OF1_CONS_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[OF1_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb3: // CHECK: aie.end @@ -58,7 +56,7 @@ // CHECK: } module @elementGenerationAIE1 { - aie.device(xcvc1902) { + aie.device(npu1_4col) { %tile12 = aie.tile(1, 2) %tile13 = aie.tile(1, 3) %tile33 = aie.tile(3, 3) diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/basic.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/basic.mlir index ec29ec08a..ac8edc0d8 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/basic.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/basic.mlir @@ -1,141 +1,5 @@ -// RUN: iree-opt --aie-assign-bd-ids --split-input-file %s | FileCheck %s - -// CHECK-LABEL: aie.device(npu1_4col) { -// CHECK: %[[TILE_0_0:.*]] = aie.tile(0, 0) -// CHECK: %[[TILE_0_1:.*]] = aie.tile(0, 1) -// CHECK: %[[TILE_0_2:.*]] = aie.tile(0, 2) -// CHECK: %[[DOUBLE_BUFFER:.*]] = aie.buffer(%[[TILE_0_2]]) {sym_name = "double_buffer"} : memref<32xi32> -// CHECK: %[[BUFFER_0_1:.*]] = aie.buffer(%[[TILE_0_1]]) : memref<32xi32> -// CHECK: %[[LOCK_X:.*]] = aie.lock(%[[TILE_0_2]]) {init = 1 : i32, sym_name = "lock_X"} -// CHECK: %[[LOCK_Y:.*]] = aie.lock(%[[TILE_0_2]]) {init = 0 : i32, sym_name = "lock_Y"} -// CHECK: %[[MEM_0_2:.*]] = aie.mem(%[[TILE_0_2]]) { -// CHECK: %[[PLAYER_A:.*]] = aie.dma(S2MM, 0) {sym_name = "player_a"} [{ -// CHECK: aie.use_lock(%[[LOCK_Y]], Acquire, 0) -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>, 0) {bd_id = 0 : i32, next_bd_id = 1 : i32} -// CHECK: aie.use_lock(%[[LOCK_Y]], Release, 0) -// CHECK: }, { -// CHECK: aie.use_lock(%[[LOCK_X]], Acquire, 1) -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>) {bd_id = 1 : i32, next_bd_id = 2 : i32} -// CHECK: aie.use_lock(%[[LOCK_X]], Release, -1) -// CHECK: }, { -// CHECK: aie.use_lock(%[[LOCK_Y]], Acquire) {acq_en = false} -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>) {bd_id = 2 : i32, next_bd_id = 0 : i32} -// CHECK: aie.use_lock(%[[LOCK_Y]], Release, 1) -// CHECK: }] -// CHECK: %[[PLAYER_B:.*]] = aie.dma(S2MM, 1) {sym_name = "player_b"} [{ -// CHECK: aie.use_lock(%[[LOCK_Y]], Acquire, 1) -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>, 0) {bd_id = 3 : i32, next_bd_id = 4 : i32} -// CHECK: aie.use_lock(%[[LOCK_Y]], Release, 0) -// CHECK: }, { -// CHECK: aie.use_lock(%[[LOCK_X]], Acquire, 1) -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>) {bd_id = 4 : i32, next_bd_id = 5 : i32} -// CHECK: aie.use_lock(%[[LOCK_X]], Release, -1) -// CHECK: }, { -// CHECK: aie.use_lock(%[[LOCK_Y]], Acquire) {acq_en = false} -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>) {bd_id = 5 : i32, next_bd_id = 3 : i32} -// CHECK: aie.use_lock(%[[LOCK_Y]], Release, -1) -// CHECK: }] -// CHECK: aie.end -// CHECK: } -// CHECK: %[[MEMTILE_DMA_0_1:.*]] = aie.memtile_dma(%[[TILE_0_1]]) { -// CHECK: %[[LOCK_0_1:.*]] = aie.lock(%[[TILE_0_1]]) {init = 1 : i32} -// CHECK: %[[LOCK_0_1_0:.*]] = aie.lock(%[[TILE_0_1]]) {init = 0 : i32} -// CHECK: %[[VAL_0:.*]] = aie.dma(S2MM, 0) {loop = false, repeat_count = 10 : i32} [{ -// CHECK: aie.use_lock(%[[LOCK_0_1]], AcquireGreaterEqual) -// CHECK: aie.dma_bd(%[[BUFFER_0_1]] : memref<32xi32>) {bd_id = 0 : i32} -// CHECK: aie.use_lock(%[[LOCK_0_1_0]], Release) -// CHECK: }] -// CHECK: %[[VAL_1:.*]] = aie.dma(MM2S, 0) {loop = false, repeat_count = 10 : i32} [{ -// CHECK: aie.use_lock(%[[LOCK_0_1_0]], AcquireGreaterEqual) -// CHECK: aie.dma_bd(%[[BUFFER_0_1]] : memref<32xi32>) {bd_id = 1 : i32} -// CHECK: aie.use_lock(%[[LOCK_0_1]], Release) -// CHECK: }] -// CHECK: %[[LOCK_0_1_1:.*]] = aie.lock(%[[TILE_0_1]]) {init = 1 : i32} -// CHECK: %[[LOCK_0_1_2:.*]] = aie.lock(%[[TILE_0_1]]) {init = 0 : i32} -// CHECK: %[[VAL_2:.*]] = aie.dma(S2MM, 1) {loop = false, repeat_count = 10 : i32} [{ -// CHECK: aie.use_lock(%[[LOCK_0_1_1]], AcquireGreaterEqual) -// CHECK: aie.dma_bd(%[[BUFFER_0_1]] : memref<32xi32>) {bd_id = 24 : i32} -// CHECK: aie.use_lock(%[[LOCK_0_1_2]], Release) -// CHECK: }] -// CHECK: %[[VAL_3:.*]] = aie.dma(MM2S, 1) {loop = false, repeat_count = 10 : i32} [{ -// CHECK: aie.use_lock(%[[LOCK_0_1_2]], AcquireGreaterEqual) -// CHECK: aie.dma_bd(%[[BUFFER_0_1]] : memref<32xi32>) {bd_id = 25 : i32} -// CHECK: aie.use_lock(%[[LOCK_0_1_1]], Release) -// CHECK: }] -// CHECK: aie.end -// CHECK: } -// CHECK: } - -module { - aie.device(npu1_4col) { - %tile_0_0 = aie.tile(0, 0) - %tile_0_1 = aie.tile(0, 1) - %tile_0_2 = aie.tile(0, 2) - %double_buffer = aie.buffer(%tile_0_2) {sym_name = "double_buffer"} : memref<32xi32> - %buffer_0_1 = aie.buffer(%tile_0_1) : memref<32xi32> - %lock_X = aie.lock(%tile_0_2) {init = 1 : i32, sym_name = "lock_X"} - %lock_Y = aie.lock(%tile_0_2) {init = 0 : i32, sym_name = "lock_Y"} - %mem_0_2 = aie.mem(%tile_0_2) { - %player_a = aie.dma(S2MM, 0) {sym_name = "player_a"} [{ - aie.use_lock(%lock_Y, Acquire, 0) - aie.dma_bd(%double_buffer : memref<32xi32>, 0) - aie.use_lock(%lock_Y, Release, 0) - }, { - aie.use_lock(%lock_X, Acquire, 1) - aie.dma_bd(%double_buffer : memref<32xi32>) - aie.use_lock(%lock_X, Release, -1) - }, { - aie.use_lock(%lock_Y, Acquire) {acq_en = false} - aie.dma_bd(%double_buffer : memref<32xi32>) - aie.use_lock(%lock_Y, Release, 1) - }] - %player_b = aie.dma(S2MM, 1) {sym_name = "player_b"} [{ - aie.use_lock(%lock_Y, Acquire, 1) - aie.dma_bd(%double_buffer : memref<32xi32>, 0) - aie.use_lock(%lock_Y, Release, 0) - }, { - aie.use_lock(%lock_X, Acquire, 1) - aie.dma_bd(%double_buffer : memref<32xi32>) - aie.use_lock(%lock_X, Release, -1) - }, { - aie.use_lock(%lock_Y, Acquire) {acq_en = false} - aie.dma_bd(%double_buffer : memref<32xi32>) - aie.use_lock(%lock_Y, Release, -1) - }] - aie.end - } - %memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) { - %lock_0_1 = aie.lock(%tile_0_1) {init = 1 : i32} - %lock_0_1_0 = aie.lock(%tile_0_1) {init = 0 : i32} - %0 = aie.dma(S2MM, 0) {loop = false, repeat_count = 10 : i32} [{ - aie.use_lock(%lock_0_1, AcquireGreaterEqual) - aie.dma_bd(%buffer_0_1 : memref<32xi32>) - aie.use_lock(%lock_0_1_0, Release) - }] - %1 = aie.dma(MM2S, 0) {loop = false, repeat_count = 10 : i32} [{ - aie.use_lock(%lock_0_1_0, AcquireGreaterEqual) - aie.dma_bd(%buffer_0_1 : memref<32xi32>) - aie.use_lock(%lock_0_1, Release) - }] - %lock_0_1_1 = aie.lock(%tile_0_1) {init = 1 : i32} - %lock_0_1_2 = aie.lock(%tile_0_1) {init = 0 : i32} - %2 = aie.dma(S2MM, 1) {loop = false, repeat_count = 10 : i32} [{ - aie.use_lock(%lock_0_1_1, AcquireGreaterEqual) - aie.dma_bd(%buffer_0_1 : memref<32xi32>) - aie.use_lock(%lock_0_1_2, Release) - }] - %3 = aie.dma(MM2S, 1) {loop = false, repeat_count = 10 : i32} [{ - aie.use_lock(%lock_0_1_2, AcquireGreaterEqual) - aie.dma_bd(%buffer_0_1 : memref<32xi32>) - aie.use_lock(%lock_0_1_1, Release) - }] - aie.end - } - } -} - -// ----- +// RUN: iree-opt --aie-assign-bd-ids %s | FileCheck %s // CHECK-LABEL: aie.device(xcve2302) { // CHECK: %[[TILE_2_1:.*]] = aie.tile(2, 1) diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/broadcast_test.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/broadcast_test.mlir index 33cf0de9d..9406bdb6b 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/broadcast_test.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/broadcast_test.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt --aie-objectFifo-stateful-transform %s | FileCheck %s -// CHECK-LABEL: aie.device(xcvc1902) { +// CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @broadcast_of_0_cons : memref<16xi32> // CHECK: memref.global "public" @broadcast_of_1_cons : memref<16xi32> // CHECK: memref.global "public" @broadcast_of_2_cons : memref<16xi32> @@ -14,32 +14,28 @@ // CHECK: %[[TILE_3_3:.*]] = aie.tile(3, 3) // CHECK: %[[BROADCAST_OF_0_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "broadcast_of_0_cons_buff_0"} : memref<16xi32> // CHECK: %[[BROADCAST_OF_0_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "broadcast_of_0_cons_buff_1"} : memref<16xi32> -// CHECK: %[[BROADCAST_OF_0_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 0 : i32, sym_name = "broadcast_of_0_cons_lock_0"} -// CHECK: %[[BROADCAST_OF_0_CONS_LOCK_1:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "broadcast_of_0_cons_lock_1"} +// CHECK: %[[BROADCAST_OF_0_CONS_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 2 : i32, sym_name = "broadcast_of_0_cons_prod_lock"} +// CHECK: %[[BROADCAST_OF_0_CONS_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "broadcast_of_0_cons_cons_lock"} // CHECK: %[[BROADCAST_OF_1_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_1_4]]) {sym_name = "broadcast_of_1_cons_buff_0"} : memref<16xi32> // CHECK: %[[BROADCAST_OF_1_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_1_4]]) {sym_name = "broadcast_of_1_cons_buff_1"} : memref<16xi32> // CHECK: %[[BROADCAST_OF_1_CONS_BUFF_2:.*]] = aie.buffer(%[[TILE_1_4]]) {sym_name = "broadcast_of_1_cons_buff_2"} : memref<16xi32> -// CHECK: %[[BROADCAST_OF_1_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_1_4]], 0) {init = 0 : i32, sym_name = "broadcast_of_1_cons_lock_0"} -// CHECK: %[[BROADCAST_OF_1_CONS_LOCK_1:.*]] = aie.lock(%[[TILE_1_4]], 1) {init = 0 : i32, sym_name = "broadcast_of_1_cons_lock_1"} -// CHECK: %[[BROADCAST_OF_1_CONS_LOCK_2:.*]] = aie.lock(%[[TILE_1_4]], 2) {init = 0 : i32, sym_name = "broadcast_of_1_cons_lock_2"} +// CHECK: %[[BROADCAST_OF_1_CONS_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_4]], 0) {init = 3 : i32, sym_name = "broadcast_of_1_cons_prod_lock"} +// CHECK: %[[BROADCAST_OF_1_CONS_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_4]], 1) {init = 0 : i32, sym_name = "broadcast_of_1_cons_cons_lock"} // CHECK: %[[BROADCAST_OF_2_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_3_2]]) {sym_name = "broadcast_of_2_cons_buff_0"} : memref<16xi32> // CHECK: %[[BROADCAST_OF_2_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_3_2]]) {sym_name = "broadcast_of_2_cons_buff_1"} : memref<16xi32> // CHECK: %[[BROADCAST_OF_2_CONS_BUFF_2:.*]] = aie.buffer(%[[TILE_3_2]]) {sym_name = "broadcast_of_2_cons_buff_2"} : memref<16xi32> // CHECK: %[[BROADCAST_OF_2_CONS_BUFF_3:.*]] = aie.buffer(%[[TILE_3_2]]) {sym_name = "broadcast_of_2_cons_buff_3"} : memref<16xi32> -// CHECK: %[[BROADCAST_OF_2_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_3_2]], 0) {init = 0 : i32, sym_name = "broadcast_of_2_cons_lock_0"} -// CHECK: %[[BROADCAST_OF_2_CONS_LOCK_1:.*]] = aie.lock(%[[TILE_3_2]], 1) {init = 0 : i32, sym_name = "broadcast_of_2_cons_lock_1"} -// CHECK: %[[BROADCAST_OF_2_CONS_LOCK_2:.*]] = aie.lock(%[[TILE_3_2]], 2) {init = 0 : i32, sym_name = "broadcast_of_2_cons_lock_2"} -// CHECK: %[[BROADCAST_OF_2_CONS_LOCK_3:.*]] = aie.lock(%[[TILE_3_2]], 3) {init = 0 : i32, sym_name = "broadcast_of_2_cons_lock_3"} +// CHECK: %[[BROADCAST_OF_2_CONS_PROD_LOCK:.*]] = aie.lock(%[[TILE_3_2]], 0) {init = 4 : i32, sym_name = "broadcast_of_2_cons_prod_lock"} +// CHECK: %[[BROADCAST_OF_2_CONS_CONS_LOCK:.*]] = aie.lock(%[[TILE_3_2]], 1) {init = 0 : i32, sym_name = "broadcast_of_2_cons_cons_lock"} // CHECK: %[[BROADCAST_OF_3_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "broadcast_of_3_cons_buff_0"} : memref<16xi32> // CHECK: %[[BROADCAST_OF_3_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "broadcast_of_3_cons_buff_1"} : memref<16xi32> // CHECK: %[[BROADCAST_OF_3_CONS_BUFF_2:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "broadcast_of_3_cons_buff_2"} : memref<16xi32> -// CHECK: %[[BROADCAST_OF_3_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_3_3]], 0) {init = 0 : i32, sym_name = "broadcast_of_3_cons_lock_0"} -// CHECK: %[[BROADCAST_OF_3_CONS_LOCK_1:.*]] = aie.lock(%[[TILE_3_3]], 1) {init = 0 : i32, sym_name = "broadcast_of_3_cons_lock_1"} -// CHECK: %[[BROADCAST_OF_3_CONS_LOCK_2:.*]] = aie.lock(%[[TILE_3_3]], 2) {init = 0 : i32, sym_name = "broadcast_of_3_cons_lock_2"} +// CHECK: %[[BROADCAST_OF_3_CONS_PROD_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 0) {init = 3 : i32, sym_name = "broadcast_of_3_cons_prod_lock"} +// CHECK: %[[BROADCAST_OF_3_CONS_CONS_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 1) {init = 0 : i32, sym_name = "broadcast_of_3_cons_cons_lock"} // CHECK: %[[BROADCAST_OF_BUFF_0:.*]] = aie.buffer(%[[TILE_1_3]]) {sym_name = "broadcast_of_buff_0"} : memref<16xi32> // CHECK: %[[BROADCAST_OF_BUFF_1:.*]] = aie.buffer(%[[TILE_1_3]]) {sym_name = "broadcast_of_buff_1"} : memref<16xi32> -// CHECK: %[[BROADCAST_OF_LOCK_0:.*]] = aie.lock(%[[TILE_1_3]], 0) {init = 0 : i32, sym_name = "broadcast_of_lock_0"} -// CHECK: %[[BROADCAST_OF_LOCK_1:.*]] = aie.lock(%[[TILE_1_3]], 1) {init = 0 : i32, sym_name = "broadcast_of_lock_1"} +// CHECK: %[[BROADCAST_OF_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_3]], 0) {init = 2 : i32, sym_name = "broadcast_of_prod_lock"} +// CHECK: %[[BROADCAST_OF_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_3]], 1) {init = 0 : i32, sym_name = "broadcast_of_cons_lock"} // CHECK: aie.flow(%[[TILE_1_3]], DMA : 0, %[[TILE_3_3]], DMA : 0) // CHECK: aie.flow(%[[TILE_1_3]], DMA : 0, %[[TILE_3_2]], DMA : 0) // CHECK: aie.flow(%[[TILE_1_3]], DMA : 0, %[[TILE_1_4]], DMA : 0) @@ -53,12 +49,12 @@ // CHECK: %[[C12:.*]] = arith.constant 12 : index // CHECK: %[[C2:.*]] = arith.constant 2 : index // CHECK: scf.for %[[ARG0:.*]] = %[[C0]] to %[[C12]] step %[[C2]] { -// CHECK: aie.use_lock(%[[BROADCAST_OF_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[BROADCAST_OF_BUFF_0]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[BROADCAST_OF_LOCK_0]], Release, 1) -// CHECK: aie.use_lock(%[[BROADCAST_OF_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_CONS_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[BROADCAST_OF_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[BROADCAST_OF_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_CONS_LOCK]], Release, 1) // CHECK: } // CHECK: aie.end // CHECK: } @@ -68,12 +64,12 @@ // CHECK: %[[C12:.*]] = arith.constant 12 : index // CHECK: %[[C2:.*]] = arith.constant 2 : index // CHECK: scf.for %[[ARG0:.*]] = %[[C0]] to %[[C12]] step %[[C2]] { -// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[BROADCAST_OF_0_CONS_BUFF_0]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_LOCK_0]], Release, 0) -// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_PROD_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[BROADCAST_OF_0_CONS_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_LOCK_1]], Release, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_PROD_LOCK]], Release, 1) // CHECK: } // CHECK: aie.end // CHECK: } @@ -83,24 +79,18 @@ // CHECK: %[[C12:.*]] = arith.constant 12 : index // CHECK: %[[C3:.*]] = arith.constant 3 : index // CHECK: scf.for %[[ARG0:.*]] = %[[C0]] to %[[C12]] step %[[C3]] { -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_0]], Acquire, 1) -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_CONS_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[BROADCAST_OF_1_CONS_BUFF_0]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[BROADCAST_OF_1_CONS_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_0]], Release, 0) -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_1]], Release, 0) -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_2]], Acquire, 1) -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_PROD_LOCK]], Release, 2) +// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_CONS_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[BROADCAST_OF_1_CONS_BUFF_2]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[BROADCAST_OF_1_CONS_BUFF_0]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_2]], Release, 0) -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_0]], Release, 0) -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_1]], Acquire, 1) -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_2]], Acquire, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_PROD_LOCK]], Release, 2) +// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_CONS_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[BROADCAST_OF_1_CONS_BUFF_1]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[BROADCAST_OF_1_CONS_BUFF_2]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_1]], Release, 0) -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_2]], Release, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_PROD_LOCK]], Release, 2) // CHECK: } // CHECK: aie.end // CHECK: } @@ -110,28 +100,26 @@ // CHECK: %[[C12:.*]] = arith.constant 12 : index // CHECK: %[[C4:.*]] = arith.constant 4 : index // CHECK: scf.for %[[ARG0:.*]] = %[[C0]] to %[[C12]] step %[[C4]] { -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_0]], Acquire, 1) -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_1]], Acquire, 1) -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_2]], Acquire, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_CONS_LOCK]], AcquireGreaterEqual, 3) // CHECK: func.call @some_work(%[[BROADCAST_OF_2_CONS_BUFF_0]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[BROADCAST_OF_2_CONS_BUFF_1]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[BROADCAST_OF_2_CONS_BUFF_2]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_0]], Release, 0) -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_3]], Acquire, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_PROD_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[BROADCAST_OF_2_CONS_BUFF_1]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[BROADCAST_OF_2_CONS_BUFF_2]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[BROADCAST_OF_2_CONS_BUFF_3]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_1]], Release, 0) -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_PROD_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[BROADCAST_OF_2_CONS_BUFF_2]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[BROADCAST_OF_2_CONS_BUFF_3]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[BROADCAST_OF_2_CONS_BUFF_0]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_2]], Release, 0) -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_PROD_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[BROADCAST_OF_2_CONS_BUFF_3]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[BROADCAST_OF_2_CONS_BUFF_0]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[BROADCAST_OF_2_CONS_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_3]], Release, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_PROD_LOCK]], Release, 1) // CHECK: } // CHECK: aie.end // CHECK: } @@ -141,33 +129,32 @@ // CHECK: %[[C12:.*]] = arith.constant 12 : index // CHECK: %[[C3:.*]] = arith.constant 3 : index // CHECK: scf.for %[[ARG0:.*]] = %[[C0]] to %[[C12]] step %[[C3]] { -// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_LOCK_0]], Acquire, 1) -// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_CONS_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[BROADCAST_OF_3_CONS_BUFF_0]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[BROADCAST_OF_3_CONS_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_LOCK_0]], Release, 0) -// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_LOCK_2]], Acquire, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_PROD_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[BROADCAST_OF_3_CONS_BUFF_1]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[BROADCAST_OF_3_CONS_BUFF_2]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_LOCK_1]], Release, 0) -// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_PROD_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[BROADCAST_OF_3_CONS_BUFF_2]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[BROADCAST_OF_3_CONS_BUFF_0]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_LOCK_2]], Release, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_PROD_LOCK]], Release, 1) // CHECK: } // CHECK: aie.end // CHECK: } // CHECK: %[[MEM_1_3:.*]] = aie.mem(%[[TILE_1_3]]) { // CHECK: %[[VAL_0:.*]] = aie.dma_start(MM2S, 0, ^bb1, ^bb3) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[BROADCAST_OF_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[BROADCAST_OF_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[BROADCAST_OF_LOCK_0]], Release, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[BROADCAST_OF_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[BROADCAST_OF_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[BROADCAST_OF_LOCK_1]], Release, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb3: // CHECK: aie.end @@ -175,14 +162,14 @@ // CHECK: %[[MEM_1_2:.*]] = aie.mem(%[[TILE_1_2]]) { // CHECK: %[[VAL_1:.*]] = aie.dma_start(S2MM, 0, ^bb1, ^bb3) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[BROADCAST_OF_0_CONS_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[BROADCAST_OF_0_CONS_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_0_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb3: // CHECK: aie.end @@ -190,19 +177,19 @@ // CHECK: %[[MEM_1_4:.*]] = aie.mem(%[[TILE_1_4]]) { // CHECK: %[[VAL_2:.*]] = aie.dma_start(S2MM, 0, ^bb1, ^bb4) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[BROADCAST_OF_1_CONS_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[BROADCAST_OF_1_CONS_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb3 // CHECK: ^bb3: -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_2]], Acquire, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[BROADCAST_OF_1_CONS_BUFF_2]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_LOCK_2]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_1_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb4: // CHECK: aie.end @@ -210,24 +197,24 @@ // CHECK: %[[MEM_3_2:.*]] = aie.mem(%[[TILE_3_2]]) { // CHECK: %[[VAL_3:.*]] = aie.dma_start(S2MM, 0, ^bb1, ^bb5) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[BROADCAST_OF_2_CONS_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[BROADCAST_OF_2_CONS_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb3 // CHECK: ^bb3: -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_2]], Acquire, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[BROADCAST_OF_2_CONS_BUFF_2]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_2]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb4 // CHECK: ^bb4: -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_3]], Acquire, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[BROADCAST_OF_2_CONS_BUFF_3]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_LOCK_3]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_2_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb5: // CHECK: aie.end @@ -235,19 +222,19 @@ // CHECK: %[[MEM_3_3:.*]] = aie.mem(%[[TILE_3_3]]) { // CHECK: %[[VAL_4:.*]] = aie.dma_start(S2MM, 0, ^bb1, ^bb4) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[BROADCAST_OF_3_CONS_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[BROADCAST_OF_3_CONS_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb3 // CHECK: ^bb3: -// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_LOCK_2]], Acquire, 0) +// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[BROADCAST_OF_3_CONS_BUFF_2]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_LOCK_2]], Release, 1) +// CHECK: aie.use_lock(%[[BROADCAST_OF_3_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb4: // CHECK: aie.end @@ -255,7 +242,7 @@ // CHECK: } module @broadcast { - aie.device(xcvc1902) { + aie.device(npu1_4col) { %tile12 = aie.tile(1, 2) %tile13 = aie.tile(1, 3) %tile14 = aie.tile(1, 4) diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/link_test_AIE1.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/link_test_AIE1.mlir index b13d09df3..b6eff0457 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/link_test_AIE1.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/link_test_AIE1.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt --aie-objectFifo-stateful-transform %s | FileCheck %s -// CHECK-LABEL: aie.device(xcvc1902) { +// CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @of2_cons : memref<16xi32> // CHECK: memref.global "public" @of2 : memref<16xi32> // CHECK: memref.global "public" @of1_cons : memref<16xi32> @@ -11,13 +11,14 @@ // CHECK: %[[TILE_2_2:.*]] = aie.tile(2, 2) // CHECK: %[[OF2_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_2_2]]) {sym_name = "of2_cons_buff_0"} : memref<16xi32> // CHECK: %[[OF2_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_2_2]]) {sym_name = "of2_cons_buff_1"} : memref<16xi32> -// CHECK: %[[OF2_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_2_2]], 0) {init = 0 : i32, sym_name = "of2_cons_lock_0"} -// CHECK: %[[OF2_CONS_LOCK_1:.*]] = aie.lock(%[[TILE_2_2]], 1) {init = 0 : i32, sym_name = "of2_cons_lock_1"} +// CHECK: %[[OF2_CONS_PROD_LOCK:.*]] = aie.lock(%[[TILE_2_2]], 0) {init = 2 : i32, sym_name = "of2_cons_prod_lock"} +// CHECK: %[[OF2_CONS_CONS_LOCK:.*]] = aie.lock(%[[TILE_2_2]], 1) {init = 0 : i32, sym_name = "of2_cons_cons_lock"} // CHECK: %[[OF1_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of1_cons_buff_0"} : memref<16xi32> // CHECK: %[[OF1_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of1_cons_buff_1"} : memref<16xi32> -// CHECK: %[[OF1_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 0 : i32, sym_name = "of1_cons_lock_0"} -// CHECK: %[[OF1_CONS_LOCK_1:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "of1_cons_lock_1"} -// CHECK: %[[OF1_LOCK_0:.*]] = aie.lock(%[[TILE_2_0]], 0) {init = 0 : i32, sym_name = "of1_lock_0"} +// CHECK: %[[OF1_CONS_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 2 : i32, sym_name = "of1_cons_prod_lock"} +// CHECK: %[[OF1_CONS_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "of1_cons_cons_lock"} +// CHECK: %[[OF1_PROD_LOCK:.*]] = aie.lock(%[[TILE_2_0]], 0) {init = 1 : i32, sym_name = "of1_prod_lock"} +// CHECK: %[[OF1_CONS_LOCK:.*]] = aie.lock(%[[TILE_2_0]], 1) {init = 0 : i32, sym_name = "of1_cons_lock"} // CHECK: aie.flow(%[[TILE_2_0]], DMA : 0, %[[TILE_1_2]], DMA : 0) // CHECK: aie.flow(%[[TILE_1_2]], DMA : 0, %[[TILE_2_2]], DMA : 0) // CHECK: %[[EXT_BUFF_IN:.*]] = aie.external_buffer {sym_name = "ext_buff_in"} : memref<16xi32> @@ -25,9 +26,9 @@ // CHECK: %[[SHIM_DMA_2_0:.*]] = aie.shim_dma(%[[TILE_2_0]]) { // CHECK: %[[VAL_0:.*]] = aie.dma_start(MM2S, 0, ^bb1, ^bb2) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[OF1_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[OF1_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[EXT_BUFF_IN]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OF1_LOCK_0]], Release, 0) +// CHECK: aie.use_lock(%[[OF1_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb2: // CHECK: aie.end @@ -35,26 +36,26 @@ // CHECK: %[[MEM_1_2:.*]] = aie.mem(%[[TILE_1_2]]) { // CHECK: %[[VAL_1:.*]] = aie.dma_start(S2MM, 0, ^bb1, ^bb3) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[OF1_CONS_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[OF1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OF1_CONS_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OF1_CONS_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[OF1_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[OF1_CONS_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[OF1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OF1_CONS_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OF1_CONS_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[OF1_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb3: // CHECK: %[[VAL_2:.*]] = aie.dma_start(MM2S, 0, ^bb4, ^bb6) // CHECK: ^bb4: -// CHECK: aie.use_lock(%[[OF1_CONS_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[OF1_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OF1_CONS_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OF1_CONS_LOCK_0]], Release, 0) +// CHECK: aie.use_lock(%[[OF1_CONS_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb5 // CHECK: ^bb5: -// CHECK: aie.use_lock(%[[OF1_CONS_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[OF1_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OF1_CONS_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OF1_CONS_LOCK_1]], Release, 0) +// CHECK: aie.use_lock(%[[OF1_CONS_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb4 // CHECK: ^bb6: // CHECK: aie.end @@ -62,14 +63,14 @@ // CHECK: %[[MEM_2_2:.*]] = aie.mem(%[[TILE_2_2]]) { // CHECK: %[[VAL_3:.*]] = aie.dma_start(S2MM, 0, ^bb1, ^bb3) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[OF2_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OF2_CONS_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[OF2_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[OF2_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OF2_CONS_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OF2_CONS_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[OF2_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb3: // CHECK: aie.end @@ -77,7 +78,7 @@ // CHECK: } module @link_AIE1 { - aie.device(xcvc1902) { + aie.device(npu1_4col) { %tile20 = aie.tile(2, 0) %tile12 = aie.tile(1, 2) %tile22 = aie.tile(2, 2) diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/local_locks.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/local_locks.mlir index 2775f4d92..e83d29ac5 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/local_locks.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/local_locks.mlir @@ -4,15 +4,15 @@ // CHECK: %[[C56:.*]] = arith.constant 56 : index // CHECK: %[[VAL_0:.*]] = arith.index_cast %[[C56]] : index to i32 // CHECK: %[[C0_I32:.*]] = arith.constant 0 : i32 -// CHECK: call @llvm.aie.lock.acquire.reg(%[[VAL_0]], %[[C0_I32]]) : (i32, i32) -> () +// CHECK: call @llvm.aie2.acquire(%[[VAL_0]], %[[C0_I32]]) : (i32, i32) -> () // CHECK: %[[VAL_1:.*]] = arith.index_cast %[[C56]] : index to i32 // CHECK: %[[C1_I32:.*]] = arith.constant 1 : i32 -// CHECK: call @llvm.aie.lock.release.reg(%[[VAL_1]], %[[C1_I32]]) : (i32, i32) -> () +// CHECK: call @llvm.aie2.release(%[[VAL_1]], %[[C1_I32]]) : (i32, i32) -> () // CHECK: return // CHECK: } module @local_locks { - aie.device(xcvc1902) { + aie.device(npu1_4col) { %3 = aie.tile(3, 3) %11 = aie.core(%3) { %c56 = arith.constant 56 : index diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/loop_test.aie.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/loop_test.aie.mlir index f2159fa4a..a1dbcd9e8 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/loop_test.aie.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/loop_test.aie.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt --aie-objectFifo-stateful-transform %s | FileCheck %s -// CHECK-LABEL: aie.device(xcvc1902) { +// CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @loop_of : memref<16xi32> // CHECK: %[[TILE_1_2:.*]] = aie.tile(1, 2) // CHECK: %[[TILE_1_3:.*]] = aie.tile(1, 3) @@ -9,10 +9,8 @@ // CHECK: %[[LOOP_OF_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "loop_of_buff_1"} : memref<16xi32> // CHECK: %[[LOOP_OF_BUFF_2:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "loop_of_buff_2"} : memref<16xi32> // CHECK: %[[LOOP_OF_BUFF_3:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "loop_of_buff_3"} : memref<16xi32> -// CHECK: %[[LOOP_OF_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 0 : i32, sym_name = "loop_of_lock_0"} -// CHECK: %[[LOOP_OF_LOCK_1:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "loop_of_lock_1"} -// CHECK: %[[LOOP_OF_LOCK_2:.*]] = aie.lock(%[[TILE_1_2]], 2) {init = 0 : i32, sym_name = "loop_of_lock_2"} -// CHECK: %[[LOOP_OF_LOCK_3:.*]] = aie.lock(%[[TILE_1_2]], 3) {init = 0 : i32, sym_name = "loop_of_lock_3"} +// CHECK: %[[LOOP_OF_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 4 : i32, sym_name = "loop_of_prod_lock"} +// CHECK: %[[LOOP_OF_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "loop_of_cons_lock"} // CHECK: func.func @some_work(%[[ARG0:.*]]: memref<16xi32>, %[[ARG1:.*]]: index) { // CHECK: return // CHECK: } @@ -22,75 +20,75 @@ // CHECK: %[[C2:.*]] = arith.constant 2 : index // CHECK: %[[C4:.*]] = arith.constant 4 : index // CHECK: %[[C21:.*]] = arith.constant 21 : index -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[LOOP_OF_BUFF_0]], %[[C0]]) : (memref<16xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: %[[C17:.*]] = arith.constant 17 : index // CHECK: %[[C8:.*]] = arith.constant 8 : index // CHECK: scf.for %[[ARG0:.*]] = %[[C1]] to %[[C17]] step %[[C8]] { -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[LOOP_OF_BUFF_1]], %[[ARG0]]) : (memref<16xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: %[[C1_2:.*]] = arith.constant 1 : index // CHECK: %[[VAL_0:.*]] = arith.muli %[[C2]], %[[C1_2]] : index // CHECK: %[[VAL_1:.*]] = arith.addi %[[ARG0]], %[[VAL_0]] : index -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_2]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[LOOP_OF_BUFF_2]], %[[VAL_1]]) : (memref<16xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_2]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: %[[C2_3:.*]] = arith.constant 2 : index // CHECK: %[[VAL_2:.*]] = arith.muli %[[C2]], %[[C2_3]] : index // CHECK: %[[VAL_3:.*]] = arith.addi %[[ARG0]], %[[VAL_2]] : index -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_3]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[LOOP_OF_BUFF_3]], %[[VAL_3]]) : (memref<16xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_3]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: %[[C3:.*]] = arith.constant 3 : index // CHECK: %[[VAL_4:.*]] = arith.muli %[[C2]], %[[C3]] : index // CHECK: %[[VAL_5:.*]] = arith.addi %[[ARG0]], %[[VAL_4]] : index -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[LOOP_OF_BUFF_0]], %[[VAL_5]]) : (memref<16xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: } // CHECK: scf.for %[[ARG0:.*]] = %[[C17]] to %[[C21]] step %[[C2]] { -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[LOOP_OF_BUFF_1]], %[[ARG0]]) : (memref<16xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: } // CHECK: %[[C1_0:.*]] = arith.constant 1 : index // CHECK: %[[C4_1:.*]] = arith.constant 4 : index // CHECK: scf.for %[[ARG0:.*]] = %[[C1]] to %[[C1_0]] step %[[C4_1]] { -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_2]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[LOOP_OF_BUFF_2]], %[[ARG0]]) : (memref<16xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_2]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: %[[C1_2:.*]] = arith.constant 1 : index // CHECK: %[[VAL_6:.*]] = arith.muli %[[C1]], %[[C1_2]] : index // CHECK: %[[VAL_7:.*]] = arith.addi %[[ARG0]], %[[VAL_6]] : index -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_3]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[LOOP_OF_BUFF_3]], %[[VAL_7]]) : (memref<16xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_3]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: %[[C2_3:.*]] = arith.constant 2 : index // CHECK: %[[VAL_8:.*]] = arith.muli %[[C1]], %[[C2_3]] : index // CHECK: %[[VAL_9:.*]] = arith.addi %[[ARG0]], %[[VAL_8]] : index -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[LOOP_OF_BUFF_0]], %[[VAL_9]]) : (memref<16xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: %[[C3:.*]] = arith.constant 3 : index // CHECK: %[[VAL_10:.*]] = arith.muli %[[C1]], %[[C3]] : index // CHECK: %[[VAL_11:.*]] = arith.addi %[[ARG0]], %[[VAL_10]] : index -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[LOOP_OF_BUFF_1]], %[[VAL_11]]) : (memref<16xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: } // CHECK: scf.for %[[ARG0:.*]] = %[[C1_0]] to %[[C4]] step %[[C1]] { -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_2]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[LOOP_OF_BUFF_2]], %[[ARG0]]) : (memref<16xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_2]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: } // CHECK: aie.end // CHECK: } // CHECK: } module { - aie.device(xcvc1902) { + aie.device(npu1_4col) { %tile12 = aie.tile(1, 2) %tile13 = aie.tile(1, 3) aie.objectfifo @loop_of (%tile12, {%tile13}, 4 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/loop_test_nested.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/loop_test_nested.mlir index 3ea2174d5..781a65135 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/loop_test_nested.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/loop_test_nested.mlir @@ -1,14 +1,14 @@ // RUN: iree-opt --aie-objectFifo-stateful-transform %s | FileCheck %s -// CHECK-LABEL: aie.device(xcvc1902) { +// CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @loop_of : memref<16xi32> // CHECK: %[[TILE_1_2:.*]] = aie.tile(1, 2) // CHECK: %[[TILE_1_3:.*]] = aie.tile(1, 3) // CHECK: %[[LOOP_OF_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "loop_of_buff_0"} : memref<16xi32> // CHECK: %[[LOOP_OF_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "loop_of_buff_1"} : memref<16xi32> -// CHECK: %[[LOOP_OF_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 0 : i32, sym_name = "loop_of_lock_0"} -// CHECK: %[[LOOP_OF_LOCK_1:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "loop_of_lock_1"} +// CHECK: %[[LOOP_OF_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 2 : i32, sym_name = "loop_of_prod_lock"} +// CHECK: %[[LOOP_OF_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "loop_of_cons_lock"} // CHECK: func.func @some_work(%[[ARG0:.*]]: memref<4x4xi32>, %[[ARG1:.*]]: index) { // CHECK: return // CHECK: } @@ -22,79 +22,79 @@ // CHECK: %[[C4294967294:.*]] = arith.constant 4294967294 : index // CHECK: %[[C2_0:.*]] = arith.constant 2 : index // CHECK: scf.for %[[ARG0:.*]] = %[[C0]] to %[[C4294967294]] step %[[C2_0]] { -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: %[[REINTERPRET_CAST_3:.*]] = memref.reinterpret_cast %[[LOOP_OF_BUFF_0]] to offset: [0], sizes: [4, 4], strides: [4, 1] : memref<16xi32> to memref<4x4xi32> // CHECK: func.call @some_work(%[[REINTERPRET_CAST_3]], %[[C0]]) : (memref<4x4xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: %[[C2_4:.*]] = arith.constant 2 : index // CHECK: scf.for %[[ARG1:.*]] = %[[C1]] to %[[C21]] step %[[C2_4]] { -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: %[[REINTERPRET_CAST_9:.*]] = memref.reinterpret_cast %[[LOOP_OF_BUFF_1]] to offset: [0], sizes: [4, 4], strides: [4, 1] : memref<16xi32> to memref<4x4xi32> // CHECK: func.call @some_work(%[[REINTERPRET_CAST_9]], %[[ARG1]]) : (memref<4x4xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: %[[C1_10:.*]] = arith.constant 1 : index // CHECK: %[[VAL_0:.*]] = arith.muli %[[C1]], %[[C1_10]] : index // CHECK: %[[VAL_1:.*]] = arith.addi %[[ARG1]], %[[VAL_0]] : index -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: %[[REINTERPRET_CAST_11:.*]] = memref.reinterpret_cast %[[LOOP_OF_BUFF_0]] to offset: [0], sizes: [4, 4], strides: [4, 1] : memref<16xi32> to memref<4x4xi32> // CHECK: func.call @some_work(%[[REINTERPRET_CAST_11]], %[[VAL_1]]) : (memref<4x4xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: } -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: %[[REINTERPRET_CAST_5:.*]] = memref.reinterpret_cast %[[LOOP_OF_BUFF_1]] to offset: [0], sizes: [4, 4], strides: [4, 1] : memref<16xi32> to memref<4x4xi32> // CHECK: func.call @some_work(%[[REINTERPRET_CAST_5]], %[[C0]]) : (memref<4x4xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Release, 1) -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: %[[REINTERPRET_CAST_6:.*]] = memref.reinterpret_cast %[[LOOP_OF_BUFF_0]] to offset: [0], sizes: [4, 4], strides: [4, 1] : memref<16xi32> to memref<4x4xi32> // CHECK: func.call @some_work(%[[REINTERPRET_CAST_6]], %[[C0]]) : (memref<4x4xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: %[[C2_7:.*]] = arith.constant 2 : index // CHECK: scf.for %[[ARG1:.*]] = %[[C1]] to %[[C21]] step %[[C2_7]] { -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: %[[REINTERPRET_CAST_9:.*]] = memref.reinterpret_cast %[[LOOP_OF_BUFF_1]] to offset: [0], sizes: [4, 4], strides: [4, 1] : memref<16xi32> to memref<4x4xi32> // CHECK: func.call @some_work(%[[REINTERPRET_CAST_9]], %[[ARG1]]) : (memref<4x4xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: %[[C1_10:.*]] = arith.constant 1 : index // CHECK: %[[VAL_2:.*]] = arith.muli %[[C1]], %[[C1_10]] : index // CHECK: %[[VAL_3:.*]] = arith.addi %[[ARG1]], %[[VAL_2]] : index -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: %[[REINTERPRET_CAST_11:.*]] = memref.reinterpret_cast %[[LOOP_OF_BUFF_0]] to offset: [0], sizes: [4, 4], strides: [4, 1] : memref<16xi32> to memref<4x4xi32> // CHECK: func.call @some_work(%[[REINTERPRET_CAST_11]], %[[VAL_3]]) : (memref<4x4xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: } -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: %[[REINTERPRET_CAST_8:.*]] = memref.reinterpret_cast %[[LOOP_OF_BUFF_1]] to offset: [0], sizes: [4, 4], strides: [4, 1] : memref<16xi32> to memref<4x4xi32> // CHECK: func.call @some_work(%[[REINTERPRET_CAST_8]], %[[C0]]) : (memref<4x4xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: } -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: %[[REINTERPRET_CAST:.*]] = memref.reinterpret_cast %[[LOOP_OF_BUFF_0]] to offset: [0], sizes: [4, 4], strides: [4, 1] : memref<16xi32> to memref<4x4xi32> // CHECK: func.call @some_work(%[[REINTERPRET_CAST]], %[[C0]]) : (memref<4x4xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: %[[C2_1:.*]] = arith.constant 2 : index // CHECK: scf.for %[[ARG0:.*]] = %[[C1]] to %[[C21]] step %[[C2_1]] { -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: %[[REINTERPRET_CAST_3:.*]] = memref.reinterpret_cast %[[LOOP_OF_BUFF_1]] to offset: [0], sizes: [4, 4], strides: [4, 1] : memref<16xi32> to memref<4x4xi32> // CHECK: func.call @some_work(%[[REINTERPRET_CAST_3]], %[[ARG0]]) : (memref<4x4xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: %[[C1_4:.*]] = arith.constant 1 : index // CHECK: %[[VAL_4:.*]] = arith.muli %[[C1]], %[[C1_4]] : index // CHECK: %[[VAL_5:.*]] = arith.addi %[[ARG0]], %[[VAL_4]] : index -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: %[[REINTERPRET_CAST_5:.*]] = memref.reinterpret_cast %[[LOOP_OF_BUFF_0]] to offset: [0], sizes: [4, 4], strides: [4, 1] : memref<16xi32> to memref<4x4xi32> // CHECK: func.call @some_work(%[[REINTERPRET_CAST_5]], %[[VAL_5]]) : (memref<4x4xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: } -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[LOOP_OF_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: %[[REINTERPRET_CAST_2:.*]] = memref.reinterpret_cast %[[LOOP_OF_BUFF_1]] to offset: [0], sizes: [4, 4], strides: [4, 1] : memref<16xi32> to memref<4x4xi32> // CHECK: func.call @some_work(%[[REINTERPRET_CAST_2]], %[[C0]]) : (memref<4x4xi32>, index) -> () -// CHECK: aie.use_lock(%[[LOOP_OF_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[LOOP_OF_CONS_LOCK]], Release, 1) // CHECK: aie.end // CHECK: } // CHECK: } module { - aie.device(xcvc1902) { + aie.device(npu1_4col) { %tile12 = aie.tile(1, 2) %tile13 = aie.tile(1, 3) aie.objectfifo @loop_of (%tile12, {%tile13}, 2 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_buffer_and_lock.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_buffer_and_lock.mlir index ff3b8df19..2db98fed3 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_buffer_and_lock.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_buffer_and_lock.mlir @@ -1,40 +1,26 @@ // RUN: iree-opt --aie-localize-locks --aie-standard-lowering %s | FileCheck %s // CHECK-LABEL: memref.global "public" @a : memref<256xi32> -// CHECK: func.func private @debug_i32(i32) -// CHECK: func.func private @llvm.aie.event0() -// CHECK: func.func private @llvm.aie.event1() -// CHECK: func.func private @llvm.aie.put.ms(i32, i32) -// CHECK: func.func private @llvm.aie.put.wms(i32, i128) -// CHECK: func.func private @llvm.aie.put.fms(i32, f32) -// CHECK: func.func private @llvm.aie.get.ss(i32) -> i32 -// CHECK: func.func private @llvm.aie.get.wss(i32) -> i128 -// CHECK: func.func private @llvm.aie.get.fss(i32) -> f32 -// CHECK: func.func private @llvm.aie.put.mcd(i384) -// CHECK: func.func private @llvm.aie.get.scd() -> i384 -// CHECK: func.func private @llvm.aie.lock.acquire.reg(i32, i32) -// CHECK: func.func private @llvm.aie.lock.release.reg(i32, i32) - -// CHECK-LABEL: func.func @core_1_2() { +// CHECK-LABEL: func.func @core_1_3() { // CHECK: %[[C8:.*]] = arith.constant 8 : index // CHECK: %[[VAL_0:.*]] = arith.index_cast %[[C8]] : index to i32 // CHECK: %[[C1_I32:.*]] = arith.constant 1 : i32 -// CHECK: call @llvm.aie.lock.acquire.reg(%[[VAL_0]], %[[C1_I32]]) : (i32, i32) -> () +// CHECK: call @llvm.aie2.acquire(%[[VAL_0]], %[[C1_I32]]) : (i32, i32) -> () // CHECK: %[[C16:.*]] = arith.constant 16 : index // CHECK: %[[VAL_1:.*]] = memref.get_global @a : memref<256xi32> // CHECK: memref.assume_alignment %[[VAL_1]], 32 : memref<256xi32> // CHECK: %[[VAL_2:.*]] = memref.load %[[VAL_1]]{{\[}}%[[C16]]] : memref<256xi32> // CHECK: %[[VAL_3:.*]] = arith.index_cast %[[C8]] : index to i32 // CHECK: %[[C0_I32:.*]] = arith.constant 0 : i32 -// CHECK: call @llvm.aie.lock.release.reg(%[[VAL_3]], %[[C0_I32]]) : (i32, i32) -> () +// CHECK: call @llvm.aie2.release(%[[VAL_3]], %[[C0_I32]]) : (i32, i32) -> () // CHECK: return // CHECK: } -// CHECK-LABEL: func.func @core_1_1() { +// CHECK-LABEL: func.func @core_1_2() { // CHECK: %[[C56:.*]] = arith.constant 56 : index // CHECK: %[[VAL_0:.*]] = arith.index_cast %[[C56]] : index to i32 // CHECK: %[[C0_I32:.*]] = arith.constant 0 : i32 -// CHECK: call @llvm.aie.lock.acquire.reg(%[[VAL_0]], %[[C0_I32]]) : (i32, i32) -> () +// CHECK: call @llvm.aie2.acquire(%[[VAL_0]], %[[C0_I32]]) : (i32, i32) -> () // CHECK: %[[C1_I32:.*]] = arith.constant 1 : i32 // CHECK: %[[C16:.*]] = arith.constant 16 : index // CHECK: %[[VAL_1:.*]] = memref.get_global @a : memref<256xi32> @@ -42,32 +28,32 @@ // CHECK: memref.store %[[C1_I32]], %[[VAL_1]]{{\[}}%[[C16]]] : memref<256xi32> // CHECK: %[[VAL_2:.*]] = arith.index_cast %[[C56]] : index to i32 // CHECK: %[[C1_I32_0:.*]] = arith.constant 1 : i32 -// CHECK: call @llvm.aie.lock.release.reg(%[[VAL_2]], %[[C1_I32_0]]) : (i32, i32) -> () +// CHECK: call @llvm.aie2.release(%[[VAL_2]], %[[C1_I32_0]]) : (i32, i32) -> () // CHECK: return // CHECK: } module @test_core_llvm1 { - aie.device(xcvc1902) { - %tile11 = aie.tile(1, 1) + aie.device(npu1_4col) { %tile12 = aie.tile(1, 2) + %tile13 = aie.tile(1, 3) - %lock11_8 = aie.lock(%tile11, 8) - %buf11_0 = aie.buffer(%tile11) { sym_name = "a" } : memref<256xi32> + %lock12_8 = aie.lock(%tile12, 8) + %buf12_0 = aie.buffer(%tile12) { sym_name = "a" } : memref<256xi32> - %core11 = aie.core(%tile11) { - aie.use_lock(%lock11_8, Acquire, 0) + %core12 = aie.core(%tile12) { + aie.use_lock(%lock12_8, Acquire, 0) %0 = arith.constant 1 : i32 %i = arith.constant 16 : index - memref.store %0, %buf11_0[%i] : memref<256xi32> - aie.use_lock(%lock11_8, Release, 1) + memref.store %0, %buf12_0[%i] : memref<256xi32> + aie.use_lock(%lock12_8, Release, 1) aie.end } - %core12 = aie.core(%tile12) { - aie.use_lock(%lock11_8, Acquire, 1) + %core13 = aie.core(%tile13) { + aie.use_lock(%lock12_8, Acquire, 1) %i = arith.constant 16 : index - %0 = memref.load %buf11_0[%i] : memref<256xi32> - aie.use_lock(%lock11_8, Release, 0) + %0 = memref.load %buf12_0[%i] : memref<256xi32> + aie.use_lock(%lock12_8, Release, 0) aie.end } } diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_dma.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_dma.mlir index c46b64ccf..afe7bfeb9 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_dma.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_dma.mlir @@ -1,39 +1,39 @@ // RUN: iree-opt --aie-localize-locks --aie-standard-lowering %s | FileCheck %s -// CHECK-LABEL: func.func @core_4_3() { +// CHECK-LABEL: func.func @core_2_3() { // CHECK: %[[C48:.*]] = arith.constant 48 : index -// CHECK: %[[C16:.*]] = arith.constant 16 : index -// CHECK: %[[C17:.*]] = arith.constant 17 : index // CHECK: %[[VAL_0:.*]] = arith.index_cast %[[C48]] : index to i32 // CHECK: %[[C1_I32:.*]] = arith.constant 1 : i32 -// CHECK: call @llvm.aie.lock.acquire.reg(%[[VAL_0]], %[[C1_I32]]) : (i32, i32) -> () +// CHECK: call @llvm.aie2.acquire(%[[VAL_0]], %[[C1_I32]]) : (i32, i32) -> () // CHECK: %[[VAL_1:.*]] = arith.index_cast %[[C48]] : index to i32 // CHECK: %[[C0_I32:.*]] = arith.constant 0 : i32 -// CHECK: call @llvm.aie.lock.release.reg(%[[VAL_1]], %[[C0_I32]]) : (i32, i32) -> () +// CHECK: call @llvm.aie2.release(%[[VAL_1]], %[[C0_I32]]) : (i32, i32) -> () // CHECK: return // CHECK: } // CHECK-LABEL: func.func @core_3_3() { +// CHECK: %[[C16:.*]] = arith.constant 16 : index // CHECK: %[[C48:.*]] = arith.constant 48 : index // CHECK: %[[C49:.*]] = arith.constant 49 : index // CHECK: %[[VAL_0:.*]] = arith.index_cast %[[C48]] : index to i32 // CHECK: %[[C0_I32:.*]] = arith.constant 0 : i32 -// CHECK: call @llvm.aie.lock.acquire.reg(%[[VAL_0]], %[[C0_I32]]) : (i32, i32) -> () +// CHECK: call @llvm.aie2.acquire(%[[VAL_0]], %[[C0_I32]]) : (i32, i32) -> () // CHECK: %[[C16_I32:.*]] = arith.constant 16 : i32 // CHECK: %[[C0_I32_0:.*]] = arith.constant 0 : i32 -// CHECK: call @llvm.aie.put.ms(%[[C0_I32_0]], %[[C16_I32]]) : (i32, i32) -> () -// CHECK: %[[VAL_1:.*]] = call @llvm.aie.get.wss(%[[C0_I32_0]]) : (i32) -> i128 -// CHECK: %[[C1_I384:.*]] = arith.constant 1 : i384 -// CHECK: call @llvm.aie.put.mcd(%[[C1_I384]]) : (i384) -> () -// CHECK: %[[VAL_2:.*]] = arith.index_cast %[[C48]] : index to i32 +// CHECK: %[[C0_I32_1:.*]] = arith.constant 0 : i32 +// CHECK: call @llvm.aie2.put.ms(%[[C16_I32]], %[[C0_I32_1]]) : (i32, i32) -> () +// CHECK: %[[CST:.*]] = arith.constant dense<0> : vector<16xi32> // CHECK: %[[C1_I32:.*]] = arith.constant 1 : i32 -// CHECK: call @llvm.aie.lock.release.reg(%[[VAL_2]], %[[C1_I32]]) : (i32, i32) -> () +// CHECK: call @llvm.aie2.mcd.write.vec(%[[CST]], %[[C1_I32]]) : (vector<16xi32>, i32) -> () +// CHECK: %[[VAL_1:.*]] = arith.index_cast %[[C48]] : index to i32 +// CHECK: %[[C1_I32_2:.*]] = arith.constant 1 : i32 +// CHECK: call @llvm.aie2.release(%[[VAL_1]], %[[C1_I32_2]]) : (i32, i32) -> () // CHECK: return // CHECK: } module @example0 { - aie.device(xcvc1902) { + aie.device(npu1_4col) { // Odd AIE rows: DMem on the East // Even AIE rows: DMem on the West @@ -44,14 +44,14 @@ module @example0 { %t11 = aie.tile(1, 1) %t33 = aie.tile(3, 3) - %t43 = aie.tile(4, 3) + %t23 = aie.tile(2, 3) %l33_0 = aie.lock(%t33, 0) %l33_1 = aie.lock(%t33, 1) - %l43_0 = aie.lock(%t43, 0) + %l23_0 = aie.lock(%t23, 0) %buf33 = aie.buffer(%t33) { sym_name = "a" } : memref<256xi32> - %buf43 = aie.buffer(%t43) { sym_name = "b" } : memref<256xi32> + %buf23 = aie.buffer(%t23) { sym_name = "b" } : memref<256xi32> %m33 = aie.mem(%t33) { %dmaSt0 = aie.dma_start(MM2S, 0, ^bd0, ^end) @@ -64,12 +64,12 @@ module @example0 { aie.end } - %m43 = aie.mem(%t43) { + %m23 = aie.mem(%t23) { %dmaSt = aie.dma_start(S2MM, 0, ^bd0, ^end) ^bd0: - aie.use_lock(%l43_0, Acquire, 0) - aie.dma_bd(%buf43 : memref<256xi32>, 0, 256) - aie.use_lock(%l43_0, Release, 1) + aie.use_lock(%l23_0, Acquire, 0) + aie.dma_bd(%buf23 : memref<256xi32>, 0, 256) + aie.use_lock(%l23_0, Release, 1) aie.next_bd ^end ^end: aie.end @@ -79,7 +79,7 @@ module @example0 { aie.connect } - %s43 = aie.switchbox(%t43) { + %s23 = aie.switchbox(%t23) { aie.connect } @@ -89,19 +89,19 @@ module @example0 { %val0 = arith.constant 16 : i32 %0 = arith.constant 0 : i32 aie.put_stream(%0 : i32, %val0 : i32) - %val1 = aie.get_stream(%0 : i32) : i128 - %val2 = arith.constant 1 : i384 - aie.put_cascade(%val2: i384) + // %val1 = aie.get_stream(%0 : i32) : i128 + %val2 = arith.constant dense<0> : vector<16xi32> + aie.put_cascade(%val2: vector<16xi32>) aie.use_lock(%l33_0, Release, 1) aie.end } - %c43 = aie.core(%t43) { - aie.use_lock(%l43_0, Acquire, 1) + %c23 = aie.core(%t23) { + aie.use_lock(%l23_0, Acquire, 1) // code - aie.use_lock(%l43_0, Release, 0) + aie.use_lock(%l23_0, Release, 0) aie.end } } diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_event.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_event.mlir index d73165604..35ffaf28e 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_event.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_event.mlir @@ -1,4 +1,6 @@ // RUN: iree-opt --aie-standard-lowering %s | FileCheck %s +// XFAIL: * +// event not supported for aie2? // CHECK-LABEL: func.func @core_1_1() { // CHECK: call @llvm.aie.event0() : () -> () @@ -7,9 +9,9 @@ // CHECK: } module @test { - aie.device(xcvc1902) { - %tile11 = aie.tile(1, 1) - %core11 = aie.core(%tile11) { + aie.device(npu1_4col) { + %tile12 = aie.tile(1, 2) + %core12 = aie.core(%tile12) { aie.event(0) aie.event(1) aie.end diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_stream.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_stream.mlir index f6cca50f0..02f686cd8 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_stream.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/lower_stream.mlir @@ -1,54 +1,29 @@ -// RUN: iree-opt --aie-standard-lowering %s | FileCheck %s -// CHECK-LABEL: func.func @core_2_1() { -// CHECK: %[[C0_I32:.*]] = arith.constant 0 : i32 -// CHECK: %[[C1_I32:.*]] = arith.constant 1 : i32 -// CHECK: %[[VAL_0:.*]] = call @llvm.aie.get.ss(%[[C0_I32]]) : (i32) -> i32 -// CHECK: %[[VAL_1:.*]] = call @llvm.aie.get.ss(%[[C1_I32]]) : (i32) -> i32 -// CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_1]] : i32 -// CHECK: %[[VAL_3:.*]] = call @llvm.aie.get.scd() : () -> i384 -// CHECK: return -// CHECK: } +// RUN: iree-opt --aie-standard-lowering %s | FileCheck %s -// CHECK-LABEL: func.func @core_1_1() { +// CHECK-LABEL: func.func @core_2_2() { // CHECK: %[[C0_I32:.*]] = arith.constant 0 : i32 // CHECK: %[[C1_I32:.*]] = arith.constant 1 : i32 -// CHECK: %[[C16_I32:.*]] = arith.constant 16 : i32 -// CHECK: %[[C32_I128:.*]] = arith.constant 32 : i128 -// CHECK: call @llvm.aie.put.ms(%[[C0_I32]], %[[C16_I32]]) : (i32, i32) -> () -// CHECK: call @llvm.aie.put.wms(%[[C1_I32]], %[[C32_I128]]) : (i32, i128) -> () -// CHECK: %[[C64_I384:.*]] = arith.constant 64 : i384 -// CHECK: call @llvm.aie.put.mcd(%[[C64_I384]]) : (i384) -> () +// CHECK: %[[VAL_0:.*]]:2 = call @llvm.aie2.get.ss() : () -> (i32, i32) +// CHECK: %[[VAL_1:.*]]:2 = call @llvm.aie2.get.ss() : () -> (i32, i32) +// CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]]#0, %[[VAL_1]]#0 : i32 +// CHECK: %[[C1_I32_0:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_3:.*]] = call @llvm.aie2.scd.read.vec(%[[C1_I32_0]]) : (i32) -> vector<16xi32> // CHECK: return // CHECK: } module @test_core_llvm0 { - aie.device(xcvc1902) { - %tile11 = aie.tile(1, 1) - %tile21 = aie.tile(2, 1) - - %core11 = aie.core(%tile11) { - %0 = arith.constant 0 : i32 - %1 = arith.constant 1 : i32 - %val0 = arith.constant 16 : i32 - %val1 = arith.constant 32 : i128 - aie.put_stream(%0 : i32, %val0 : i32) - aie.put_stream(%1 : i32, %val1 : i128) - %val2 = arith.constant 64 : i384 - aie.put_cascade(%val2 : i384) - aie.end - } - - %core21 = aie.core(%tile21) { + aie.device(npu1_4col) { + %tile22 = aie.tile(2, 2) + %core22 = aie.core(%tile22) { %0 = arith.constant 0 : i32 %1 = arith.constant 1 : i32 //%val0 = aie.get_stream(0) : i32 %val0 = aie.get_stream(%0 : i32) : i32 %val1 = aie.get_stream(%1 : i32) : i32 %2 = arith.addi %val0, %val1 : i32 - %3 = aie.get_cascade() : i384 + %3 = aie.get_cascade() : i512 aie.end } - } } diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/non_adjacency_test_1.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/non_adjacency_test_1.mlir index e49219f4a..496a2b8c5 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/non_adjacency_test_1.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/non_adjacency_test_1.mlir @@ -1,19 +1,19 @@ // RUN: iree-opt --aie-objectFifo-stateful-transform %s | FileCheck %s -// CHECK-LABEL: aie.device(xcvc1902) { +// CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @objfifo_cons : memref<16xi32> // CHECK: memref.global "public" @objfifo : memref<16xi32> // CHECK: %[[TILE_1_2:.*]] = aie.tile(1, 2) // CHECK: %[[TILE_3_3:.*]] = aie.tile(3, 3) // CHECK: %[[OBJFIFO_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "objfifo_cons_buff_0"} : memref<16xi32> // CHECK: %[[OBJFIFO_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "objfifo_cons_buff_1"} : memref<16xi32> -// CHECK: %[[OBJFIFO_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_3_3]], 0) {init = 0 : i32, sym_name = "objfifo_cons_lock_0"} -// CHECK: %[[OBJFIFO_CONS_LOCK_1:.*]] = aie.lock(%[[TILE_3_3]], 1) {init = 0 : i32, sym_name = "objfifo_cons_lock_1"} +// CHECK: %[[OBJFIFO_CONS_PROD_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 0) {init = 2 : i32, sym_name = "objfifo_cons_prod_lock"} +// CHECK: %[[OBJFIFO_CONS_CONS_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 1) {init = 0 : i32, sym_name = "objfifo_cons_cons_lock"} // CHECK: %[[OBJFIFO_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "objfifo_buff_0"} : memref<16xi32> // CHECK: %[[OBJFIFO_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "objfifo_buff_1"} : memref<16xi32> -// CHECK: %[[OBJFIFO_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 0 : i32, sym_name = "objfifo_lock_0"} -// CHECK: %[[OBJFIFO_LOCK_1:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "objfifo_lock_1"} +// CHECK: %[[OBJFIFO_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 2 : i32, sym_name = "objfifo_prod_lock"} +// CHECK: %[[OBJFIFO_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "objfifo_cons_lock"} // CHECK: aie.flow(%[[TILE_1_2]], DMA : 0, %[[TILE_3_3]], DMA : 0) // CHECK: func.func @some_work(%[[ARG0:.*]]: memref<16xi32>) { // CHECK: return @@ -24,12 +24,12 @@ // CHECK: %[[C12:.*]] = arith.constant 12 : index // CHECK: %[[C2:.*]] = arith.constant 2 : index // CHECK: scf.for %[[ARG0:.*]] = %[[C0]] to %[[C12]] step %[[C2]] { -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OBJFIFO_BUFF_0]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Release, 1) -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OBJFIFO_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], Release, 1) // CHECK: } // CHECK: aie.end // CHECK: } @@ -39,26 +39,26 @@ // CHECK: %[[C12:.*]] = arith.constant 12 : index // CHECK: %[[C2:.*]] = arith.constant 2 : index // CHECK: scf.for %[[ARG0:.*]] = %[[C0]] to %[[C12]] step %[[C2]] { -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OBJFIFO_CONS_BUFF_0]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Release, 0) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OBJFIFO_CONS_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_1]], Release, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], Release, 1) // CHECK: } // CHECK: aie.end // CHECK: } // CHECK: %[[MEM_1_2:.*]] = aie.mem(%[[TILE_1_2]]) { // CHECK: %[[VAL_0:.*]] = aie.dma_start(MM2S, 0, ^bb1, ^bb3) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Release, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_1]], Release, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb3: // CHECK: aie.end @@ -66,14 +66,14 @@ // CHECK: %[[MEM_3_3:.*]] = aie.mem(%[[TILE_3_3]]) { // CHECK: %[[VAL_1:.*]] = aie.dma_start(S2MM, 0, ^bb1, ^bb3) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_CONS_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_CONS_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb3: // CHECK: aie.end @@ -81,7 +81,7 @@ // CHECK: } module @non_adjacency { - aie.device(xcvc1902) { + aie.device(npu1_4col) { %tile12 = aie.tile(1, 2) %tile33 = aie.tile(3, 3) aie.objectfifo @objfifo (%tile12, {%tile33}, 2 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/non_adjacency_test_2.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/non_adjacency_test_2.mlir index 8b9387735..78e927e61 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/non_adjacency_test_2.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/non_adjacency_test_2.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt --aie-objectFifo-stateful-transform %s | FileCheck %s -// CHECK-LABEL: aie.device(xcvc1902) { +// CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @objfifo_cons : memref<16xi32> // CHECK: memref.global "public" @objfifo : memref<16xi32> // CHECK: %[[TILE_1_2:.*]] = aie.tile(1, 2) @@ -10,14 +10,12 @@ // CHECK: %[[OBJFIFO_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "objfifo_cons_buff_1"} : memref<16xi32> // CHECK: %[[OBJFIFO_CONS_BUFF_2:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "objfifo_cons_buff_2"} : memref<16xi32> // CHECK: %[[OBJFIFO_CONS_BUFF_3:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "objfifo_cons_buff_3"} : memref<16xi32> -// CHECK: %[[OBJFIFO_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_3_3]], 0) {init = 0 : i32, sym_name = "objfifo_cons_lock_0"} -// CHECK: %[[OBJFIFO_CONS_LOCK_1:.*]] = aie.lock(%[[TILE_3_3]], 1) {init = 0 : i32, sym_name = "objfifo_cons_lock_1"} -// CHECK: %[[OBJFIFO_CONS_LOCK_2:.*]] = aie.lock(%[[TILE_3_3]], 2) {init = 0 : i32, sym_name = "objfifo_cons_lock_2"} -// CHECK: %[[OBJFIFO_CONS_LOCK_3:.*]] = aie.lock(%[[TILE_3_3]], 3) {init = 0 : i32, sym_name = "objfifo_cons_lock_3"} +// CHECK: %[[OBJFIFO_CONS_PROD_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 0) {init = 4 : i32, sym_name = "objfifo_cons_prod_lock"} +// CHECK: %[[OBJFIFO_CONS_CONS_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 1) {init = 0 : i32, sym_name = "objfifo_cons_cons_lock"} // CHECK: %[[OBJFIFO_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "objfifo_buff_0"} : memref<16xi32> // CHECK: %[[OBJFIFO_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "objfifo_buff_1"} : memref<16xi32> -// CHECK: %[[OBJFIFO_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 0 : i32, sym_name = "objfifo_lock_0"} -// CHECK: %[[OBJFIFO_LOCK_1:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "objfifo_lock_1"} +// CHECK: %[[OBJFIFO_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 2 : i32, sym_name = "objfifo_prod_lock"} +// CHECK: %[[OBJFIFO_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "objfifo_cons_lock"} // CHECK: aie.flow(%[[TILE_1_2]], DMA : 0, %[[TILE_3_3]], DMA : 0) // CHECK: func.func @some_work(%[[ARG0:.*]]: memref<16xi32>) { // CHECK: return @@ -28,12 +26,12 @@ // CHECK: %[[C12:.*]] = arith.constant 12 : index // CHECK: %[[C2:.*]] = arith.constant 2 : index // CHECK: scf.for %[[ARG0:.*]] = %[[C0]] to %[[C12]] step %[[C2]] { -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OBJFIFO_BUFF_0]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Release, 1) -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OBJFIFO_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], Release, 1) // CHECK: } // CHECK: aie.end // CHECK: } @@ -43,34 +41,32 @@ // CHECK: %[[C12:.*]] = arith.constant 12 : index // CHECK: %[[C4:.*]] = arith.constant 4 : index // CHECK: scf.for %[[ARG0:.*]] = %[[C0]] to %[[C12]] step %[[C4]] { -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Acquire, 1) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_1]], Acquire, 1) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_2]], Acquire, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], AcquireGreaterEqual, 3) // CHECK: func.call @some_work(%[[OBJFIFO_CONS_BUFF_0]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Release, 0) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_3]], Acquire, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OBJFIFO_CONS_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_1]], Release, 0) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OBJFIFO_CONS_BUFF_2]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_2]], Release, 0) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OBJFIFO_CONS_BUFF_3]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_3]], Release, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], Release, 1) // CHECK: } // CHECK: aie.end // CHECK: } // CHECK: %[[MEM_1_2:.*]] = aie.mem(%[[TILE_1_2]]) { // CHECK: %[[VAL_0:.*]] = aie.dma_start(MM2S, 0, ^bb1, ^bb3) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Release, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_1]], Release, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb3: // CHECK: aie.end @@ -78,24 +74,24 @@ // CHECK: %[[MEM_3_3:.*]] = aie.mem(%[[TILE_3_3]]) { // CHECK: %[[VAL_1:.*]] = aie.dma_start(S2MM, 0, ^bb1, ^bb5) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_CONS_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_CONS_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb3 // CHECK: ^bb3: -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_2]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_CONS_BUFF_2]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_2]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb4 // CHECK: ^bb4: -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_3]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_CONS_BUFF_3]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_3]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb5: // CHECK: aie.end @@ -103,7 +99,7 @@ // CHECK: } module @non_adjacency { - aie.device(xcvc1902) { + aie.device(npu1_4col) { %tile12 = aie.tile(1, 2) %tile33 = aie.tile(3, 3) aie.objectfifo @objfifo (%tile12, {%tile33}, 2 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/register_external_buffers_test.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/register_external_buffers_test.mlir index c626427c8..56f3ea883 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/register_external_buffers_test.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/register_external_buffers_test.mlir @@ -1,60 +1,59 @@ // RUN: iree-opt --aie-objectFifo-stateful-transform %s | FileCheck %s -// CHECK-LABEL: aie.device(xcvc1902) { +// CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @ext_of_cons : memref<16xi32> // CHECK: memref.global "public" @ext_of : memref<16xi32> -// CHECK: %[[TILE_7_1:.*]] = aie.tile(7, 1) -// CHECK: %[[TILE_7_0:.*]] = aie.tile(7, 0) -// CHECK: %[[EXT_OF_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_7_1]]) {sym_name = "ext_of_cons_buff_0"} : memref<16xi32> -// CHECK: %[[EXT_OF_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_7_1]]) {sym_name = "ext_of_cons_buff_1"} : memref<16xi32> -// CHECK: %[[EXT_OF_CONS_BUFF_2:.*]] = aie.buffer(%[[TILE_7_1]]) {sym_name = "ext_of_cons_buff_2"} : memref<16xi32> -// CHECK: %[[EXT_OF_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_7_1]], 0) {init = 0 : i32, sym_name = "ext_of_cons_lock_0"} -// CHECK: %[[EXT_OF_CONS_LOCK_1:.*]] = aie.lock(%[[TILE_7_1]], 1) {init = 0 : i32, sym_name = "ext_of_cons_lock_1"} -// CHECK: %[[EXT_OF_CONS_LOCK_2:.*]] = aie.lock(%[[TILE_7_1]], 2) {init = 0 : i32, sym_name = "ext_of_cons_lock_2"} -// CHECK: %[[EXT_OF_LOCK_0:.*]] = aie.lock(%[[TILE_7_0]], 0) {init = 0 : i32, sym_name = "ext_of_lock_0"} -// CHECK: aie.flow(%[[TILE_7_0]], DMA : 0, %[[TILE_7_1]], DMA : 0) +// CHECK: %[[TILE_3_2:.*]] = aie.tile(3, 2) +// CHECK: %[[TILE_3_0:.*]] = aie.tile(3, 0) +// CHECK: %[[EXT_OF_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_3_2]]) {sym_name = "ext_of_cons_buff_0"} : memref<16xi32> +// CHECK: %[[EXT_OF_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_3_2]]) {sym_name = "ext_of_cons_buff_1"} : memref<16xi32> +// CHECK: %[[EXT_OF_CONS_BUFF_2:.*]] = aie.buffer(%[[TILE_3_2]]) {sym_name = "ext_of_cons_buff_2"} : memref<16xi32> +// CHECK: %[[EXT_OF_CONS_PROD_LOCK:.*]] = aie.lock(%[[TILE_3_2]], 0) {init = 3 : i32, sym_name = "ext_of_cons_prod_lock"} +// CHECK: %[[EXT_OF_CONS_CONS_LOCK:.*]] = aie.lock(%[[TILE_3_2]], 1) {init = 0 : i32, sym_name = "ext_of_cons_cons_lock"} +// CHECK: %[[EXT_OF_PROD_LOCK:.*]] = aie.lock(%[[TILE_3_0]], 0) {init = 1 : i32, sym_name = "ext_of_prod_lock"} +// CHECK: %[[EXT_OF_CONS_LOCK:.*]] = aie.lock(%[[TILE_3_0]], 1) {init = 0 : i32, sym_name = "ext_of_cons_lock"} +// CHECK: aie.flow(%[[TILE_3_0]], DMA : 0, %[[TILE_3_2]], DMA : 0) // CHECK: %[[EXT_BUFFER_IN:.*]] = aie.external_buffer {sym_name = "ext_buffer_in"} : memref<64xi32> // CHECK: func.func @some_work(%[[ARG0:.*]]: memref<16xi32>, %[[ARG1:.*]]: memref<16xi32>) { // CHECK: return // CHECK: } -// CHECK: %[[CORE_7_1:.*]] = aie.core(%[[TILE_7_1]]) { +// CHECK: %[[CORE_3_2:.*]] = aie.core(%[[TILE_3_2]]) { // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[C1:.*]] = arith.constant 1 : index // CHECK: %[[C12:.*]] = arith.constant 12 : index -// CHECK: aie.use_lock(%[[EXT_OF_CONS_LOCK_0]], Acquire, 1) -// CHECK: aie.use_lock(%[[EXT_OF_CONS_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[EXT_OF_CONS_CONS_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[EXT_OF_CONS_BUFF_0]], %[[EXT_OF_CONS_BUFF_1]]) : (memref<16xi32>, memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[EXT_OF_CONS_LOCK_0]], Release, 0) +// CHECK: aie.use_lock(%[[EXT_OF_CONS_PROD_LOCK]], Release, 1) // CHECK: aie.end // CHECK: } -// CHECK: aie.shim_dma_allocation @ext_of(MM2S, 0, 7) -// CHECK: %[[SHIM_DMA_7_0:.*]] = aie.shim_dma(%[[TILE_7_0]]) { +// CHECK: aie.shim_dma_allocation @ext_of(MM2S, 0, 3) +// CHECK: %[[SHIM_DMA_3_0:.*]] = aie.shim_dma(%[[TILE_3_0]]) { // CHECK: %[[VAL_0:.*]] = aie.dma_start(MM2S, 0, ^bb1, ^bb2) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[EXT_OF_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[EXT_OF_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[EXT_BUFFER_IN]] : memref<64xi32>, 0, 64) -// CHECK: aie.use_lock(%[[EXT_OF_LOCK_0]], Release, 0) +// CHECK: aie.use_lock(%[[EXT_OF_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb2: // CHECK: aie.end // CHECK: } -// CHECK: %[[MEM_7_1:.*]] = aie.mem(%[[TILE_7_1]]) { +// CHECK: %[[MEM_3_2:.*]] = aie.mem(%[[TILE_3_2]]) { // CHECK: %[[VAL_1:.*]] = aie.dma_start(S2MM, 0, ^bb1, ^bb4) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[EXT_OF_CONS_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[EXT_OF_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[EXT_OF_CONS_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[EXT_OF_CONS_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[EXT_OF_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[EXT_OF_CONS_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[EXT_OF_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[EXT_OF_CONS_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[EXT_OF_CONS_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[EXT_OF_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb3 // CHECK: ^bb3: -// CHECK: aie.use_lock(%[[EXT_OF_CONS_LOCK_2]], Acquire, 0) +// CHECK: aie.use_lock(%[[EXT_OF_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[EXT_OF_CONS_BUFF_2]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[EXT_OF_CONS_LOCK_2]], Release, 1) +// CHECK: aie.use_lock(%[[EXT_OF_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb4: // CHECK: aie.end @@ -62,16 +61,16 @@ // CHECK: } module @register_external_buffers { - aie.device(xcvc1902) { - %tile71 = aie.tile(7, 1) - %tile70 = aie.tile(7, 0) - aie.objectfifo @ext_of (%tile70, {%tile71}, 3 : i32) : !aie.objectfifo> + aie.device(npu1_4col) { + %tile32 = aie.tile(3, 2) + %tile30 = aie.tile(3, 0) + aie.objectfifo @ext_of (%tile30, {%tile32}, 3 : i32) : !aie.objectfifo> %ext_buffer_in = aie.external_buffer {sym_name = "ext_buffer_in"}: memref<64xi32> - aie.objectfifo.register_external_buffers @ext_of (%tile70, {%ext_buffer_in}) : (memref<64xi32>) + aie.objectfifo.register_external_buffers @ext_of (%tile30, {%ext_buffer_in}) : (memref<64xi32>) func.func @some_work(%a : memref<16xi32>, %b : memref<16xi32>) -> () { return } - %core71 = aie.core(%tile71) { + %core71 = aie.core(%tile32) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %height = arith.constant 12 : index diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/shimRow_mem_test.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/shimRow_mem_test.mlir index 989f72170..89b67bb3b 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/shimRow_mem_test.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/shimRow_mem_test.mlir @@ -1,60 +1,59 @@ // RUN: iree-opt --aie-objectFifo-stateful-transform %s | FileCheck %s -// CHECK-LABEL: aie.device(xcvc1902) { +// CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @objfifo_cons : memref<16xi32> // CHECK: memref.global "public" @objfifo : memref<16xi32> -// CHECK: %[[TILE_7_1:.*]] = aie.tile(7, 1) -// CHECK: %[[TILE_7_0:.*]] = aie.tile(7, 0) -// CHECK: %[[OBJFIFO_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_7_1]]) {sym_name = "objfifo_cons_buff_0"} : memref<16xi32> -// CHECK: %[[OBJFIFO_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_7_1]]) {sym_name = "objfifo_cons_buff_1"} : memref<16xi32> -// CHECK: %[[OBJFIFO_CONS_BUFF_2:.*]] = aie.buffer(%[[TILE_7_1]]) {sym_name = "objfifo_cons_buff_2"} : memref<16xi32> -// CHECK: %[[OBJFIFO_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_7_1]], 0) {init = 0 : i32, sym_name = "objfifo_cons_lock_0"} -// CHECK: %[[OBJFIFO_CONS_LOCK_1:.*]] = aie.lock(%[[TILE_7_1]], 1) {init = 0 : i32, sym_name = "objfifo_cons_lock_1"} -// CHECK: %[[OBJFIFO_CONS_LOCK_2:.*]] = aie.lock(%[[TILE_7_1]], 2) {init = 0 : i32, sym_name = "objfifo_cons_lock_2"} -// CHECK: %[[OBJFIFO_LOCK_0:.*]] = aie.lock(%[[TILE_7_0]], 0) {init = 0 : i32, sym_name = "objfifo_lock_0"} -// CHECK: aie.flow(%[[TILE_7_0]], DMA : 0, %[[TILE_7_1]], DMA : 0) +// CHECK: %[[TILE_3_2:.*]] = aie.tile(3, 2) +// CHECK: %[[TILE_3_0:.*]] = aie.tile(3, 0) +// CHECK: %[[OBJFIFO_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_3_2]]) {sym_name = "objfifo_cons_buff_0"} : memref<16xi32> +// CHECK: %[[OBJFIFO_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_3_2]]) {sym_name = "objfifo_cons_buff_1"} : memref<16xi32> +// CHECK: %[[OBJFIFO_CONS_BUFF_2:.*]] = aie.buffer(%[[TILE_3_2]]) {sym_name = "objfifo_cons_buff_2"} : memref<16xi32> +// CHECK: %[[OBJFIFO_CONS_PROD_LOCK:.*]] = aie.lock(%[[TILE_3_2]], 0) {init = 3 : i32, sym_name = "objfifo_cons_prod_lock"} +// CHECK: %[[OBJFIFO_CONS_CONS_LOCK:.*]] = aie.lock(%[[TILE_3_2]], 1) {init = 0 : i32, sym_name = "objfifo_cons_cons_lock"} +// CHECK: %[[OBJFIFO_PROD_LOCK:.*]] = aie.lock(%[[TILE_3_0]], 0) {init = 1 : i32, sym_name = "objfifo_prod_lock"} +// CHECK: %[[OBJFIFO_CONS_LOCK:.*]] = aie.lock(%[[TILE_3_0]], 1) {init = 0 : i32, sym_name = "objfifo_cons_lock"} +// CHECK: aie.flow(%[[TILE_3_0]], DMA : 0, %[[TILE_3_2]], DMA : 0) // CHECK: %[[EXT_BUFFER_IN:.*]] = aie.external_buffer {sym_name = "ext_buffer_in"} : memref<64xi32> // CHECK: func.func @some_work(%[[ARG0:.*]]: memref<16xi32>, %[[ARG1:.*]]: memref<16xi32>) { // CHECK: return // CHECK: } -// CHECK: %[[CORE_7_1:.*]] = aie.core(%[[TILE_7_1]]) { +// CHECK: %[[CORE_3_2:.*]] = aie.core(%[[TILE_3_2]]) { // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[C1:.*]] = arith.constant 1 : index // CHECK: %[[C12:.*]] = arith.constant 12 : index -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Acquire, 1) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[OBJFIFO_CONS_BUFF_0]], %[[OBJFIFO_CONS_BUFF_1]]) : (memref<16xi32>, memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Release, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], Release, 1) // CHECK: aie.end // CHECK: } -// CHECK: aie.shim_dma_allocation @objfifo(MM2S, 0, 7) -// CHECK: %[[SHIM_DMA_7_0:.*]] = aie.shim_dma(%[[TILE_7_0]]) { +// CHECK: aie.shim_dma_allocation @objfifo(MM2S, 0, 3) +// CHECK: %[[SHIM_DMA_3_0:.*]] = aie.shim_dma(%[[TILE_3_0]]) { // CHECK: %[[VAL_0:.*]] = aie.dma_start(MM2S, 0, ^bb1, ^bb2) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[EXT_BUFFER_IN]] : memref<64xi32>, 0, 64) -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Release, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb2: // CHECK: aie.end // CHECK: } -// CHECK: %[[MEM_7_1:.*]] = aie.mem(%[[TILE_7_1]]) { +// CHECK: %[[MEM_3_2:.*]] = aie.mem(%[[TILE_3_2]]) { // CHECK: %[[VAL_1:.*]] = aie.dma_start(S2MM, 0, ^bb1, ^bb4) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_CONS_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_CONS_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb3 // CHECK: ^bb3: -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_2]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_CONS_BUFF_2]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_2]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb4: // CHECK: aie.end @@ -62,16 +61,16 @@ // CHECK: } module @shimRow_mem { - aie.device(xcvc1902) { - %tile71 = aie.tile(7, 1) - %tile70 = aie.tile(7, 0) - aie.objectfifo @objfifo (%tile70, {%tile71}, 3 : i32) : !aie.objectfifo> + aie.device(npu1_4col) { + %tile32 = aie.tile(3, 2) + %tile30 = aie.tile(3, 0) + aie.objectfifo @objfifo (%tile30, {%tile32}, 3 : i32) : !aie.objectfifo> %ext_buffer_in = aie.external_buffer {sym_name = "ext_buffer_in"}: memref<64xi32> - aie.objectfifo.register_external_buffers @objfifo (%tile70, {%ext_buffer_in}) : (memref<64xi32>) + aie.objectfifo.register_external_buffers @objfifo (%tile30, {%ext_buffer_in}) : (memref<64xi32>) func.func @some_work(%a : memref<16xi32>, %b : memref<16xi32>) -> () { return } - %core71 = aie.core(%tile71) { + %core71 = aie.core(%tile32) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %height = arith.constant 12 : index diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/subview_test_1.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/subview_test_1.mlir index b636207e3..eda95990b 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/subview_test_1.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/subview_test_1.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt --aie-objectFifo-stateful-transform %s | FileCheck %s -// CHECK-LABEL: aie.device(xcvc1902) { +// CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @objfifo : memref<16xi32> // CHECK: %[[TILE_1_2:.*]] = aie.tile(1, 2) // CHECK: %[[TILE_1_3:.*]] = aie.tile(1, 3) @@ -9,25 +9,22 @@ // CHECK: %[[OBJFIFO_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "objfifo_buff_1"} : memref<16xi32> // CHECK: %[[OBJFIFO_BUFF_2:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "objfifo_buff_2"} : memref<16xi32> // CHECK: %[[OBJFIFO_BUFF_3:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "objfifo_buff_3"} : memref<16xi32> -// CHECK: %[[OBJFIFO_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 0 : i32, sym_name = "objfifo_lock_0"} -// CHECK: %[[OBJFIFO_LOCK_1:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "objfifo_lock_1"} -// CHECK: %[[OBJFIFO_LOCK_2:.*]] = aie.lock(%[[TILE_1_2]], 2) {init = 0 : i32, sym_name = "objfifo_lock_2"} -// CHECK: %[[OBJFIFO_LOCK_3:.*]] = aie.lock(%[[TILE_1_2]], 3) {init = 0 : i32, sym_name = "objfifo_lock_3"} +// CHECK: %[[OBJFIFO_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 4 : i32, sym_name = "objfifo_prod_lock"} +// CHECK: %[[OBJFIFO_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "objfifo_cons_lock"} // CHECK: func.func @some_work(%[[ARG0:.*]]: memref<16xi32>) { // CHECK: return // CHECK: } // CHECK: %[[CORE_1_2:.*]] = aie.core(%[[TILE_1_2]]) { -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Acquire, 0) -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[OBJFIFO_BUFF_0]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OBJFIFO_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_2]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OBJFIFO_BUFF_0]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OBJFIFO_BUFF_1]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OBJFIFO_BUFF_2]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Release, 1) -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_1]], Release, 1) -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_3]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OBJFIFO_BUFF_2]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OBJFIFO_BUFF_3]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OBJFIFO_BUFF_2]]) : (memref<16xi32>) -> () @@ -37,7 +34,7 @@ // CHECK: } module @singleFifo { - aie.device(xcvc1902) { + aie.device(npu1_4col) { %tile12 = aie.tile(1, 2) %tile13 = aie.tile(1, 3) aie.objectfifo @objfifo (%tile12, {%tile13}, 4 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/subview_test_2.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/subview_test_2.mlir index 6efe3f59b..d2804e51a 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/subview_test_2.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/subview_test_2.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt --aie-objectFifo-stateful-transform %s | FileCheck %s -// CHECK-LABEL: aie.device(xcvc1902) { +// CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @of2 : memref<16xi32> // CHECK: memref.global "public" @of : memref<16xi32> // CHECK: %[[TILE_1_2:.*]] = aie.tile(1, 2) @@ -9,66 +9,54 @@ // CHECK: %[[OF2_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of2_buff_0"} : memref<16xi32> // CHECK: %[[OF2_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of2_buff_1"} : memref<16xi32> // CHECK: %[[OF2_BUFF_2:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of2_buff_2"} : memref<16xi32> -// CHECK: %[[OF2_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 4) {init = 0 : i32, sym_name = "of2_lock_0"} -// CHECK: %[[OF2_LOCK_1:.*]] = aie.lock(%[[TILE_1_2]], 5) {init = 0 : i32, sym_name = "of2_lock_1"} -// CHECK: %[[OF2_LOCK_2:.*]] = aie.lock(%[[TILE_1_2]], 6) {init = 0 : i32, sym_name = "of2_lock_2"} +// CHECK: %[[OF2_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 2) {init = 3 : i32, sym_name = "of2_prod_lock"} +// CHECK: %[[OF2_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 3) {init = 0 : i32, sym_name = "of2_cons_lock"} // CHECK: %[[OF_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of_buff_0"} : memref<16xi32> // CHECK: %[[OF_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of_buff_1"} : memref<16xi32> // CHECK: %[[OF_BUFF_2:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of_buff_2"} : memref<16xi32> // CHECK: %[[OF_BUFF_3:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of_buff_3"} : memref<16xi32> -// CHECK: %[[OF_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 0 : i32, sym_name = "of_lock_0"} -// CHECK: %[[OF_LOCK_1:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "of_lock_1"} -// CHECK: %[[OF_LOCK_2:.*]] = aie.lock(%[[TILE_1_2]], 2) {init = 0 : i32, sym_name = "of_lock_2"} -// CHECK: %[[OF_LOCK_3:.*]] = aie.lock(%[[TILE_1_2]], 3) {init = 0 : i32, sym_name = "of_lock_3"} +// CHECK: %[[OF_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 4 : i32, sym_name = "of_prod_lock"} +// CHECK: %[[OF_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "of_cons_lock"} // CHECK: func.func @some_work(%[[ARG0:.*]]: memref<16xi32>) { // CHECK: return // CHECK: } // CHECK: %[[CORE_1_2:.*]] = aie.core(%[[TILE_1_2]]) { -// CHECK: aie.use_lock(%[[OF_LOCK_0]], Acquire, 0) -// CHECK: aie.use_lock(%[[OF_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[OF_PROD_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[OF_BUFF_0]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OF_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OF2_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OF2_BUFF_0]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OF_LOCK_0]], Release, 1) -// CHECK: aie.use_lock(%[[OF_LOCK_2]], Acquire, 0) -// CHECK: aie.use_lock(%[[OF_LOCK_3]], Acquire, 0) +// CHECK: aie.use_lock(%[[OF_CONS_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OF_PROD_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[OF_BUFF_1]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OF_BUFF_2]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OF_BUFF_3]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OF_LOCK_1]], Release, 1) -// CHECK: aie.use_lock(%[[OF_LOCK_2]], Release, 1) -// CHECK: aie.use_lock(%[[OF_LOCK_3]], Release, 1) -// CHECK: aie.use_lock(%[[OF2_LOCK_0]], Release, 1) -// CHECK: aie.use_lock(%[[OF2_LOCK_1]], Acquire, 0) -// CHECK: aie.use_lock(%[[OF2_LOCK_2]], Acquire, 0) +// CHECK: aie.use_lock(%[[OF_CONS_LOCK]], Release, 3) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[OF2_BUFF_1]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OF2_BUFF_2]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OF2_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK]], Release, 1) // CHECK: aie.end // CHECK: } // CHECK: %[[CORE_1_3:.*]] = aie.core(%[[TILE_1_3]]) { -// CHECK: aie.use_lock(%[[OF_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[OF_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OF_BUFF_0]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OF2_LOCK_0]], Acquire, 1) -// CHECK: aie.use_lock(%[[OF2_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[OF2_BUFF_0]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OF2_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OF2_LOCK_0]], Release, 0) -// CHECK: aie.use_lock(%[[OF2_LOCK_1]], Release, 0) -// CHECK: aie.use_lock(%[[OF_LOCK_0]], Release, 0) -// CHECK: aie.use_lock(%[[OF_LOCK_1]], Acquire, 1) -// CHECK: aie.use_lock(%[[OF_LOCK_2]], Acquire, 1) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK]], Release, 2) +// CHECK: aie.use_lock(%[[OF_PROD_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OF_CONS_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[OF_BUFF_1]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OF_BUFF_2]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OF_LOCK_1]], Release, 0) -// CHECK: aie.use_lock(%[[OF_LOCK_2]], Release, 0) +// CHECK: aie.use_lock(%[[OF_PROD_LOCK]], Release, 2) // CHECK: aie.end // CHECK: } // CHECK: } module @multiFifo { - aie.device(xcvc1902) { + aie.device(npu1_4col) { %tile12 = aie.tile(1, 2) %tile13 = aie.tile(1, 3) aie.objectfifo @of (%tile12, {%tile13}, 4 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/subview_test_3.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/subview_test_3.mlir index 07aa6ace1..01fe48051 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/subview_test_3.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/subview_test_3.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt --aie-objectFifo-stateful-transform %s | FileCheck %s -// CHECK-LABEL: aie.device(xcvc1902) { +// CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @of2 : memref<16xi32> // CHECK: memref.global "public" @of : memref<16xi32> // CHECK: %[[TILE_1_2:.*]] = aie.tile(1, 2) @@ -9,66 +9,54 @@ // CHECK: %[[OF2_BUFF_0:.*]] = aie.buffer(%[[TILE_1_3]]) {sym_name = "of2_buff_0"} : memref<16xi32> // CHECK: %[[OF2_BUFF_1:.*]] = aie.buffer(%[[TILE_1_3]]) {sym_name = "of2_buff_1"} : memref<16xi32> // CHECK: %[[OF2_BUFF_2:.*]] = aie.buffer(%[[TILE_1_3]]) {sym_name = "of2_buff_2"} : memref<16xi32> -// CHECK: %[[OF2_LOCK_0:.*]] = aie.lock(%[[TILE_1_3]], 0) {init = 0 : i32, sym_name = "of2_lock_0"} -// CHECK: %[[OF2_LOCK_1:.*]] = aie.lock(%[[TILE_1_3]], 1) {init = 0 : i32, sym_name = "of2_lock_1"} -// CHECK: %[[OF2_LOCK_2:.*]] = aie.lock(%[[TILE_1_3]], 2) {init = 0 : i32, sym_name = "of2_lock_2"} +// CHECK: %[[OF2_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_3]], 0) {init = 3 : i32, sym_name = "of2_prod_lock"} +// CHECK: %[[OF2_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_3]], 1) {init = 0 : i32, sym_name = "of2_cons_lock"} // CHECK: %[[OF_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of_buff_0"} : memref<16xi32> // CHECK: %[[OF_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of_buff_1"} : memref<16xi32> // CHECK: %[[OF_BUFF_2:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of_buff_2"} : memref<16xi32> // CHECK: %[[OF_BUFF_3:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "of_buff_3"} : memref<16xi32> -// CHECK: %[[OF_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 0 : i32, sym_name = "of_lock_0"} -// CHECK: %[[OF_LOCK_1:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "of_lock_1"} -// CHECK: %[[OF_LOCK_2:.*]] = aie.lock(%[[TILE_1_2]], 2) {init = 0 : i32, sym_name = "of_lock_2"} -// CHECK: %[[OF_LOCK_3:.*]] = aie.lock(%[[TILE_1_2]], 3) {init = 0 : i32, sym_name = "of_lock_3"} +// CHECK: %[[OF_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 0) {init = 4 : i32, sym_name = "of_prod_lock"} +// CHECK: %[[OF_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 1) {init = 0 : i32, sym_name = "of_cons_lock"} // CHECK: func.func @some_work(%[[ARG0:.*]]: memref<16xi32>) { // CHECK: return // CHECK: } // CHECK: %[[CORE_1_2:.*]] = aie.core(%[[TILE_1_2]]) { -// CHECK: aie.use_lock(%[[OF_LOCK_0]], Acquire, 0) -// CHECK: aie.use_lock(%[[OF_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[OF_PROD_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[OF_BUFF_0]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OF_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OF2_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OF2_BUFF_0]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OF_LOCK_0]], Release, 1) -// CHECK: aie.use_lock(%[[OF_LOCK_2]], Acquire, 0) -// CHECK: aie.use_lock(%[[OF_LOCK_3]], Acquire, 0) +// CHECK: aie.use_lock(%[[OF_CONS_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OF_PROD_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[OF_BUFF_1]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OF_BUFF_2]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OF_BUFF_3]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OF_LOCK_1]], Release, 1) -// CHECK: aie.use_lock(%[[OF_LOCK_2]], Release, 1) -// CHECK: aie.use_lock(%[[OF_LOCK_3]], Release, 1) -// CHECK: aie.use_lock(%[[OF2_LOCK_0]], Release, 0) -// CHECK: aie.use_lock(%[[OF2_LOCK_1]], Acquire, 1) -// CHECK: aie.use_lock(%[[OF2_LOCK_2]], Acquire, 1) +// CHECK: aie.use_lock(%[[OF_CONS_LOCK]], Release, 3) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[OF2_BUFF_1]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OF2_BUFF_2]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OF2_LOCK_1]], Release, 0) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK]], Release, 1) // CHECK: aie.end // CHECK: } // CHECK: %[[CORE_1_3:.*]] = aie.core(%[[TILE_1_3]]) { -// CHECK: aie.use_lock(%[[OF_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[OF_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OF_BUFF_0]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OF2_LOCK_0]], Acquire, 0) -// CHECK: aie.use_lock(%[[OF2_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[OF2_PROD_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[OF2_BUFF_0]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OF2_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OF2_LOCK_0]], Release, 1) -// CHECK: aie.use_lock(%[[OF2_LOCK_1]], Release, 1) -// CHECK: aie.use_lock(%[[OF_LOCK_0]], Release, 0) -// CHECK: aie.use_lock(%[[OF_LOCK_1]], Acquire, 1) -// CHECK: aie.use_lock(%[[OF_LOCK_2]], Acquire, 1) +// CHECK: aie.use_lock(%[[OF2_CONS_LOCK]], Release, 2) +// CHECK: aie.use_lock(%[[OF_PROD_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OF_CONS_LOCK]], AcquireGreaterEqual, 2) // CHECK: func.call @some_work(%[[OF_BUFF_1]]) : (memref<16xi32>) -> () // CHECK: func.call @some_work(%[[OF_BUFF_2]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OF_LOCK_1]], Release, 0) -// CHECK: aie.use_lock(%[[OF_LOCK_2]], Release, 0) +// CHECK: aie.use_lock(%[[OF_PROD_LOCK]], Release, 2) // CHECK: aie.end // CHECK: } // CHECK: } module @multiCoreMixedFifo { - aie.device(xcvc1902) { + aie.device(npu1_4col) { %tile12 = aie.tile(1, 2) %tile13 = aie.tile(1, 3) aie.objectfifo @of (%tile12, {%tile13}, 4 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/tileDMA_test.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/tileDMA_test.mlir index a436d61be..808018e5a 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/tileDMA_test.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/tileDMA_test.mlir @@ -1,19 +1,19 @@ // RUN: iree-opt --aie-objectFifo-stateful-transform %s | FileCheck %s -// CHECK-LABEL: aie.device(xcvc1902) { +// CHECK-LABEL: aie.device(npu1_4col) { // CHECK: memref.global "public" @objfifo_cons : memref<16xi32> // CHECK: memref.global "public" @objfifo : memref<16xi32> // CHECK: %[[TILE_1_2:.*]] = aie.tile(1, 2) // CHECK: %[[TILE_3_3:.*]] = aie.tile(3, 3) // CHECK: %[[OBJFIFO_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "objfifo_cons_buff_0"} : memref<16xi32> // CHECK: %[[OBJFIFO_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "objfifo_cons_buff_1"} : memref<16xi32> -// CHECK: %[[OBJFIFO_CONS_LOCK_0:.*]] = aie.lock(%[[TILE_3_3]], 0) {init = 0 : i32, sym_name = "objfifo_cons_lock_0"} -// CHECK: %[[OBJFIFO_CONS_LOCK_1:.*]] = aie.lock(%[[TILE_3_3]], 1) {init = 0 : i32, sym_name = "objfifo_cons_lock_1"} +// CHECK: %[[OBJFIFO_CONS_PROD_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 0) {init = 2 : i32, sym_name = "objfifo_cons_prod_lock"} +// CHECK: %[[OBJFIFO_CONS_CONS_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 1) {init = 0 : i32, sym_name = "objfifo_cons_cons_lock"} // CHECK: %[[OBJFIFO_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "objfifo_buff_0"} : memref<16xi32> // CHECK: %[[OBJFIFO_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "objfifo_buff_1"} : memref<16xi32> -// CHECK: %[[OBJFIFO_LOCK_0:.*]] = aie.lock(%[[TILE_1_2]], 3) {init = 0 : i32, sym_name = "objfifo_lock_0"} -// CHECK: %[[OBJFIFO_LOCK_1:.*]] = aie.lock(%[[TILE_1_2]], 4) {init = 0 : i32, sym_name = "objfifo_lock_1"} +// CHECK: %[[OBJFIFO_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 3) {init = 2 : i32, sym_name = "objfifo_prod_lock"} +// CHECK: %[[OBJFIFO_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 4) {init = 0 : i32, sym_name = "objfifo_cons_lock"} // CHECK: %[[BUFFER_1_2:.*]] = aie.buffer(%[[TILE_1_2]]) : memref<16xi32> // CHECK: %[[LOCK_1_2:.*]] = aie.lock(%[[TILE_1_2]], 0) // CHECK: %[[BUFFER_1_2_0:.*]] = aie.buffer(%[[TILE_1_2]]) : memref<16xi32> @@ -30,12 +30,12 @@ // CHECK: %[[C12:.*]] = arith.constant 12 : index // CHECK: %[[C2:.*]] = arith.constant 2 : index // CHECK: scf.for %[[ARG0:.*]] = %[[C0]] to %[[C12]] step %[[C2]] { -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OBJFIFO_BUFF_0]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Release, 1) -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: func.call @some_work(%[[OBJFIFO_BUFF_1]]) : (memref<16xi32>) -> () -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], Release, 1) // CHECK: } // CHECK: aie.end // CHECK: } @@ -61,14 +61,14 @@ // CHECK: ^bb5: // CHECK: %[[VAL_2:.*]] = aie.dma_start(MM2S, 1, ^bb6, ^bb8) // CHECK: ^bb6: -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Acquire, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_0]], Release, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb7 // CHECK: ^bb7: -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_1]], Acquire, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_LOCK_1]], Release, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_PROD_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb6 // CHECK: ^bb8: // CHECK: aie.end @@ -76,14 +76,14 @@ // CHECK: %[[MEM_3_3:.*]] = aie.mem(%[[TILE_3_3]]) { // CHECK: %[[VAL_3:.*]] = aie.dma_start(S2MM, 0, ^bb1, ^bb3) // CHECK: ^bb1: -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_CONS_BUFF_0]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_0]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_1]], Acquire, 0) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) // CHECK: aie.dma_bd(%[[OBJFIFO_CONS_BUFF_1]] : memref<16xi32>, 0, 16) -// CHECK: aie.use_lock(%[[OBJFIFO_CONS_LOCK_1]], Release, 1) +// CHECK: aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], Release, 1) // CHECK: aie.next_bd ^bb1 // CHECK: ^bb3: // CHECK: aie.end @@ -91,7 +91,7 @@ // CHECK: } module @tileDMA_channels { - aie.device(xcvc1902) { + aie.device(npu1_4col) { %tile12 = aie.tile(1, 2) %tile33 = aie.tile(3, 3) %buff0 = aie.buffer(%tile12) : memref<16xi32> diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/useLock_in_func.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/useLock_in_func.mlir index 4ecbc0dc8..90846efc7 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/useLock_in_func.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/useLock_in_func.mlir @@ -1,11 +1,11 @@ // RUN: iree-opt --aie-localize-locks --aie-standard-lowering %s | FileCheck %s -// CHECK-LABEL: module @test attributes {llvm.target_triple = "aie"} { +// CHECK-LABEL: module @test attributes {llvm.target_triple = "aie2"} { // CHECK-LABEL: func.func private @kernel( // CHECK-SAME: %[[ARG0:.*]]: index) { // CHECK: %[[VAL_0:.*]] = arith.index_cast %[[ARG0]] : index to i32 // CHECK: %[[C0_I32:.*]] = arith.constant 0 : i32 -// CHECK: call @llvm.aie.lock.acquire.reg(%[[VAL_0]], %[[C0_I32]]) : (i32, i32) -> () +// CHECK: call @llvm.aie2.acquire(%[[VAL_0]], %[[C0_I32]]) : (i32, i32) -> () // CHECK: return // CHECK: } @@ -16,7 +16,7 @@ // CHECK: } module @test { - aie.device(xcvc1902) { + aie.device(npu1_4col) { %tile13 = aie.tile(1, 3) %lock13_3 = aie.lock(%tile13, 0) diff --git a/compiler/plugins/target/AMD-AIE/aie/aie_passes/user_assigned.mlir b/compiler/plugins/target/AMD-AIE/aie/aie_passes/user_assigned.mlir index 2a08f9f11..79e08f0cb 100644 --- a/compiler/plugins/target/AMD-AIE/aie/aie_passes/user_assigned.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/aie_passes/user_assigned.mlir @@ -1,142 +1,6 @@ // RUN: iree-opt --aie-assign-bd-ids --split-input-file %s | FileCheck %s -// CHECK-LABEL: aie.device(npu1_4col) { -// CHECK: %[[TILE_0_0:.*]] = aie.tile(0, 0) -// CHECK: %[[TILE_0_1:.*]] = aie.tile(0, 1) -// CHECK: %[[TILE_0_2:.*]] = aie.tile(0, 2) -// CHECK: %[[DOUBLE_BUFFER:.*]] = aie.buffer(%[[TILE_0_2]]) {sym_name = "double_buffer"} : memref<32xi32> -// CHECK: %[[BUFFER_0_1:.*]] = aie.buffer(%[[TILE_0_1]]) : memref<32xi32> -// CHECK: %[[LOCK_X:.*]] = aie.lock(%[[TILE_0_2]]) {init = 1 : i32, sym_name = "lock_X"} -// CHECK: %[[LOCK_Y:.*]] = aie.lock(%[[TILE_0_2]]) {init = 0 : i32, sym_name = "lock_Y"} -// CHECK: %[[MEM_0_2:.*]] = aie.mem(%[[TILE_0_2]]) { -// CHECK: %[[PLAYER_A:.*]] = aie.dma(S2MM, 0) {sym_name = "player_a"} [{ -// CHECK: aie.use_lock(%[[LOCK_Y]], Acquire, 0) -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>, 0) {bd_id = 0 : i32, next_bd_id = 1 : i32} -// CHECK: aie.use_lock(%[[LOCK_Y]], Release, 0) -// CHECK: }, { -// CHECK: aie.use_lock(%[[LOCK_X]], Acquire, 1) -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>) {bd_id = 1 : i32, next_bd_id = 2 : i32} -// CHECK: aie.use_lock(%[[LOCK_X]], Release, -1) -// CHECK: }, { -// CHECK: aie.use_lock(%[[LOCK_Y]], Acquire) {acq_en = false} -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>) {bd_id = 2 : i32, next_bd_id = 0 : i32} -// CHECK: aie.use_lock(%[[LOCK_Y]], Release, 1) -// CHECK: }] -// CHECK: %[[PLAYER_B:.*]] = aie.dma(S2MM, 1) {sym_name = "player_b"} [{ -// CHECK: aie.use_lock(%[[LOCK_Y]], Acquire, 1) -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>, 0) {bd_id = 3 : i32, next_bd_id = 4 : i32} -// CHECK: aie.use_lock(%[[LOCK_Y]], Release, 0) -// CHECK: }, { -// CHECK: aie.use_lock(%[[LOCK_X]], Acquire, 1) -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>) {bd_id = 4 : i32, next_bd_id = 5 : i32} -// CHECK: aie.use_lock(%[[LOCK_X]], Release, -1) -// CHECK: }, { -// CHECK: aie.use_lock(%[[LOCK_Y]], Acquire) {acq_en = false} -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>) {bd_id = 5 : i32, next_bd_id = 3 : i32} -// CHECK: aie.use_lock(%[[LOCK_Y]], Release, -1) -// CHECK: }] -// CHECK: aie.end -// CHECK: } -// CHECK: %[[MEMTILE_DMA_0_1:.*]] = aie.memtile_dma(%[[TILE_0_1]]) { -// CHECK: %[[LOCK_0_1:.*]] = aie.lock(%[[TILE_0_1]]) {init = 1 : i32} -// CHECK: %[[LOCK_0_1_0:.*]] = aie.lock(%[[TILE_0_1]]) {init = 0 : i32} -// CHECK: %[[VAL_0:.*]] = aie.dma(S2MM, 0) {loop = false, repeat_count = 10 : i32} [{ -// CHECK: aie.use_lock(%[[LOCK_0_1]], AcquireGreaterEqual) -// CHECK: aie.dma_bd(%[[BUFFER_0_1]] : memref<32xi32>) {bd_id = 0 : i32} -// CHECK: aie.use_lock(%[[LOCK_0_1_0]], Release) -// CHECK: }] -// CHECK: %[[VAL_1:.*]] = aie.dma(MM2S, 0) {loop = false, repeat_count = 10 : i32} [{ -// CHECK: aie.use_lock(%[[LOCK_0_1_0]], AcquireGreaterEqual) -// CHECK: aie.dma_bd(%[[BUFFER_0_1]] : memref<32xi32>) {bd_id = 1 : i32} -// CHECK: aie.use_lock(%[[LOCK_0_1]], Release) -// CHECK: }] -// CHECK: %[[LOCK_0_1_1:.*]] = aie.lock(%[[TILE_0_1]]) {init = 1 : i32} -// CHECK: %[[LOCK_0_1_2:.*]] = aie.lock(%[[TILE_0_1]]) {init = 0 : i32} -// CHECK: %[[VAL_2:.*]] = aie.dma(S2MM, 1) {loop = false, repeat_count = 10 : i32} [{ -// CHECK: aie.use_lock(%[[LOCK_0_1_1]], AcquireGreaterEqual) -// CHECK: aie.dma_bd(%[[BUFFER_0_1]] : memref<32xi32>) {bd_id = 24 : i32} -// CHECK: aie.use_lock(%[[LOCK_0_1_2]], Release) -// CHECK: }] -// CHECK: %[[VAL_3:.*]] = aie.dma(MM2S, 1) {loop = false, repeat_count = 10 : i32} [{ -// CHECK: aie.use_lock(%[[LOCK_0_1_2]], AcquireGreaterEqual) -// CHECK: aie.dma_bd(%[[BUFFER_0_1]] : memref<32xi32>) {bd_id = 25 : i32} -// CHECK: aie.use_lock(%[[LOCK_0_1_1]], Release) -// CHECK: }] -// CHECK: aie.end -// CHECK: } -// CHECK: } - -module { - aie.device(npu1_4col) { - %tile_0_0 = aie.tile(0, 0) - %tile_0_1 = aie.tile(0, 1) - %tile_0_2 = aie.tile(0, 2) - %double_buffer = aie.buffer(%tile_0_2) {sym_name = "double_buffer"} : memref<32xi32> - %buffer_0_1 = aie.buffer(%tile_0_1) : memref<32xi32> - %lock_X = aie.lock(%tile_0_2) {init = 1 : i32, sym_name = "lock_X"} - %lock_Y = aie.lock(%tile_0_2) {init = 0 : i32, sym_name = "lock_Y"} - %mem_0_2 = aie.mem(%tile_0_2) { - %player_a = aie.dma(S2MM, 0) {sym_name = "player_a"} [{ - aie.use_lock(%lock_Y, Acquire, 0) - aie.dma_bd(%double_buffer : memref<32xi32>, 0) {bd_id = 0 : i32} - aie.use_lock(%lock_Y, Release, 0) - }, { - aie.use_lock(%lock_X, Acquire, 1) - aie.dma_bd(%double_buffer : memref<32xi32>) {bd_id = 1 : i32} - aie.use_lock(%lock_X, Release, -1) - }, { - aie.use_lock(%lock_Y, Acquire) {acq_en = false} - aie.dma_bd(%double_buffer : memref<32xi32>) - aie.use_lock(%lock_Y, Release, 1) - }] - %player_b = aie.dma(S2MM, 1) {sym_name = "player_b"} [{ - aie.use_lock(%lock_Y, Acquire, 1) - aie.dma_bd(%double_buffer : memref<32xi32>, 0) - aie.use_lock(%lock_Y, Release, 0) - }, { - aie.use_lock(%lock_X, Acquire, 1) - aie.dma_bd(%double_buffer : memref<32xi32>) {bd_id = 4 : i32} - aie.use_lock(%lock_X, Release, -1) - }, { - aie.use_lock(%lock_Y, Acquire) {acq_en = false} - aie.dma_bd(%double_buffer : memref<32xi32>) - aie.use_lock(%lock_Y, Release, -1) - }] - aie.end - } - %memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) { - %lock_0_1 = aie.lock(%tile_0_1) {init = 1 : i32} - %lock_0_1_0 = aie.lock(%tile_0_1) {init = 0 : i32} - %0 = aie.dma(S2MM, 0) {loop = false, repeat_count = 10 : i32} [{ - aie.use_lock(%lock_0_1, AcquireGreaterEqual) - aie.dma_bd(%buffer_0_1 : memref<32xi32>) - aie.use_lock(%lock_0_1_0, Release) - }] - %1 = aie.dma(MM2S, 0) {loop = false, repeat_count = 10 : i32} [{ - aie.use_lock(%lock_0_1_0, AcquireGreaterEqual) - aie.dma_bd(%buffer_0_1 : memref<32xi32>) - aie.use_lock(%lock_0_1, Release) - }] - %lock_0_1_1 = aie.lock(%tile_0_1) {init = 1 : i32} - %lock_0_1_2 = aie.lock(%tile_0_1) {init = 0 : i32} - %2 = aie.dma(S2MM, 1) {loop = false, repeat_count = 10 : i32} [{ - aie.use_lock(%lock_0_1_1, AcquireGreaterEqual) - aie.dma_bd(%buffer_0_1 : memref<32xi32>) {bd_id = 24 : i32} - aie.use_lock(%lock_0_1_2, Release) - }] - %3 = aie.dma(MM2S, 1) {loop = false, repeat_count = 10 : i32} [{ - aie.use_lock(%lock_0_1_2, AcquireGreaterEqual) - aie.dma_bd(%buffer_0_1 : memref<32xi32>) - aie.use_lock(%lock_0_1_1, Release) - }] - aie.end - } - } -} - -// ----- - // CHECK-LABEL: aie.device(xcve2302) { // CHECK: %[[TILE_2_1:.*]] = aie.tile(2, 1) // CHECK: %[[IN:.*]] = aie.buffer(%[[TILE_2_1]]) {address = 8192 : i32, sym_name = "in"} : memref<16xi32> @@ -202,7 +66,7 @@ module @aie_module { aie.next_bd ^bd0 ^bd1: aie.use_lock(%l01_1, "AcquireGreaterEqual", 1) - aie.dma_bd(%buf01_0 : memref<16xi32>, 0, 16) {bd_id = 24 : i32} + aie.dma_bd(%buf01_0 : memref<16xi32>, 0, 16) aie.use_lock(%l01_0, "Release", 1) aie.next_bd ^bd1 ^bd2: @@ -212,7 +76,7 @@ module @aie_module { aie.next_bd ^bd2 ^bd3: aie.use_lock(%l01_3, "AcquireGreaterEqual", 1) - aie.dma_bd(%buf01_1 : memref<16xi32>, 0, 16) {bd_id = 1 : i32} + aie.dma_bd(%buf01_1 : memref<16xi32>, 0, 16) aie.use_lock(%l01_2, "Release", 1) aie.next_bd ^bd3 ^end: @@ -220,139 +84,3 @@ module @aie_module { } } } - -// ----- - -// CHECK-LABEL: aie.device(npu1_4col) { -// CHECK: %[[TILE_0_0:.*]] = aie.tile(0, 0) -// CHECK: %[[TILE_0_1:.*]] = aie.tile(0, 1) -// CHECK: %[[TILE_0_2:.*]] = aie.tile(0, 2) -// CHECK: %[[DOUBLE_BUFFER:.*]] = aie.buffer(%[[TILE_0_2]]) {sym_name = "double_buffer"} : memref<32xi32> -// CHECK: %[[BUFFER_0_1:.*]] = aie.buffer(%[[TILE_0_1]]) : memref<32xi32> -// CHECK: %[[LOCK_X:.*]] = aie.lock(%[[TILE_0_2]]) {init = 1 : i32, sym_name = "lock_X"} -// CHECK: %[[LOCK_Y:.*]] = aie.lock(%[[TILE_0_2]]) {init = 0 : i32, sym_name = "lock_Y"} -// CHECK: %[[MEM_0_2:.*]] = aie.mem(%[[TILE_0_2]]) { -// CHECK: %[[PLAYER_A:.*]] = aie.dma(S2MM, 0) {sym_name = "player_a"} [{ -// CHECK: aie.use_lock(%[[LOCK_Y]], Acquire, 0) -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>, 0) {bd_id = 5 : i32, next_bd_id = 4 : i32} -// CHECK: aie.use_lock(%[[LOCK_Y]], Release, 0) -// CHECK: }, { -// CHECK: aie.use_lock(%[[LOCK_X]], Acquire, 1) -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>) {bd_id = 4 : i32, next_bd_id = 3 : i32} -// CHECK: aie.use_lock(%[[LOCK_X]], Release, -1) -// CHECK: }, { -// CHECK: aie.use_lock(%[[LOCK_Y]], Acquire) {acq_en = false} -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>) {bd_id = 3 : i32, next_bd_id = 5 : i32} -// CHECK: aie.use_lock(%[[LOCK_Y]], Release, 1) -// CHECK: }] -// CHECK: %[[PLAYER_B:.*]] = aie.dma(S2MM, 1) {sym_name = "player_b"} [{ -// CHECK: aie.use_lock(%[[LOCK_Y]], Acquire, 1) -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>, 0) {bd_id = 2 : i32, next_bd_id = 1 : i32} -// CHECK: aie.use_lock(%[[LOCK_Y]], Release, 0) -// CHECK: }, { -// CHECK: aie.use_lock(%[[LOCK_X]], Acquire, 1) -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>) {bd_id = 1 : i32, next_bd_id = 0 : i32} -// CHECK: aie.use_lock(%[[LOCK_X]], Release, -1) -// CHECK: }, { -// CHECK: aie.use_lock(%[[LOCK_Y]], Acquire) {acq_en = false} -// CHECK: aie.dma_bd(%[[DOUBLE_BUFFER]] : memref<32xi32>) {bd_id = 0 : i32, next_bd_id = 2 : i32} -// CHECK: aie.use_lock(%[[LOCK_Y]], Release, -1) -// CHECK: }] -// CHECK: aie.end -// CHECK: } -// CHECK: %[[MEMTILE_DMA_0_1:.*]] = aie.memtile_dma(%[[TILE_0_1]]) { -// CHECK: %[[LOCK_0_1:.*]] = aie.lock(%[[TILE_0_1]]) {init = 1 : i32} -// CHECK: %[[LOCK_0_1_0:.*]] = aie.lock(%[[TILE_0_1]]) {init = 0 : i32} -// CHECK: %[[VAL_0:.*]] = aie.dma(S2MM, 0) {loop = false, repeat_count = 10 : i32} [{ -// CHECK: aie.use_lock(%[[LOCK_0_1]], AcquireGreaterEqual) -// CHECK: aie.dma_bd(%[[BUFFER_0_1]] : memref<32xi32>) {bd_id = 0 : i32} -// CHECK: aie.use_lock(%[[LOCK_0_1_0]], Release) -// CHECK: }] -// CHECK: %[[VAL_1:.*]] = aie.dma(MM2S, 0) {loop = false, repeat_count = 10 : i32} [{ -// CHECK: aie.use_lock(%[[LOCK_0_1_0]], AcquireGreaterEqual) -// CHECK: aie.dma_bd(%[[BUFFER_0_1]] : memref<32xi32>) {bd_id = 1 : i32} -// CHECK: aie.use_lock(%[[LOCK_0_1]], Release) -// CHECK: }] -// CHECK: %[[LOCK_0_1_1:.*]] = aie.lock(%[[TILE_0_1]]) {init = 1 : i32} -// CHECK: %[[LOCK_0_1_2:.*]] = aie.lock(%[[TILE_0_1]]) {init = 0 : i32} -// CHECK: %[[VAL_2:.*]] = aie.dma(S2MM, 1) {loop = false, repeat_count = 10 : i32} [{ -// CHECK: aie.use_lock(%[[LOCK_0_1_1]], AcquireGreaterEqual) -// CHECK: aie.dma_bd(%[[BUFFER_0_1]] : memref<32xi32>) {bd_id = 24 : i32} -// CHECK: aie.use_lock(%[[LOCK_0_1_2]], Release) -// CHECK: }] -// CHECK: %[[VAL_3:.*]] = aie.dma(MM2S, 1) {loop = false, repeat_count = 10 : i32} [{ -// CHECK: aie.use_lock(%[[LOCK_0_1_2]], AcquireGreaterEqual) -// CHECK: aie.dma_bd(%[[BUFFER_0_1]] : memref<32xi32>) {bd_id = 25 : i32} -// CHECK: aie.use_lock(%[[LOCK_0_1_1]], Release) -// CHECK: }] -// CHECK: aie.end -// CHECK: } -// CHECK: } - -module { - aie.device(npu1_4col) { - %tile_0_0 = aie.tile(0, 0) - %tile_0_1 = aie.tile(0, 1) - %tile_0_2 = aie.tile(0, 2) - %double_buffer = aie.buffer(%tile_0_2) {sym_name = "double_buffer"} : memref<32xi32> - %buffer_0_1 = aie.buffer(%tile_0_1) : memref<32xi32> - %lock_X = aie.lock(%tile_0_2) {init = 1 : i32, sym_name = "lock_X"} - %lock_Y = aie.lock(%tile_0_2) {init = 0 : i32, sym_name = "lock_Y"} - %mem_0_2 = aie.mem(%tile_0_2) { - %player_a = aie.dma(S2MM, 0) {sym_name = "player_a"} [{ - aie.use_lock(%lock_Y, Acquire, 0) - aie.dma_bd(%double_buffer : memref<32xi32>, 0) {bd_id = 5 : i32} - aie.use_lock(%lock_Y, Release, 0) - }, { - aie.use_lock(%lock_X, Acquire, 1) - aie.dma_bd(%double_buffer : memref<32xi32>) {bd_id = 4 : i32} - aie.use_lock(%lock_X, Release, -1) - }, { - aie.use_lock(%lock_Y, Acquire) {acq_en = false} - aie.dma_bd(%double_buffer : memref<32xi32>) {bd_id = 3 : i32} - aie.use_lock(%lock_Y, Release, 1) - }] - %player_b = aie.dma(S2MM, 1) {sym_name = "player_b"} [{ - aie.use_lock(%lock_Y, Acquire, 1) - aie.dma_bd(%double_buffer : memref<32xi32>, 0) {bd_id = 2 : i32} - aie.use_lock(%lock_Y, Release, 0) - }, { - aie.use_lock(%lock_X, Acquire, 1) - aie.dma_bd(%double_buffer : memref<32xi32>) {bd_id = 1 : i32} - aie.use_lock(%lock_X, Release, -1) - }, { - aie.use_lock(%lock_Y, Acquire) {acq_en = false} - aie.dma_bd(%double_buffer : memref<32xi32>) {bd_id = 0 : i32} - aie.use_lock(%lock_Y, Release, -1) - }] - aie.end - } - %memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) { - %lock_0_1 = aie.lock(%tile_0_1) {init = 1 : i32} - %lock_0_1_0 = aie.lock(%tile_0_1) {init = 0 : i32} - %0 = aie.dma(S2MM, 0) {loop = false, repeat_count = 10 : i32} [{ - aie.use_lock(%lock_0_1, AcquireGreaterEqual) - aie.dma_bd(%buffer_0_1 : memref<32xi32>) - aie.use_lock(%lock_0_1_0, Release) - }] - %1 = aie.dma(MM2S, 0) {loop = false, repeat_count = 10 : i32} [{ - aie.use_lock(%lock_0_1_0, AcquireGreaterEqual) - aie.dma_bd(%buffer_0_1 : memref<32xi32>) - aie.use_lock(%lock_0_1, Release) - }] - %lock_0_1_1 = aie.lock(%tile_0_1) {init = 1 : i32} - %lock_0_1_2 = aie.lock(%tile_0_1) {init = 0 : i32} - %2 = aie.dma(S2MM, 1) {loop = false, repeat_count = 10 : i32} [{ - aie.use_lock(%lock_0_1_1, AcquireGreaterEqual) - aie.dma_bd(%buffer_0_1 : memref<32xi32>) {bd_id = 24 : i32} - aie.use_lock(%lock_0_1_2, Release) - }] - %3 = aie.dma(MM2S, 1) {loop = false, repeat_count = 10 : i32} [{ - aie.use_lock(%lock_0_1_2, AcquireGreaterEqual) - aie.dma_bd(%buffer_0_1 : memref<32xi32>) - aie.use_lock(%lock_0_1_1, Release) - }] - aie.end - } - } -}