From 622c3ffe8d85d699f0cf4e0a8b36d984efbcfeb8 Mon Sep 17 00:00:00 2001 From: max Date: Sat, 13 Jul 2024 21:37:30 -0500 Subject: [PATCH] more DCE --- .../target/AMD-AIE/aievec/AIEVecToLLVM.cpp | 69 +----- .../target/AMD-AIE/aievec/AIEVecUtils.h | 97 -------- .../plugins/target/AMD-AIE/aievec/Passes.h | 14 -- .../aievec/VectorToAIEVecConversions.cpp | 1 - .../aievec/VectorToVectorConversions.cpp | 24 +- .../target/AMD-AIE/aievec/XLLVMAIE2IntrOps.td | 218 +----------------- 6 files changed, 12 insertions(+), 411 deletions(-) diff --git a/compiler/plugins/target/AMD-AIE/aievec/AIEVecToLLVM.cpp b/compiler/plugins/target/AMD-AIE/aievec/AIEVecToLLVM.cpp index f40925f28c..d47d6dc8f7 100644 --- a/compiler/plugins/target/AMD-AIE/aievec/AIEVecToLLVM.cpp +++ b/compiler/plugins/target/AMD-AIE/aievec/AIEVecToLLVM.cpp @@ -107,27 +107,6 @@ static SmallVector forceCastOperandsToSignature(OpBuilder &builder, })); } -struct BufferParams { - uint32_t start; - uint32_t offsets; - uint32_t offsets_hi; - uint32_t step; - uint32_t square; -}; - -std::string getVectorTypeString(VectorType type, bool abbrev = false, - bool acc = false) { - std::stringstream ss; - auto size = getVectorLaneSize(type); - ss << "v" << size; - if (auto intType = dyn_cast(type.getElementType())) { - ss << (acc ? "acc" : abbrev ? "i" : "int") << intType.getWidth(); - } else if (dyn_cast(type.getElementType())) { - ss << (abbrev ? "f" : "float"); - } - return ss.str(); -} - // Squashes the easy-to-read 16-bit square encoding into // the 8-bit encoding the configuration register uses uint32_t encodeSquare(uint32_t square) { @@ -139,15 +118,6 @@ uint32_t encodeSquare(uint32_t square) { return out & 0xFF; } -// Encode the configuration register with buffer parameters and options -// TODO: struct to handle this? -void encodeConf(uint32_t conf[2], const BufferParams &x, const BufferParams &z, - bool sub) { - conf[0] |= ((x.step & 0x3F) << 0) | ((z.step & 0x3F) << 8); - conf[1] |= (encodeSquare(x.square) << 0) | (encodeSquare(z.square) << 8); - conf[1] |= sub << 17; -} - class UPSOpConversion : public mlir::ConvertOpToLLVMPattern { public: using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; @@ -677,9 +647,8 @@ class ShuffleOpConversion } }; -void populateAIEVecToLLVMConversionPatterns( - mlir::LLVMTypeConverter &converter, mlir::RewritePatternSet &patterns, - Aie2Fp32Emulation aie2Fp32EmulationOption) { +void populateAIEVecToLLVMConversionPatterns(mlir::LLVMTypeConverter &converter, + mlir::RewritePatternSet &patterns) { patterns.add< UPSOpConversion, SRSOpConversion, @@ -689,14 +658,6 @@ void populateAIEVecToLLVMConversionPatterns( struct ConvertAIEVecToLLVMPass : public PassWrapper> { - ConvertAIEVecToLLVMPass(const ConvertAIEVecToLLVMOptions &options) { - aie2Fp32Emulation = options.aie2Fp32Emulation; - } - // both of these are deleted by default because Pass::Option has deleted - // defaults - ConvertAIEVecToLLVMPass() = default; - ConvertAIEVecToLLVMPass(const ConvertAIEVecToLLVMPass &pass) {} - StringRef getArgument() const override { return "convert-aievec-to-llvm"; } StringRef getDescription() const override { return "This pass converts AIEVec dialect ops to LLVM dialect calls to " @@ -708,29 +669,6 @@ struct ConvertAIEVecToLLVMPass mlir::vector::VectorDialect, xllvm::XLLVMDialect>(); } - mlir::Pass::Option aie2Fp32Emulation{ - *this, "aie2-fp32-emulation-strategy", - llvm::cl::desc( - "Set the AIE2 FP32 emulation strategy. Elementwise multiplication " - "and matrix multiplication intrinsics for FP32 input type are " - "emulated using bfloat16 data-path."), - llvm::cl::init(Aie2Fp32Emulation::AccuracySafe), - llvm::cl::values( - clEnumValN(Aie2Fp32Emulation::AccuracySafe, "accuracy-safe", - "Most accurate option since input fp32 number is split " - "into 3 bfloat16 numbers. float a*b would require 9 mac " - "operations due to 3 bfloat16 splits each."), - clEnumValN( - Aie2Fp32Emulation::AccuracyFast, "accuracy-fast", - "Fast and Accurate option. Input fp32 number is split in to 3 " - "bfloat16 numbers. In the 9 mac operations to emulate fp32 mul, " - "mac operations with LSBs are ignored. (3 last terms)."), - clEnumValN( - Aie2Fp32Emulation::AccuracyLow, "accuracy-low", - "Fast and least accurate option. Input fp32 number is split in " - "to 2 bfloat16 numbers. In the 4 mac operations to emulate fp32 " - "mul, mac operations with LSBs are ignored. (1 last term)."))}; - void runOnOperation() override { RewritePatternSet patterns(&getContext()); LLVMTypeConverter converter(&getContext()); @@ -740,8 +678,7 @@ struct ConvertAIEVecToLLVMPass converter.addConversion( [&](VectorType type) -> std::optional { return type; }); - populateAIEVecToLLVMConversionPatterns(converter, patterns, - aie2Fp32Emulation); + populateAIEVecToLLVMConversionPatterns(converter, patterns); LLVMConversionTarget target(getContext()); target.addIllegalDialect(); diff --git a/compiler/plugins/target/AMD-AIE/aievec/AIEVecUtils.h b/compiler/plugins/target/AMD-AIE/aievec/AIEVecUtils.h index cc6a084e67..9b76b1b5ed 100644 --- a/compiler/plugins/target/AMD-AIE/aievec/AIEVecUtils.h +++ b/compiler/plugins/target/AMD-AIE/aievec/AIEVecUtils.h @@ -46,19 +46,6 @@ inline unsigned getVectorLaneSize(mlir::VectorType type) { std::multiplies()); } -// For a 1D vector, return its size in bits. For an nD vector, return the size -// of the innerost dimension in bits. -inline int32_t getVectorSizeInBits(mlir::VectorType type) { - int32_t veclen = getVectorLaneSize(type) * getElementSizeInBits(type); - assert(veclen >= 128 && "AIE vector size should be greater than 128 bits"); - return veclen; -} - -// Return true if this is an operation defined in AIE dialect -inline bool isAIEOp(mlir::Operation *op) { - return llvm::isa(op->getDialect()); -} - // Determine the output type for a vector operation based on whether // it operates on integer or floating point data. inline mlir::VectorType getVectorOpDestType(mlir::VectorType type, bool AIE2) { @@ -90,90 +77,6 @@ inline mlir::VectorType getVectorOpDestType(mlir::VectorType type, bool AIE2) { llvm::report_fatal_error("Unsupported destination type"); } -// Linearize the exprVec as a strided access, but do not simplify -inline mlir::AffineExpr flattenedStridedExpr( - llvm::ArrayRef sizes, llvm::ArrayRef exprs, - mlir::MLIRContext *context) { - // Expect non-empty sizes and exprs - if (sizes.empty() || exprs.empty()) return nullptr; - - if (is_contained(sizes, 0)) return getAffineConstantExpr(0, context); - - auto maps = mlir::AffineMap::inferFromExprList(exprs, context); - if (maps.empty()) return nullptr; - - unsigned nSymbols = maps[0].getNumSymbols(); - - mlir::AffineExpr expr; - bool dynamicPoisonBit = false; - int64_t runningSize = 1; - for (auto en : zip(reverse(exprs), reverse(sizes))) { - int64_t size = std::get<1>(en); - if (size == 0) continue; - - mlir::AffineExpr dimExpr = std::get<0>(en); - mlir::AffineExpr stride = dynamicPoisonBit - ? getAffineSymbolExpr(nSymbols++, context) - : getAffineConstantExpr(runningSize, context); - expr = expr ? expr + dimExpr * stride : dimExpr * stride; - if (size > 0) { - runningSize *= size; - if (runningSize <= 0) return nullptr; - } else - dynamicPoisonBit = true; - } - return expr; -} - -// From a linearized affine expression, compute the base and the constant -// offset. If the access is A[i][j+2] for an N*N array A, the linearized -// expression will be A[i*N+j+2]. The base in this case will be (i*N+j), and the -// offset will be 2. -inline std::pair extractBaseAndOffset( - mlir::AffineExpr expr) { - mlir::AffineExpr base = expr; - int32_t offset = 0; - - if (auto constExpr = llvm::dyn_cast(expr)) { - base = nullptr; - offset += constExpr.getValue(); - } else if (auto binopExpr = llvm::dyn_cast(expr)) { - if (binopExpr.getKind() == mlir::AffineExprKind::Add) { - mlir::AffineExpr lhs = binopExpr.getLHS(), rhs = binopExpr.getRHS(); - if (auto constExpr = llvm::dyn_cast(lhs)) { - base = rhs; - offset += constExpr.getValue(); - } - if (auto constExpr = llvm::dyn_cast(rhs)) { - base = base == rhs ? nullptr : lhs; - offset += constExpr.getValue(); - } - } - } - return std::make_pair(base, offset); -} - -// MLIR-AIE auto-vectorization to CPP flow currently doesn't support to -// implicitly broadcast a dynamic dimension of size `1`. Hence, we assume that -// dynamic dimensions are not with size '1' that can be interpreted to various -// broadcasting scenarios. We let lowerings assume this on a per-scope basis if -// the tosa.no_implicit_broadcast_of_dynamic_sizes attribute presents on any -// parent of the block. -inline bool isAssumingNoImplicitBroadcastOfDynamicSizes(mlir::Block *block) { - for (mlir::Operation *parentOp = block->getParentOp(); parentOp; - parentOp = parentOp->getParentOp()) - if (parentOp->hasAttr("tosa.no_implicit_broadcast_of_dynamic_sizes")) - return true; - return false; -} - -// Helper that uses the block from an OpBuilder for determining whether we -// are assuming no implict broadcast of dynamic sizes -inline bool isAssumingNoImplicitBroadcastOfDynamicSizes( - mlir::OpBuilder &builder) { - return isAssumingNoImplicitBroadcastOfDynamicSizes(builder.getBlock()); -} - } // namespace mlir::iree_compiler::aievec #endif // AIE_DIALECT_AIEVEC_AIEVECUTILS_H diff --git a/compiler/plugins/target/AMD-AIE/aievec/Passes.h b/compiler/plugins/target/AMD-AIE/aievec/Passes.h index 725b491db0..31dac0817b 100644 --- a/compiler/plugins/target/AMD-AIE/aievec/Passes.h +++ b/compiler/plugins/target/AMD-AIE/aievec/Passes.h @@ -19,20 +19,6 @@ namespace mlir::iree_compiler::aievec { -enum class Aie2Fp32Emulation : uint32_t { - AccuracySafe = 0, - AccuracyFast = 1, - AccuracyLow = 2, -}; - -struct ConvertAIEVecToLLVMOptions { - Aie2Fp32Emulation aie2Fp32Emulation = Aie2Fp32Emulation::AccuracySafe; -}; - -//===----------------------------------------------------------------------===// -// Building and Registering. -//===----------------------------------------------------------------------===// - /// Adds the "convert-vector-to-aievec" pipeline to the `OpPassManager`. This /// pipeline takes `Vector` code, transforms it to make it compatible with the /// selected `AIE` target, lowers it to `AIEVec` dialect, and performs some diff --git a/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp b/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp index 124bae7d54..54037d366f 100644 --- a/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp +++ b/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp @@ -12,7 +12,6 @@ // to ops that can be translated to a sequence of valid AIEVec ops. //===----------------------------------------------------------------------===// -#include #include #include diff --git a/compiler/plugins/target/AMD-AIE/aievec/VectorToVectorConversions.cpp b/compiler/plugins/target/AMD-AIE/aievec/VectorToVectorConversions.cpp index cdcea545e1..3c5b3503d7 100644 --- a/compiler/plugins/target/AMD-AIE/aievec/VectorToVectorConversions.cpp +++ b/compiler/plugins/target/AMD-AIE/aievec/VectorToVectorConversions.cpp @@ -494,25 +494,6 @@ static void populateCommonAIECanonicalizeConversionPatterns( patterns.getContext()); } -//============================================================================// -//============== AIEv1-specific canonicalization configuration ===============// -//============================================================================// - -static void configureAIEv1CanonicalizeLegalizations(ConversionTarget &target) { - target.addDynamicallyLegalOp( - [](vector::TransferReadOp op) { - return !op.getPermutationMap().isConstant() && - getTransferReadAlignmentOffset(op, op.getVectorType(), 128) - .value_or(0) == 0; - }); -} - -static void populateAIEv1CanonicalizeConversionPatterns( - RewritePatternSet &patterns) { - patterns.add(patterns.getContext(), 512, - 128); -} - //============================================================================// //============== AIE2-specific canonicalization configuration ===============// //============================================================================// @@ -558,11 +539,8 @@ static void populateAIE2CanonicalizeConversionPatterns( struct CanonicalizeVectorForAIEVecPass : public PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CanonicalizeVectorForAIEVecPass) - - // In case we want to register this pass as a standalone pass for test - // purposes. StringRef getArgument() const final { - return "test-canonicalize-vector-for-aievec"; + return "canonicalize-vector-for-aievec"; } StringRef getDescription() const final { diff --git a/compiler/plugins/target/AMD-AIE/aievec/XLLVMAIE2IntrOps.td b/compiler/plugins/target/AMD-AIE/aievec/XLLVMAIE2IntrOps.td index d02a6eb161..dd4c11ec5f 100644 --- a/compiler/plugins/target/AMD-AIE/aievec/XLLVMAIE2IntrOps.td +++ b/compiler/plugins/target/AMD-AIE/aievec/XLLVMAIE2IntrOps.td @@ -55,7 +55,7 @@ class AIE2BF16MinMaxElem : Arguments<(ins VectorOfLengthAndType<[32], [BF16]>:$lhs, VectorOfLengthAndType<[32], [BF16]>:$rhs)> ; -// ----- MAC ----- +// ----- MAC ----- def MacConfAcc32IntrOp : AIEVec2_IntrOp<"I512.I512.ACC1024.acc32.mac.conf", @@ -78,37 +78,7 @@ def MacConfBF16IntrOp : [TypeIs<"res", VectorOfLengthAndType<[8], [I64]>>]>, AIE2BF16MACConf; -// ----- MSC ----- - -def MscConfBF16IntrOp : - AIEVec2_IntrOp<"bf.msc16.conf", - [TypeIs<"res", VectorOfLengthAndType<[8], [I64]>>]>, - AIE2BF16MACConf; - -// ----- MUL ----- - -def MulConfAcc32IntrOp : - AIEVec2_IntrOp<"I512.I512.acc32.mul.conf", - [TypeIs<"res", VectorOfLengthAndType<[16], [I64]>>]>, - Arguments<(ins VectorOfLengthAndType<[64], [I8]>:$lhs, - VectorOfLengthAndType<[16], [I32]>:$rhs, - I32:$conf)>; - -def MulConfAcc64IntrOp : - AIEVec2_IntrOp<"I512.I512.acc64.mul.conf", - [TypeIs<"res", VectorOfLengthAndType<[16], [I64]>>]>, - Arguments<(ins VectorOfLengthAndType<[64], [I8]>:$lhs, - VectorOfLengthAndType<[16], [I32]>:$rhs, - I32:$conf)>; - -def MulConfBF16IntrOp : - AIEVec2_IntrOp<"bf.mul16.conf", - [TypeIs<"res", VectorOfLengthAndType<[8], [I64]>>]>, - Arguments<(ins VectorOfLengthAndType<[32], [BF16]>:$lhs, - VectorOfLengthAndType<[32], [BF16]>:$rhs, - I32:$conf)>; - -// ----- SET ----- +// ----- SET ----- def VectorSetI512I128IntrOp : AIEVec2_IntrOp<"set.I512.I128", @@ -121,7 +91,7 @@ def VectorSetI512I256IntrOp : Arguments<(ins VectorOfLengthAndType<[8], [I32]>:$src, I32:$pos)>; -// ----- SRS ----- +// ----- SRS ----- def I256V16Acc32SrsIntrOp : AIEVec2_IntrOp<"I256.v16.acc32.srs", @@ -170,34 +140,7 @@ def Vector16AccFloatToV16BF16IntrOp : [TypeIs<"res", VectorOfLengthAndType<[16], [BF16]>>]>, Arguments<(ins VectorOfLengthAndType<[8], [I64]>:$src)>; -// ----- BROADCAST ----- - -def VectorBroadcast8I512IntrOp : - AIEVec2_IntrOp<"vbroadcast8.I512", - [TypeIs<"res", VectorOfLengthAndType<[64], [I8]>>]>, - Arguments<(ins I32:$src)>; - -def VectorBroadcast16I512IntrOp : - AIEVec2_IntrOp<"vbroadcast16.I512", - [TypeIs<"res", VectorOfLengthAndType<[32], [I16]>>]>, - Arguments<(ins I32:$src)>; - -def VectorBroadcast32I512IntrOp : - AIEVec2_IntrOp<"vbroadcast32.I512", - [TypeIs<"res", VectorOfLengthAndType<[16], [I32]>>]>, - Arguments<(ins I32:$src)>; - -def VectorBroadcast16BF512IntrOp : - AIEVec2_IntrOp<"vbroadcast16.bf512", - [TypeIs<"res", VectorOfLengthAndType<[32], [BF16]>>]>, - Arguments<(ins BF16:$src)>; - -def VectorBroadcastfloatI512IntrOp : - AIEVec2_IntrOp<"vbroadcastfloat.I512", - [TypeIs<"res", VectorOfLengthAndType<[16], [F32]>>]>, - Arguments<(ins F32:$src)>; - -// ----- EXT ----- +// ----- EXT ----- def ExtI256I512IntrOp : AIEVec2_IntrOp<"ext.I256.I512", @@ -211,18 +154,7 @@ def ExtI512I1024IntrOp : Arguments<(ins VectorOfLengthAndType<[32], [I32]>:$src, I32:$idx)>; -def ExtI256I1024IntrOp : - AIEVec2_IntrOp<"ext.I256.I1024", - [TypeIs<"res", VectorOfLengthAndType<[8], [I32]>>]>, - Arguments<(ins VectorOfLengthAndType<[32], [I32]>:$src, - I32:$idx)>; - -def ExtI128I512IntrOp : - AIEVec2_IntrOp<"extract.I128.I512", - [TypeIs<"res", VectorOfLengthAndType<[4], [I32]>>]>, - Arguments<(ins VectorOfLengthAndType<[16], [I32]>:$src)>; - -// ----- CONCAT ----- +// ----- CONCAT ----- def ConcatI512I256IntrOp : AIEVec2_IntrOp<"concat.I512.I256", @@ -230,21 +162,13 @@ def ConcatI512I256IntrOp : Arguments<(ins VectorOfLengthAndType<[8], [I32]>:$lhs, VectorOfLengthAndType<[8], [I32]>:$rhs)>; -def ConcatI1024I256IntrOp : - AIEVec2_IntrOp<"concat.I1024.I256", - [TypeIs<"res", VectorOfLengthAndType<[32], [I32]>>]>, - Arguments<(ins VectorOfLengthAndType<[8], [I32]>:$src0, - VectorOfLengthAndType<[8], [I32]>:$src1, - VectorOfLengthAndType<[8], [I32]>:$src2, - VectorOfLengthAndType<[8], [I32]>:$src3)>; - def ConcatI1024I512IntrOp : AIEVec2_IntrOp<"concat.I1024.I512", [TypeIs<"res", VectorOfLengthAndType<[32], [I32]>>]>, Arguments<(ins VectorOfLengthAndType<[16], [I32]>:$lhs, VectorOfLengthAndType<[16], [I32]>:$rhs)>; -// ----- SHUFFLE ----- +// ----- SHUFFLE ----- def VectorShuffleIntrOp : AIEVec2_IntrOp<"vshuffle", @@ -253,22 +177,13 @@ def VectorShuffleIntrOp : VectorOfLengthAndType<[16], [I32]>:$rhs, I32:$mode)>; -// ----- UNDEF ----- +// ----- UNDEF ----- def UndefV16I32IntrOp : AIEVec2_IntrOp<"v16int32", [TypeIs<"res", VectorOfLengthAndType<[16], [I32]>>]>; -// ----- UPD ----- - -def UpdBF512BF256IntrOp : - AIEVec2_IntrOp<"upd.bf512.bf256", - [TypeIs<"res", VectorOfLengthAndType<[32], [BF16]>>]>, - Arguments<(ins VectorOfLengthAndType<[32], [BF16]>:$dst, - VectorOfLengthAndType<[16], [BF16]>:$src, - I32:$idx)>; - -// ----- UPS ----- +// ----- UPS ----- def Acc32V16I256UpsIntrOp : AIEVec2_IntrOp<"acc32.v16.I256.ups", @@ -317,121 +232,4 @@ def Vector16BF16ToV16AccFloatIntrOp : [TypeIs<"res", VectorOfLengthAndType<[8], [I64]>>]>, Arguments<(ins VectorOfLengthAndType<[16], [BF16]>:$src)>; -// ----- SHIFT ----- - -def VectorShiftI512I512IntrOp : - AIEVec2_IntrOp<"vshift.I512.I512", - [TypeIs<"res", VectorOfLengthAndType<[16], [I32]>>]>, - Arguments<(ins VectorOfLengthAndType<[16], [I32]>:$lhs, - VectorOfLengthAndType<[16], [I32]>:$rhs, - I32:$step, - I32:$shift)>; - -def VectorShiftBF512BF512IntrOp : - AIEVec2_IntrOp<"vshift.bf512.bf512", - [TypeIs<"res", VectorOfLengthAndType<[32], [BF16]>>]>, - Arguments<(ins VectorOfLengthAndType<[32], [BF16]>:$lhs, - VectorOfLengthAndType<[32], [BF16]>:$rhs, - I32:$step, - I32:$shift)>; - -// ----- EXTRACT ELEMENT ----- - -def VectorExtractElem8I512IntrOp : - AIEVec2_IntrOp<"vextract.elem8.I512", - [TypeIs<"res", I32>]>, - Arguments<(ins VectorOfLengthAndType<[64], [I8]>:$src, - I32:$idx, - I32:$sign)>; - -def VectorExtractElem16I512IntrOp : - AIEVec2_IntrOp<"vextract.elem16.I512", - [TypeIs<"res", I32>]>, - Arguments<(ins VectorOfLengthAndType<[32], [I16]>:$src, - I32:$idx, - I32:$sign)>; - -def VectorExtractElem32I512IntrOp : - AIEVec2_IntrOp<"vextract.elem32.I512", - [TypeIs<"res", I32>]>, - Arguments<(ins VectorOfLengthAndType<[16], [I32]>:$src, - I32:$idx, - I32:$sign)>; - -// ----- MAX ELEMENT ----- - -def VectorMaxLt8IntrOp : - AIEVec2_IntrOp<"vmax.lt8", - [TypeIs<"res", - LLVM_StructOf<[ - VectorOfLengthAndType<[64], [I8]>, - VectorOfLengthAndType<[2], [I32]>]> - >], /*numResults=*/2>, - AIE2I8MinMaxElem; - -def VectorMaxLt16IntrOp : - AIEVec2_IntrOp<"vmax.lt16", - [TypeIs<"res", - LLVM_StructOf<[ - VectorOfLengthAndType<[32], [I16]>, - I32]> - >], /*numResults=*/2>, - AIE2I16MinMaxElem; - -def VectorMaxLt32IntrOp : - AIEVec2_IntrOp<"vmax.lt32", - [TypeIs<"res", - LLVM_StructOf<[ - VectorOfLengthAndType<[16], [I32]>, - I32]> - >], /*numResults=*/2>, - AIE2I32MinMaxElem; - -def VectorMaxLtBf16IntrOp : - AIEVec2_IntrOp<"vmax.ltbf16", - [TypeIs<"res", - LLVM_StructOf<[ - VectorOfLengthAndType<[32], [BF16]>, - I32]> - >], /*numResults=*/2>, - AIE2BF16MinMaxElem; - -// ----- MIN ELEMENT ----- - -def VectorMinGe8IntrOp : - AIEVec2_IntrOp<"vmin.ge8", - [TypeIs<"res", - LLVM_StructOf<[ - VectorOfLengthAndType<[64], [I8]>, - VectorOfLengthAndType<[2], [I32]>]> - >], /*numResults=*/2>, - AIE2I8MinMaxElem; - -def VectorMinGe16IntrOp : - AIEVec2_IntrOp<"vmin.ge16", - [TypeIs<"res", - LLVM_StructOf<[ - VectorOfLengthAndType<[32], [I16]>, - I32]> - >], /*numResults=*/2>, - AIE2I16MinMaxElem; - -def VectorMinGe32IntrOp : - AIEVec2_IntrOp<"vmin.ge32", - [TypeIs<"res", - LLVM_StructOf<[ - VectorOfLengthAndType<[16], [I32]>, - I32]> - >], /*numResults=*/2>, - AIE2I32MinMaxElem; - -def VectorMinGeBf16IntrOp : - AIEVec2_IntrOp<"vmin.gebf16", - [TypeIs<"res", - LLVM_StructOf<[ - VectorOfLengthAndType<[32], [BF16]>, - I32]> - >], /*numResults=*/2>, - AIE2BF16MinMaxElem; - #endif // AIE_DIALECT_XLLVM_IR_XLLVMAIE2INTROPS_TD