From 622c3ffe8d85d699f0cf4e0a8b36d984efbcfeb8 Mon Sep 17 00:00:00 2001
From: max <maksim.levental@gmail.com>
Date: Sat, 13 Jul 2024 21:37:30 -0500
Subject: [PATCH] more DCE

---
 .../target/AMD-AIE/aievec/AIEVecToLLVM.cpp    |  69 +-----
 .../target/AMD-AIE/aievec/AIEVecUtils.h       |  97 --------
 .../plugins/target/AMD-AIE/aievec/Passes.h    |  14 --
 .../aievec/VectorToAIEVecConversions.cpp      |   1 -
 .../aievec/VectorToVectorConversions.cpp      |  24 +-
 .../target/AMD-AIE/aievec/XLLVMAIE2IntrOps.td | 218 +-----------------
 6 files changed, 12 insertions(+), 411 deletions(-)
diff --git a/compiler/plugins/target/AMD-AIE/aievec/AIEVecToLLVM.cpp b/compiler/plugins/target/AMD-AIE/aievec/AIEVecToLLVM.cpp
index f40925f28c..d47d6dc8f7 100644
--- a/compiler/plugins/target/AMD-AIE/aievec/AIEVecToLLVM.cpp
+++ b/compiler/plugins/target/AMD-AIE/aievec/AIEVecToLLVM.cpp
@@ -107,27 +107,6 @@ static SmallVector<Value> forceCastOperandsToSignature(OpBuilder &builder,
       }));
 }
 
-struct BufferParams {
-  uint32_t start;
-  uint32_t offsets;
-  uint32_t offsets_hi;
-  uint32_t step;
-  uint32_t square;
-};
-
-std::string getVectorTypeString(VectorType type, bool abbrev = false,
-                                bool acc = false) {
-  std::stringstream ss;
-  auto size = getVectorLaneSize(type);
-  ss << "v" << size;
-  if (auto intType = dyn_cast<IntegerType>(type.getElementType())) {
-    ss << (acc ? "acc" : abbrev ? "i" : "int") << intType.getWidth();
-  } else if (dyn_cast<FloatType>(type.getElementType())) {
-    ss << (abbrev ? "f" : "float");
-  }
-  return ss.str();
-}
-
 // Squashes the easy-to-read 16-bit square encoding into
 // the 8-bit encoding the configuration register uses
 uint32_t encodeSquare(uint32_t square) {
@@ -139,15 +118,6 @@ uint32_t encodeSquare(uint32_t square) {
   return out & 0xFF;
 }
 
-// Encode the configuration register with buffer parameters and options
-// TODO: struct to handle this?
-void encodeConf(uint32_t conf[2], const BufferParams &x, const BufferParams &z,
-                bool sub) {
-  conf[0] |= ((x.step & 0x3F) << 0) | ((z.step & 0x3F) << 8);
-  conf[1] |= (encodeSquare(x.square) << 0) | (encodeSquare(z.square) << 8);
-  conf[1] |= sub << 17;
-}
-
 class UPSOpConversion : public mlir::ConvertOpToLLVMPattern<aievec::UPSOp> {
  public:
   using ConvertOpToLLVMPattern<aievec::UPSOp>::ConvertOpToLLVMPattern;
@@ -677,9 +647,8 @@ class ShuffleOpConversion
   }
 };
 
-void populateAIEVecToLLVMConversionPatterns(
-    mlir::LLVMTypeConverter &converter, mlir::RewritePatternSet &patterns,
-    Aie2Fp32Emulation aie2Fp32EmulationOption) {
+void populateAIEVecToLLVMConversionPatterns(mlir::LLVMTypeConverter &converter,
+                                            mlir::RewritePatternSet &patterns) {
   patterns.add<
 
       UPSOpConversion, SRSOpConversion,
@@ -689,14 +658,6 @@ void populateAIEVecToLLVMConversionPatterns(
 
 struct ConvertAIEVecToLLVMPass
     : public PassWrapper<ConvertAIEVecToLLVMPass, OperationPass<ModuleOp>> {
-  ConvertAIEVecToLLVMPass(const ConvertAIEVecToLLVMOptions &options) {
-    aie2Fp32Emulation = options.aie2Fp32Emulation;
-  }
-  // both of these are deleted by default because Pass::Option has deleted
-  // defaults
-  ConvertAIEVecToLLVMPass() = default;
-  ConvertAIEVecToLLVMPass(const ConvertAIEVecToLLVMPass &pass) {}
-
   StringRef getArgument() const override { return "convert-aievec-to-llvm"; }
   StringRef getDescription() const override {
     return "This pass converts AIEVec dialect ops to LLVM dialect calls to "
@@ -708,29 +669,6 @@ struct ConvertAIEVecToLLVMPass
                     mlir::vector::VectorDialect, xllvm::XLLVMDialect>();
   }
 
-  mlir::Pass::Option<Aie2Fp32Emulation> aie2Fp32Emulation{
-      *this, "aie2-fp32-emulation-strategy",
-      llvm::cl::desc(
-          "Set the AIE2 FP32 emulation strategy. Elementwise multiplication "
-          "and matrix multiplication intrinsics for FP32 input type are "
-          "emulated using bfloat16 data-path."),
-      llvm::cl::init(Aie2Fp32Emulation::AccuracySafe),
-      llvm::cl::values(
-          clEnumValN(Aie2Fp32Emulation::AccuracySafe, "accuracy-safe",
-                     "Most accurate option since input fp32 number is split "
-                     "into 3 bfloat16 numbers. float a*b would require 9 mac "
-                     "operations due to 3 bfloat16 splits each."),
-          clEnumValN(
-              Aie2Fp32Emulation::AccuracyFast, "accuracy-fast",
-              "Fast and Accurate option. Input fp32 number is split in to 3 "
-              "bfloat16 numbers. In the 9 mac operations to emulate fp32 mul, "
-              "mac operations with LSBs are ignored. (3 last terms)."),
-          clEnumValN(
-              Aie2Fp32Emulation::AccuracyLow, "accuracy-low",
-              "Fast and least accurate option. Input fp32 number is split in "
-              "to 2 bfloat16 numbers. In the 4 mac operations to emulate fp32 "
-              "mul, mac operations with LSBs are ignored. (1 last term)."))};
-
   void runOnOperation() override {
     RewritePatternSet patterns(&getContext());
     LLVMTypeConverter converter(&getContext());
@@ -740,8 +678,7 @@ struct ConvertAIEVecToLLVMPass
     converter.addConversion(
         [&](VectorType type) -> std::optional<Type> { return type; });
 
-    populateAIEVecToLLVMConversionPatterns(converter, patterns,
-                                           aie2Fp32Emulation);
+    populateAIEVecToLLVMConversionPatterns(converter, patterns);
 
     LLVMConversionTarget target(getContext());
     target.addIllegalDialect<AIEVecDialect>();
diff --git a/compiler/plugins/target/AMD-AIE/aievec/AIEVecUtils.h b/compiler/plugins/target/AMD-AIE/aievec/AIEVecUtils.h
index cc6a084e67..9b76b1b5ed 100644
--- a/compiler/plugins/target/AMD-AIE/aievec/AIEVecUtils.h
+++ b/compiler/plugins/target/AMD-AIE/aievec/AIEVecUtils.h
@@ -46,19 +46,6 @@ inline unsigned getVectorLaneSize(mlir::VectorType type) {
                          std::multiplies<int64_t>());
 }
 
-// For a 1D vector, return its size in bits. For an nD vector, return the size
-// of the innerost dimension in bits.
-inline int32_t getVectorSizeInBits(mlir::VectorType type) {
-  int32_t veclen = getVectorLaneSize(type) * getElementSizeInBits(type);
-  assert(veclen >= 128 && "AIE vector size should be greater than 128 bits");
-  return veclen;
-}
-
-// Return true if this is an operation defined in AIE dialect
-inline bool isAIEOp(mlir::Operation *op) {
-  return llvm::isa<AIEVecDialect>(op->getDialect());
-}
-
 // Determine the output type for a vector operation based on whether
 // it operates on integer or floating point data.
 inline mlir::VectorType getVectorOpDestType(mlir::VectorType type, bool AIE2) {
@@ -90,90 +77,6 @@ inline mlir::VectorType getVectorOpDestType(mlir::VectorType type, bool AIE2) {
   llvm::report_fatal_error("Unsupported destination type");
 }
 
-// Linearize the exprVec as a strided access, but do not simplify
-inline mlir::AffineExpr flattenedStridedExpr(
-    llvm::ArrayRef<int64_t> sizes, llvm::ArrayRef<mlir::AffineExpr> exprs,
-    mlir::MLIRContext *context) {
-  // Expect non-empty sizes and exprs
-  if (sizes.empty() || exprs.empty()) return nullptr;
-
-  if (is_contained(sizes, 0)) return getAffineConstantExpr(0, context);
-
-  auto maps = mlir::AffineMap::inferFromExprList(exprs, context);
-  if (maps.empty()) return nullptr;
-
-  unsigned nSymbols = maps[0].getNumSymbols();
-
-  mlir::AffineExpr expr;
-  bool dynamicPoisonBit = false;
-  int64_t runningSize = 1;
-  for (auto en : zip(reverse(exprs), reverse(sizes))) {
-    int64_t size = std::get<1>(en);
-    if (size == 0) continue;
-
-    mlir::AffineExpr dimExpr = std::get<0>(en);
-    mlir::AffineExpr stride = dynamicPoisonBit
-                                  ? getAffineSymbolExpr(nSymbols++, context)
-                                  : getAffineConstantExpr(runningSize, context);
-    expr = expr ? expr + dimExpr * stride : dimExpr * stride;
-    if (size > 0) {
-      runningSize *= size;
-      if (runningSize <= 0) return nullptr;
-    } else
-      dynamicPoisonBit = true;
-  }
-  return expr;
-}
-
-// From a linearized affine expression, compute the base and the constant
-// offset. If the access is A[i][j+2] for an N*N array A, the linearized
-// expression will be A[i*N+j+2]. The base in this case will be (i*N+j), and the
-// offset will be 2.
-inline std::pair<mlir::AffineExpr, int32_t> extractBaseAndOffset(
-    mlir::AffineExpr expr) {
-  mlir::AffineExpr base = expr;
-  int32_t offset = 0;
-
-  if (auto constExpr = llvm::dyn_cast<mlir::AffineConstantExpr>(expr)) {
-    base = nullptr;
-    offset += constExpr.getValue();
-  } else if (auto binopExpr = llvm::dyn_cast<mlir::AffineBinaryOpExpr>(expr)) {
-    if (binopExpr.getKind() == mlir::AffineExprKind::Add) {
-      mlir::AffineExpr lhs = binopExpr.getLHS(), rhs = binopExpr.getRHS();
-      if (auto constExpr = llvm::dyn_cast<mlir::AffineConstantExpr>(lhs)) {
-        base = rhs;
-        offset += constExpr.getValue();
-      }
-      if (auto constExpr = llvm::dyn_cast<mlir::AffineConstantExpr>(rhs)) {
-        base = base == rhs ? nullptr : lhs;
-        offset += constExpr.getValue();
-      }
-    }
-  }
-  return std::make_pair(base, offset);
-}
-
-// MLIR-AIE auto-vectorization to CPP flow currently doesn't support to
-// implicitly broadcast a dynamic dimension of size `1`. Hence, we assume that
-// dynamic dimensions are not with size '1' that can be interpreted to various
-// broadcasting scenarios. We let lowerings assume this on a per-scope basis if
-// the tosa.no_implicit_broadcast_of_dynamic_sizes attribute presents on any
-// parent of the block.
-inline bool isAssumingNoImplicitBroadcastOfDynamicSizes(mlir::Block *block) {
-  for (mlir::Operation *parentOp = block->getParentOp(); parentOp;
-       parentOp = parentOp->getParentOp())
-    if (parentOp->hasAttr("tosa.no_implicit_broadcast_of_dynamic_sizes"))
-      return true;
-  return false;
-}
-
-// Helper that uses the block from an OpBuilder for determining whether we
-// are assuming no implict broadcast of dynamic sizes
-inline bool isAssumingNoImplicitBroadcastOfDynamicSizes(
-    mlir::OpBuilder &builder) {
-  return isAssumingNoImplicitBroadcastOfDynamicSizes(builder.getBlock());
-}
-
 }  // namespace mlir::iree_compiler::aievec
 
 #endif  // AIE_DIALECT_AIEVEC_AIEVECUTILS_H
diff --git a/compiler/plugins/target/AMD-AIE/aievec/Passes.h b/compiler/plugins/target/AMD-AIE/aievec/Passes.h
index 725b491db0..31dac0817b 100644
--- a/compiler/plugins/target/AMD-AIE/aievec/Passes.h
+++ b/compiler/plugins/target/AMD-AIE/aievec/Passes.h
@@ -19,20 +19,6 @@
 
 namespace mlir::iree_compiler::aievec {
 
-enum class Aie2Fp32Emulation : uint32_t {
-  AccuracySafe = 0,
-  AccuracyFast = 1,
-  AccuracyLow = 2,
-};
-
-struct ConvertAIEVecToLLVMOptions {
-  Aie2Fp32Emulation aie2Fp32Emulation = Aie2Fp32Emulation::AccuracySafe;
-};
-
-//===----------------------------------------------------------------------===//
-// Building and Registering.
-//===----------------------------------------------------------------------===//
-
 /// Adds the "convert-vector-to-aievec" pipeline to the `OpPassManager`. This
 /// pipeline takes `Vector` code, transforms it to make it compatible with the
 /// selected `AIE` target, lowers it to `AIEVec` dialect, and performs some
diff --git a/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp b/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp
index 124bae7d54..54037d366f 100644
--- a/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp
+++ b/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp
@@ -12,7 +12,6 @@
 // to ops that can be translated to a sequence of valid AIEVec ops.
 //===----------------------------------------------------------------------===//
 
-#include <bitset>
 #include <optional>
 #include <tuple>
 
diff --git a/compiler/plugins/target/AMD-AIE/aievec/VectorToVectorConversions.cpp b/compiler/plugins/target/AMD-AIE/aievec/VectorToVectorConversions.cpp
index cdcea545e1..3c5b3503d7 100644
--- a/compiler/plugins/target/AMD-AIE/aievec/VectorToVectorConversions.cpp
+++ b/compiler/plugins/target/AMD-AIE/aievec/VectorToVectorConversions.cpp
@@ -494,25 +494,6 @@ static void populateCommonAIECanonicalizeConversionPatterns(
       patterns.getContext());
 }
 
-//============================================================================//
-//============== AIEv1-specific canonicalization configuration ===============//
-//============================================================================//
-
-static void configureAIEv1CanonicalizeLegalizations(ConversionTarget &target) {
-  target.addDynamicallyLegalOp<vector::TransferReadOp>(
-      [](vector::TransferReadOp op) {
-        return !op.getPermutationMap().isConstant() &&
-               getTransferReadAlignmentOffset(op, op.getVectorType(), 128)
-                       .value_or(0) == 0;
-      });
-}
-
-static void populateAIEv1CanonicalizeConversionPatterns(
-    RewritePatternSet &patterns) {
-  patterns.add<SplitUnalignedTransferReadPattern>(patterns.getContext(), 512,
-                                                  128);
-}
-
 //============================================================================//
 //============== AIE2-specific canonicalization configuration ===============//
 //============================================================================//
@@ -558,11 +539,8 @@ static void populateAIE2CanonicalizeConversionPatterns(
 struct CanonicalizeVectorForAIEVecPass
     : public PassWrapper<CanonicalizeVectorForAIEVecPass, OperationPass<>> {
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CanonicalizeVectorForAIEVecPass)
-
-  // In case we want to register this pass as a standalone pass for test
-  // purposes.
   StringRef getArgument() const final {
-    return "test-canonicalize-vector-for-aievec";
+    return "canonicalize-vector-for-aievec";
   }
 
   StringRef getDescription() const final {
diff --git a/compiler/plugins/target/AMD-AIE/aievec/XLLVMAIE2IntrOps.td b/compiler/plugins/target/AMD-AIE/aievec/XLLVMAIE2IntrOps.td
index d02a6eb161..dd4c11ec5f 100644
--- a/compiler/plugins/target/AMD-AIE/aievec/XLLVMAIE2IntrOps.td
+++ b/compiler/plugins/target/AMD-AIE/aievec/XLLVMAIE2IntrOps.td
@@ -55,7 +55,7 @@ class AIE2BF16MinMaxElem :
     Arguments<(ins VectorOfLengthAndType<[32], [BF16]>:$lhs,
                    VectorOfLengthAndType<[32], [BF16]>:$rhs)> ;
 
-// ----- MAC ----- 
+// ----- MAC -----
 
 def MacConfAcc32IntrOp :
     AIEVec2_IntrOp<"I512.I512.ACC1024.acc32.mac.conf",
@@ -78,37 +78,7 @@ def MacConfBF16IntrOp :
         [TypeIs<"res", VectorOfLengthAndType<[8], [I64]>>]>,
         AIE2BF16MACConf;
 
-// ----- MSC ----- 
-
-def MscConfBF16IntrOp :
-    AIEVec2_IntrOp<"bf.msc16.conf",
-        [TypeIs<"res", VectorOfLengthAndType<[8], [I64]>>]>,
-        AIE2BF16MACConf;
-
-// ----- MUL ----- 
-
-def MulConfAcc32IntrOp :
-    AIEVec2_IntrOp<"I512.I512.acc32.mul.conf",
-        [TypeIs<"res", VectorOfLengthAndType<[16], [I64]>>]>,
-    Arguments<(ins VectorOfLengthAndType<[64], [I8]>:$lhs,
-                   VectorOfLengthAndType<[16], [I32]>:$rhs,
-                   I32:$conf)>;
-
-def MulConfAcc64IntrOp :
-    AIEVec2_IntrOp<"I512.I512.acc64.mul.conf",
-        [TypeIs<"res", VectorOfLengthAndType<[16], [I64]>>]>,
-    Arguments<(ins VectorOfLengthAndType<[64], [I8]>:$lhs,
-                   VectorOfLengthAndType<[16], [I32]>:$rhs,
-                   I32:$conf)>;
-
-def MulConfBF16IntrOp :
-    AIEVec2_IntrOp<"bf.mul16.conf",
-        [TypeIs<"res", VectorOfLengthAndType<[8], [I64]>>]>,
-    Arguments<(ins VectorOfLengthAndType<[32], [BF16]>:$lhs,
-                   VectorOfLengthAndType<[32], [BF16]>:$rhs,
-                   I32:$conf)>;
-
-// ----- SET ----- 
+// ----- SET -----
 
 def VectorSetI512I128IntrOp :
     AIEVec2_IntrOp<"set.I512.I128",
@@ -121,7 +91,7 @@ def VectorSetI512I256IntrOp :
     Arguments<(ins VectorOfLengthAndType<[8], [I32]>:$src,
                    I32:$pos)>;
 
-// ----- SRS ----- 
+// ----- SRS -----
 
 def I256V16Acc32SrsIntrOp :
     AIEVec2_IntrOp<"I256.v16.acc32.srs",
@@ -170,34 +140,7 @@ def Vector16AccFloatToV16BF16IntrOp :
         [TypeIs<"res", VectorOfLengthAndType<[16], [BF16]>>]>,
     Arguments<(ins VectorOfLengthAndType<[8], [I64]>:$src)>;
 
-// ----- BROADCAST ----- 
-
-def VectorBroadcast8I512IntrOp :
-    AIEVec2_IntrOp<"vbroadcast8.I512",
-        [TypeIs<"res", VectorOfLengthAndType<[64], [I8]>>]>,
-    Arguments<(ins I32:$src)>;
-
-def VectorBroadcast16I512IntrOp :
-    AIEVec2_IntrOp<"vbroadcast16.I512",
-        [TypeIs<"res", VectorOfLengthAndType<[32], [I16]>>]>,
-    Arguments<(ins I32:$src)>;
-
-def VectorBroadcast32I512IntrOp :
-    AIEVec2_IntrOp<"vbroadcast32.I512",
-        [TypeIs<"res", VectorOfLengthAndType<[16], [I32]>>]>,
-    Arguments<(ins I32:$src)>;
-
-def VectorBroadcast16BF512IntrOp :
-    AIEVec2_IntrOp<"vbroadcast16.bf512",
-        [TypeIs<"res", VectorOfLengthAndType<[32], [BF16]>>]>,
-    Arguments<(ins BF16:$src)>;
-
-def VectorBroadcastfloatI512IntrOp :
-    AIEVec2_IntrOp<"vbroadcastfloat.I512",
-        [TypeIs<"res", VectorOfLengthAndType<[16], [F32]>>]>,
-    Arguments<(ins F32:$src)>;
-
-// ----- EXT ----- 
+// ----- EXT -----
 
 def ExtI256I512IntrOp :
     AIEVec2_IntrOp<"ext.I256.I512",
@@ -211,18 +154,7 @@ def ExtI512I1024IntrOp :
     Arguments<(ins VectorOfLengthAndType<[32], [I32]>:$src,
                    I32:$idx)>;
 
-def ExtI256I1024IntrOp :
-    AIEVec2_IntrOp<"ext.I256.I1024",
-        [TypeIs<"res", VectorOfLengthAndType<[8], [I32]>>]>,
-    Arguments<(ins VectorOfLengthAndType<[32], [I32]>:$src,
-                   I32:$idx)>;
-
-def ExtI128I512IntrOp :
-    AIEVec2_IntrOp<"extract.I128.I512",
-        [TypeIs<"res", VectorOfLengthAndType<[4], [I32]>>]>,
-    Arguments<(ins VectorOfLengthAndType<[16], [I32]>:$src)>;
-  
-// ----- CONCAT ----- 
+// ----- CONCAT -----
 
 def ConcatI512I256IntrOp :
     AIEVec2_IntrOp<"concat.I512.I256",
@@ -230,21 +162,13 @@ def ConcatI512I256IntrOp :
     Arguments<(ins VectorOfLengthAndType<[8], [I32]>:$lhs,
                    VectorOfLengthAndType<[8], [I32]>:$rhs)>;
 
-def ConcatI1024I256IntrOp :
-    AIEVec2_IntrOp<"concat.I1024.I256",
-        [TypeIs<"res", VectorOfLengthAndType<[32], [I32]>>]>,
-    Arguments<(ins VectorOfLengthAndType<[8], [I32]>:$src0,
-                   VectorOfLengthAndType<[8], [I32]>:$src1,
-                   VectorOfLengthAndType<[8], [I32]>:$src2,
-                   VectorOfLengthAndType<[8], [I32]>:$src3)>;
-
 def ConcatI1024I512IntrOp :
     AIEVec2_IntrOp<"concat.I1024.I512",
         [TypeIs<"res", VectorOfLengthAndType<[32], [I32]>>]>,
     Arguments<(ins VectorOfLengthAndType<[16], [I32]>:$lhs,
                    VectorOfLengthAndType<[16], [I32]>:$rhs)>;
 
-// ----- SHUFFLE ----- 
+// ----- SHUFFLE -----
 
 def VectorShuffleIntrOp :
     AIEVec2_IntrOp<"vshuffle",
@@ -253,22 +177,13 @@ def VectorShuffleIntrOp :
                    VectorOfLengthAndType<[16], [I32]>:$rhs,
                    I32:$mode)>;
 
-// ----- UNDEF ----- 
+// ----- UNDEF -----
 
 def UndefV16I32IntrOp :
     AIEVec2_IntrOp<"v16int32",
         [TypeIs<"res", VectorOfLengthAndType<[16], [I32]>>]>;
 
-// ----- UPD ----- 
-
-def UpdBF512BF256IntrOp :
-    AIEVec2_IntrOp<"upd.bf512.bf256",
-        [TypeIs<"res", VectorOfLengthAndType<[32], [BF16]>>]>,
-    Arguments<(ins VectorOfLengthAndType<[32], [BF16]>:$dst,
-                   VectorOfLengthAndType<[16], [BF16]>:$src,
-                   I32:$idx)>;
-
-// ----- UPS ----- 
+// ----- UPS -----
 
 def Acc32V16I256UpsIntrOp :
     AIEVec2_IntrOp<"acc32.v16.I256.ups",
@@ -317,121 +232,4 @@ def Vector16BF16ToV16AccFloatIntrOp :
         [TypeIs<"res", VectorOfLengthAndType<[8], [I64]>>]>,
     Arguments<(ins VectorOfLengthAndType<[16], [BF16]>:$src)>;
 
-// ----- SHIFT ----- 
-
-def VectorShiftI512I512IntrOp :
-    AIEVec2_IntrOp<"vshift.I512.I512",
-        [TypeIs<"res", VectorOfLengthAndType<[16], [I32]>>]>,
-    Arguments<(ins VectorOfLengthAndType<[16], [I32]>:$lhs,
-                   VectorOfLengthAndType<[16], [I32]>:$rhs,
-                   I32:$step,
-                   I32:$shift)>;
-
-def VectorShiftBF512BF512IntrOp :
-    AIEVec2_IntrOp<"vshift.bf512.bf512",
-        [TypeIs<"res", VectorOfLengthAndType<[32], [BF16]>>]>,
-    Arguments<(ins VectorOfLengthAndType<[32], [BF16]>:$lhs,
-                   VectorOfLengthAndType<[32], [BF16]>:$rhs,
-                   I32:$step,
-                   I32:$shift)>;
-
-// ----- EXTRACT ELEMENT ----- 
-
-def VectorExtractElem8I512IntrOp :
-    AIEVec2_IntrOp<"vextract.elem8.I512",
-        [TypeIs<"res", I32>]>,
-    Arguments<(ins VectorOfLengthAndType<[64], [I8]>:$src,
-                   I32:$idx,
-                   I32:$sign)>;
-
-def VectorExtractElem16I512IntrOp :
-    AIEVec2_IntrOp<"vextract.elem16.I512",
-        [TypeIs<"res", I32>]>,
-    Arguments<(ins VectorOfLengthAndType<[32], [I16]>:$src,
-                   I32:$idx,
-                   I32:$sign)>;
-
-def VectorExtractElem32I512IntrOp :
-    AIEVec2_IntrOp<"vextract.elem32.I512",
-        [TypeIs<"res", I32>]>,
-    Arguments<(ins VectorOfLengthAndType<[16], [I32]>:$src,
-                   I32:$idx,
-                   I32:$sign)>;
-
-// ----- MAX ELEMENT -----
-
-def VectorMaxLt8IntrOp :
-    AIEVec2_IntrOp<"vmax.lt8",
-        [TypeIs<"res",
-            LLVM_StructOf<[
-                VectorOfLengthAndType<[64], [I8]>,
-                VectorOfLengthAndType<[2], [I32]>]>
-        >], /*numResults=*/2>,
-    AIE2I8MinMaxElem;
-
-def VectorMaxLt16IntrOp :
-    AIEVec2_IntrOp<"vmax.lt16",
-        [TypeIs<"res",
-            LLVM_StructOf<[
-                VectorOfLengthAndType<[32], [I16]>,
-                I32]>
-        >], /*numResults=*/2>,
-    AIE2I16MinMaxElem;
-
-def VectorMaxLt32IntrOp :
-    AIEVec2_IntrOp<"vmax.lt32",
-        [TypeIs<"res",
-            LLVM_StructOf<[
-                VectorOfLengthAndType<[16], [I32]>,
-                I32]>
-        >], /*numResults=*/2>,
-    AIE2I32MinMaxElem;
-
-def VectorMaxLtBf16IntrOp :
-    AIEVec2_IntrOp<"vmax.ltbf16",
-        [TypeIs<"res",
-            LLVM_StructOf<[
-                VectorOfLengthAndType<[32], [BF16]>,
-                I32]>
-        >], /*numResults=*/2>,
-    AIE2BF16MinMaxElem;
-
-// ----- MIN ELEMENT -----
-
-def VectorMinGe8IntrOp :
-    AIEVec2_IntrOp<"vmin.ge8",
-        [TypeIs<"res",
-            LLVM_StructOf<[
-                VectorOfLengthAndType<[64], [I8]>,
-                VectorOfLengthAndType<[2], [I32]>]>
-        >], /*numResults=*/2>,
-    AIE2I8MinMaxElem;
-
-def VectorMinGe16IntrOp :
-    AIEVec2_IntrOp<"vmin.ge16",
-        [TypeIs<"res",
-            LLVM_StructOf<[
-                VectorOfLengthAndType<[32], [I16]>,
-                I32]>
-        >], /*numResults=*/2>,
-    AIE2I16MinMaxElem;
-
-def VectorMinGe32IntrOp :
-    AIEVec2_IntrOp<"vmin.ge32",
-        [TypeIs<"res",
-            LLVM_StructOf<[
-                VectorOfLengthAndType<[16], [I32]>,
-                I32]>
-        >], /*numResults=*/2>,
-    AIE2I32MinMaxElem;
-
-def VectorMinGeBf16IntrOp :
-    AIEVec2_IntrOp<"vmin.gebf16",
-        [TypeIs<"res",
-            LLVM_StructOf<[
-                VectorOfLengthAndType<[32], [BF16]>,
-                I32]>
-        >], /*numResults=*/2>,
-    AIE2BF16MinMaxElem;
-
 #endif // AIE_DIALECT_XLLVM_IR_XLLVMAIE2INTROPS_TD