more DCE

nod-ai · Jul 14, 2024 · d421d6a · d421d6a
1 parent 9d27498
commit d421d6a
Show file tree

Hide file tree

Showing 7 changed files with 12 additions and 423 deletions.
diff --git a/compiler/plugins/target/AMD-AIE/aievec/AIEVecToLLVM.cpp b/compiler/plugins/target/AMD-AIE/aievec/AIEVecToLLVM.cpp
@@ -107,27 +107,6 @@ static SmallVector<Value> forceCastOperandsToSignature(OpBuilder &builder,
       }));
 }
 
-struct BufferParams {
-  uint32_t start;
-  uint32_t offsets;
-  uint32_t offsets_hi;
-  uint32_t step;
-  uint32_t square;
-};
-
-std::string getVectorTypeString(VectorType type, bool abbrev = false,
-                                bool acc = false) {
-  std::stringstream ss;
-  auto size = getVectorLaneSize(type);
-  ss << "v" << size;
-  if (auto intType = dyn_cast<IntegerType>(type.getElementType())) {
-    ss << (acc ? "acc" : abbrev ? "i" : "int") << intType.getWidth();
-  } else if (dyn_cast<FloatType>(type.getElementType())) {
-    ss << (abbrev ? "f" : "float");
-  }
-  return ss.str();
-}
-
 // Squashes the easy-to-read 16-bit square encoding into
 // the 8-bit encoding the configuration register uses
 uint32_t encodeSquare(uint32_t square) {
@@ -139,15 +118,6 @@ uint32_t encodeSquare(uint32_t square) {
   return out & 0xFF;
 }
 
-// Encode the configuration register with buffer parameters and options
-// TODO: struct to handle this?
-void encodeConf(uint32_t conf[2], const BufferParams &x, const BufferParams &z,
-                bool sub) {
-  conf[0] |= ((x.step & 0x3F) << 0) | ((z.step & 0x3F) << 8);
-  conf[1] |= (encodeSquare(x.square) << 0) | (encodeSquare(z.square) << 8);
-  conf[1] |= sub << 17;
-}
-
 class UPSOpConversion : public mlir::ConvertOpToLLVMPattern<aievec::UPSOp> {
  public:
   using ConvertOpToLLVMPattern<aievec::UPSOp>::ConvertOpToLLVMPattern;
@@ -677,9 +647,8 @@ class ShuffleOpConversion
   }
 };
 
-void populateAIEVecToLLVMConversionPatterns(
-    mlir::LLVMTypeConverter &converter, mlir::RewritePatternSet &patterns,
-    Aie2Fp32Emulation aie2Fp32EmulationOption) {
+void populateAIEVecToLLVMConversionPatterns(mlir::LLVMTypeConverter &converter,
+                                            mlir::RewritePatternSet &patterns) {
   patterns.add<
 
       UPSOpConversion, SRSOpConversion,
@@ -689,14 +658,6 @@ void populateAIEVecToLLVMConversionPatterns(
 
 struct ConvertAIEVecToLLVMPass
     : public PassWrapper<ConvertAIEVecToLLVMPass, OperationPass<ModuleOp>> {
-  ConvertAIEVecToLLVMPass(const ConvertAIEVecToLLVMOptions &options) {
-    aie2Fp32Emulation = options.aie2Fp32Emulation;
-  }
-  // both of these are deleted by default because Pass::Option has deleted
-  // defaults
-  ConvertAIEVecToLLVMPass() = default;
-  ConvertAIEVecToLLVMPass(const ConvertAIEVecToLLVMPass &pass) {}
-
   StringRef getArgument() const override { return "convert-aievec-to-llvm"; }
   StringRef getDescription() const override {
     return "This pass converts AIEVec dialect ops to LLVM dialect calls to "
@@ -708,29 +669,6 @@ struct ConvertAIEVecToLLVMPass
                     mlir::vector::VectorDialect, xllvm::XLLVMDialect>();
   }
 
-  mlir::Pass::Option<Aie2Fp32Emulation> aie2Fp32Emulation{
-      *this, "aie2-fp32-emulation-strategy",
-      llvm::cl::desc(
-          "Set the AIE2 FP32 emulation strategy. Elementwise multiplication "
-          "and matrix multiplication intrinsics for FP32 input type are "
-          "emulated using bfloat16 data-path."),
-      llvm::cl::init(Aie2Fp32Emulation::AccuracySafe),
-      llvm::cl::values(
-          clEnumValN(Aie2Fp32Emulation::AccuracySafe, "accuracy-safe",
-                     "Most accurate option since input fp32 number is split "
-                     "into 3 bfloat16 numbers. float a*b would require 9 mac "
-                     "operations due to 3 bfloat16 splits each."),
-          clEnumValN(
-              Aie2Fp32Emulation::AccuracyFast, "accuracy-fast",
-              "Fast and Accurate option. Input fp32 number is split in to 3 "
-              "bfloat16 numbers. In the 9 mac operations to emulate fp32 mul, "
-              "mac operations with LSBs are ignored. (3 last terms)."),
-          clEnumValN(
-              Aie2Fp32Emulation::AccuracyLow, "accuracy-low",
-              "Fast and least accurate option. Input fp32 number is split in "
-              "to 2 bfloat16 numbers. In the 4 mac operations to emulate fp32 "
-              "mul, mac operations with LSBs are ignored. (1 last term)."))};
-
   void runOnOperation() override {
     RewritePatternSet patterns(&getContext());
     LLVMTypeConverter converter(&getContext());
@@ -740,8 +678,7 @@ struct ConvertAIEVecToLLVMPass
     converter.addConversion(
         [&](VectorType type) -> std::optional<Type> { return type; });
 
-    populateAIEVecToLLVMConversionPatterns(converter, patterns,
-                                           aie2Fp32Emulation);
+    populateAIEVecToLLVMConversionPatterns(converter, patterns);
 
     LLVMConversionTarget target(getContext());
     target.addIllegalDialect<AIEVecDialect>();

diff --git a/compiler/plugins/target/AMD-AIE/aievec/AIEVecUtils.h b/compiler/plugins/target/AMD-AIE/aievec/AIEVecUtils.h
@@ -46,19 +46,6 @@ inline unsigned getVectorLaneSize(mlir::VectorType type) {
                          std::multiplies<int64_t>());
 }
 
-// For a 1D vector, return its size in bits. For an nD vector, return the size
-// of the innerost dimension in bits.
-inline int32_t getVectorSizeInBits(mlir::VectorType type) {
-  int32_t veclen = getVectorLaneSize(type) * getElementSizeInBits(type);
-  assert(veclen >= 128 && "AIE vector size should be greater than 128 bits");
-  return veclen;
-}
-
-// Return true if this is an operation defined in AIE dialect
-inline bool isAIEOp(mlir::Operation *op) {
-  return llvm::isa<AIEVecDialect>(op->getDialect());
-}
-
 // Determine the output type for a vector operation based on whether
 // it operates on integer or floating point data.
 inline mlir::VectorType getVectorOpDestType(mlir::VectorType type, bool AIE2) {
@@ -90,90 +77,6 @@ inline mlir::VectorType getVectorOpDestType(mlir::VectorType type, bool AIE2) {
   llvm::report_fatal_error("Unsupported destination type");
 }
 
-// Linearize the exprVec as a strided access, but do not simplify
-inline mlir::AffineExpr flattenedStridedExpr(
-    llvm::ArrayRef<int64_t> sizes, llvm::ArrayRef<mlir::AffineExpr> exprs,
-    mlir::MLIRContext *context) {
-  // Expect non-empty sizes and exprs
-  if (sizes.empty() || exprs.empty()) return nullptr;
-
-  if (is_contained(sizes, 0)) return getAffineConstantExpr(0, context);
-
-  auto maps = mlir::AffineMap::inferFromExprList(exprs, context);
-  if (maps.empty()) return nullptr;
-
-  unsigned nSymbols = maps[0].getNumSymbols();
-
-  mlir::AffineExpr expr;
-  bool dynamicPoisonBit = false;
-  int64_t runningSize = 1;
-  for (auto en : zip(reverse(exprs), reverse(sizes))) {
-    int64_t size = std::get<1>(en);
-    if (size == 0) continue;
-
-    mlir::AffineExpr dimExpr = std::get<0>(en);
-    mlir::AffineExpr stride = dynamicPoisonBit
-                                  ? getAffineSymbolExpr(nSymbols++, context)
-                                  : getAffineConstantExpr(runningSize, context);
-    expr = expr ? expr + dimExpr * stride : dimExpr * stride;
-    if (size > 0) {
-      runningSize *= size;
-      if (runningSize <= 0) return nullptr;
-    } else
-      dynamicPoisonBit = true;
-  }
-  return expr;
-}
-
-// From a linearized affine expression, compute the base and the constant
-// offset. If the access is A[i][j+2] for an N*N array A, the linearized
-// expression will be A[i*N+j+2]. The base in this case will be (i*N+j), and the
-// offset will be 2.
-inline std::pair<mlir::AffineExpr, int32_t> extractBaseAndOffset(
-    mlir::AffineExpr expr) {
-  mlir::AffineExpr base = expr;
-  int32_t offset = 0;
-
-  if (auto constExpr = llvm::dyn_cast<mlir::AffineConstantExpr>(expr)) {
-    base = nullptr;
-    offset += constExpr.getValue();
-  } else if (auto binopExpr = llvm::dyn_cast<mlir::AffineBinaryOpExpr>(expr)) {
-    if (binopExpr.getKind() == mlir::AffineExprKind::Add) {
-      mlir::AffineExpr lhs = binopExpr.getLHS(), rhs = binopExpr.getRHS();
-      if (auto constExpr = llvm::dyn_cast<mlir::AffineConstantExpr>(lhs)) {
-        base = rhs;
-        offset += constExpr.getValue();
-      }
-      if (auto constExpr = llvm::dyn_cast<mlir::AffineConstantExpr>(rhs)) {
-        base = base == rhs ? nullptr : lhs;
-        offset += constExpr.getValue();
-      }
-    }
-  }
-  return std::make_pair(base, offset);
-}
-
-// MLIR-AIE auto-vectorization to CPP flow currently doesn't support to
-// implicitly broadcast a dynamic dimension of size `1`. Hence, we assume that
-// dynamic dimensions are not with size '1' that can be interpreted to various
-// broadcasting scenarios. We let lowerings assume this on a per-scope basis if
-// the tosa.no_implicit_broadcast_of_dynamic_sizes attribute presents on any
-// parent of the block.
-inline bool isAssumingNoImplicitBroadcastOfDynamicSizes(mlir::Block *block) {
-  for (mlir::Operation *parentOp = block->getParentOp(); parentOp;
-       parentOp = parentOp->getParentOp())
-    if (parentOp->hasAttr("tosa.no_implicit_broadcast_of_dynamic_sizes"))
-      return true;
-  return false;
-}
-
-// Helper that uses the block from an OpBuilder for determining whether we
-// are assuming no implict broadcast of dynamic sizes
-inline bool isAssumingNoImplicitBroadcastOfDynamicSizes(
-    mlir::OpBuilder &builder) {
-  return isAssumingNoImplicitBroadcastOfDynamicSizes(builder.getBlock());
-}
-
 }  // namespace mlir::iree_compiler::aievec
 
 #endif  // AIE_DIALECT_AIEVEC_AIEVECUTILS_H
diff --git a/compiler/plugins/target/AMD-AIE/aievec/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/aievec/CMakeLists.txt
@@ -7,16 +7,6 @@
 set(IREE_PACKAGE_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}")
 set(IREE_PACKAGE_ROOT_PREFIX "iree::target::amd-aie::aievec")
 
-list(APPEND IREE_COMPILER_TABLEGEN_INCLUDE_DIRS
-    "${IREE_MLIR_AIE_SOURCE_DIR}/include")
-
-iree_cc_library(
-  NAME
-    defs
-  INCLUDES
-    "${IREE_MLIR_AIE_SOURCE_DIR}/include"
-)
-
 ###############################################################################
 # AIEVec Dialect
 ###############################################################################
@@ -73,7 +63,6 @@ iree_cc_library(
     AIEVecOps.cpp
     XLLVMOps.cpp
   DEPS
-    ::defs
     ::AIEVecOpsGen
     ::AIEVecDialectGen
     ::AIEVecAttrsGen
@@ -93,7 +82,6 @@ iree_cc_library(
     XLLVMToLLVMIRTranslation.cpp
   DEPS
     MLIREmitCDialect
-    ::defs
     ::AIEVecDialectIR
     ::AIEVecXLLVMOpsGen
 )

diff --git a/compiler/plugins/target/AMD-AIE/aievec/Passes.h b/compiler/plugins/target/AMD-AIE/aievec/Passes.h
@@ -19,20 +19,6 @@
 
 namespace mlir::iree_compiler::aievec {
 
-enum class Aie2Fp32Emulation : uint32_t {
-  AccuracySafe = 0,
-  AccuracyFast = 1,
-  AccuracyLow = 2,
-};
-
-struct ConvertAIEVecToLLVMOptions {
-  Aie2Fp32Emulation aie2Fp32Emulation = Aie2Fp32Emulation::AccuracySafe;
-};
-
-//===----------------------------------------------------------------------===//
-// Building and Registering.
-//===----------------------------------------------------------------------===//
-
 /// Adds the "convert-vector-to-aievec" pipeline to the `OpPassManager`. This
 /// pipeline takes `Vector` code, transforms it to make it compatible with the
 /// selected `AIE` target, lowers it to `AIEVec` dialect, and performs some

diff --git a/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp b/compiler/plugins/target/AMD-AIE/aievec/VectorToAIEVecConversions.cpp
@@ -12,7 +12,6 @@
 // to ops that can be translated to a sequence of valid AIEVec ops.
 //===----------------------------------------------------------------------===//
 
-#include <bitset>
 #include <optional>
 #include <tuple>
 

diff --git a/compiler/plugins/target/AMD-AIE/aievec/VectorToVectorConversions.cpp b/compiler/plugins/target/AMD-AIE/aievec/VectorToVectorConversions.cpp
@@ -494,25 +494,6 @@ static void populateCommonAIECanonicalizeConversionPatterns(
       patterns.getContext());
 }
 
-//============================================================================//
-//============== AIEv1-specific canonicalization configuration ===============//
-//============================================================================//
-
-static void configureAIEv1CanonicalizeLegalizations(ConversionTarget &target) {
-  target.addDynamicallyLegalOp<vector::TransferReadOp>(
-      [](vector::TransferReadOp op) {
-        return !op.getPermutationMap().isConstant() &&
-               getTransferReadAlignmentOffset(op, op.getVectorType(), 128)
-                       .value_or(0) == 0;
-      });
-}
-
-static void populateAIEv1CanonicalizeConversionPatterns(
-    RewritePatternSet &patterns) {
-  patterns.add<SplitUnalignedTransferReadPattern>(patterns.getContext(), 512,
-                                                  128);
-}
-
 //============================================================================//
 //============== AIE2-specific canonicalization configuration ===============//
 //============================================================================//
@@ -558,11 +539,8 @@ static void populateAIE2CanonicalizeConversionPatterns(
 struct CanonicalizeVectorForAIEVecPass
     : public PassWrapper<CanonicalizeVectorForAIEVecPass, OperationPass<>> {
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CanonicalizeVectorForAIEVecPass)
-
-  // In case we want to register this pass as a standalone pass for test
-  // purposes.
   StringRef getArgument() const final {
-    return "test-canonicalize-vector-for-aievec";
+    return "canonicalize-vector-for-aievec";
   }
 
   StringRef getDescription() const final {