Skip to content

Commit

Permalink
more DCE
Browse files Browse the repository at this point in the history
  • Loading branch information
makslevental committed Jul 14, 2024
1 parent 9d27498 commit d421d6a
Show file tree
Hide file tree
Showing 7 changed files with 12 additions and 423 deletions.
69 changes: 3 additions & 66 deletions compiler/plugins/target/AMD-AIE/aievec/AIEVecToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,27 +107,6 @@ static SmallVector<Value> forceCastOperandsToSignature(OpBuilder &builder,
}));
}

struct BufferParams {
uint32_t start;
uint32_t offsets;
uint32_t offsets_hi;
uint32_t step;
uint32_t square;
};

std::string getVectorTypeString(VectorType type, bool abbrev = false,
bool acc = false) {
std::stringstream ss;
auto size = getVectorLaneSize(type);
ss << "v" << size;
if (auto intType = dyn_cast<IntegerType>(type.getElementType())) {
ss << (acc ? "acc" : abbrev ? "i" : "int") << intType.getWidth();
} else if (dyn_cast<FloatType>(type.getElementType())) {
ss << (abbrev ? "f" : "float");
}
return ss.str();
}

// Squashes the easy-to-read 16-bit square encoding into
// the 8-bit encoding the configuration register uses
uint32_t encodeSquare(uint32_t square) {
Expand All @@ -139,15 +118,6 @@ uint32_t encodeSquare(uint32_t square) {
return out & 0xFF;
}

// Encode the configuration register with buffer parameters and options
// TODO: struct to handle this?
void encodeConf(uint32_t conf[2], const BufferParams &x, const BufferParams &z,
bool sub) {
conf[0] |= ((x.step & 0x3F) << 0) | ((z.step & 0x3F) << 8);
conf[1] |= (encodeSquare(x.square) << 0) | (encodeSquare(z.square) << 8);
conf[1] |= sub << 17;
}

class UPSOpConversion : public mlir::ConvertOpToLLVMPattern<aievec::UPSOp> {
public:
using ConvertOpToLLVMPattern<aievec::UPSOp>::ConvertOpToLLVMPattern;
Expand Down Expand Up @@ -677,9 +647,8 @@ class ShuffleOpConversion
}
};

void populateAIEVecToLLVMConversionPatterns(
mlir::LLVMTypeConverter &converter, mlir::RewritePatternSet &patterns,
Aie2Fp32Emulation aie2Fp32EmulationOption) {
void populateAIEVecToLLVMConversionPatterns(mlir::LLVMTypeConverter &converter,
mlir::RewritePatternSet &patterns) {
patterns.add<

UPSOpConversion, SRSOpConversion,
Expand All @@ -689,14 +658,6 @@ void populateAIEVecToLLVMConversionPatterns(

struct ConvertAIEVecToLLVMPass
: public PassWrapper<ConvertAIEVecToLLVMPass, OperationPass<ModuleOp>> {
ConvertAIEVecToLLVMPass(const ConvertAIEVecToLLVMOptions &options) {
aie2Fp32Emulation = options.aie2Fp32Emulation;
}
// both of these are deleted by default because Pass::Option has deleted
// defaults
ConvertAIEVecToLLVMPass() = default;
ConvertAIEVecToLLVMPass(const ConvertAIEVecToLLVMPass &pass) {}

StringRef getArgument() const override { return "convert-aievec-to-llvm"; }
StringRef getDescription() const override {
return "This pass converts AIEVec dialect ops to LLVM dialect calls to "
Expand All @@ -708,29 +669,6 @@ struct ConvertAIEVecToLLVMPass
mlir::vector::VectorDialect, xllvm::XLLVMDialect>();
}

mlir::Pass::Option<Aie2Fp32Emulation> aie2Fp32Emulation{
*this, "aie2-fp32-emulation-strategy",
llvm::cl::desc(
"Set the AIE2 FP32 emulation strategy. Elementwise multiplication "
"and matrix multiplication intrinsics for FP32 input type are "
"emulated using bfloat16 data-path."),
llvm::cl::init(Aie2Fp32Emulation::AccuracySafe),
llvm::cl::values(
clEnumValN(Aie2Fp32Emulation::AccuracySafe, "accuracy-safe",
"Most accurate option since input fp32 number is split "
"into 3 bfloat16 numbers. float a*b would require 9 mac "
"operations due to 3 bfloat16 splits each."),
clEnumValN(
Aie2Fp32Emulation::AccuracyFast, "accuracy-fast",
"Fast and Accurate option. Input fp32 number is split in to 3 "
"bfloat16 numbers. In the 9 mac operations to emulate fp32 mul, "
"mac operations with LSBs are ignored. (3 last terms)."),
clEnumValN(
Aie2Fp32Emulation::AccuracyLow, "accuracy-low",
"Fast and least accurate option. Input fp32 number is split in "
"to 2 bfloat16 numbers. In the 4 mac operations to emulate fp32 "
"mul, mac operations with LSBs are ignored. (1 last term)."))};

void runOnOperation() override {
RewritePatternSet patterns(&getContext());
LLVMTypeConverter converter(&getContext());
Expand All @@ -740,8 +678,7 @@ struct ConvertAIEVecToLLVMPass
converter.addConversion(
[&](VectorType type) -> std::optional<Type> { return type; });

populateAIEVecToLLVMConversionPatterns(converter, patterns,
aie2Fp32Emulation);
populateAIEVecToLLVMConversionPatterns(converter, patterns);

LLVMConversionTarget target(getContext());
target.addIllegalDialect<AIEVecDialect>();
Expand Down
97 changes: 0 additions & 97 deletions compiler/plugins/target/AMD-AIE/aievec/AIEVecUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,6 @@ inline unsigned getVectorLaneSize(mlir::VectorType type) {
std::multiplies<int64_t>());
}

// For a 1D vector, return its size in bits. For an nD vector, return the size
// of the innerost dimension in bits.
inline int32_t getVectorSizeInBits(mlir::VectorType type) {
int32_t veclen = getVectorLaneSize(type) * getElementSizeInBits(type);
assert(veclen >= 128 && "AIE vector size should be greater than 128 bits");
return veclen;
}

// Return true if this is an operation defined in AIE dialect
inline bool isAIEOp(mlir::Operation *op) {
return llvm::isa<AIEVecDialect>(op->getDialect());
}

// Determine the output type for a vector operation based on whether
// it operates on integer or floating point data.
inline mlir::VectorType getVectorOpDestType(mlir::VectorType type, bool AIE2) {
Expand Down Expand Up @@ -90,90 +77,6 @@ inline mlir::VectorType getVectorOpDestType(mlir::VectorType type, bool AIE2) {
llvm::report_fatal_error("Unsupported destination type");
}

// Linearize the exprVec as a strided access, but do not simplify
inline mlir::AffineExpr flattenedStridedExpr(
llvm::ArrayRef<int64_t> sizes, llvm::ArrayRef<mlir::AffineExpr> exprs,
mlir::MLIRContext *context) {
// Expect non-empty sizes and exprs
if (sizes.empty() || exprs.empty()) return nullptr;

if (is_contained(sizes, 0)) return getAffineConstantExpr(0, context);

auto maps = mlir::AffineMap::inferFromExprList(exprs, context);
if (maps.empty()) return nullptr;

unsigned nSymbols = maps[0].getNumSymbols();

mlir::AffineExpr expr;
bool dynamicPoisonBit = false;
int64_t runningSize = 1;
for (auto en : zip(reverse(exprs), reverse(sizes))) {
int64_t size = std::get<1>(en);
if (size == 0) continue;

mlir::AffineExpr dimExpr = std::get<0>(en);
mlir::AffineExpr stride = dynamicPoisonBit
? getAffineSymbolExpr(nSymbols++, context)
: getAffineConstantExpr(runningSize, context);
expr = expr ? expr + dimExpr * stride : dimExpr * stride;
if (size > 0) {
runningSize *= size;
if (runningSize <= 0) return nullptr;
} else
dynamicPoisonBit = true;
}
return expr;
}

// From a linearized affine expression, compute the base and the constant
// offset. If the access is A[i][j+2] for an N*N array A, the linearized
// expression will be A[i*N+j+2]. The base in this case will be (i*N+j), and the
// offset will be 2.
inline std::pair<mlir::AffineExpr, int32_t> extractBaseAndOffset(
mlir::AffineExpr expr) {
mlir::AffineExpr base = expr;
int32_t offset = 0;

if (auto constExpr = llvm::dyn_cast<mlir::AffineConstantExpr>(expr)) {
base = nullptr;
offset += constExpr.getValue();
} else if (auto binopExpr = llvm::dyn_cast<mlir::AffineBinaryOpExpr>(expr)) {
if (binopExpr.getKind() == mlir::AffineExprKind::Add) {
mlir::AffineExpr lhs = binopExpr.getLHS(), rhs = binopExpr.getRHS();
if (auto constExpr = llvm::dyn_cast<mlir::AffineConstantExpr>(lhs)) {
base = rhs;
offset += constExpr.getValue();
}
if (auto constExpr = llvm::dyn_cast<mlir::AffineConstantExpr>(rhs)) {
base = base == rhs ? nullptr : lhs;
offset += constExpr.getValue();
}
}
}
return std::make_pair(base, offset);
}

// MLIR-AIE auto-vectorization to CPP flow currently doesn't support to
// implicitly broadcast a dynamic dimension of size `1`. Hence, we assume that
// dynamic dimensions are not with size '1' that can be interpreted to various
// broadcasting scenarios. We let lowerings assume this on a per-scope basis if
// the tosa.no_implicit_broadcast_of_dynamic_sizes attribute presents on any
// parent of the block.
inline bool isAssumingNoImplicitBroadcastOfDynamicSizes(mlir::Block *block) {
for (mlir::Operation *parentOp = block->getParentOp(); parentOp;
parentOp = parentOp->getParentOp())
if (parentOp->hasAttr("tosa.no_implicit_broadcast_of_dynamic_sizes"))
return true;
return false;
}

// Helper that uses the block from an OpBuilder for determining whether we
// are assuming no implict broadcast of dynamic sizes
inline bool isAssumingNoImplicitBroadcastOfDynamicSizes(
mlir::OpBuilder &builder) {
return isAssumingNoImplicitBroadcastOfDynamicSizes(builder.getBlock());
}

} // namespace mlir::iree_compiler::aievec

#endif // AIE_DIALECT_AIEVEC_AIEVECUTILS_H
12 changes: 0 additions & 12 deletions compiler/plugins/target/AMD-AIE/aievec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,6 @@
set(IREE_PACKAGE_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}")
set(IREE_PACKAGE_ROOT_PREFIX "iree::target::amd-aie::aievec")

list(APPEND IREE_COMPILER_TABLEGEN_INCLUDE_DIRS
"${IREE_MLIR_AIE_SOURCE_DIR}/include")

iree_cc_library(
NAME
defs
INCLUDES
"${IREE_MLIR_AIE_SOURCE_DIR}/include"
)

###############################################################################
# AIEVec Dialect
###############################################################################
Expand Down Expand Up @@ -73,7 +63,6 @@ iree_cc_library(
AIEVecOps.cpp
XLLVMOps.cpp
DEPS
::defs
::AIEVecOpsGen
::AIEVecDialectGen
::AIEVecAttrsGen
Expand All @@ -93,7 +82,6 @@ iree_cc_library(
XLLVMToLLVMIRTranslation.cpp
DEPS
MLIREmitCDialect
::defs
::AIEVecDialectIR
::AIEVecXLLVMOpsGen
)
Expand Down
14 changes: 0 additions & 14 deletions compiler/plugins/target/AMD-AIE/aievec/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,6 @@

namespace mlir::iree_compiler::aievec {

enum class Aie2Fp32Emulation : uint32_t {
AccuracySafe = 0,
AccuracyFast = 1,
AccuracyLow = 2,
};

struct ConvertAIEVecToLLVMOptions {
Aie2Fp32Emulation aie2Fp32Emulation = Aie2Fp32Emulation::AccuracySafe;
};

//===----------------------------------------------------------------------===//
// Building and Registering.
//===----------------------------------------------------------------------===//

/// Adds the "convert-vector-to-aievec" pipeline to the `OpPassManager`. This
/// pipeline takes `Vector` code, transforms it to make it compatible with the
/// selected `AIE` target, lowers it to `AIEVec` dialect, and performs some
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
// to ops that can be translated to a sequence of valid AIEVec ops.
//===----------------------------------------------------------------------===//

#include <bitset>
#include <optional>
#include <tuple>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -494,25 +494,6 @@ static void populateCommonAIECanonicalizeConversionPatterns(
patterns.getContext());
}

//============================================================================//
//============== AIEv1-specific canonicalization configuration ===============//
//============================================================================//

static void configureAIEv1CanonicalizeLegalizations(ConversionTarget &target) {
target.addDynamicallyLegalOp<vector::TransferReadOp>(
[](vector::TransferReadOp op) {
return !op.getPermutationMap().isConstant() &&
getTransferReadAlignmentOffset(op, op.getVectorType(), 128)
.value_or(0) == 0;
});
}

static void populateAIEv1CanonicalizeConversionPatterns(
RewritePatternSet &patterns) {
patterns.add<SplitUnalignedTransferReadPattern>(patterns.getContext(), 512,
128);
}

//============================================================================//
//============== AIE2-specific canonicalization configuration ===============//
//============================================================================//
Expand Down Expand Up @@ -558,11 +539,8 @@ static void populateAIE2CanonicalizeConversionPatterns(
struct CanonicalizeVectorForAIEVecPass
: public PassWrapper<CanonicalizeVectorForAIEVecPass, OperationPass<>> {
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CanonicalizeVectorForAIEVecPass)

// In case we want to register this pass as a standalone pass for test
// purposes.
StringRef getArgument() const final {
return "test-canonicalize-vector-for-aievec";
return "canonicalize-vector-for-aievec";
}

StringRef getDescription() const final {
Expand Down
Loading

0 comments on commit d421d6a

Please sign in to comment.