Skip to content

Commit

Permalink
[WIP] aievec tests through aiesim
Browse files Browse the repository at this point in the history
  • Loading branch information
makslevental committed Sep 25, 2024
1 parent 57a3636 commit 673c585
Show file tree
Hide file tree
Showing 16 changed files with 512 additions and 8 deletions.
2 changes: 1 addition & 1 deletion compiler/plugins/target/AMD-AIE/aievec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,4 @@ iree_cc_library(
::AIEVecXLLVMOpsGen
)

add_subdirectory(test)
iree_add_all_subdirs()
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,12 @@ void mlir::iree_compiler::aievec::buildConvertVectorToAIEVec(
pm.addPass(createLoopInvariantCodeMotionPass());
pm.addPass(createCanonicalizerPass());
}

void mlir::iree_compiler::aievec::registerAIEVecPipelines() {
PassPipelineRegistration<>(
"convert-vector-to-aievec",
"This pass pipeline takes standard \"Vector\" code and converts it to "
"\"AIEVec\" code targeting the selected Xilinx AIE vector "
"architecture.",
buildConvertVectorToAIEVec);
}
12 changes: 9 additions & 3 deletions compiler/plugins/target/AMD-AIE/aievec/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@ void buildLowerVectorToAIEVec(mlir::OpPassManager &pm);

/**
* A pass containing patterns for lowering operations in the vector dialect to
* the AIEVec dialect. The pass is currently named `test-lower-vector-to-aievec`.
* the AIEVec dialect. The pass is currently named
* `test-lower-vector-to-aievec`.
*/
static std::unique_ptr<mlir::Pass> createLowerVectorToAIEVec();
std::unique_ptr<mlir::Pass> createLowerVectorToAIEVec();

/**
* Expose the pass `test-lower-vector-to-aievec` to the command line.
Expand All @@ -67,9 +68,14 @@ void buildConvertVectorToAIEVec(mlir::OpPassManager &);
/**
* Lower from the vector dialect to the AIEVec dialect. The pass is called
* `convert-aievec-to-llvm`.
* */
*/
std::unique_ptr<mlir::Pass> createConvertAIEVecToLLVMPass();

/**
* Register all pipelines for the AIE Vector dialect.
*/
void registerAIEVecPipelines();

/**
* Expose the pass `convert-aievec-to-llvm` to the command line.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1077,12 +1077,12 @@ struct LowerVectorToAIEVec : PassWrapper<LowerVectorToAIEVec, OperationPass<>> {
//============================================================================//

namespace mlir::iree_compiler::aievec {
static std::unique_ptr<Pass> createLowerVectorToAIEVec() {
std::unique_ptr<Pass> createLowerVectorToAIEVec() {
return std::make_unique<LowerVectorToAIEVec>();
}

void registerLowerVectorToAIEVecPass() {
::mlir::registerPass([]() -> std::unique_ptr<mlir::Pass> {
mlir::registerPass([]() -> std::unique_ptr<mlir::Pass> {
return createLowerVectorToAIEVec();
});
}
Expand Down
2 changes: 2 additions & 0 deletions compiler/plugins/target/AMD-AIE/aievec/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ iree_lit_test_suite(
LABELS
"hostonly"
)

iree_add_all_subdirs()
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// RUN: iree-opt %S/gemm-64x32x64-bf16.mlir --convert-vector-to-aievec -lower-affine -canonicalize -cse --convert-aievec-to-llvm --convert-scf-to-cf | iree-aie-translate --mlir-to-llvmir -o kernel.ll
// RUN: clang -O2 --target=aie2-none-unknown-elf -c kernel.ll -o kernel.o
// RUN: clang -O2 --target=aie2-none-unknown-elf -c testbench.cc -o testbench.o
// RUN: clang --target=aie2-none-unknown-elf -Wl,--gc-sections -Wl,--orphan-handling=error -Wl,T,%S/ldfile -o test.exe
// RUN: xca_udm_dbg -qf -T -P $AIETOOLS/data/aie_ml/lib -t "%S/../profiling.tcl ./testbench.exe" | FileCheck %s
// RUN: cat checkers.output

#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
module {
func.func @gemm_64x32x64_bf16_packed_4x8x4(%A: memref<16x4x4x8xbf16>,
%B: memref<4x16x8x4xbf16>,
%C: memref<16x16x4x4xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4 = arith.constant 4 : index
%c16 = arith.constant 16 : index
%c0_bf16 = arith.constant 0.000000e+00 : bf16
%c0_f32 = arith.constant 0.000000e+00 : f32
scf.for %i = %c0 to %c16 step %c1 {
scf.for %j = %c0 to %c16 step %c1 {
scf.for %k = %c0 to %c4 step %c1 {
%va = vector.transfer_read %A[%i, %k, %c0, %c0], %c0_bf16 :
memref<16x4x4x8xbf16>, vector<4x8xbf16>
%vb = vector.transfer_read %B[%k, %j, %c0, %c0], %c0_bf16 :
memref<4x16x8x4xbf16>, vector<8x4xbf16>
%vc = vector.transfer_read %C[%i, %j, %c0, %c0], %c0_f32 :
memref<16x16x4x4xf32>, vector<4x4xf32>
%vaf32 = arith.extf %va : vector<4x8xbf16> to vector<4x8xf32>
%vbf32 = arith.extf %vb : vector<8x4xbf16> to vector<8x4xf32>
%vr = vector.contract {
indexing_maps = [#map, #map1, #map2],
iterator_types = ["parallel", "parallel", "reduction"],
kind = #vector.kind<add>}
%vaf32, %vbf32, %vc :
vector<4x8xf32>, vector<8x4xf32> into vector<4x4xf32>
vector.transfer_write %vr, %C[%i, %j, %c0, %c0] :
vector<4x4xf32>, memref<16x16x4x4xf32>
}
}
}
return
}

memref.global "private" constant @A : memref<16x4x4x8xbf16> = dense<1.000000e+00>
memref.global "private" constant @B : memref<4x16x8x4xbf16> = dense<2.000000e+00>
memref.global "private" constant @C : memref<16x16x4x4xf32> = dense<0.000000e+00>
func.func @main() {
%0 = memref.get_global @A : memref<16x4x4x8xbf16>
%1 = memref.get_global @B : memref<4x16x8x4xbf16>
%2 = memref.get_global @C : memref<16x16x4x4xf32>
func.call @gemm_64x32x64_bf16_packed_4x8x4(%0, %1, %2) : (memref<16x4x4x8xbf16>, memref<4x16x8x4xbf16>, memref<16x16x4x4xf32>) -> ()
return
}
}

// CHECK-LABEL: N: 64, M: 64, K: 32
// CHECK-LABEL: Running MATMUL...
// CHECK: Cycle count: [[CC:[0-9]+]]
// CHECK-LABEL: Finish MATMUL!
// CHECK-LABEL: Compare the results
// CHECK: PASSED, Max delta: [[MD:-?[0-9]+.[0-9]+]], pixel intensity

// RUN: xchesscc -j1 -pme -P $AIETOOLS/data/aie_ml/lib -f -CRelease_LLVM -w work -D__AIENGINE__ -D__AIE_ARCH__=20 -D__AIEARCH__=20 -I $AIETOOLS/include kernel.ll -o kernel.o
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

using namespace std;

#define MAT_A_SIZE 2048
#define MAT_B_SIZE 2048
#define MAT_C_SIZE 4096
#define N_SIZE 64
#define M_SIZE 64
#define K_SIZE 32

bfloat16 mat_a_data[MAT_A_SIZE];
bfloat16 mat_b_data[MAT_B_SIZE];
float mat_c_data[MAT_C_SIZE];
float ref_c_data[MAT_C_SIZE];

#define INPUT_A_FILE "matrix_a_test.txt"
#define INPUT_B_FILE "matrix_b_test.txt"
#define OUTPUT_C_FILE "matrix_c_test.txt"

#ifndef __chess__
int chess_cycle_count() { return 0; }
#endif

extern void gemm_64x32x64_bf16_packed_4x8x4(bfloat16 *restrict mat_a_data,
bfloat16 *restrict mat_b_data,
float *restrict mat_c_data);

int main() {
int i = 0, j = 0, k = 0;

// Read in matrix_a to local memory
int index = 0;
for (i = 0; i < N_SIZE; i++) {
for (k = 0; k < K_SIZE; k++) {
int32_t ival = *reinterpret_cast<int32_t *>(&i);
int16_t bfval = (ival & 0xFFFF0000) >> 16;
mat_a_data[index++] = *reinterpret_cast<bfloat16 *>(&bfval);
}
}

// Read in matrix_b to local memory
index = 0;
for (k = 0; k < K_SIZE; k++) {
for (j = 0; j < M_SIZE; j++) {
int32_t ival = *reinterpret_cast<int32_t *>(&i);
int16_t bfval = (ival & 0xFFFF0000) >> 16;
mat_b_data[index++] = *reinterpret_cast<bfloat16 *>(&bfval);
}
}

// Initialize matrix_c to local memory
index = 0;
for (i = 0; i < N_SIZE; i++) {
for (j = 0; j < M_SIZE; j++) {
mat_c_data[index++] = 0.f;
}
}

// Compute matrix multiplication
// reference(mat_a_data, mat_b_data, mat_c_data);
auto cyclesBegin = chess_cycle_count();
gemm_64x32x64_bf16_packed_4x8x4(mat_a_data, mat_b_data, mat_c_data);
auto cyclesEnd = chess_cycle_count();

return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright 2024 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS)

iree_cc_binary(
NAME
iree-aie-translate
SRCS
iree-aie-translate.cpp
DEPS
iree::target::amd-aie::aie::AIEDialectIR
iree::target::amd-aie::aievec::AIEVecDialectIR
iree::target::amd-aie::aievec::AIEVecConvertToLLVM
${dialect_libs}
${translation_libs}
LLVMSupport
MLIRFuncAllExtensions
MLIRIR
MLIRParser
MLIRPass
MLIRTargetLLVMIRExport
MLIRTargetLLVMIRImport
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Copyright (C) 2023-2024, Advanced Micro Devices, Inc.

// REQUIRES: valid_xchess_license
// REQUIRES: peano, peano_and_chess
// RUN: mkdir -p %t/data; cd %t
// RUN: aie-opt %s %vector-to-generic-llvmir% -o llvmir.mlir
// RUN: aie-translate llvmir.mlir %llvmir-to-ll% -o dut.ll
// RUN: %PEANO_INSTALL_DIR/bin/clang %clang_aie2_args -c dut.ll -o dut.o
// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_LLVM +w work +o work -I%S -I. %S/testbench.cc dut.o
// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout
// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s
// CHECK: TEST PASSED
// vector_to_generic_llvmir = '-canonicalize-vector-for-aievec=aie-target=aie2 -convert-vector-to-llvm -lower-affine -convert-scf-to-cf -canonicalize -cse -convert-math-to-llvm -expand-strided-metadata -finalize-memref-to-llvm -convert-func-to-llvm=\'use-bare-ptr-memref-call-conv\' -convert-index-to-llvm -canonicalize -cse'

// --convert-vector-to-aievec -lower-affine -canonicalize -cse --convert-aievec-to-llvm --convert-scf-to-cf --iree-convert-to-llvm | iree-aie-translate --mlir-to-llvmir -o kernel.ll

// --convert-vector-to-aievec -lower-affine -canonicalize -cse --convert-aievec-to-llvm --convert-scf-to-cf --iree-convert-to-llvm | ./tools/iree-aie-translate --mlir-to-llvmir
// ../llvm-aie/bin/clang --target=aie2-none-unknown-elf -Wl,--gc-sections -Wl,--orphan-handling=warn -Wl,-T,$PWD/ldfile kernel.o -o test.exe -v

module {
func.func private @dut(%arg0: memref<1024xi16>, %arg1: memref<1024xi16>, %arg2: memref<1024xi16>) {
memref.assume_alignment %arg0, 32 : memref<1024xi16>
memref.assume_alignment %arg1, 32 : memref<1024xi16>
memref.assume_alignment %arg2, 32 : memref<1024xi16>
affine.for %arg3 = 0 to 1024 {
%0 = affine.load %arg0[%arg3] : memref<1024xi16>
%1 = affine.load %arg1[%arg3] : memref<1024xi16>
%2 = arith.muli %0, %1 : i16
affine.store %2, %arg2[%arg3] : memref<1024xi16>
}
return
}
memref.global "private" constant @A : memref<1024xi16> = dense<1>
memref.global "private" constant @B : memref<1024xi16> = dense<2>
memref.global "private" constant @C : memref<1024xi16> = dense<0>
func.func @main() {
%0 = memref.get_global @A : memref<1024xi16>
%1 = memref.get_global @B : memref<1024xi16>
%2 = memref.get_global @C : memref<1024xi16>
func.call @dut(%0, %1, %2) : (memref<1024xi16>, memref<1024xi16>, memref<1024xi16>) -> ()
return
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright 2024 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "aie/AIEDialect.h"
#include "aievec/AIEVecDialect.h"
#include "aievec/Passes.h"
#include "aievec/XLLVMDialect.h"
#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
#include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h"
#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h"
#include "mlir/Dialect/Arith/Transforms/Passes.h"
#include "mlir/Dialect/DLTI/DLTI.h"
#include "mlir/Dialect/Func/Extensions/AllExtensions.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Func/Transforms/Passes.h"
#include "mlir/Dialect/Math/IR/Math.h"
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
#include "mlir/Dialect/SCF/Transforms/Passes.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/InitAllTranslations.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Support/LogicalResult.h"
#include "mlir/Target/LLVMIR/Dialect/All.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "mlir/Tools/mlir-translate/MlirTranslateMain.h"
#include "mlir/Tools/mlir-translate/Translation.h"
#include "mlir/Transforms/Passes.h"

using namespace mlir;
using namespace mlir::iree_compiler;

namespace aie {
void registerToLLVMIRTranslation() {
TranslateFromMLIRRegistration registration(
"mlir-to-llvmir", "Translate MLIR to LLVMIR",
[](Operation *op, raw_ostream &output) {
PassManager pm(op->getContext());
pm.addPass(createConvertVectorToLLVMPass());
pm.addPass(memref::createExpandStridedMetadataPass());
pm.addPass(createConvertMathToLLVMPass());
pm.addPass(createConvertIndexToLLVMPass());
pm.addPass(arith::createArithExpandOpsPass());
pm.addPass(createArithToLLVMConversionPass());
pm.addPass(createFinalizeMemRefToLLVMConversionPass());
ConvertFuncToLLVMPassOptions options;
options.useBarePtrCallConv = true;
pm.addPass(createConvertFuncToLLVMPass(options));
pm.addPass(createConvertControlFlowToLLVMPass());
pm.addPass(createCanonicalizerPass());
pm.addPass(createCSEPass());
(void)pm.run(op);

llvm::LLVMContext llvmContext;
auto llvmModule = translateModuleToLLVMIR(op, llvmContext);
if (!llvmModule) return failure();
llvmModule->print(output, nullptr);
return success();
},
[](DialectRegistry &registry) {
registry
.insert<DLTIDialect, LLVM::LLVMDialect, aievec::AIEVecDialect,
aievec::xllvm::XLLVMDialect, arith::ArithDialect,
cf::ControlFlowDialect, func::FuncDialect,
math::MathDialect, memref::MemRefDialect, scf::SCFDialect,
vector::VectorDialect, xilinx::AIE::AIEDialect>();
registerBuiltinDialectTranslation(registry);
registerLLVMDialectTranslation(registry);
aievec::registerXLLVMDialectTranslation(registry);
arith::registerConvertArithToLLVMInterface(registry);
cf::registerConvertControlFlowToLLVMInterface(registry);
func::registerAllExtensions(registry);
registerConvertFuncToLLVMInterface(registry);
index::registerConvertIndexToLLVMInterface(registry);
registerConvertMathToLLVMInterface(registry);
registerConvertMemRefToLLVMInterface(registry);
});
}
} // namespace aie

int main(int argc, char **argv) {
registerFromLLVMIRTranslation();
aie::registerToLLVMIRTranslation();
return failed(mlirTranslateMain(argc, argv, "AMDAIE Translation Tool"));
}
Loading

0 comments on commit 673c585

Please sign in to comment.