Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] aievec tests through aiesim #732

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler/plugins/target/AMD-AIE/aievec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,4 @@ iree_cc_library(
::AIEVecXLLVMOpsGen
)

add_subdirectory(test)
iree_add_all_subdirs()
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,12 @@ void mlir::iree_compiler::aievec::buildConvertVectorToAIEVec(
pm.addPass(createLoopInvariantCodeMotionPass());
pm.addPass(createCanonicalizerPass());
}

void mlir::iree_compiler::aievec::registerAIEVecPipelines() {
PassPipelineRegistration<>(
"convert-vector-to-aievec",
"This pass pipeline takes standard \"Vector\" code and converts it to "
"\"AIEVec\" code targeting the selected Xilinx AIE vector "
"architecture.",
buildConvertVectorToAIEVec);
}
12 changes: 9 additions & 3 deletions compiler/plugins/target/AMD-AIE/aievec/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@ void buildLowerVectorToAIEVec(mlir::OpPassManager &pm);

/**
* A pass containing patterns for lowering operations in the vector dialect to
* the AIEVec dialect. The pass is currently named `test-lower-vector-to-aievec`.
* the AIEVec dialect. The pass is currently named
* `test-lower-vector-to-aievec`.
*/
static std::unique_ptr<mlir::Pass> createLowerVectorToAIEVec();
std::unique_ptr<mlir::Pass> createLowerVectorToAIEVec();

/**
* Expose the pass `test-lower-vector-to-aievec` to the command line.
Expand All @@ -67,9 +68,14 @@ void buildConvertVectorToAIEVec(mlir::OpPassManager &);
/**
* Lower from the vector dialect to the AIEVec dialect. The pass is called
* `convert-aievec-to-llvm`.
* */
*/
std::unique_ptr<mlir::Pass> createConvertAIEVecToLLVMPass();

/**
* Register all pipelines for the AIE Vector dialect.
*/
void registerAIEVecPipelines();

/**
* Expose the pass `convert-aievec-to-llvm` to the command line.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1077,12 +1077,12 @@ struct LowerVectorToAIEVec : PassWrapper<LowerVectorToAIEVec, OperationPass<>> {
//============================================================================//

namespace mlir::iree_compiler::aievec {
static std::unique_ptr<Pass> createLowerVectorToAIEVec() {
std::unique_ptr<Pass> createLowerVectorToAIEVec() {
return std::make_unique<LowerVectorToAIEVec>();
}

void registerLowerVectorToAIEVecPass() {
::mlir::registerPass([]() -> std::unique_ptr<mlir::Pass> {
mlir::registerPass([]() -> std::unique_ptr<mlir::Pass> {
return createLowerVectorToAIEVec();
});
}
Expand Down
2 changes: 2 additions & 0 deletions compiler/plugins/target/AMD-AIE/aievec/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ iree_lit_test_suite(
LABELS
"hostonly"
)

iree_add_all_subdirs()
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// RUN: iree-opt %S/gemm-64x32x64-bf16.mlir --convert-vector-to-aievec -lower-affine -canonicalize -cse --convert-aievec-to-llvm --convert-scf-to-cf | iree-aie-translate --mlir-to-llvmir -o kernel.ll
// RUN: clang -O2 --target=aie2-none-unknown-elf -c kernel.ll -o kernel.o
// RUN: clang -O2 --target=aie2-none-unknown-elf -c testbench.cc -o testbench.o
// RUN: clang --target=aie2-none-unknown-elf -Wl,--gc-sections -Wl,--orphan-handling=error -Wl,T,%S/ldfile -o test.exe
// RUN: xca_udm_dbg -qf -T -P $AIETOOLS/data/aie_ml/lib -t "%S/../profiling.tcl ./testbench.exe" | FileCheck %s
// RUN: cat checkers.output

#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
module {
func.func @gemm_64x32x64_bf16_packed_4x8x4(%A: memref<16x4x4x8xbf16>,
%B: memref<4x16x8x4xbf16>,
%C: memref<16x16x4x4xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4 = arith.constant 4 : index
%c16 = arith.constant 16 : index
%c0_bf16 = arith.constant 0.000000e+00 : bf16
%c0_f32 = arith.constant 0.000000e+00 : f32
scf.for %i = %c0 to %c16 step %c1 {
scf.for %j = %c0 to %c16 step %c1 {
scf.for %k = %c0 to %c4 step %c1 {
%va = vector.transfer_read %A[%i, %k, %c0, %c0], %c0_bf16 :
memref<16x4x4x8xbf16>, vector<4x8xbf16>
%vb = vector.transfer_read %B[%k, %j, %c0, %c0], %c0_bf16 :
memref<4x16x8x4xbf16>, vector<8x4xbf16>
%vc = vector.transfer_read %C[%i, %j, %c0, %c0], %c0_f32 :
memref<16x16x4x4xf32>, vector<4x4xf32>
%vaf32 = arith.extf %va : vector<4x8xbf16> to vector<4x8xf32>
%vbf32 = arith.extf %vb : vector<8x4xbf16> to vector<8x4xf32>
%vr = vector.contract {
indexing_maps = [#map, #map1, #map2],
iterator_types = ["parallel", "parallel", "reduction"],
kind = #vector.kind<add>}
%vaf32, %vbf32, %vc :
vector<4x8xf32>, vector<8x4xf32> into vector<4x4xf32>
vector.transfer_write %vr, %C[%i, %j, %c0, %c0] :
vector<4x4xf32>, memref<16x16x4x4xf32>
}
}
}
return
}

memref.global "private" constant @A : memref<16x4x4x8xbf16> = dense<1.000000e+00>
memref.global "private" constant @B : memref<4x16x8x4xbf16> = dense<2.000000e+00>
memref.global "private" constant @C : memref<16x16x4x4xf32> = dense<0.000000e+00>
func.func @main() {
%0 = memref.get_global @A : memref<16x4x4x8xbf16>
%1 = memref.get_global @B : memref<4x16x8x4xbf16>
%2 = memref.get_global @C : memref<16x16x4x4xf32>
func.call @gemm_64x32x64_bf16_packed_4x8x4(%0, %1, %2) : (memref<16x4x4x8xbf16>, memref<4x16x8x4xbf16>, memref<16x16x4x4xf32>) -> ()
return
}
}

// CHECK-LABEL: N: 64, M: 64, K: 32
// CHECK-LABEL: Running MATMUL...
// CHECK: Cycle count: [[CC:[0-9]+]]
// CHECK-LABEL: Finish MATMUL!
// CHECK-LABEL: Compare the results
// CHECK: PASSED, Max delta: [[MD:-?[0-9]+.[0-9]+]], pixel intensity

// RUN: xchesscc -j1 -pme -P $AIETOOLS/data/aie_ml/lib -f -CRelease_LLVM -w work -D__AIENGINE__ -D__AIE_ARCH__=20 -D__AIEARCH__=20 -I $AIETOOLS/include kernel.ll -o kernel.o
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

using namespace std;

#define MAT_A_SIZE 2048
#define MAT_B_SIZE 2048
#define MAT_C_SIZE 4096
#define N_SIZE 64
#define M_SIZE 64
#define K_SIZE 32

bfloat16 mat_a_data[MAT_A_SIZE];
bfloat16 mat_b_data[MAT_B_SIZE];
float mat_c_data[MAT_C_SIZE];
float ref_c_data[MAT_C_SIZE];

#define INPUT_A_FILE "matrix_a_test.txt"
#define INPUT_B_FILE "matrix_b_test.txt"
#define OUTPUT_C_FILE "matrix_c_test.txt"

#ifndef __chess__
int chess_cycle_count() { return 0; }
#endif

extern void gemm_64x32x64_bf16_packed_4x8x4(bfloat16 *restrict mat_a_data,
bfloat16 *restrict mat_b_data,
float *restrict mat_c_data);

int main() {
int i = 0, j = 0, k = 0;

// Read in matrix_a to local memory
int index = 0;
for (i = 0; i < N_SIZE; i++) {
for (k = 0; k < K_SIZE; k++) {
int32_t ival = *reinterpret_cast<int32_t *>(&i);
int16_t bfval = (ival & 0xFFFF0000) >> 16;
mat_a_data[index++] = *reinterpret_cast<bfloat16 *>(&bfval);
}
}

// Read in matrix_b to local memory
index = 0;
for (k = 0; k < K_SIZE; k++) {
for (j = 0; j < M_SIZE; j++) {
int32_t ival = *reinterpret_cast<int32_t *>(&i);
int16_t bfval = (ival & 0xFFFF0000) >> 16;
mat_b_data[index++] = *reinterpret_cast<bfloat16 *>(&bfval);
}
}

// Initialize matrix_c to local memory
index = 0;
for (i = 0; i < N_SIZE; i++) {
for (j = 0; j < M_SIZE; j++) {
mat_c_data[index++] = 0.f;
}
}

// Compute matrix multiplication
// reference(mat_a_data, mat_b_data, mat_c_data);
auto cyclesBegin = chess_cycle_count();
gemm_64x32x64_bf16_packed_4x8x4(mat_a_data, mat_b_data, mat_c_data);
auto cyclesEnd = chess_cycle_count();

return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright 2024 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS)

iree_cc_binary(
NAME
iree-aie-translate
SRCS
iree-aie-translate.cpp
DEPS
iree::target::amd-aie::aie::AIEDialectIR
iree::target::amd-aie::aievec::AIEVecDialectIR
iree::target::amd-aie::aievec::AIEVecConvertToLLVM
${dialect_libs}
${translation_libs}
LLVMSupport
MLIRFuncAllExtensions
MLIRIR
MLIRParser
MLIRPass
MLIRTargetLLVMIRExport
MLIRTargetLLVMIRImport
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Copyright (C) 2023-2024, Advanced Micro Devices, Inc.

// REQUIRES: valid_xchess_license
// REQUIRES: peano, peano_and_chess
// RUN: mkdir -p %t/data; cd %t
// RUN: aie-opt %s %vector-to-generic-llvmir% -o llvmir.mlir
// RUN: aie-translate llvmir.mlir %llvmir-to-ll% -o dut.ll
// RUN: %PEANO_INSTALL_DIR/bin/clang %clang_aie2_args -c dut.ll -o dut.o
// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_LLVM +w work +o work -I%S -I. %S/testbench.cc dut.o
// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout
// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s
// CHECK: TEST PASSED
// vector_to_generic_llvmir = '-canonicalize-vector-for-aievec=aie-target=aie2 -convert-vector-to-llvm -lower-affine -convert-scf-to-cf -canonicalize -cse -convert-math-to-llvm -expand-strided-metadata -finalize-memref-to-llvm -convert-func-to-llvm=\'use-bare-ptr-memref-call-conv\' -convert-index-to-llvm -canonicalize -cse'

// --convert-vector-to-aievec -lower-affine -canonicalize -cse --convert-aievec-to-llvm --convert-scf-to-cf --iree-convert-to-llvm | iree-aie-translate --mlir-to-llvmir -o kernel.ll

// --convert-vector-to-aievec -lower-affine -canonicalize -cse --convert-aievec-to-llvm --convert-scf-to-cf --iree-convert-to-llvm | ./tools/iree-aie-translate --mlir-to-llvmir
// ../llvm-aie/bin/clang --target=aie2-none-unknown-elf -Wl,--gc-sections -Wl,--orphan-handling=warn -Wl,-T,$PWD/ldfile kernel.o -o test.exe -v

module {
func.func private @dut(%arg0: memref<1024xi16>, %arg1: memref<1024xi16>, %arg2: memref<1024xi16>) {
memref.assume_alignment %arg0, 32 : memref<1024xi16>
memref.assume_alignment %arg1, 32 : memref<1024xi16>
memref.assume_alignment %arg2, 32 : memref<1024xi16>
affine.for %arg3 = 0 to 1024 {
%0 = affine.load %arg0[%arg3] : memref<1024xi16>
%1 = affine.load %arg1[%arg3] : memref<1024xi16>
%2 = arith.muli %0, %1 : i16
affine.store %2, %arg2[%arg3] : memref<1024xi16>
}
return
}
memref.global "private" constant @A : memref<1024xi16> = dense<1>
memref.global "private" constant @B : memref<1024xi16> = dense<2>
memref.global "private" constant @C : memref<1024xi16> = dense<0>
func.func @main() {
%0 = memref.get_global @A : memref<1024xi16>
%1 = memref.get_global @B : memref<1024xi16>
%2 = memref.get_global @C : memref<1024xi16>
func.call @dut(%0, %1, %2) : (memref<1024xi16>, memref<1024xi16>, memref<1024xi16>) -> ()
return
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright 2024 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "aie/AIEDialect.h"
#include "aievec/AIEVecDialect.h"
#include "aievec/Passes.h"
#include "aievec/XLLVMDialect.h"
#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
#include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h"
#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h"
#include "mlir/Dialect/Arith/Transforms/Passes.h"
#include "mlir/Dialect/DLTI/DLTI.h"
#include "mlir/Dialect/Func/Extensions/AllExtensions.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Func/Transforms/Passes.h"
#include "mlir/Dialect/Math/IR/Math.h"
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
#include "mlir/Dialect/SCF/Transforms/Passes.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/InitAllTranslations.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Support/LogicalResult.h"
#include "mlir/Target/LLVMIR/Dialect/All.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "mlir/Tools/mlir-translate/MlirTranslateMain.h"
#include "mlir/Tools/mlir-translate/Translation.h"
#include "mlir/Transforms/Passes.h"

using namespace mlir;
using namespace mlir::iree_compiler;

namespace aie {
void registerToLLVMIRTranslation() {
TranslateFromMLIRRegistration registration(
"mlir-to-llvmir", "Translate MLIR to LLVMIR",
[](Operation *op, raw_ostream &output) {
PassManager pm(op->getContext());
pm.addPass(createConvertVectorToLLVMPass());
pm.addPass(memref::createExpandStridedMetadataPass());
pm.addPass(createConvertMathToLLVMPass());
pm.addPass(createConvertIndexToLLVMPass());
pm.addPass(arith::createArithExpandOpsPass());
pm.addPass(createArithToLLVMConversionPass());
pm.addPass(createFinalizeMemRefToLLVMConversionPass());
ConvertFuncToLLVMPassOptions options;
options.useBarePtrCallConv = true;
pm.addPass(createConvertFuncToLLVMPass(options));
pm.addPass(createConvertControlFlowToLLVMPass());
pm.addPass(createCanonicalizerPass());
pm.addPass(createCSEPass());
(void)pm.run(op);

llvm::LLVMContext llvmContext;
auto llvmModule = translateModuleToLLVMIR(op, llvmContext);
if (!llvmModule) return failure();
llvmModule->print(output, nullptr);
return success();
},
[](DialectRegistry &registry) {
registry
.insert<DLTIDialect, LLVM::LLVMDialect, aievec::AIEVecDialect,
aievec::xllvm::XLLVMDialect, arith::ArithDialect,
cf::ControlFlowDialect, func::FuncDialect,
math::MathDialect, memref::MemRefDialect, scf::SCFDialect,
vector::VectorDialect, xilinx::AIE::AIEDialect>();
registerBuiltinDialectTranslation(registry);
registerLLVMDialectTranslation(registry);
aievec::registerXLLVMDialectTranslation(registry);
arith::registerConvertArithToLLVMInterface(registry);
cf::registerConvertControlFlowToLLVMInterface(registry);
func::registerAllExtensions(registry);
registerConvertFuncToLLVMInterface(registry);
index::registerConvertIndexToLLVMInterface(registry);
registerConvertMathToLLVMInterface(registry);
registerConvertMemRefToLLVMInterface(registry);
});
}
} // namespace aie

int main(int argc, char **argv) {
registerFromLLVMIRTranslation();
aie::registerToLLVMIRTranslation();
return failed(mlirTranslateMain(argc, argv, "AMDAIE Translation Tool"));
}
Loading
Loading