nod-ai · makslevental · Aug 31, 2024
@@ -86,4 +86,4 @@ iree_cc_library(
     ::AIEVecXLLVMOpsGen
 )
 
-add_subdirectory(test)
+iree_add_all_subdirs()
@@ -114,3 +114,12 @@ void mlir::iree_compiler::aievec::buildConvertVectorToAIEVec(
   pm.addPass(createLoopInvariantCodeMotionPass());
   pm.addPass(createCanonicalizerPass());
 }
+
+void mlir::iree_compiler::aievec::registerAIEVecPipelines() {
+  PassPipelineRegistration<>(
+      "convert-vector-to-aievec",
+      "This pass pipeline takes standard \"Vector\" code and converts it to "
+      "\"AIEVec\" code targeting the selected Xilinx AIE vector "
+      "architecture.",
+      buildConvertVectorToAIEVec);
+}
@@ -48,9 +48,10 @@ void buildLowerVectorToAIEVec(mlir::OpPassManager &pm);
 
 /**
  * A pass containing patterns for lowering operations in the vector dialect to
- * the AIEVec dialect. The pass is currently named `test-lower-vector-to-aievec`.
+ * the AIEVec dialect. The pass is currently named
+ * `test-lower-vector-to-aievec`.
  */
-static std::unique_ptr<mlir::Pass> createLowerVectorToAIEVec();
+std::unique_ptr<mlir::Pass> createLowerVectorToAIEVec();
 
 /**
  * Expose the pass `test-lower-vector-to-aievec` to the command line.
@@ -67,9 +68,14 @@ void buildConvertVectorToAIEVec(mlir::OpPassManager &);
 /**
  * Lower from the vector dialect to the AIEVec dialect. The pass is called
  * `convert-aievec-to-llvm`.
- * */
+ */
 std::unique_ptr<mlir::Pass> createConvertAIEVecToLLVMPass();
 
+/**
+ * Register all pipelines for the AIE Vector dialect.
+ */
+void registerAIEVecPipelines();
+
 /**
  * Expose the pass `convert-aievec-to-llvm` to the command line.
  */

@@ -1077,12 +1077,12 @@ struct LowerVectorToAIEVec : PassWrapper<LowerVectorToAIEVec, OperationPass<>> {
 //============================================================================//
 
 namespace mlir::iree_compiler::aievec {
-static std::unique_ptr<Pass> createLowerVectorToAIEVec() {
+std::unique_ptr<Pass> createLowerVectorToAIEVec() {
   return std::make_unique<LowerVectorToAIEVec>();
 }
 
 void registerLowerVectorToAIEVecPass() {
-  ::mlir::registerPass([]() -> std::unique_ptr<mlir::Pass> {
+  mlir::registerPass([]() -> std::unique_ptr<mlir::Pass> {
     return createLowerVectorToAIEVec();
   });
 }

@@ -13,3 +13,5 @@ iree_lit_test_suite(
   LABELS
     "hostonly"
 )
+
+iree_add_all_subdirs()
@@ -0,0 +1,65 @@
+// RUN: iree-opt %S/gemm-64x32x64-bf16.mlir --convert-vector-to-aievec -lower-affine -canonicalize -cse --convert-aievec-to-llvm --convert-scf-to-cf | iree-aie-translate --mlir-to-llvmir -o kernel.ll
+// RUN: clang -O2 --target=aie2-none-unknown-elf -c kernel.ll -o kernel.o
+// RUN: clang -O2 --target=aie2-none-unknown-elf -c testbench.cc -o testbench.o
+// RUN: clang --target=aie2-none-unknown-elf -Wl,--gc-sections -Wl,--orphan-handling=error -Wl,T,%S/ldfile -o test.exe
+// RUN: xca_udm_dbg -qf -T -P $AIETOOLS/data/aie_ml/lib -t "%S/../profiling.tcl ./testbench.exe" | FileCheck %s
+// RUN: cat checkers.output
+
+#map = affine_map<(d0, d1, d2) -> (d0, d2)>
+#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
+#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
+module {
+  func.func @gemm_64x32x64_bf16_packed_4x8x4(%A: memref<16x4x4x8xbf16>,
+                                             %B: memref<4x16x8x4xbf16>,
+                                             %C: memref<16x16x4x4xf32>) {
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %c4 = arith.constant 4 : index
+    %c16 = arith.constant 16 : index
+    %c0_bf16 = arith.constant 0.000000e+00 : bf16
+    %c0_f32 = arith.constant 0.000000e+00 : f32
+    scf.for %i = %c0 to %c16 step %c1 {
+      scf.for %j = %c0 to %c16 step %c1 {
+        scf.for %k = %c0 to %c4 step %c1 {
+          %va = vector.transfer_read %A[%i, %k, %c0, %c0], %c0_bf16 :
+                                        memref<16x4x4x8xbf16>, vector<4x8xbf16>
+          %vb = vector.transfer_read %B[%k, %j, %c0, %c0], %c0_bf16 :
+                                        memref<4x16x8x4xbf16>, vector<8x4xbf16>
+          %vc = vector.transfer_read %C[%i, %j, %c0, %c0], %c0_f32 :
+                                        memref<16x16x4x4xf32>, vector<4x4xf32>
+          %vaf32 = arith.extf %va : vector<4x8xbf16> to vector<4x8xf32>
+          %vbf32 = arith.extf %vb : vector<8x4xbf16> to vector<8x4xf32>
+          %vr = vector.contract {
+                        indexing_maps = [#map, #map1, #map2],
+                        iterator_types = ["parallel", "parallel", "reduction"],
+                        kind = #vector.kind<add>}
+                      %vaf32, %vbf32, %vc :
+                      vector<4x8xf32>, vector<8x4xf32> into vector<4x4xf32>
+          vector.transfer_write %vr, %C[%i, %j, %c0, %c0] :
+                                        vector<4x4xf32>, memref<16x16x4x4xf32>
+        }
+      }
+    }
+    return
+  }
+
+  memref.global "private" constant @A : memref<16x4x4x8xbf16> = dense<1.000000e+00>
+  memref.global "private" constant @B : memref<4x16x8x4xbf16> = dense<2.000000e+00>
+  memref.global "private" constant @C : memref<16x16x4x4xf32> = dense<0.000000e+00>
+  func.func @main() {
+    %0 = memref.get_global @A : memref<16x4x4x8xbf16>
+    %1 = memref.get_global @B : memref<4x16x8x4xbf16>
+    %2 = memref.get_global @C : memref<16x16x4x4xf32>
+    func.call @gemm_64x32x64_bf16_packed_4x8x4(%0, %1, %2) : (memref<16x4x4x8xbf16>, memref<4x16x8x4xbf16>, memref<16x16x4x4xf32>) -> ()
+    return
+  }
+}
+
+// CHECK-LABEL: N: 64, M: 64, K: 32
+// CHECK-LABEL: Running MATMUL...
+// CHECK: Cycle count: [[CC:[0-9]+]]
+// CHECK-LABEL: Finish MATMUL!
+// CHECK-LABEL: Compare the results
+// CHECK: PASSED, Max delta: [[MD:-?[0-9]+.[0-9]+]], pixel intensity
+
+// RUN: xchesscc -j1 -pme -P $AIETOOLS/data/aie_ml/lib -f -CRelease_LLVM -w work -D__AIENGINE__ -D__AIE_ARCH__=20 -D__AIEARCH__=20 -I $AIETOOLS/include kernel.ll -o kernel.o
@@ -0,0 +1,70 @@
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+using namespace std;
+
+#define MAT_A_SIZE 2048
+#define MAT_B_SIZE 2048
+#define MAT_C_SIZE 4096
+#define N_SIZE 64
+#define M_SIZE 64
+#define K_SIZE 32
+
+bfloat16 mat_a_data[MAT_A_SIZE];
+bfloat16 mat_b_data[MAT_B_SIZE];
+float mat_c_data[MAT_C_SIZE];
+float ref_c_data[MAT_C_SIZE];
+
+#define INPUT_A_FILE "matrix_a_test.txt"
+#define INPUT_B_FILE "matrix_b_test.txt"
+#define OUTPUT_C_FILE "matrix_c_test.txt"
+
+#ifndef __chess__
+int chess_cycle_count() { return 0; }
+#endif
+
+extern void gemm_64x32x64_bf16_packed_4x8x4(bfloat16 *restrict mat_a_data,
+                                            bfloat16 *restrict mat_b_data,
+                                            float *restrict mat_c_data);
+
+int main() {
+  int i = 0, j = 0, k = 0;
+
+  // Read in matrix_a to local memory
+  int index = 0;
+  for (i = 0; i < N_SIZE; i++) {
+    for (k = 0; k < K_SIZE; k++) {
+      int32_t ival = *reinterpret_cast<int32_t *>(&i);
+      int16_t bfval = (ival & 0xFFFF0000) >> 16;
+      mat_a_data[index++] = *reinterpret_cast<bfloat16 *>(&bfval);
+    }
+  }
+
+  // Read in matrix_b to local memory
+  index = 0;
+  for (k = 0; k < K_SIZE; k++) {
+    for (j = 0; j < M_SIZE; j++) {
+      int32_t ival = *reinterpret_cast<int32_t *>(&i);
+      int16_t bfval = (ival & 0xFFFF0000) >> 16;
+      mat_b_data[index++] = *reinterpret_cast<bfloat16 *>(&bfval);
+    }
+  }
+
+  // Initialize matrix_c to local memory
+  index = 0;
+  for (i = 0; i < N_SIZE; i++) {
+    for (j = 0; j < M_SIZE; j++) {
+      mat_c_data[index++] = 0.f;
+    }
+  }
+
+  // Compute matrix multiplication
+  // reference(mat_a_data, mat_b_data, mat_c_data);
+  auto cyclesBegin = chess_cycle_count();
+  gemm_64x32x64_bf16_packed_4x8x4(mat_a_data, mat_b_data, mat_c_data);
+  auto cyclesEnd = chess_cycle_count();
+
+  return 0;
+}
@@ -0,0 +1,28 @@
+# Copyright 2024 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS)
+
+iree_cc_binary(
+  NAME
+    iree-aie-translate
+  SRCS
+    iree-aie-translate.cpp
+  DEPS
+    iree::target::amd-aie::aie::AIEDialectIR
+    iree::target::amd-aie::aievec::AIEVecDialectIR
+    iree::target::amd-aie::aievec::AIEVecConvertToLLVM
+    ${dialect_libs}
+    ${translation_libs}
+    LLVMSupport
+    MLIRFuncAllExtensions
+    MLIRIR
+    MLIRParser
+    MLIRPass
+    MLIRTargetLLVMIRExport
+    MLIRTargetLLVMIRImport
+)
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+
+// REQUIRES: valid_xchess_license
+// REQUIRES: peano, peano_and_chess
+// RUN: mkdir -p %t/data; cd %t
+// RUN: aie-opt %s %vector-to-generic-llvmir% -o llvmir.mlir
+// RUN: aie-translate llvmir.mlir %llvmir-to-ll% -o dut.ll
+// RUN: %PEANO_INSTALL_DIR/bin/clang %clang_aie2_args -c dut.ll -o dut.o
+// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_LLVM +w work +o work -I%S -I. %S/testbench.cc dut.o
+// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout
+// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s
+// CHECK: TEST PASSED
+ //     vector_to_generic_llvmir = '-canonicalize-vector-for-aievec=aie-target=aie2 -convert-vector-to-llvm -lower-affine -convert-scf-to-cf -canonicalize -cse -convert-math-to-llvm -expand-strided-metadata -finalize-memref-to-llvm -convert-func-to-llvm=\'use-bare-ptr-memref-call-conv\' -convert-index-to-llvm -canonicalize -cse'
+
+// --convert-vector-to-aievec -lower-affine -canonicalize -cse --convert-aievec-to-llvm --convert-scf-to-cf --iree-convert-to-llvm  | iree-aie-translate --mlir-to-llvmir -o kernel.ll
+
+// --convert-vector-to-aievec -lower-affine -canonicalize -cse --convert-aievec-to-llvm --convert-scf-to-cf --iree-convert-to-llvm | ./tools/iree-aie-translate --mlir-to-llvmir
+// ../llvm-aie/bin/clang --target=aie2-none-unknown-elf -Wl,--gc-sections -Wl,--orphan-handling=warn -Wl,-T,$PWD/ldfile kernel.o -o test.exe -v
+
+module {
+  func.func private @dut(%arg0: memref<1024xi16>, %arg1: memref<1024xi16>, %arg2: memref<1024xi16>) {
+    memref.assume_alignment %arg0, 32 : memref<1024xi16>
+    memref.assume_alignment %arg1, 32 : memref<1024xi16>
+    memref.assume_alignment %arg2, 32 : memref<1024xi16>
+    affine.for %arg3 = 0 to 1024 {
+      %0 = affine.load %arg0[%arg3] : memref<1024xi16>
+      %1 = affine.load %arg1[%arg3] : memref<1024xi16>
+      %2 = arith.muli %0, %1 : i16
+      affine.store %2, %arg2[%arg3] : memref<1024xi16>
+    }
+    return
+  }
+  memref.global "private" constant @A : memref<1024xi16> = dense<1>
+  memref.global "private" constant @B : memref<1024xi16> = dense<2>
+  memref.global "private" constant @C : memref<1024xi16> = dense<0>
+  func.func @main() {
+    %0 = memref.get_global @A : memref<1024xi16>
+    %1 = memref.get_global @B : memref<1024xi16>
+    %2 = memref.get_global @C : memref<1024xi16>
+    func.call @dut(%0, %1, %2) : (memref<1024xi16>, memref<1024xi16>, memref<1024xi16>) -> ()
+    return
+  }
+}
@@ -0,0 +1,93 @@
+// Copyright 2024 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "aie/AIEDialect.h"
+#include "aievec/AIEVecDialect.h"
+#include "aievec/Passes.h"
+#include "aievec/XLLVMDialect.h"
+#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
+#include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h"
+#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
+#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h"
+#include "mlir/Dialect/Arith/Transforms/Passes.h"
+#include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/Func/Extensions/AllExtensions.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Func/Transforms/Passes.h"
+#include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/Transforms/Passes.h"
+#include "mlir/Dialect/SCF/Transforms/Passes.h"
+#include "mlir/Dialect/Vector/IR/VectorOps.h"
+#include "mlir/InitAllTranslations.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Target/LLVMIR/Dialect/All.h"
+#include "mlir/Target/LLVMIR/Export.h"
+#include "mlir/Tools/mlir-translate/MlirTranslateMain.h"
+#include "mlir/Tools/mlir-translate/Translation.h"
+#include "mlir/Transforms/Passes.h"
+
+using namespace mlir;
+using namespace mlir::iree_compiler;
+
+namespace aie {
+void registerToLLVMIRTranslation() {
+  TranslateFromMLIRRegistration registration(
+      "mlir-to-llvmir", "Translate MLIR to LLVMIR",
+      [](Operation *op, raw_ostream &output) {
+        PassManager pm(op->getContext());
+        pm.addPass(createConvertVectorToLLVMPass());
+        pm.addPass(memref::createExpandStridedMetadataPass());
+        pm.addPass(createConvertMathToLLVMPass());
+        pm.addPass(createConvertIndexToLLVMPass());
+        pm.addPass(arith::createArithExpandOpsPass());
+        pm.addPass(createArithToLLVMConversionPass());
+        pm.addPass(createFinalizeMemRefToLLVMConversionPass());
+        ConvertFuncToLLVMPassOptions options;
+        options.useBarePtrCallConv = true;
+        pm.addPass(createConvertFuncToLLVMPass(options));
+        pm.addPass(createConvertControlFlowToLLVMPass());
+        pm.addPass(createCanonicalizerPass());
+        pm.addPass(createCSEPass());
+        (void)pm.run(op);
+
+        llvm::LLVMContext llvmContext;
+        auto llvmModule = translateModuleToLLVMIR(op, llvmContext);
+        if (!llvmModule) return failure();
+        llvmModule->print(output, nullptr);
+        return success();
+      },
+      [](DialectRegistry &registry) {
+        registry
+            .insert<DLTIDialect, LLVM::LLVMDialect, aievec::AIEVecDialect,
+                    aievec::xllvm::XLLVMDialect, arith::ArithDialect,
+                    cf::ControlFlowDialect, func::FuncDialect,
+                    math::MathDialect, memref::MemRefDialect, scf::SCFDialect,
+                    vector::VectorDialect, xilinx::AIE::AIEDialect>();
+        registerBuiltinDialectTranslation(registry);
+        registerLLVMDialectTranslation(registry);
+        aievec::registerXLLVMDialectTranslation(registry);
+        arith::registerConvertArithToLLVMInterface(registry);
+        cf::registerConvertControlFlowToLLVMInterface(registry);
+        func::registerAllExtensions(registry);
+        registerConvertFuncToLLVMInterface(registry);
+        index::registerConvertIndexToLLVMInterface(registry);
+        registerConvertMathToLLVMInterface(registry);
+        registerConvertMemRefToLLVMInterface(registry);
+      });
+}
+}  // namespace aie
+
+int main(int argc, char **argv) {
+  registerFromLLVMIRTranslation();
+  aie::registerToLLVMIRTranslation();
+  return failed(mlirTranslateMain(argc, argv, "AMDAIE Translation Tool"));
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,3 +13,5 @@ iree_lit_test_suite( @@
       LABELS
         "hostonly"
     )
+    iree_add_all_subdirs()