Skip to content

Commit

Permalink
bump mlir-aie (Xilinx#564)
Browse files Browse the repository at this point in the history
  • Loading branch information
fifield authored May 7, 2024
1 parent aff2a49 commit 5a1991d
Show file tree
Hide file tree
Showing 9 changed files with 22 additions and 32 deletions.
10 changes: 0 additions & 10 deletions mlir/lib/Transform/AIRHerdAssignPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,16 +111,6 @@ class AIRHerdAssignPass
}
}

for (auto f : module.getOps<func::FuncOp>()) {
std::vector<func::CallOp> kernelOps;
f.walk([&](Operation *o) {
if (auto co = dyn_cast<func::CallOp>(o)) {
if (co.getCallee().startswith("acap_conv2d_hw_kernel")) {
kernelOps.push_back(co);
}
}
});
}
}

private:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ module {
%subview_9 = memref.subview %arg17[0, %arg13, 0, 0] [1, 1, 16, 16] [1, 1, 1, 1] : memref<1x1x16x16xi32, 1> to memref<1x1x16x16xi32, strided<[256, 256, 16, 1], offset: ?>, 1>
%subview_10 = memref.subview %arg18[%arg12, %arg13, 0, 0] [1, 1, 8, 16] [1, 1, 1, 1] : memref<1x1x8x16xi32, 1> to memref<1x1x8x16xi32, strided<[128, 128, 16, 1], offset: ?>, 1>
%alloc_11 = memref.alloc() : memref<1x1x2x2x4x8xi32, 2>
%expand_shape = memref.expand_shape %subview_8 [[0], [1], [2, 3], [4, 5]] : memref<1x1x8x16xi32, strided<[128, 128, 16, 1], offset: ?>, 1> into memref<1x1x2x4x2x8xi32, strided<[128, 128, 64, 16, 8, 1], offset: ?>, 1>
%expand_shape = memref.expand_shape %subview_8 [[0], [1], [2, 3], [4, 5]] output_shape [1, 1, 2, 4, 2, 8]: memref<1x1x8x16xi32, strided<[128, 128, 16, 1], offset: ?>, 1> into memref<1x1x2x4x2x8xi32, strided<[128, 128, 64, 16, 8, 1], offset: ?>, 1>
%transpose_12 = memref.transpose %expand_shape (d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d2, d3, d5) : memref<1x1x2x4x2x8xi32, strided<[128, 128, 64, 16, 8, 1], offset: ?>, 1> to memref<1x1x2x2x4x8xi32, strided<[128, 128, 8, 64, 16, 1], offset: ?>, 1>
air.dma_memcpy_nd (%alloc_11[] [] [], %transpose_12[] [] []) : (memref<1x1x2x2x4x8xi32, 2>, memref<1x1x2x2x4x8xi32, strided<[128, 128, 8, 64, 16, 1], offset: ?>, 1>)
%alloc_13 = memref.alloc() : memref<1x1x2x2x8x8xi32, 2>
%expand_shape_14 = memref.expand_shape %subview_9 [[0], [1], [2, 3], [4, 5]] : memref<1x1x16x16xi32, strided<[256, 256, 16, 1], offset: ?>, 1> into memref<1x1x2x8x2x8xi32, strided<[256, 256, 128, 16, 8, 1], offset: ?>, 1>
%expand_shape_14 = memref.expand_shape %subview_9 [[0], [1], [2, 3], [4, 5]] output_shape [1, 1, 2, 8, 2, 8] : memref<1x1x16x16xi32, strided<[256, 256, 16, 1], offset: ?>, 1> into memref<1x1x2x8x2x8xi32, strided<[256, 256, 128, 16, 8, 1], offset: ?>, 1>
%transpose_15 = memref.transpose %expand_shape_14 (d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d2, d3, d5) : memref<1x1x2x8x2x8xi32, strided<[256, 256, 128, 16, 8, 1], offset: ?>, 1> to memref<1x1x2x2x8x8xi32, strided<[256, 256, 8, 128, 16, 1], offset: ?>, 1>
air.dma_memcpy_nd (%alloc_13[] [] [], %transpose_15[] [] []) : (memref<1x1x2x2x8x8xi32, 2>, memref<1x1x2x2x8x8xi32, strided<[256, 256, 8, 128, 16, 1], offset: ?>, 1>)
%alloc_16 = memref.alloc() : memref<1x1x2x2x4x8xi32, 2>
Expand Down
4 changes: 2 additions & 2 deletions test/xrt/04_gemm_w_pack/aie.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@
%subview_10 = memref.subview %alloc_8[0, 0, %arg7, %arg5] [1, 1, 32, 32] [1, 1, 1, 1] : memref<1x1x256x64xi32, 1> to memref<1x1x32x32xi32, strided<[16384, 16384, 64, 1], offset: ?>, 1>
%alloc_11 = memref.alloc() : memref<1x1x4x8x4x8xi32, 2>
%alloc_12 = memref.alloc() : memref<1x1x4x4x8x8xi32, 2>
%expand_shape = memref.expand_shape %subview_9 [[0], [1], [2, 3], [4, 5]] : memref<1x1x32x32xi32, strided<[16384, 16384, 256, 1], offset: ?>, 1> into memref<1x1x8x4x4x8xi32, strided<[16384, 16384, 1024, 256, 8, 1], offset: ?>, 1>
%expand_shape = memref.expand_shape %subview_9 [[0], [1], [2, 3], [4, 5]] output_shape [1, 1, 8, 4, 4, 8] : memref<1x1x32x32xi32, strided<[16384, 16384, 256, 1], offset: ?>, 1> into memref<1x1x8x4x4x8xi32, strided<[16384, 16384, 1024, 256, 8, 1], offset: ?>, 1>
%transpose_13 = memref.transpose %expand_shape (d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d2, d3, d5) : memref<1x1x8x4x4x8xi32, strided<[16384, 16384, 1024, 256, 8, 1], offset: ?>, 1> to memref<1x1x4x8x4x8xi32, strided<[16384, 16384, 8, 1024, 256, 1], offset: ?>, 1>
air.dma_memcpy_nd (%alloc_11[] [] [], %transpose_13[] [] []) : (memref<1x1x4x8x4x8xi32, 2>, memref<1x1x4x8x4x8xi32, strided<[16384, 16384, 8, 1024, 256, 1], offset: ?>, 1>)
%expand_shape_14 = memref.expand_shape %subview_10 [[0], [1], [2, 3], [4, 5]] : memref<1x1x32x32xi32, strided<[16384, 16384, 64, 1], offset: ?>, 1> into memref<1x1x4x8x4x8xi32, strided<[16384, 16384, 512, 64, 8, 1], offset: ?>, 1>
%expand_shape_14 = memref.expand_shape %subview_10 [[0], [1], [2, 3], [4, 5]] output_shape [1, 1, 4, 8, 4, 8] : memref<1x1x32x32xi32, strided<[16384, 16384, 64, 1], offset: ?>, 1> into memref<1x1x4x8x4x8xi32, strided<[16384, 16384, 512, 64, 8, 1], offset: ?>, 1>
%transpose_15 = memref.transpose %expand_shape_14 (d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d2, d3, d5) : memref<1x1x4x8x4x8xi32, strided<[16384, 16384, 512, 64, 8, 1], offset: ?>, 1> to memref<1x1x4x4x8x8xi32, strided<[16384, 16384, 8, 512, 64, 1], offset: ?>, 1>
air.dma_memcpy_nd (%alloc_12[] [] [], %transpose_15[] [] []) : (memref<1x1x4x4x8x8xi32, 2>, memref<1x1x4x4x8x8xi32, strided<[16384, 16384, 8, 512, 64, 1], offset: ?>, 1>)
linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]} ins(%alloc_11, %alloc_12 : memref<1x1x4x8x4x8xi32, 2>, memref<1x1x4x4x8x8xi32, 2>) outs(%alloc_2 : memref<1x1x4x8x4x8xi32, 2>) {
Expand Down
4 changes: 2 additions & 2 deletions test/xrt/08_gemm_extern_vec/aie.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@
%8 = affine.apply #map2()[%arg4]
%subview_4 = memref.subview %alloc[%7, %8] [64, 64] [1, 1] : memref<128x1024xbf16, 1> to memref<64x64xbf16, strided<[1024, 1], offset: ?>, 1>
%alloc_5 = memref.alloc() : memref<8x16x4x8xbf16, 2>
%expand_shape = memref.expand_shape %subview_4 [[0, 1], [2, 3]] : memref<64x64xbf16, strided<[1024, 1], offset: ?>, 1> into memref<16x4x8x8xbf16, strided<[4096, 1024, 8, 1], offset: ?>, 1>
%expand_shape = memref.expand_shape %subview_4 [[0, 1], [2, 3]] output_shape [16, 4, 8, 8] : memref<64x64xbf16, strided<[1024, 1], offset: ?>, 1> into memref<16x4x8x8xbf16, strided<[4096, 1024, 8, 1], offset: ?>, 1>
%transpose_6 = memref.transpose %expand_shape (d0, d1, d2, d3) -> (d2, d0, d1, d3) : memref<16x4x8x8xbf16, strided<[4096, 1024, 8, 1], offset: ?>, 1> to memref<8x16x4x8xbf16, strided<[8, 4096, 1024, 1], offset: ?>, 1>
air.dma_memcpy_nd (%alloc_5[] [] [], %transpose_6[] [] []) : (memref<8x16x4x8xbf16, 2>, memref<8x16x4x8xbf16, strided<[8, 4096, 1024, 1], offset: ?>, 1>)
%9 = affine.apply #map2()[%arg4]
%10 = affine.apply #map1()[%arg3]
%subview_7 = memref.subview %alloc_0[%9, %10] [64, 64] [1, 1] : memref<1024x128xbf16, 1> to memref<64x64xbf16, strided<[128, 1], offset: ?>, 1>
%alloc_8 = memref.alloc() : memref<16x8x8x4xbf16, 2>
%expand_shape_9 = memref.expand_shape %subview_7 [[0, 1], [2, 3]] : memref<64x64xbf16, strided<[128, 1], offset: ?>, 1> into memref<8x8x16x4xbf16, strided<[1024, 128, 4, 1], offset: ?>, 1>
%expand_shape_9 = memref.expand_shape %subview_7 [[0, 1], [2, 3]] output_shape [8, 8, 16, 4] : memref<64x64xbf16, strided<[128, 1], offset: ?>, 1> into memref<8x8x16x4xbf16, strided<[1024, 128, 4, 1], offset: ?>, 1>
%transpose_10 = memref.transpose %expand_shape_9 (d0, d1, d2, d3) -> (d2, d0, d1, d3) : memref<8x8x16x4xbf16, strided<[1024, 128, 4, 1], offset: ?>, 1> to memref<16x8x8x4xbf16, strided<[4, 1024, 128, 1], offset: ?>, 1>
air.dma_memcpy_nd (%alloc_8[] [] [], %transpose_10[] [] []) : (memref<16x8x8x4xbf16, 2>, memref<16x8x8x4xbf16, strided<[4, 1024, 128, 1], offset: ?>, 1>)
linalg.generic {indexing_maps = [#map3, #map4, #map5], iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"], library_call = "matmul_bf16_bf16"} ins(%alloc_5, %alloc_8 : memref<8x16x4x8xbf16, 2>, memref<16x8x8x4xbf16, 2>) outs(%alloc_3 : memref<16x16x4x4xbf16, 2>) {
Expand Down
4 changes: 2 additions & 2 deletions test/xrt/09_gemm_extern_vec_4x4/aie.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@
%8 = affine.apply #map2()[%arg4]
%subview_4 = memref.subview %alloc[%7, %8] [64, 64] [1, 1] : memref<256x1024xbf16, 1> to memref<64x64xbf16, strided<[1024, 1], offset: ?>, 1>
%alloc_5 = memref.alloc() : memref<8x16x4x8xbf16, 2>
%expand_shape = memref.expand_shape %subview_4 [[0, 1], [2, 3]] : memref<64x64xbf16, strided<[1024, 1], offset: ?>, 1> into memref<16x4x8x8xbf16, strided<[4096, 1024, 8, 1], offset: ?>, 1>
%expand_shape = memref.expand_shape %subview_4 [[0, 1], [2, 3]] output_shape [16, 4, 8, 8] : memref<64x64xbf16, strided<[1024, 1], offset: ?>, 1> into memref<16x4x8x8xbf16, strided<[4096, 1024, 8, 1], offset: ?>, 1>
%transpose_6 = memref.transpose %expand_shape (d0, d1, d2, d3) -> (d2, d0, d1, d3) : memref<16x4x8x8xbf16, strided<[4096, 1024, 8, 1], offset: ?>, 1> to memref<8x16x4x8xbf16, strided<[8, 4096, 1024, 1], offset: ?>, 1>
air.dma_memcpy_nd (%alloc_5[] [] [], %transpose_6[] [] []) : (memref<8x16x4x8xbf16, 2>, memref<8x16x4x8xbf16, strided<[8, 4096, 1024, 1], offset: ?>, 1>)
%9 = affine.apply #map2()[%arg4]
%10 = affine.apply #map1()[%arg3]
%subview_7 = memref.subview %alloc_0[%9, %10] [64, 64] [1, 1] : memref<1024x256xbf16, 1> to memref<64x64xbf16, strided<[256, 1], offset: ?>, 1>
%alloc_8 = memref.alloc() : memref<16x8x8x4xbf16, 2>
%expand_shape_9 = memref.expand_shape %subview_7 [[0, 1], [2, 3]] : memref<64x64xbf16, strided<[256, 1], offset: ?>, 1> into memref<8x8x16x4xbf16, strided<[2048, 256, 4, 1], offset: ?>, 1>
%expand_shape_9 = memref.expand_shape %subview_7 [[0, 1], [2, 3]] output_shape [8, 8, 16, 4] : memref<64x64xbf16, strided<[256, 1], offset: ?>, 1> into memref<8x8x16x4xbf16, strided<[2048, 256, 4, 1], offset: ?>, 1>
%transpose_10 = memref.transpose %expand_shape_9 (d0, d1, d2, d3) -> (d2, d0, d1, d3) : memref<8x8x16x4xbf16, strided<[2048, 256, 4, 1], offset: ?>, 1> to memref<16x8x8x4xbf16, strided<[4, 2048, 256, 1], offset: ?>, 1>
air.dma_memcpy_nd (%alloc_8[] [] [], %transpose_10[] [] []) : (memref<16x8x8x4xbf16, 2>, memref<16x8x8x4xbf16, strided<[4, 2048, 256, 1], offset: ?>, 1>)
linalg.generic {indexing_maps = [#map3, #map4, #map5], iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"], library_call = "matmul_bf16_bf16"} ins(%alloc_5, %alloc_8 : memref<8x16x4x8xbf16, 2>, memref<16x8x8x4xbf16, 2>) outs(%alloc_3 : memref<16x16x4x4xbf16, 2>) {
Expand Down
8 changes: 4 additions & 4 deletions test/xrt/10_gemm_peeling_extern_vec/aie.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@
scf.parallel (%arg2, %arg3) = (%c0, %c0) to (%c32, %c32) step (%c16, %c16) {
%3 = affine.apply #map()[%arg2]
%subview_11 = memref.subview %alloc_2[0, 0, %3, 0] [1, 1, 64, 64] [1, 1, 1, 1] : memref<1x1x128x64xbf16, 1 : i32> to memref<1x1x64x64xbf16, strided<[8192, 8192, 64, 1], offset: ?>, 1 : i32>
%expand_shape = memref.expand_shape %subview_11 [[0], [1], [2, 3], [4, 5]] : memref<1x1x64x64xbf16, strided<[8192, 8192, 64, 1], offset: ?>, 1 : i32> into memref<1x1x16x4x8x8xbf16, strided<[8192, 8192, 256, 64, 8, 1], offset: ?>, 1 : i32>
%expand_shape = memref.expand_shape %subview_11 [[0], [1], [2, 3], [4, 5]] output_shape [1, 1, 16, 4, 8, 8] : memref<1x1x64x64xbf16, strided<[8192, 8192, 64, 1], offset: ?>, 1 : i32> into memref<1x1x16x4x8x8xbf16, strided<[8192, 8192, 256, 64, 8, 1], offset: ?>, 1 : i32>
%transpose_12 = memref.transpose %expand_shape (d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d2, d3, d5) : memref<1x1x16x4x8x8xbf16, strided<[8192, 8192, 256, 64, 8, 1], offset: ?>, 1 : i32> to memref<1x1x8x16x4x8xbf16, strided<[8192, 8192, 8, 256, 64, 1], offset: ?>, 1 : i32>
air.dma_memcpy_nd (%alloc_0[] [] [], %transpose_12[] [] []) : (memref<1x1x8x16x4x8xbf16, 2 : i32>, memref<1x1x8x16x4x8xbf16, strided<[8192, 8192, 8, 256, 64, 1], offset: ?>, 1 : i32>)
%4 = affine.apply #map()[%arg3]
%subview_13 = memref.subview %alloc_1[0, 0, 0, %4] [1, 1, 64, 64] [1, 1, 1, 1] : memref<1x1x64x128xbf16, 1 : i32> to memref<1x1x64x64xbf16, strided<[8192, 8192, 128, 1], offset: ?>, 1 : i32>
%expand_shape_14 = memref.expand_shape %subview_13 [[0], [1], [2, 3], [4, 5]] : memref<1x1x64x64xbf16, strided<[8192, 8192, 128, 1], offset: ?>, 1 : i32> into memref<1x1x8x8x16x4xbf16, strided<[8192, 8192, 1024, 128, 4, 1], offset: ?>, 1 : i32>
%expand_shape_14 = memref.expand_shape %subview_13 [[0], [1], [2, 3], [4, 5]] output_shape [1, 1, 8, 8, 16, 4] : memref<1x1x64x64xbf16, strided<[8192, 8192, 128, 1], offset: ?>, 1 : i32> into memref<1x1x8x8x16x4xbf16, strided<[8192, 8192, 1024, 128, 4, 1], offset: ?>, 1 : i32>
%transpose_15 = memref.transpose %expand_shape_14 (d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d2, d3, d5) : memref<1x1x8x8x16x4xbf16, strided<[8192, 8192, 1024, 128, 4, 1], offset: ?>, 1 : i32> to memref<1x1x16x8x8x4xbf16, strided<[8192, 8192, 4, 1024, 128, 1], offset: ?>, 1 : i32>
air.dma_memcpy_nd (%alloc[] [] [], %transpose_15[] [] []) : (memref<1x1x16x8x8x4xbf16, 2 : i32>, memref<1x1x16x8x8x4xbf16, strided<[8192, 8192, 4, 1024, 128, 1], offset: ?>, 1 : i32>)
%subview_16 = memref.subview %alloc_3[0, 0, %arg3, %arg2, 0, 0] [1, 1, 16, 16, 4, 4] [1, 1, 1, 1, 1, 1] : memref<1x1x32x32x4x4xbf16, 2 : i32> to memref<1x1x16x16x4x4xbf16, strided<[16384, 16384, 512, 16, 4, 1], offset: ?>, 2 : i32>
Expand All @@ -77,12 +77,12 @@
scf.parallel (%arg3, %arg4) = (%c0, %c0) to (%c32, %c32) step (%c16, %c16) {
%4 = affine.apply #map()[%arg3]
%subview_13 = memref.subview %alloc_2[0, 0, %4, 0] [1, 1, 64, 64] [1, 1, 1, 1] : memref<1x1x128x64xbf16, 1 : i32> to memref<1x1x64x64xbf16, strided<[8192, 8192, 64, 1], offset: ?>, 1 : i32>
%expand_shape = memref.expand_shape %subview_13 [[0], [1], [2, 3], [4, 5]] : memref<1x1x64x64xbf16, strided<[8192, 8192, 64, 1], offset: ?>, 1 : i32> into memref<1x1x16x4x8x8xbf16, strided<[8192, 8192, 256, 64, 8, 1], offset: ?>, 1 : i32>
%expand_shape = memref.expand_shape %subview_13 [[0], [1], [2, 3], [4, 5]] output_shape [1, 1, 16, 4, 8, 8] : memref<1x1x64x64xbf16, strided<[8192, 8192, 64, 1], offset: ?>, 1 : i32> into memref<1x1x16x4x8x8xbf16, strided<[8192, 8192, 256, 64, 8, 1], offset: ?>, 1 : i32>
%transpose_14 = memref.transpose %expand_shape (d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d2, d3, d5) : memref<1x1x16x4x8x8xbf16, strided<[8192, 8192, 256, 64, 8, 1], offset: ?>, 1 : i32> to memref<1x1x8x16x4x8xbf16, strided<[8192, 8192, 8, 256, 64, 1], offset: ?>, 1 : i32>
air.dma_memcpy_nd (%alloc_0[] [] [], %transpose_14[] [] []) : (memref<1x1x8x16x4x8xbf16, 2 : i32>, memref<1x1x8x16x4x8xbf16, strided<[8192, 8192, 8, 256, 64, 1], offset: ?>, 1 : i32>)
%5 = affine.apply #map()[%arg4]
%subview_15 = memref.subview %alloc_1[0, 0, 0, %5] [1, 1, 64, 64] [1, 1, 1, 1] : memref<1x1x64x128xbf16, 1 : i32> to memref<1x1x64x64xbf16, strided<[8192, 8192, 128, 1], offset: ?>, 1 : i32>
%expand_shape_16 = memref.expand_shape %subview_15 [[0], [1], [2, 3], [4, 5]] : memref<1x1x64x64xbf16, strided<[8192, 8192, 128, 1], offset: ?>, 1 : i32> into memref<1x1x8x8x16x4xbf16, strided<[8192, 8192, 1024, 128, 4, 1], offset: ?>, 1 : i32>
%expand_shape_16 = memref.expand_shape %subview_15 [[0], [1], [2, 3], [4, 5]] output_shape [1, 1, 8, 8, 16, 4] : memref<1x1x64x64xbf16, strided<[8192, 8192, 128, 1], offset: ?>, 1 : i32> into memref<1x1x8x8x16x4xbf16, strided<[8192, 8192, 1024, 128, 4, 1], offset: ?>, 1 : i32>
%transpose_17 = memref.transpose %expand_shape_16 (d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d2, d3, d5) : memref<1x1x8x8x16x4xbf16, strided<[8192, 8192, 1024, 128, 4, 1], offset: ?>, 1 : i32> to memref<1x1x16x8x8x4xbf16, strided<[8192, 8192, 4, 1024, 128, 1], offset: ?>, 1 : i32>
air.dma_memcpy_nd (%alloc[] [] [], %transpose_17[] [] []) : (memref<1x1x16x8x8x4xbf16, 2 : i32>, memref<1x1x16x8x8x4xbf16, strided<[8192, 8192, 4, 1024, 128, 1], offset: ?>, 1 : i32>)
%subview_18 = memref.subview %alloc_3[0, 0, %arg4, %arg3, 0, 0] [1, 1, 16, 16, 4, 4] [1, 1, 1, 1, 1, 1] : memref<1x1x32x32x4x4xbf16, 2 : i32> to memref<1x1x16x16x4x4xbf16, strided<[16384, 16384, 512, 16, 4, 1], offset: ?>, 2 : i32>
Expand Down
Loading

0 comments on commit 5a1991d

Please sign in to comment.