Skip to content

Commit

Permalink
Update mm_npu4 for 64x64x64 on 4x8 AIE array + add e2e CI test
Browse files Browse the repository at this point in the history
  • Loading branch information
Abhishek-Varma committed Jan 20, 2025
1 parent f96b969 commit 9629558
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 0 deletions.
16 changes: 16 additions & 0 deletions build_tools/ci/cpu_comparison/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -1740,6 +1740,22 @@ def __init__(self):
run_on_target=["npu4"],
)
)
self.register(
Matmul(
64,
64,
64,
"bf16",
"f32",
use_ukernel=True,
aie_compilation_flags=[
"--iree-amdaie-num-rows=4",
"--iree-amdaie-num-cols=8",
],
use_chess=True,
run_on_target=["npu4"],
)
)

# Matmul test on 2(rows)x2(cols) cores
self.register(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,13 @@ extern "C" {
zero_vectorized<ctype_out, M, N, r>(c_out, offsetC); \
}
matmul_combos(matmul_vectorized_c_func, 16, 8, 32)
matmul_combos(matmul_vectorized_c_func, 16, 16, 32)
matmul_combos(matmul_vectorized_c_func, 32, 32, 32)
matmul_combos(matmul_vectorized_c_func, 32, 32, 64)
matmul_combos(matmul_vectorized_c_func, 64, 64, 64)
zero_fill_combos(zero_vectorized_c_func, 16, 8)
zero_fill_combos(zero_vectorized_c_func, 16, 16)
zero_fill_combos(zero_vectorized_c_func, 32, 32)
zero_fill_combos(zero_vectorized_c_func, 64, 64)
Expand Down

0 comments on commit 9629558

Please sign in to comment.