Skip to content

Commit

Permalink
[CI] Add initial Strix matmul performance tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jtuyls committed Jan 30, 2025
1 parent a801c92 commit 3766a22
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 6 deletions.
36 changes: 35 additions & 1 deletion .github/workflows/ci-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ jobs:
git config user.name "github-actions"
git config user.email "github-actions@github.com"
git add results_history.json results_history.html
git commit -m "Update performance results and deploy"
git commit -m "Update NPU1 performance results and deploy"
git push
test_linux_strix:
Expand Down Expand Up @@ -314,3 +314,37 @@ jobs:
--xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \
--xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS \
-v
# Run the 'Performance' tests. These do not check numerical correctness,
# just measure the time to run some workloads.
- name : Performance benchmarks
run: |
source .venv/bin/activate
python build_tools/ci/cpu_comparison/run.py \
test_aie_vs_cpu \
$PWD/iree-install \
--peano_dir=$PWD/llvm-aie \
--vitis_dir=/opt/Xilinx/Vitis/2024.2 \
--target_device="npu4" \
--reset_npu_between_runs -v \
--xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \
--xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS \
--tests=Performance > performance_npu4.log
# Print a summary of the findings.
python build_tools/ci/cpu_comparison/performance_summarizer.py \
performance_npu4.log results_npu4.json
# Only publish the performance results on main branch pushes.
- name: Publish performance results
if: github.event_name == 'push' && github.ref_name == 'main'
run: |
cp build_tools/ci/cpu_comparison/performance_publish.py .
git fetch origin gh-pages
git checkout gh-pages
python performance_publish.py results_npu4.json results_history_npu4.json results_history_npu4.html
git config user.name "github-actions"
git config user.email "github-actions@github.com"
git add results_history_npu4.json results_history_npu4.html
git commit -m "Update NPU4 performance results and deploy"
git push
61 changes: 56 additions & 5 deletions build_tools/ci/cpu_comparison/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ def __init__(
aie_compilation_flags=None,
n_repeats=1,
n_kernel_runs=1,
use_chess=False,
):
aie_compilation_flags = (
[] if aie_compilation_flags is None else aie_compilation_flags
Expand All @@ -375,6 +376,7 @@ def __init__(
use_ukernel=use_ukernel,
n_repeats=n_repeats,
n_kernel_runs=n_kernel_runs,
use_chess=use_chess,
)

self.name = f"matmul_benchmark_{M}_{N}_{K}_{input_type}_{acc_type}"
Expand Down Expand Up @@ -1818,6 +1820,9 @@ def __init__(self):
)

performance_tests = [
##############
# NPU1 Tests #
##############
{
"M": 512,
"N": 512,
Expand Down Expand Up @@ -1993,6 +1998,39 @@ def __init__(self):
"skip_numerics": True,
"tile_pipeline": "pack-peel-4-level-tiling",
},
##############
# NPU4 Tests #
##############
{
"M": 512,
"N": 4096,
"K": 512,
"in_dtype": "i8",
"out_dtype": "i32",
"use_ukernel": True,
"peano_opt_level": 3,
"outline": "all",
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
"run_on_target": "npu4",
},
{
"M": 512,
"N": 4096,
"K": 512,
"in_dtype": "i8",
"out_dtype": "i32",
"use_ukernel": False,
"peano_opt_level": 3,
"outline": "all",
"outline_to_empty_function": True,
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
"run_on_target": "npu4",
"skip_numerics": True,
},
]

# Some bf16 Performance tests:
Expand All @@ -2006,15 +2044,27 @@ def __init__(self):
transpose_a = test["transpose_a"]
transpose_b = test["transpose_b"]
tile_pipeline = test["tile_pipeline"]
run_on_target = (
test["run_on_target"] if "run_on_target" in test else "npu1_4col"
)
in_dtype = test["in_dtype"] if "in_dtype" in test else "bf16"
out_dtype = test["out_dtype"] if "out_dtype" in test else "f32"

outlining_string = "--iree-amdaie-enable-function-outlining=" + outline

peano_opt_level_string = f'"-O{peano_opt_level}"'
name_suffix = "O" + str(peano_opt_level)
name_suffix += "_" + run_on_target

aie_compilation_flags = [
outlining_string,
f"--iree-amd-aie-additional-peano-opt-flags={peano_opt_level_string}",
]

if run_on_target == "npu4":
aie_compilation_flags.append("--iree-amdaie-num-rows=4")
aie_compilation_flags.append("--iree-amdaie-num-cols=8")

outline_to_empty_function = False
empty_key = "outline_to_empty_function"
if empty_key in test and test[empty_key] == True:
Expand All @@ -2025,7 +2075,6 @@ def __init__(self):
"--iree-amdaie-replace-outlined-functions-with-empty"
)

name_suffix = "O" + str(peano_opt_level)
if outline != "none":
if outline_to_empty_function:
name_suffix += "_outline_empty"
Expand Down Expand Up @@ -2057,8 +2106,9 @@ def __init__(self):
M,
N,
K,
"bf16",
"f32",
in_dtype,
out_dtype,
run_on_target=run_on_target,
tile_pipeline=tile_pipeline,
use_ukernel=use_ukernel,
n_repeats=2,
Expand All @@ -2073,8 +2123,9 @@ def __init__(self):
M,
N,
K,
"bf16",
"f32",
in_dtype,
out_dtype,
run_on_target=run_on_target,
tile_pipeline=tile_pipeline,
additional_labels=["Performance"],
use_ukernel=use_ukernel,
Expand Down

0 comments on commit 3766a22

Please sign in to comment.