diff --git a/.github/workflows/ci-sharktank-nightly.yaml b/.github/workflows/ci-sharktank-nightly.yaml new file mode 100644 index 000000000..4bd1e7ee7 --- /dev/null +++ b/.github/workflows/ci-sharktank-nightly.yaml @@ -0,0 +1,87 @@ +# Copyright 2024 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +name: Sharktank Nightly Tests + +on: + workflow_dispatch: + schedule: + # Weekdays at 10:00 AM UTC = 02:00 AM PST / 03:00 AM PDT + - cron: "0 10 * * 1-5" + +concurrency: + # A PR number if a pull request and otherwise the commit hash. This cancels + # queued and in-progress runs for the same PR (presubmit) or commit + # (postsubmit). The workflow name is prepended to avoid conflicts between + # different workflows. + group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + nightly-mi300x: + if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} + name: "Nightly tests and benchmarks" + strategy: + matrix: + version: [3.11] + fail-fast: false + runs-on: llama-mi300x-3 + defaults: + run: + shell: bash + env: + VENV_DIR: ${{ github.workspace }}/.venv + HF_HOME: "/data/huggingface" + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Get Current Date + id: date + run: echo "::set-output name=date::$(date +'%Y-%m-%d')" + + - name: "Setting up Python" + id: setup_python + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + with: + python-version: ${{matrix.version}} + - name: Create Python venv + run: python -m venv ${VENV_DIR} + + - name: Install pip deps + run: | + source ${VENV_DIR}/bin/activate + python -m pip install --no-compile --upgrade pip + + # Note: We install in three steps in order to satisfy requirements + # from non default locations first. + pip install --no-compile -r pytorch-cpu-requirements.txt + pip install -r requirements-iree-unpinned.txt + pip install --no-compile \ + -r sharktank/requirements-tests.txt \ + -e sharktank/ + + pip freeze + + - name: Run benchmarks + run: | + source ${VENV_DIR}/bin/activate + pytest \ + --verbose \ + --capture=no \ + --iree-hip-target=gfx942 \ + --iree-device=hip://0 \ + --with-flux-data \ + -m="benchmark" \ + --html=out/sharktank_nightly/benchmark/index.html \ + sharktank/tests + + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + with: + github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} + publish_dir: ./out/sharktank_nightly + destination_dir: ./sharktank_nightly + keep_files: true diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 51a34db4f..309849853 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -85,7 +85,10 @@ jobs: - name: Run sharktank tests if: ${{ !cancelled() }} run: | - pytest -n 4 sharktank/ --durations=10 + pytest \ + -n 4 \ + --durations=10 \ + sharktank/ test_with_data: @@ -191,7 +194,9 @@ jobs: - name: Run punet tests run: | - pytest -v sharktank/ -m punet_quick \ + pytest -v sharktank/ \ + -m "punet_quick or model_punet" \ + --with-expensive \ --durations=0 \ --timeout=600 diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..3628e583a --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "third_party/benchmark"] + path = third_party/benchmark + url = https://github.com/google/benchmark diff --git a/sharktank/pyproject.toml b/sharktank/pyproject.toml index 09aca178b..cdb72a5f0 100644 --- a/sharktank/pyproject.toml +++ b/sharktank/pyproject.toml @@ -41,9 +41,10 @@ testing = {file = ["requirements-tests.txt"]} addopts = [ "-ra", "--import-mode=importlib", - "-m=unit", + "-m=unit and not expensive", ] markers = [ + "benchmark: model benchmarks", "expensive: tests that are very expensive", "export: tests that require export from torch", "golden: tests that compare to some golden values", diff --git a/sharktank/sharktank/models/flux/benchmark.py b/sharktank/sharktank/models/flux/benchmark.py new file mode 100644 index 000000000..58af63c0c --- /dev/null +++ b/sharktank/sharktank/models/flux/benchmark.py @@ -0,0 +1,92 @@ +# Copyright 2024 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from pathlib import Path +import iree.compiler +import iree.runtime +import os +from iree.turbine.support.tools import iree_tool_prepare_input_args + +from .export import ( + export_flux_transformer_from_hugging_face, + flux_transformer_default_batch_sizes, + iree_compile_flags, +) +from ...types import Dataset +from .flux import FluxModelV1, FluxParams +from ...utils.export_artifacts import ExportArtifacts +from ...utils.iree import flatten_for_iree_signature +from ...utils.benchmark import iree_benchmark_module + + +def iree_benchmark_flux_dev_transformer( + artifacts_dir: Path, + iree_device: str, + json_result_output_path: Path, + caching: bool = False, +) -> str: + mlir_path = artifacts_dir / "model.mlir" + parameters_path = artifacts_dir / "parameters.irpa" + if ( + not caching + or not os.path.exists(mlir_path) + or not os.path.exists(parameters_path) + ): + export_flux_transformer_from_hugging_face( + "black-forest-labs/FLUX.1-dev/black-forest-labs-transformer", + mlir_output_path=mlir_path, + parameters_output_path=parameters_path, + ) + return iree_benchmark_flux_transformer( + mlir_path=mlir_path, + parameters_path=parameters_path, + artifacts_dir=artifacts_dir, + iree_device=iree_device, + json_result_output_path=json_result_output_path, + caching=caching, + ) + + +def iree_benchmark_flux_transformer( + artifacts_dir: Path, + mlir_path: Path, + parameters_path: Path, + iree_device: str, + json_result_output_path: Path, + caching: bool = False, +) -> str: + dataset = Dataset.load(parameters_path) + model = FluxModelV1( + theta=dataset.root_theta, + params=FluxParams.from_hugging_face_properties(dataset.properties), + ) + input_args = flatten_for_iree_signature( + model.sample_inputs(batch_size=flux_transformer_default_batch_sizes[0]) + ) + cli_input_args = iree_tool_prepare_input_args( + input_args, file_path_prefix=f"{artifacts_dir / 'arg'}" + ) + cli_input_args = [f"--input={v}" for v in cli_input_args] + + iree_module_path = artifacts_dir / "model.vmfb" + if not caching or not os.path.exists(iree_module_path): + iree.compiler.compile_file( + mlir_path, + output_file=iree_module_path, + extra_args=iree_compile_flags, + ) + + iree_benchmark_args = [ + f"--device={iree_device}", + f"--module={iree_module_path}", + f"--parameters=model={parameters_path}", + f"--function=forward_bs{flux_transformer_default_batch_sizes[0]}", + "--benchmark_repetitions=30", + "--benchmark_min_warmup_time=1.0", + "--benchmark_out_format=json", + f"--benchmark_out={json_result_output_path}", + ] + cli_input_args + return iree_benchmark_module(iree_benchmark_args) diff --git a/sharktank/sharktank/models/flux/export.py b/sharktank/sharktank/models/flux/export.py index a63e75af9..dc5d8ba93 100644 --- a/sharktank/sharktank/models/flux/export.py +++ b/sharktank/sharktank/models/flux/export.py @@ -17,6 +17,26 @@ flux_transformer_default_batch_sizes = [1] +iree_compile_flags = [ + "--iree-hal-target-device=hip", + "--iree-hip-target=gfx942", + "--iree-opt-const-eval=false", + "--iree-opt-strip-assertions=true", + "--iree-global-opt-propagate-transposes=true", + "--iree-dispatch-creation-enable-fuse-horizontal-contractions=true", + "--iree-dispatch-creation-enable-aggressive-fusion=true", + "--iree-opt-aggressively-propagate-transposes=true", + "--iree-opt-outer-dim-concat=true", + "--iree-vm-target-truncate-unsupported-floats", + "--iree-llvmgpu-enable-prefetch=true", + "--iree-opt-data-tiling=false", + "--iree-codegen-gpu-native-math-precision=true", + "--iree-codegen-llvmgpu-use-vector-distribution", + "--iree-hip-waves-per-eu=2", + "--iree-execution-model=async-external", + "--iree-preprocessing-pass-pipeline=builtin.module(iree-preprocessing-transpose-convolution-pipeline,iree-preprocessing-pad-to-intrinsics)", +] + def export_flux_transformer_model_mlir( model: FluxModelV1, diff --git a/sharktank/sharktank/models/flux/testing.py b/sharktank/sharktank/models/flux/testing.py index da9561b33..9097928f8 100644 --- a/sharktank/sharktank/models/flux/testing.py +++ b/sharktank/sharktank/models/flux/testing.py @@ -7,6 +7,7 @@ import torch from os import PathLike from collections import OrderedDict +import pytest from .flux import FluxParams, FluxModelV1 from .export import export_flux_transformer, flux_transformer_default_batch_sizes @@ -17,6 +18,8 @@ make_mmdit_single_block_random_theta, ) +with_flux_data = pytest.mark.skipif("not config.getoption('with_flux_data')") + def convert_flux_transformer_input_for_hugging_face_model( img: torch.Tensor, diff --git a/sharktank/sharktank/utils/benchmark.py b/sharktank/sharktank/utils/benchmark.py new file mode 100644 index 000000000..ecc5d506c --- /dev/null +++ b/sharktank/sharktank/utils/benchmark.py @@ -0,0 +1,118 @@ +# Copyright 2025 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from typing import Any +import iree.runtime +import subprocess +import json +import sys +import pandas +from pathlib import Path +import os +from os import PathLike + + +def _run_program( + args: tuple[str], +): + process_result = subprocess.run( + args=args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + out = process_result.stdout.decode() + err = process_result.stderr.decode() + + if process_result.returncode != 0: + raise RuntimeError(f"stderr:\n{err}\nstdout:\n{out}") + + if err != "": + print(err, file=sys.stderr) + + return out + + +def iree_benchmark_module( + cli_args: tuple[str], +): + args = [iree.runtime.benchmark_exe()] + cli_args + return _run_program(args=args) + + +def google_benchmark_compare_path() -> str: + return os.path.abspath( + Path(__file__).parent.parent.parent.parent + / "third_party" + / "benchmark" + / "tools" + / "compare.py" + ) + + +def iree_benchmark_compare(cli_args: tuple[str]): + args = [google_benchmark_compare_path()] + cli_args + return _run_program(args=args) + + +def _get_benchmark_comparison_aggregate_real_time( + benchmark_comparison_result_json: dict[str, Any], aggregate: str +) -> tuple[float, float, str]: + real_time = [ + ( + benchmark["measurements"][0]["real_time"], + benchmark["measurements"][0]["real_time_other"], + benchmark["time_unit"], + ) + for benchmark in benchmark_comparison_result_json + if "aggregate_name" in benchmark and benchmark["aggregate_name"] == aggregate + ] + assert len(real_time) == 1 + return real_time[0] + + +def _assert_contender_aggregate_real_time_is_not_worse( + benchmark_comparison_result_json: dict[str, Any], aggregate: str +): + real_time = _get_benchmark_comparison_aggregate_real_time( + benchmark_comparison_result_json, aggregate + ) + baseline_real_time, contender_real_time, time_unit = real_time + if baseline_real_time < contender_real_time: + raise AssertionError( + f"Benchmark contender {aggregate} " + f"real time {contender_real_time} {time_unit} " + f"is worse than baseline {baseline_real_time} {time_unit}." + ) + + +def iree_benchmark_assert_contender_is_not_worse( + benchmark_comparison_result_json: dict[str, Any], alpha: float = 0.05 +): + """If contender is not from the same distribution as baseline, assert that and + that its median and mean is not worse. + + Arguments + --------- + alpha: acceptance/significance threshold probability that the two benchmark sample + sets are from the same distribution. Meaning they are not different.""" + time_pvalue = [ + b["utest"]["time_pvalue"] + for b in benchmark_comparison_result_json + if "utest" in b and "time_pvalue" in b["utest"] + ] + assert len(time_pvalue) == 1 + time_pvalue = time_pvalue[0] + if alpha <= time_pvalue: + # The benchmarks are from the same distribution. + return + + _assert_contender_aggregate_real_time_is_not_worse( + benchmark_comparison_result_json, "mean" + ) + _assert_contender_aggregate_real_time_is_not_worse( + benchmark_comparison_result_json, "median" + ) diff --git a/sharktank/sharktank/utils/testing.py b/sharktank/sharktank/utils/testing.py index 0ad524f37..9e70eff11 100644 --- a/sharktank/sharktank/utils/testing.py +++ b/sharktank/sharktank/utils/testing.py @@ -7,6 +7,7 @@ from typing import Optional import contextlib from pathlib import Path +import pytest from os import PathLike import os import shutil @@ -21,6 +22,8 @@ from ..types import * from .math import cosine_similarity +is_mi300x = pytest.mark.skipif("config.getoption('iree_hip_target') != 'gfx942'") + # Range of torch.rand() is [0,1) # Range of torch.rand() * 2 - 1 is [-1, 1), includes negative values def make_rand_torch(shape: list[int], dtype: Optional[torch.dtype] = torch.float32): diff --git a/sharktank/tests/evaluate/perplexity_iree_test.py b/sharktank/tests/evaluate/perplexity_iree_test.py index f529ac0fa..1745a3fc6 100644 --- a/sharktank/tests/evaluate/perplexity_iree_test.py +++ b/sharktank/tests/evaluate/perplexity_iree_test.py @@ -10,8 +10,9 @@ import numpy as np from sharktank.evaluate import perplexity_iree +from sharktank.utils.testing import is_mi300x + -is_mi300x = pytest.mark.skipif("config.getoption('iree_hip_target') != 'gfx942'") skipif_run_quick_llama_test = pytest.mark.skipif( 'not config.getoption("run-nightly-llama-tests")', reason="Run large tests if --run-nightly-llama-tests is passed", diff --git a/sharktank/tests/models/flux/flux_benchmark_test.py b/sharktank/tests/models/flux/flux_benchmark_test.py new file mode 100644 index 000000000..90e612403 --- /dev/null +++ b/sharktank/tests/models/flux/flux_benchmark_test.py @@ -0,0 +1,66 @@ +# Copyright 2024 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from pathlib import Path +import json +import pandas +import pytest +import sys +import torch + +from sharktank.models.flux.benchmark import iree_benchmark_flux_dev_transformer +from sharktank.models.flux.testing import with_flux_data +from sharktank.utils.benchmark import ( + iree_benchmark_compare, + iree_benchmark_assert_contender_is_not_worse, +) +from sharktank.utils.testing import is_mi300x +from sharktank.utils.testing import TempDirTestBase + + +@pytest.mark.usefixtures("get_iree_flags", "caching", "path_prefix") +@pytest.mark.benchmark +@pytest.mark.expensive +class FluxBenchmark(TempDirTestBase): + def setUp(self): + super().setUp() + torch.manual_seed(12345) + if self.path_prefix is None: + self.path_prefix = self._temp_dir + + @is_mi300x + @with_flux_data + def testBenchmarkFluxDevTransformerMi300x(self): + benchmark_result_file_path = Path(self.path_prefix) / "benchmark_result.json" + benchmark_result = iree_benchmark_flux_dev_transformer( + artifacts_dir=Path(self.path_prefix), + iree_device=self.iree_device, + json_result_output_path=benchmark_result_file_path, + caching=self.caching, + ) + print(benchmark_result) + + baseline_benchmark_result_file_path = ( + Path(__file__).parent + / "flux_transformer_baseline_benchmark_result_mi300x.json" + ) + benchmark_compare_result_file_path = ( + Path(self.path_prefix) / "benchmark_compare_result.json" + ) + + benchmark_compare_result = iree_benchmark_compare( + [ + f"--dump_to_json={benchmark_compare_result_file_path}", + "benchmarks", + str(baseline_benchmark_result_file_path), + benchmark_result_file_path, + ] + ) + print(benchmark_compare_result) + + with open(benchmark_compare_result_file_path, "r") as f: + benchmark_compare_result = json.load(f) + iree_benchmark_assert_contender_is_not_worse(benchmark_compare_result) diff --git a/sharktank/tests/models/flux/flux_test.py b/sharktank/tests/models/flux/flux_test.py index bdf5c2744..6bed7b65b 100644 --- a/sharktank/tests/models/flux/flux_test.py +++ b/sharktank/tests/models/flux/flux_test.py @@ -17,12 +17,14 @@ export_flux_transformer_from_hugging_face, export_flux_transformer, import_flux_transformer_dataset_from_hugging_face, + iree_compile_flags, ) from sharktank.models.flux.testing import ( convert_flux_transformer_input_for_hugging_face_model, export_dev_random_single_layer, make_dev_single_layer_config, make_random_theta, + with_flux_data, ) from sharktank.models.flux.flux import FluxModelV1, FluxParams from sharktank.utils.testing import TempDirTestBase @@ -41,27 +43,6 @@ logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) -with_flux_data = pytest.mark.skipif("not config.getoption('with_flux_data')") - -iree_compile_flags = [ - "--iree-hal-target-device=hip", - "--iree-hip-target=gfx942", - "--iree-opt-const-eval=false", - "--iree-opt-strip-assertions=true", - "--iree-global-opt-propagate-transposes=true", - "--iree-dispatch-creation-enable-fuse-horizontal-contractions=true", - "--iree-dispatch-creation-enable-aggressive-fusion=true", - "--iree-opt-aggressively-propagate-transposes=true", - "--iree-opt-outer-dim-concat=true", - "--iree-vm-target-truncate-unsupported-floats", - "--iree-llvmgpu-enable-prefetch=true", - "--iree-opt-data-tiling=false", - "--iree-codegen-gpu-native-math-precision=true", - "--iree-codegen-llvmgpu-use-vector-distribution", - "--iree-hip-waves-per-eu=2", - "--iree-execution-model=async-external", - "--iree-preprocessing-pass-pipeline=builtin.module(iree-preprocessing-transpose-convolution-pipeline,iree-preprocessing-pad-to-intrinsics)", -] def convert_dtype_if_dtype( diff --git a/sharktank/tests/models/flux/flux_transformer_baseline_benchmark_result_mi300x.json b/sharktank/tests/models/flux/flux_transformer_baseline_benchmark_result_mi300x.json new file mode 100644 index 000000000..b1d973ad4 --- /dev/null +++ b/sharktank/tests/models/flux/flux_transformer_baseline_benchmark_result_mi300x.json @@ -0,0 +1,558 @@ +{ + "context": { + "date": "2025-01-26T15:32:22+00:00", + "host_name": "sharkmi300x-3", + "executable": "/home/bpetkant/ws/iree/build/RelWithDebInfo/runtime/bindings/python/iree/runtime/../_runtime_libs/iree-benchmark-module", + "num_cpus": 128, + "mhz_per_cpu": 3763, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 32768, + "num_sharing": 1 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 1 + }, + { + "type": "Unified", + "level": 2, + "size": 1048576, + "num_sharing": 1 + }, + { + "type": "Unified", + "level": 3, + "size": 33554432, + "num_sharing": 8 + } + ], + "load_avg": [ + 0.875977, + 0.46582, + 0.229004 + ], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 0, + "threads": 1, + "iterations": 1, + "real_time": 5.4941223305650055e+02, + "cpu_time": 5.6384478100000024e+02, + "time_unit": "ms", + "items_per_second": 1.8201269280750831e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 1, + "threads": 1, + "iterations": 1, + "real_time": 5.5047306598862633e+02, + "cpu_time": 5.6478033500000004e+02, + "time_unit": "ms", + "items_per_second": 1.8166193076205142e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 2, + "threads": 1, + "iterations": 1, + "real_time": 5.5021137697622180e+02, + "cpu_time": 5.6454373199999975e+02, + "time_unit": "ms", + "items_per_second": 1.8174833197664257e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 3, + "threads": 1, + "iterations": 1, + "real_time": 5.5035454698372632e+02, + "cpu_time": 5.6479656399999988e+02, + "time_unit": "ms", + "items_per_second": 1.8170105170941186e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 4, + "threads": 1, + "iterations": 1, + "real_time": 5.5070075998082757e+02, + "cpu_time": 5.6513533200000052e+02, + "time_unit": "ms", + "items_per_second": 1.8158682040584337e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 5, + "threads": 1, + "iterations": 1, + "real_time": 5.5090650497004390e+02, + "cpu_time": 5.6527730799999972e+02, + "time_unit": "ms", + "items_per_second": 1.8151900385608553e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 6, + "threads": 1, + "iterations": 1, + "real_time": 5.5177536100381985e+02, + "cpu_time": 5.6610141200000010e+02, + "time_unit": "ms", + "items_per_second": 1.8123317398238759e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 7, + "threads": 1, + "iterations": 1, + "real_time": 5.5097636696882546e+02, + "cpu_time": 5.6543593099999794e+02, + "time_unit": "ms", + "items_per_second": 1.8149598784090508e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 8, + "threads": 1, + "iterations": 1, + "real_time": 5.5182288005016744e+02, + "cpu_time": 5.6630932099999859e+02, + "time_unit": "ms", + "items_per_second": 1.8121756747547109e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 9, + "threads": 1, + "iterations": 1, + "real_time": 5.5110136204166338e+02, + "cpu_time": 5.6523207299999888e+02, + "time_unit": "ms", + "items_per_second": 1.8145482281069010e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 10, + "threads": 1, + "iterations": 1, + "real_time": 5.5142490100115538e+02, + "cpu_time": 5.6575547299999937e+02, + "time_unit": "ms", + "items_per_second": 1.8134835735281833e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 11, + "threads": 1, + "iterations": 1, + "real_time": 5.5079155095154420e+02, + "cpu_time": 5.6511900799999989e+02, + "time_unit": "ms", + "items_per_second": 1.8155688813170898e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 12, + "threads": 1, + "iterations": 1, + "real_time": 5.5180292006116360e+02, + "cpu_time": 5.6630688300000111e+02, + "time_unit": "ms", + "items_per_second": 1.8122412253439268e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 13, + "threads": 1, + "iterations": 1, + "real_time": 5.5177955300314352e+02, + "cpu_time": 5.6627096100000074e+02, + "time_unit": "ms", + "items_per_second": 1.8123179711124651e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 14, + "threads": 1, + "iterations": 1, + "real_time": 5.5185354396235198e+02, + "cpu_time": 5.6629863300000238e+02, + "time_unit": "ms", + "items_per_second": 1.8120749806550505e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 15, + "threads": 1, + "iterations": 1, + "real_time": 5.5273327603936195e+02, + "cpu_time": 5.6710612400000036e+02, + "time_unit": "ms", + "items_per_second": 1.8091908762315709e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 16, + "threads": 1, + "iterations": 1, + "real_time": 5.5235066899331287e+02, + "cpu_time": 5.6679550600000005e+02, + "time_unit": "ms", + "items_per_second": 1.8104440822395504e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 17, + "threads": 1, + "iterations": 1, + "real_time": 5.5234270502114668e+02, + "cpu_time": 5.6673579399999949e+02, + "time_unit": "ms", + "items_per_second": 1.8104701861894139e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 18, + "threads": 1, + "iterations": 1, + "real_time": 5.5279905395582318e+02, + "cpu_time": 5.6723945699999945e+02, + "time_unit": "ms", + "items_per_second": 1.8089755994407233e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 19, + "threads": 1, + "iterations": 1, + "real_time": 5.5281000799732283e+02, + "cpu_time": 5.6721902399999988e+02, + "time_unit": "ms", + "items_per_second": 1.8089397542253665e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 20, + "threads": 1, + "iterations": 1, + "real_time": 5.5279980297200382e+02, + "cpu_time": 5.6724346300000047e+02, + "time_unit": "ms", + "items_per_second": 1.8089731483689482e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 21, + "threads": 1, + "iterations": 1, + "real_time": 5.5278226599330083e+02, + "cpu_time": 5.6732682099999818e+02, + "time_unit": "ms", + "items_per_second": 1.8090305379154819e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 22, + "threads": 1, + "iterations": 1, + "real_time": 5.5371954495785758e+02, + "cpu_time": 5.6823183500000243e+02, + "time_unit": "ms", + "items_per_second": 1.8059683988147970e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 23, + "threads": 1, + "iterations": 1, + "real_time": 5.5347738502314314e+02, + "cpu_time": 5.6792782300000158e+02, + "time_unit": "ms", + "items_per_second": 1.8067585542961000e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 24, + "threads": 1, + "iterations": 1, + "real_time": 5.5333389097359031e+02, + "cpu_time": 5.6774159800000132e+02, + "time_unit": "ms", + "items_per_second": 1.8072270943688289e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 25, + "threads": 1, + "iterations": 1, + "real_time": 5.5361895298119634e+02, + "cpu_time": 5.6813219200000026e+02, + "time_unit": "ms", + "items_per_second": 1.8062965413576890e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 26, + "threads": 1, + "iterations": 1, + "real_time": 5.5385963898152113e+02, + "cpu_time": 5.6819793300000038e+02, + "time_unit": "ms", + "items_per_second": 1.8055115946684170e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 27, + "threads": 1, + "iterations": 1, + "real_time": 5.5390325299231336e+02, + "cpu_time": 5.6860353600000121e+02, + "time_unit": "ms", + "items_per_second": 1.8053694297655212e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 28, + "threads": 1, + "iterations": 1, + "real_time": 5.5448368302313611e+02, + "cpu_time": 5.6868854500000054e+02, + "time_unit": "ms", + "items_per_second": 1.8034795804050279e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "iteration", + "repetitions": 30, + "repetition_index": 29, + "threads": 1, + "iterations": 1, + "real_time": 5.5424572899937630e+02, + "cpu_time": 5.6859769099999721e+02, + "time_unit": "ms", + "items_per_second": 1.8042538673331396e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time_mean", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "aggregate", + "repetitions": 30, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 30, + "real_time": 5.5215489286347292e+02, + "cpu_time": 5.6656650296666680e+02, + "time_unit": "ms", + "items_per_second": 1.8110963237949083e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time_median", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "aggregate", + "repetitions": 30, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 30, + "real_time": 5.5209812449174933e+02, + "cpu_time": 5.6652255749999904e+02, + "time_unit": "ms", + "items_per_second": 1.8112725834222321e+00 + }, + { + "name": "BM_forward_bs1/process_time/real_time_stddev", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "aggregate", + "repetitions": 30, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 30, + "real_time": 1.3402606227623226e+00, + "cpu_time": 1.3650754512544929e+00, + "time_unit": "ms", + "items_per_second": 4.3969683769790810e-03 + }, + { + "name": "BM_forward_bs1/process_time/real_time_cv", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_forward_bs1/process_time/real_time", + "run_type": "aggregate", + "repetitions": 30, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 30, + "real_time": 2.4273272592256435e-03, + "cpu_time": 2.4093825598701258e-03, + "time_unit": "ms", + "items_per_second": 2.4277937728711340e-03 + } + ] +} diff --git a/sharktank/tests/models/llama/benchmark_amdgpu_test.py b/sharktank/tests/models/llama/benchmark_amdgpu_test.py index 6056f9c6c..1c4371cf8 100644 --- a/sharktank/tests/models/llama/benchmark_amdgpu_test.py +++ b/sharktank/tests/models/llama/benchmark_amdgpu_test.py @@ -19,8 +19,8 @@ IreeBenchmarkException, IreeCompileException, ) +from sharktank.utils.testing import is_mi300x -is_mi300x = pytest.mark.skipif("config.getoption('iree_hip_target') != 'gfx942'") skipif_run_quick_llama_test = pytest.mark.skipif( 'config.getoption("run-quick-llama-test") and not config.getoption("run-nightly-llama-tests")', reason="Skipping largs tests when --run-quick-llama-test is set.", diff --git a/third_party/benchmark b/third_party/benchmark new file mode 160000 index 000000000..c58e6d071 --- /dev/null +++ b/third_party/benchmark @@ -0,0 +1 @@ +Subproject commit c58e6d0710581e3a08d65c349664128a8d9a2461