diff --git a/.github/workflows/pkgci_test_amd_mi250.yml b/.github/workflows/pkgci_test_amd_mi250.yml index 6253a59301a5..0ec19c966ac3 100644 --- a/.github/workflows/pkgci_test_amd_mi250.yml +++ b/.github/workflows/pkgci_test_amd_mi250.yml @@ -56,9 +56,9 @@ jobs: echo "CC=clang" >> $GITHUB_ENV echo "CXX=clang++" >> $GITHUB_ENV - - name: Build tests + - name: Build in-tree tests run: ./build_tools/pkgci/build_tests_using_package.sh ${VENV_DIR}/bin - - name: Run GPU tests + - name: Run in-tree GPU tests env: CTEST_PARALLEL_LEVEL: 2 IREE_CTEST_LABEL_REGEX: ^requires-gpu|^driver=hip$ @@ -67,3 +67,36 @@ jobs: IREE_NVIDIA_SM80_TESTS_DISABLE: 1 IREE_MULTI_DEVICE_TESTS_DISABLE: 0 run: ./build_tools/cmake/ctest_all.sh ${BUILD_DIR} + + - name: Checkout test suites repository + uses: actions/checkout@v4.1.7 + with: + repository: ScottTodd/iree-test-suites + ref: dc027ded6175dbd753ac08d6d76ba7be36494730 + path: iree-test-suites + - name: Configure out-of-tree test suite CMake project + run: | + source ${VENV_DIR}/bin/activate + cmake -G Ninja -S iree-test-suites/matmul -B build/ \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DIREE_USE_LOCAL_REPO=ON \ + -DIREE_LOCAL_REPO_PATH=${GITHUB_WORKSPACE} \ + -DIREE_HOST_BIN_DIR=${VENV_DIR}/bin \ + -DIREE_HAL_DRIVER_LOCAL_SYNC=OFF \ + -DIREE_HAL_DRIVER_LOCAL_TASK=OFF \ + -DIREE_HAL_DRIVER_HIP=ON \ + -DIREE_HAL_DRIVER_VULKAN=OFF \ + -DIREE_HAL_DRIVER_METAL=OFF \ + -DIREE_HAL_DRIVER_CUDA=OFF \ + -DIREE_HIP_TEST_TARGET_CHIP=gfx90a + - name: Build out-of-tree test suite CMake project + run: cmake --build build/ --target iree-test-suites-matmul-deps + - name: CTest out-of-tree test suite CMake project + run: | + ctest \ + --test-dir build/ -R iree-test-suites \ + --timeout 900 \ + --output-on-failure \ + --no-tests=error \ + --label-regex "^requires-gpu|^driver=hip$" \ + --label-exclude "(^nodocker$|^driver=vulkan$|^driver=metal$|^driver=cuda$|^vulkan_uses_vk_khr_shader_float16_int8$|^requires-gpu-sm80$|^requires-gpu-rdna3$)" diff --git a/CMakeLists.txt b/CMakeLists.txt index c42e006168a2..547c7515ec68 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -556,7 +556,6 @@ include(iree_lit_test) include(iree_llvm) include(iree_add_all_subdirs) include(iree_check_test) -include(iree_e2e_generated_runner_test) include(iree_native_test) include(iree_cc_binary_benchmark) include(iree_hal_cts_test_suite) diff --git a/build_tools/bazel/iree_e2e_generated_runner_test.bzl b/build_tools/bazel/iree_e2e_generated_runner_test.bzl deleted file mode 100644 index 9d17d50808d0..000000000000 --- a/build_tools/bazel/iree_e2e_generated_runner_test.bzl +++ /dev/null @@ -1,252 +0,0 @@ -# Copyright 2021 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -"""Macros for defining tests that use the iree-e2e-${test_type}-test runner.""" - -load("//build_tools/bazel:iree_bytecode_module.bzl", "iree_bytecode_module") -load("//build_tools/bazel:native_binary.bzl", "native_test") - -def iree_e2e_runner_test( - name, - test_type, - tests_src, - tests_vmfb, - calls_src, - calls_vmfb, - target_backend, - driver, - test_runner, - compiler_flags = [], - runner_args = [], - tags = [], - target_cpu_features = None, - timeout = None, - **kwargs): - """Creates a test using a specified test runner program. - - Args: - name: Name of the target - test_type: Name of the test (e.g., matmuls, conv2ds). - tests_src: mlir source file with tests to be compiled. - tests_vmfb: specifies the path to use for the generated IREE module. - calls_src: mlir source file with calls to be compiled. - calls_vmfb: specifies the path to use for the generated IREE module. - target_backend: target backend to compile for. - driver: driver to run the module with. - compiler_flags: additional flags to pass to the compiler. Bytecode - output format and backend flags are passed automatically. - runner_args: additional args to pass to the test runner program. The - driver and input file flags are passed automatically. - tags: Additional labels to apply to the test. "driver=${DRIVER}" is - added automatically. - test_runner: test runner program to run. - timeout: timeout for the generated tests. - target_cpu_features: target CPU features. Only for llvm-cpu backend. - **kwargs: any additional attributes to pass to the underlying tests and - test suite. - """ - - if target_cpu_features: - fail("target_cpu_features must currently be empty") - - iree_bytecode_module( - name = name + "_%s_module" % test_type, - module_name = tests_vmfb, - src = tests_src, - flags = [ - "--iree-hal-target-backends=%s" % target_backend, - ] + ([ - "--iree-llvmcpu-target-cpu-features=%s" % target_cpu_features, - ] if target_cpu_features else []) + compiler_flags, - visibility = ["//visibility:private"], - testonly = True, - **kwargs - ) - - iree_bytecode_module( - name = name + "_calls_module", - module_name = calls_vmfb, - src = calls_src, - flags = [ - "--iree-hal-target-backends=%s" % target_backend, - ] + compiler_flags, - visibility = ["//visibility:private"], - testonly = True, - **kwargs - ) - - native_test( - name = name, - args = [ - "--device=%s" % driver, - "--module=$(location :%s)" % tests_vmfb, - "--module=$(location :%s)" % calls_vmfb, - ] + runner_args, - data = [ - ":%s" % tests_vmfb, - ":%s" % calls_vmfb, - ], - src = test_runner, - tags = tags + ["driver=%s" % driver], - timeout = timeout, - **kwargs - ) - -def iree_single_backend_e2e_runner_test( - name, - test_type, - generator, - test_runner, - target_backend, - driver, - generator_args = [], - compiler_flags = [], - runner_args = [], - tags = [], - target_cpu_features = None, - timeout = None, - **kwargs): - """Generates an iree_e2e_runner_test using a custom python generator script. - - The generator script produces .mlir sources which are compiled and passed to - iree_e2e_runner_test. - - Args: - name: Name of the target - test_type: Name of the test (e.g., matmul, conv2d). - generator: Target to run to generate the source MLIR files. - It will be invoked with the following standard flags, in addition - to generator_args: - --output_${test_type}_mlir=(current binary dir)/name_${test_type}.mlir - --output_calls_mlir=(current binary dir)/name_calls.mlir - generator_args: additional args to pass to the generator program. - target_backend: target backend to compile for. - driver: driver to run the module with. - compiler_flags: additional flags to pass to the compiler. Bytecode - output format and backend flags are passed automatically. - runner_args: additional args to pass to the test runner program. The - driver and input file flags are passed automatically. - tags: Additional labels to apply to the test. "driver=${DRIVER}" is - added automatically. - test_runner: test runner program to run. - timeout: timeout for the generated tests. - target_cpu_features: target CPU features. Only for llvm-cpu backend. - **kwargs: any additional attributes to pass to the underlying tests and - test suite. - """ - - tests_src = "%s.mlir" % (name) - tests_vmfb = "%s.vmfb" % (name) - calls_src = "%s_calls.mlir" % (name) - calls_vmfb = "%s_calls.vmfb" % (name) - native.genrule( - name = "%s_generate" % (name), - outs = [tests_src, calls_src], - cmd = " ".join([ - "$(location %s)" % (generator), - " ".join([('"%s"' % arg) for arg in generator_args]), - "--output_%s_mlir=$(location %s)" % (test_type, tests_src), - "--output_calls_mlir=$(location %s)" % (calls_src), - ] + [('"%s"' % arg) for arg in generator_args]), - tools = [generator], - message = "Generating code and calls for test %s..." % (name), - output_to_bindir = 1, - testonly = True, - **kwargs - ) - iree_e2e_runner_test( - name = name, - test_type = test_type, - tests_src = tests_src, - tests_vmfb = tests_vmfb, - calls_src = calls_src, - calls_vmfb = calls_vmfb, - target_backend = target_backend, - driver = driver, - test_runner = test_runner, - compiler_flags = compiler_flags, - runner_args = runner_args, - tags = tags, - timeout = timeout, - target_cpu_features = target_cpu_features, - **kwargs - ) - -def iree_generated_e2e_runner_test( - name, - test_type, - generator, - test_runner, - target_backends_and_drivers, - generator_args = [], - compiler_flags = [], - runner_args = [], - tags = [], - timeout = None, - target_cpu_features_variants = [], - **kwargs): - """Generates a suite of iree_e2e_runner_test on multiple backends/drivers. - - Args: - name: Name of the target - test_type: Name of the test (e.g., matmul, conv2d). - generator: Target to run to generate the source MLIR files. - It will be invoked with the following standard flags, in addition - to generator_args: - --output_${test_type}_mlir=(current binary dir)/name_${test_type}.mlir - --output_calls_mlir=(current binary dir)/name_calls.mlir - generator_args: additional args to pass to the generator program. - target_backends_and_drivers: backend/driver pairs to compile and run - the module. - compiler_flags: additional flags to pass to the compiler. Bytecode - output format and backend flags are passed automatically. - runner_args: additional args to pass to the test runner program. The - driver and input file flags are passed automatically. - tags: Additional labels to apply to the test. "driver=${DRIVER}" is - added automatically. - test_runner: test runner program to run. - timeout: timeout for the generated tests. - target_cpu_features_variants: list of target cpu features variants. - Currently unimplemented in Bazel due to difficulty of specializing - to target architecture in Bazel. The following describes the - semantics that this should have if implemented. Each - entry is either "default" for the architecture defaults, or a colon- - separated triple "arch:name:cpu_features" where "arch" filters - for a target CPU architecture (in IREE_ARCH format), "name" is a - short name for the CPU features set (used to generate target names) - and cpu_features is a comma-separated list of LLVM target attributes - to enable. Example: - x86_64:avx2_fma:+avx,+avx2,+fma - **kwargs: any additional attributes to pass to the underlying tests and test suite. - """ - - tests = [] - for backend, driver in target_backends_and_drivers: - # CUDA/ROCm backend/driver not supported by Bazel build. - if backend == "cuda" or driver == "cuda" or backend == "rocm" or driver == "hip": - continue - suite_entry_name = "_".join([name, backend, driver]) - iree_single_backend_e2e_runner_test( - name = suite_entry_name, - test_type = test_type, - generator = generator, - test_runner = test_runner, - driver = driver, - target_backend = backend, - generator_args = generator_args, - compiler_flags = compiler_flags, - runner_args = runner_args, - tags = tags, - timeout = timeout, - **kwargs - ) - tests.append(suite_entry_name) - native.test_suite( - name = name, - tests = tests, - tags = tags, - **kwargs - ) diff --git a/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py b/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py index 9a1796b443fd..8a73090d5b69 100644 --- a/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py +++ b/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py @@ -858,71 +858,6 @@ def iree_check_test_suite( f")\n\n" ) - def iree_generated_e2e_runner_test( - self, - name, - test_type, - generator, - generator_args=None, - test_runner=None, - target_backends_and_drivers=None, - compiler_flags=None, - runner_args=None, - tags=None, - target_cpu_features_variants=None, - **kwargs, - ): - if self._should_skip_target(tags=tags, **kwargs): - return - target_backends = None - drivers = None - if target_backends_and_drivers is not None: - target_backends = [it[0] for it in target_backends_and_drivers] - drivers = [it[1] for it in target_backends_and_drivers] - - name_block = self._convert_string_arg_block("NAME", name, quote=False) - test_type_block = self._convert_string_arg_block( - "TEST_TYPE", test_type, quote=False - ) - # For now we assume that the generator target is a py_binary with a single - # source .py file named like it. - generator_py = f"{generator.split(':')[-1]}.py" - generator_block = self._convert_string_arg_block( - "GENERATOR", generator_py, quote=True - ) - generator_args_block = self._convert_string_list_block( - "GENERATOR_ARGS", generator_args - ) - test_runner_block = self._convert_target_block("TEST_RUNNER", test_runner) - target_backends_block = self._convert_string_list_block( - "TARGET_BACKENDS", target_backends - ) - drivers_block = self._convert_string_list_block("DRIVERS", drivers) - compiler_flags_block = self._convert_string_list_block( - "COMPILER_FLAGS", compiler_flags - ) - runner_args_block = self._convert_string_list_block("RUNNER_ARGS", runner_args) - labels_block = self._convert_string_list_block("LABELS", tags) - target_cpu_features_variants_block = self._convert_string_list_block( - "TARGET_CPU_FEATURES_VARIANTS", target_cpu_features_variants - ) - - self._converter.body += ( - f"iree_generated_e2e_runner_test(\n" - f"{name_block}" - f"{test_type_block}" - f"{generator_block}" - f"{generator_args_block}" - f"{test_runner_block}" - f"{target_backends_block}" - f"{drivers_block}" - f"{compiler_flags_block}" - f"{runner_args_block}" - f"{labels_block}" - f"{target_cpu_features_variants_block}" - f")\n\n" - ) - def native_test(self, name, src, args=None, data=None, tags=None, timeout=None): if self._should_skip_target(tags=tags): return diff --git a/build_tools/cmake/iree_e2e_generated_runner_test.cmake b/build_tools/cmake/iree_e2e_generated_runner_test.cmake deleted file mode 100644 index 585d9906f112..000000000000 --- a/build_tools/cmake/iree_e2e_generated_runner_test.cmake +++ /dev/null @@ -1,502 +0,0 @@ -# Copyright 2021 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -include(CMakeParseArguments) - -# iree_e2e_runner_test() -# -# Creates a test using a specified test runner program for the specified -# test files. -# -# Parameters: -# NAME: Name of the target -# TEST_TYPE: Type of test (Currently, matmul and conv2d are supported). -# VARIANT_NAME: Variant name to suffix NAME with. -# Will reuse the same TEST_TYPE/calls vmfb files. -# TESTS_SRC: mlir source file with TEST_TYPE to be compiled to an IREE module. -# TESTS_VMFB: specifies the path to use for the generated IREE module. -# CALLS_SRC: mlir source file with calls to be compiled to an IREE module. -# CALLS_VMFB: specifies the path to use for the generated IREE module. -# TARGET_BACKEND: target backend to compile for. -# DRIVER: driver to run the module with. -# COMPILER_FLAGS: additional flags to pass to the compiler. Bytecode output -# format and backend flags are passed automatically. -# RUNNER_ARGS: additional args to pass to the trace-runner program. The driver -# and input file flags are passed automatically. -# LABELS: Additional labels to apply to the test. The package path and -# "driver=${DRIVER}" are added automatically. -# TEST_RUNNER: trace-runner program to run. -# TARGET_CPU_FEATURES: If specified, a string passed as argument to -# --iree-llvmcpu-target-cpu-features. -# TEST_DEFINED: Whether to define a test target. -# TEST_DISABLED: The test target will be skipped and its status will be -# 'Not Run'. -function(iree_e2e_runner_test) - if(NOT IREE_BUILD_TESTS) - return() - endif() - - # See comment in iree_check_test about this condition. - if(NOT IREE_BUILD_COMPILER AND NOT IREE_HOST_BIN_DIR) - return() - endif() - - cmake_parse_arguments( - _RULE - "" - "NAME;TEST_TYPE;VARIANT_NAME;TESTS_SRC;TESTS_VMFB;CALLS_SRC;CALLS_VMFB;TRACE;TARGET_BACKEND;DRIVER;TEST_RUNNER;TEST_DEFINED;TEST_DISABLED" - "COMPILER_FLAGS;RUNNER_ARGS;LABELS;TARGET_CPU_FEATURES" - ${ARGN} - ) - - iree_is_bytecode_module_test_excluded_by_labels(_EXCLUDED_BY_LABELS "${_RULE_LABELS}") - if(_EXCLUDED_BY_LABELS) - return() - endif() - - iree_package_name(_PACKAGE_NAME) - set(_NAME "${_PACKAGE_NAME}_${_RULE_NAME}") - - set(_BASE_COMPILER_FLAGS - "--iree-hal-target-backends=${_RULE_TARGET_BACKEND}" - ) - if (_RULE_TARGET_CPU_FEATURES) - list(APPEND _BASE_COMPILER_FLAGS "--iree-llvmcpu-target-cpu-features=${_RULE_TARGET_CPU_FEATURES}") - endif() - - if(NOT TARGET "${_NAME}_${_RULE_TEST_TYPE}_module") - iree_bytecode_module( - NAME - "${_RULE_NAME}_${_RULE_TEST_TYPE}_module" - MODULE_FILE_NAME - "${_RULE_TESTS_VMFB}" - SRC - "${_RULE_TESTS_SRC}" - FLAGS - "${_BASE_COMPILER_FLAGS}" - "${_RULE_COMPILER_FLAGS}" - ) - endif() - - if(NOT TARGET "${_NAME}_calls_module") - iree_bytecode_module( - NAME - "${_RULE_NAME}_calls_module" - MODULE_FILE_NAME - "${_RULE_CALLS_VMFB}" - SRC - "${_RULE_CALLS_SRC}" - FLAGS - "${_BASE_COMPILER_FLAGS}" - "${_RULE_COMPILER_FLAGS}" - ) - endif() - - # A target specifically for the test. We could combine this with the above, - # but we want that one to get pulled into iree_bytecode_module. - add_custom_target("${_NAME}${_RULE_VARIANT_NAME}" ALL) - add_dependencies( - "${_NAME}${_RULE_VARIANT_NAME}" - "${_NAME}_${_RULE_TEST_TYPE}_module" - "${_NAME}_calls_module" - "${_RULE_TEST_RUNNER}" - ) - - add_dependencies(iree-test-deps "${_NAME}${_RULE_VARIANT_NAME}") - - if(_RULE_TEST_DEFINED) - iree_native_test( - NAME - "${_RULE_NAME}${_RULE_VARIANT_NAME}" - DRIVER - "${_RULE_DRIVER}" - SRC - "${_RULE_TEST_RUNNER}" - DATA - ${_TESTS_VMFB} - ${_CALLS_VMFB} - ARGS - "--module={{${_TESTS_VMFB}}}" - "--module={{${_CALLS_VMFB}}}" - ${_RULE_RUNNER_ARGS} - LABELS - ${_RULE_LABELS} - DISABLED - ${_RULE_TEST_DISABLED} - ) - endif() -endfunction() - -# iree_single_backend_e2e_runner_test() -# -# Parameters: -# NAME: Name of the target -# TEST_TYPE: Type of test (Currently, matmul and conv are supported). -# GENERATOR: Program (at the moment, must be Python3) to run to generate the -# source file (and possibly a trace file and module path). It will be -# invoked with the following standard flags, in addition to GENERATOR_ARGS: -# --output_${TEST_TYPE}_mlir=${CMAKE_CURRENT_BINARY_DIR}/name_${TEST_TYPE}.mlir -# --output_calls_mlir=${CMAKE_CURRENT_BINARY_DIR}/name_calls.mlir -# and if TARGET_CPU_FEATURES is not empty: -# --requirements=${TARGET_CPU_FEATURES} -# GENERATOR_ARGS: additional args to pass to the generator program. -# TARGET_BACKEND: target backend to compile for. -# DRIVER: driver to run the module with. -# COMPILER_FLAGS: additional flags to pass to the compiler. Bytecode output -# format and backend flags are passed automatically. -# RUNNER_ARGS: additional args to pass to the trace-runner program. The driver -# and input file flags are passed automatically. -# LABELS: Additional labels to apply to the test. The package path and -# "driver=${DRIVER}" are added automatically. -# TEST_RUNNER: trace-runner program to run. -# TARGET_CPU_FEATURES: If specified, a string passed as argument to -# --iree-llvmcpu-target-cpu-features. -function(iree_single_backend_e2e_runner_test) - if(NOT IREE_BUILD_TESTS) - return() - endif() - - # Copied from iree_check_test. Refer to the comment there. - if(NOT IREE_BUILD_COMPILER AND NOT IREE_HOST_BIN_DIR) - return() - endif() - - cmake_parse_arguments( - _RULE - "" - "NAME;TEST_TYPE;GENERATOR;TARGET_BACKEND;DRIVER;TEST_RUNNER" - "GENERATOR_ARGS;COMPILER_FLAGS;RUNNER_ARGS;LABELS;TARGET_CPU_FEATURES" - ${ARGN} - ) - - # --------------------------------------------------------------------------- - # Bytecode module builds require - # 1. the compiler, either in the same build or provided in IREE_HOST_BIN_DIR - # 2. compiler support for _RULE_INPUT_TYPE - # 3. compiler support for _RULE_TARGET_BACKEND - set(_BYTECODE_MODULE_BUILD_ENABLED TRUE) - - # 1. Check for the compiler. - if(NOT IREE_BUILD_COMPILER AND NOT IREE_HOST_BIN_DIR) - set(_BYTECODE_MODULE_BUILD_ENABLED FALSE) - endif() - - # 2. Check target backend availability. - # Note: we can only reliably check for this when building the compiler host - # tools from source. If the tools are already built, we assume that all target - # backends are enabled. We could query the tools in the binary directory for - # support dynamically if optionality would be useful. - if(NOT IREE_HOST_BIN_DIR) - string(TOUPPER ${_RULE_TARGET_BACKEND} _UPPERCASE_TARGET_BACKEND) - string(REPLACE "-" "_" _NORMALIZED_TARGET_BACKEND ${_UPPERCASE_TARGET_BACKEND}) - # TODO(scotttodd): allow plugins to provide external backends here - if(NOT DEFINED IREE_TARGET_BACKEND_${_NORMALIZED_TARGET_BACKEND}) - message(SEND_ERROR "Unknown backend '${_RULE_TARGET_BACKEND}'. Check IREE_TARGET_BACKEND_* options.") - endif() - if(NOT IREE_TARGET_BACKEND_${_NORMALIZED_TARGET_BACKEND}) - set(_BYTECODE_MODULE_BUILD_ENABLED FALSE) - endif() - endif() - # --------------------------------------------------------------------------- - - # --------------------------------------------------------------------------- - # Tests are defined if _RULE_DRIVER is defined. - set(_TEST_DEFINED TRUE) - if(NOT DEFINED _RULE_DRIVER) - set(_TEST_DEFINED FALSE) - endif() - - # Test execution requires - # 1. the bytecode module build to be enabled - # 2. _RULE_DRIVER is defined and runtime support is enabled - # 3. no other label exclusions (e.g. 'optonly' test with 'debug' config) - set(_TEST_DISABLED FALSE) - - # 1. Check bytecode module build. - if(NOT _BYTECODE_MODULE_BUILD_ENABLED) - set(_TEST_DISABLED TRUE) - endif() - - # 2. Check driver availability. - if(DEFINED _RULE_DRIVER) - string(TOUPPER ${_RULE_DRIVER} _UPPERCASE_DRIVER) - string(REPLACE "-" "_" _NORMALIZED_DRIVER ${_UPPERCASE_DRIVER}) - if((NOT IREE_HAL_DRIVER_${_NORMALIZED_DRIVER}) AND - (NOT IREE_EXTERNAL_${_NORMALIZED_DRIVER}_HAL_DRIVER_FOUND)) - set(_TEST_DISABLED TRUE) - endif() - endif() - - # 3. Check label exclusions. - iree_is_bytecode_module_test_excluded_by_labels(_EXCLUDED_BY_LABELS "${_RULE_LABELS}") - if(_EXCLUDED_BY_LABELS) - set(_TEST_DISABLED TRUE) - endif() - - if((_TEST_DISABLED OR NOT _TEST_DEFINED) AND NOT IREE_BUILD_ALL_CHECK_TEST_MODULES) - set(_BYTECODE_MODULE_BUILD_ENABLED FALSE) - endif() - # --------------------------------------------------------------------------- - - iree_package_name(_PACKAGE_NAME) - set(_NAME "${_PACKAGE_NAME}_${_RULE_NAME}") - - set(_TESTS_SRC "${CMAKE_CURRENT_BINARY_DIR}/${_RULE_NAME}_${_RULE_TEST_TYPE}.mlir") - set(_CALLS_SRC "${CMAKE_CURRENT_BINARY_DIR}/${_RULE_NAME}_calls.mlir") - set(_TESTS_VMFB "${CMAKE_CURRENT_BINARY_DIR}/${_RULE_NAME}_${_RULE_TEST_TYPE}.vmfb") - set(_CALLS_VMFB "${CMAKE_CURRENT_BINARY_DIR}/${_RULE_NAME}_calls.vmfb") - - list(APPEND _GENERATOR_STANDARD_FLAGS "--output_${_RULE_TEST_TYPE}_mlir=${_TESTS_SRC}") - list(APPEND _GENERATOR_STANDARD_FLAGS "--output_calls_mlir=${_CALLS_SRC}") - if(_RULE_TARGET_CPU_FEATURES) - list(APPEND _GENERATOR_STANDARD_FLAGS "--requirements=${_RULE_TARGET_CPU_FEATURES}") - endif() - - if(NOT _BYTECODE_MODULE_BUILD_ENABLED) - return() - endif() - - add_custom_command( - COMMAND - "${Python3_EXECUTABLE}" - "${CMAKE_CURRENT_SOURCE_DIR}/${_RULE_GENERATOR}" - ${_GENERATOR_STANDARD_FLAGS} - ${_RULE_GENERATOR_ARGS} - OUTPUT - ${_TESTS_SRC} - ${_CALLS_SRC} - DEPENDS - ${_RULE_GENERATOR} - ) - - add_custom_target( - "${_NAME}_generated_files" - DEPENDS - ${_TESTS_SRC} - ${_CALLS_SRC} - ) - - # When using the llvm-cpu backend, the runtime build config may need to - # match the compiled executable config using (`--iree-llvmcpu-sanitize=`): - # - # | Runtime type | Compatible with these executable types | - # | -------------------- | -------------------------------------- | - # | Base (no sanitizers) | Base, ASan | - # | ASan | Base, ASan | - # | TSan | TSan (ABI break) | - - # Define the regular test suite, unless the config is llvm-cpu + TSan. - if(NOT _RULE_TARGET_BACKEND STREQUAL "llvm-cpu" OR NOT IREE_ENABLE_TSAN) - iree_e2e_runner_test( - NAME ${_RULE_NAME} - TEST_TYPE ${_RULE_TEST_TYPE} - VARIANT_NAME "" - TESTS_SRC ${_TESTS_SRC} - TESTS_VMFB ${_TESTS_VMFB} - CALLS_SRC ${_CALLS_SRC} - CALLS_VMFB ${_CALLS_VMFB} - TEST_RUNNER ${_RULE_TEST_RUNNER} - TARGET_BACKEND ${_RULE_TARGET_BACKEND} - DRIVER ${_RULE_DRIVER} - COMPILER_FLAGS ${_RULE_COMPILER_FLAGS} - RUNNER_ARGS ${_RULE_RUNNER_ARGS} - LABELS ${_RULE_LABELS} - TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES} - TEST_DEFINED ${_TEST_DEFINED} - TEST_DISABLED ${_TEST_DISABLED} - ) - # Note we are relying on the fact that the target created by - # iree_e2e_runner_test is _NAME, even though we passed _RULE_NAME to it, - # i.e. we are relying on the prefixing to be identical. - add_dependencies("${_NAME}" "${_NAME}_generated_files") - endif() - - # Define tests for AddressSanitizer (ASan) and ThreadSanitizer (TSan). - # Normally test suites should do this sort of branching at the leaves rather - # than modify the base CMake function directly, but sanitizers are applied - # at the build system uniformly, so until we decouple the test suites from - # source builds further this felt like a reasonable compromise. - if(_RULE_TARGET_BACKEND STREQUAL "llvm-cpu") - if(IREE_ENABLE_ASAN) - set(_ASAN_COMPILER_FLAGS ${_RULE_COMPILER_FLAGS}) - list(APPEND _ASAN_COMPILER_FLAGS "--iree-llvmcpu-link-embedded=false") - list(APPEND _ASAN_COMPILER_FLAGS "--iree-llvmcpu-sanitize=address") - iree_e2e_runner_test( - NAME ${_RULE_NAME} - TEST_TYPE ${_RULE_TEST_TYPE} - VARIANT_NAME "_asan" - TESTS_SRC ${_TESTS_SRC} - TESTS_VMFB ${_TESTS_VMFB} - CALLS_SRC ${_CALLS_SRC} - CALLS_VMFB ${_CALLS_VMFB} - TEST_RUNNER ${_RULE_TEST_RUNNER} - TARGET_BACKEND ${_RULE_TARGET_BACKEND} - DRIVER ${_RULE_DRIVER} - COMPILER_FLAGS ${_ASAN_COMPILER_FLAGS} - RUNNER_ARGS ${_RULE_RUNNER_ARGS} - LABELS ${_RULE_LABELS} - TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES} - TEST_DEFINED ${_TEST_DEFINED} - TEST_DISABLED ${_TEST_DISABLED} - ) - # Note we are relying on the fact that the target created by - # iree_e2e_runner_test is _NAME, even though we passed _RULE_NAME to it, - # i.e. we are relying on the prefixing to be identical. - add_dependencies("${_NAME}_asan" "${_NAME}_generated_files") - endif() - - if(IREE_ENABLE_TSAN) - set(_TSAN_COMPILER_FLAGS ${_RULE_COMPILER_FLAGS}) - list(APPEND _TSAN_COMPILER_FLAGS "--iree-llvmcpu-link-embedded=false") - list(APPEND _TSAN_COMPILER_FLAGS "--iree-llvmcpu-sanitize=thread") - iree_e2e_runner_test( - NAME ${_RULE_NAME} - VARIANT_NAME "_tsan" - TESTS_SRC ${_TESTS_SRC} - TESTS_VMFB ${_TESTS_VMFB} - CALLS_SRC ${_CALLS_SRC} - CALLS_VMFB ${_CALLS_VMFB} - TEST_RUNNER ${_RULE_TEST_RUNNER} - TARGET_BACKEND ${_RULE_TARGET_BACKEND} - DRIVER ${_RULE_DRIVER} - COMPILER_FLAGS ${_TSAN_COMPILER_FLAGS} - RUNNER_ARGS ${_RULE_RUNNER_ARGS} - LABELS ${_RULE_LABELS} - TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES} - TEST_DEFINED ${_TEST_DEFINED} - TEST_DISABLED ${_TEST_DISABLED} - ) - # Note we are relying on the fact that the target created by - # iree_e2e_runner_test is _NAME, even though we passed _RULE_NAME to it, - # i.e. we are relying on the prefixing to be identical. - add_dependencies("${_NAME}_tsan" "${_NAME}_generated_files") - endif() - endif() -endfunction() - - -# iree_generated_e2e_runner_test() -# -# Creates a set of iree_single_backend_e2e_runner_test's differing -# by target backend and driver. -# -# Mirrors the bzl rule of the same name. -# -# One test is generated per source and backend/driver pair. -# Parameters: -# NAME: Name of the target -# TEST_TYPE: Type of test (Currently, matmul and conv are supported). -# GENERATOR: Program (at the moment, must be Python3) to run to generate the -# source file (and possibly a trace file and module path). It will be -# invoked with the following standard flags, in addition to GENERATOR_ARGS: -# --output_${TEST_TYPE}_mlir=${CMAKE_CURRENT_BINARY_DIR}/name_${TEST_TYPE}.mlir -# --output_calls_mlir=${CMAKE_CURRENT_BINARY_DIR}/name_calls.mlir -# GENERATOR_ARGS: additional args to pass to the generator program. -# TARGET_BACKENDS: backends to compile the module for. These form pairs with -# the DRIVERS argument (due to cmake limitations they are separate list -# arguments). The lengths must exactly match. If no backends or drivers are -# specified, a test will be generated for every supported pair. -# DRIVERS: drivers to run the module with. These form pairs with the -# TARGET_BACKENDS argument (due to cmake limitations they are separate list -# arguments). The lengths must exactly match. If no backends or drivers are -# specified, a test will be generated for every supported pair. -# COMPILER_FLAGS: additional flags to pass to the compiler. Bytecode output -# format and backend flags are passed automatically. -# RUNNER_ARGS: additional args to pass to the trace-runner program. The driver -# and input file flags are passed automatically. -# LABELS: Additional labels to apply to the test. The package path and -# "driver=${DRIVER}" are added automatically. -# TEST_RUNNER: trace-runner program to run. -# TARGET_CPU_FEATURES_VARIANTS:list of target cpu features variants. Each -# entry is either "default" for the architecture defaults, or a colon- -# separated triple "arch:name:cpu_features" where "arch" filters -# for a target CPU architecture (in IREE_ARCH format), "name" is a -# short name for the CPU features set (used to generate target names) -# and cpu_features is a comma-separated list of LLVM target attributes -# to enable. Example: -# x86_64:avx2_fma:+avx,+avx2,+fma -function(iree_generated_e2e_runner_test) - if(NOT IREE_BUILD_TESTS) - return() - endif() - - cmake_parse_arguments( - _RULE - "" - "NAME;TEST_TYPE;GENERATOR;TEST_RUNNER" - "TARGET_BACKENDS;DRIVERS;GENERATOR_ARGS;COMPILER_FLAGS;RUNNER_ARGS;LABELS;TARGET_CPU_FEATURES_VARIANTS" - ${ARGN} - ) - - iree_is_bytecode_module_test_excluded_by_labels(_EXCLUDED_BY_LABELS "${_RULE_LABELS}") - if(_EXCLUDED_BY_LABELS) - return() - endif() - - if(_RULE_TARGET_CPU_FEATURES_VARIANTS) - set(_TARGET_CPU_FEATURES_VARIANTS "${_RULE_TARGET_CPU_FEATURES_VARIANTS}") - else() - set(_TARGET_CPU_FEATURES_VARIANTS "default") - endif() - - - if(NOT DEFINED _RULE_TARGET_BACKENDS AND NOT DEFINED _RULE_DRIVERS) - set(_RULE_TARGET_BACKENDS "vmvx" "vulkan-spirv" "llvm-cpu") - set(_RULE_DRIVERS "local-task" "vulkan" "local-task") - endif() - - list(LENGTH _RULE_TARGET_BACKENDS _TARGET_BACKEND_COUNT) - list(LENGTH _RULE_DRIVERS _DRIVER_COUNT) - - if(NOT _TARGET_BACKEND_COUNT EQUAL _DRIVER_COUNT) - message(SEND_ERROR - "TARGET_BACKENDS count ${_TARGET_BACKEND_COUNT} does not match DRIVERS count ${_DRIVER_COUNT}") - endif() - - math(EXPR _MAX_INDEX "${_TARGET_BACKEND_COUNT} - 1") - foreach(_INDEX RANGE "${_MAX_INDEX}") - list(GET _RULE_TARGET_BACKENDS ${_INDEX} _TARGET_BACKEND) - list(GET _RULE_DRIVERS ${_INDEX} _DRIVER) - foreach(_VARIANT_STRING IN LISTS _TARGET_CPU_FEATURES_VARIANTS) - parse_target_cpu_features_variant("${_VARIANT_STRING}" - _ENABLED _TARGET_CPU_FEATURES_NAME _TARGET_CPU_FEATURES) - if(NOT _ENABLED) - # The current entry is disabled on the target CPU architecture. - continue() - endif() - set(_TARGET_CPU_FEATURES_SUFFIX "") - set(_LABELS "${_RULE_LABELS}") - if (_TARGET_CPU_FEATURES_NAME) - set(_TARGET_CPU_FEATURES_SUFFIX "_${_TARGET_CPU_FEATURES_NAME}") - list(APPEND _LABELS "cpu_features=${_TARGET_CPU_FEATURES_NAME}") - endif() - iree_single_backend_e2e_runner_test( - NAME - "${_RULE_NAME}_${_TARGET_BACKEND}_${_DRIVER}${_TARGET_CPU_FEATURES_SUFFIX}" - TEST_TYPE - ${_RULE_TEST_TYPE} - GENERATOR - ${_RULE_GENERATOR} - GENERATOR_ARGS - ${_RULE_GENERATOR_ARGS} - TEST_RUNNER - ${_RULE_TEST_RUNNER} - TARGET_BACKEND - ${_TARGET_BACKEND} - DRIVER - ${_DRIVER} - COMPILER_FLAGS - ${_RULE_COMPILER_FLAGS} - RUNNER_ARGS - ${_RULE_RUNNER_ARGS} - LABELS - ${_LABELS} - TARGET_CPU_FEATURES - ${_TARGET_CPU_FEATURES} - ) - endforeach() - endforeach() -endfunction() diff --git a/tests/e2e/attention/BUILD.bazel b/tests/e2e/attention/BUILD.bazel deleted file mode 100644 index 3e9e41d5b9ee..000000000000 --- a/tests/e2e/attention/BUILD.bazel +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright 2024 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -# End-to-end attention tests. - -load("//build_tools/bazel:iree_e2e_generated_runner_test.bzl", "iree_generated_e2e_runner_test") - -package( - features = ["layering_check"], - licenses = ["notice"], # Apache 2.0 -) - -py_binary( - name = "generate_e2e_attention_tests", - srcs = ["generate_e2e_attention_tests.py"], -) - -########################################################################### -## -## LLVMCPU backend -## -########################################################################### - -# Default CPU backend. -[iree_generated_e2e_runner_test( - name = "e2e_attention_cpu_%s_%s_%s_%s" % (dtype, dtype, dtype, size), - generator = ":generate_e2e_attention_tests", - generator_args = [ - "--query_type=%s" % dtype, - "--key_type=%s" % dtype, - "--value_type=%s" % dtype, - "--shapes=%s" % size, - ], - tags = [ - "hostonly", - "local", - ], - target_backends_and_drivers = [ - ("llvm-cpu", "local-task"), - ], - target_cpu_features_variants = ["default"], - test_runner = "//tools/testing/e2e:iree-e2e-attention-test", - test_type = "attention", -) for dtype in [ - "f16", -] for size in [ - "small", - "medium", - "large", -]] diff --git a/tests/e2e/attention/CMakeLists.txt b/tests/e2e/attention/CMakeLists.txt deleted file mode 100644 index f7937845756d..000000000000 --- a/tests/e2e/attention/CMakeLists.txt +++ /dev/null @@ -1,88 +0,0 @@ -################################################################################ -# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from # -# tests/e2e/attention/BUILD.bazel # -# # -# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary # -# CMake-only content. # -# # -# To disable autogeneration for this file entirely, delete this header. # -################################################################################ - -iree_add_all_subdirs() - -iree_generated_e2e_runner_test( - NAME - e2e_attention_cpu_f16_f16_f16_small - TEST_TYPE - attention - GENERATOR - "generate_e2e_attention_tests.py" - GENERATOR_ARGS - "--query_type=f16" - "--key_type=f16" - "--value_type=f16" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-attention-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -iree_generated_e2e_runner_test( - NAME - e2e_attention_cpu_f16_f16_f16_medium - TEST_TYPE - attention - GENERATOR - "generate_e2e_attention_tests.py" - GENERATOR_ARGS - "--query_type=f16" - "--key_type=f16" - "--value_type=f16" - "--shapes=medium" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-attention-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -iree_generated_e2e_runner_test( - NAME - e2e_attention_cpu_f16_f16_f16_large - TEST_TYPE - attention - GENERATOR - "generate_e2e_attention_tests.py" - GENERATOR_ARGS - "--query_type=f16" - "--key_type=f16" - "--value_type=f16" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-attention-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/tests/e2e/attention/generate_e2e_attention_tests.py b/tests/e2e/attention/generate_e2e_attention_tests.py deleted file mode 100644 index f567a16c5557..000000000000 --- a/tests/e2e/attention/generate_e2e_attention_tests.py +++ /dev/null @@ -1,499 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -"""Generator for e2e attention tests. -""" - -import argparse -import enum -import dataclasses -import typing -import math - - -# Data type of kernel entries. The string values must match MLIR data types. -@enum.unique -class QueryElemTypeId(enum.Enum): - NONE = "" - F16 = "f16" - - -# Data type of input entries. The string values must match MLIR data types. -@enum.unique -class KeyElemTypeId(enum.Enum): - NONE = "" - F16 = "f16" - - -# Data type of input entries. The string values must match MLIR data types. -@enum.unique -class ValueElemTypeId(enum.Enum): - NONE = "" - F16 = "f16" - - -# Data type of input entries. The string values must match MLIR data types. -@enum.unique -class ResultElemTypeId(enum.Enum): - NONE = "" - F16 = "f16" - - -# Enumerates of the collections of shapes that we can generate tests for. -# The values are the accepted values for the --shapes= flag. -@enum.unique -class ShapesId(enum.Enum): - SMALL = "small" - MEDIUM = "medium" - LARGE = "large" - - -# batch: Batch dimension -# m: M dimension of first and second matmul -# n: N dimension of second matmul -# k1: K dimension of first matmul -# k2: K dimension of second matmul -@dataclasses.dataclass -class TestShapeAndScale: - batch: int - m: int - k1: int - k2: int - n: int - scale: float - - -# Returns the list of TestShape's to use for the collection of shapes -# identified by shapes_id. -def get_test_shapes(shapes_id: ShapesId): - if shapes_id == ShapesId.SMALL: - return [ - TestShapeAndScale(batch=2, m=512, k1=64, k2=128, n=32, scale=1.0), - ] - if shapes_id == ShapesId.MEDIUM: - return [ - TestShapeAndScale(batch=2, m=1024, k1=128, k2=256, n=64, scale=1.0), - ] - if shapes_id == ShapesId.LARGE: - return [ - TestShapeAndScale(batch=2, m=2048, k1=256, k2=512, n=128, scale=1.0), - ] - - raise ValueError(shapes_id) - - -# Determines the shape of input and kernel tensors. -@dataclasses.dataclass -class TestInputTensorShapes: - batch: int - m: int - k1: int - k2: int - n: int - scale: float - - -# Helper for generate_function. Generates TestInputTensorShapes, i.e. -# converts from the runtime shape dimensions in TestShape and given dynamicity to -# the set of shapes to be used in a test function's input tensors. -def generate_shapes_and_scale(shape: TestShapeAndScale): - batch = shape.batch - m = shape.m - k1 = shape.k1 - k2 = shape.k2 - n = shape.n - scale = shape.scale - - shapes_scale = TestInputTensorShapes( - batch=batch, - m=m, - k1=k1, - k2=k2, - n=n, - scale=scale, - ) - return shapes_scale - - -# Helper to return input, kernel and output shapes based on the layout and the Attention Params. -def get_tensor_shapes( - shapes_scale: TestShapeAndScale, -): - batch = shapes_scale.batch - m = shapes_scale.m - k1 = shapes_scale.k1 - k2 = shapes_scale.k2 - n = shapes_scale.n - scale = shapes_scale.scale - - query_tensor_shape = [batch, m, k1] - key_tensor_shape = [batch, k2, k1] - value_tensor_shape = [batch, k2, n] - result_tensor_shape = [batch, m, n] - - return query_tensor_shape, key_tensor_shape, value_tensor_shape, result_tensor_shape - - -# Helper for generate_function. -# Generates a name for a test function in the generated MLIR code. -def generate_function_name( - query_type: QueryElemTypeId, - key_type: KeyElemTypeId, - value_type: ValueElemTypeId, - shapes_scale: TestInputTensorShapes, -): - query_t = query_type.value - key_t = key_type.value - value_t = value_type.value - result_t = value_type.value - - batch = shapes_scale.batch - m = shapes_scale.m - k1 = shapes_scale.k1 - k2 = shapes_scale.k2 - n = shapes_scale.n - - attention = "attention" - return ( - f"{attention}_{batch}_{m}_{k1}_{k2}_{n}" - + f"_dtype_{query_t}_{key_t}_{value_t}_{result_t}" - ) - - -# Represents a generated test function. -@dataclasses.dataclass -class MLIRFunction: - name: str - signature: str - import_declaration: str - definition: str - - -# Generates a test function in the generated MLIR code. -# The generated function will take the same arguments as iree_linalg_ext.attention variants -# and will just call iree_linalg_ext.attention variants with them, returning its result. -def generate_function( - query_type: QueryElemTypeId, - key_type: KeyElemTypeId, - value_type: ValueElemTypeId, - shape_scale: TestShapeAndScale, -): - shapes_scale = generate_shapes_and_scale(shape_scale) - func_name = generate_function_name( - query_type, - key_type, - value_type, - shapes_scale, - ) - - query_shape, key_shape, value_shape, result_shape = get_tensor_shapes(shapes_scale) - query_tensor_type = ( - f"tensor<{query_shape[0]}x{query_shape[1]}x{query_shape[2]}x{query_type.value}>" - ) - key_tensor_type = ( - f"tensor<{key_shape[0]}x{key_shape[1]}x{key_shape[2]}x{key_type.value}>" - ) - value_tensor_type = ( - f"tensor<{value_shape[0]}x{value_shape[1]}x{value_shape[2]}x{value_type.value}>" - ) - result_tensor_type = f"tensor<{result_shape[0]}x{result_shape[1]}x{result_shape[2]}x{value_type.value}>" - F32 = "f32" - F16 = "f16" - op_name = "iree_linalg_ext.attention" - - # Compilation info is optional; prints empty string by default. - func_definition = "" - - signature = f"({query_tensor_type}, {key_tensor_type}, {value_tensor_type}, {result_tensor_type}) -> {result_tensor_type}" - import_declaration = f"func.func private @module.{func_name}(%query: !hal.buffer_view, %key: !hal.buffer_view, %value: !hal.buffer_view, %scale: {F32}) -> !hal.buffer_view" - func_definition = func_definition + ( - f"func.func @{func_name}(%query: {query_tensor_type}, %key: {key_tensor_type}, %value: {value_tensor_type}, %scale: {F32}) -> {result_tensor_type} {{\n" - f" %result0 = tensor.empty(): {result_tensor_type}\n" - f" %scale_f16 = arith.truncf %scale : {F32} to {F16} \n" - f" %result1 = {op_name} {{\n" - f" indexing_maps = [affine_map<(batch, m, n, k1, k2) -> (batch, m, k1)>,\n" - f" affine_map<(batch, m, n, k1, k2) -> (batch, k2, k1)>,\n" - f" affine_map<(batch, m, n, k1, k2) -> (batch, k2, n)>,\n" - f" affine_map<(batch, m, n, k1, k2) -> (batch, m, n)>]\n}}" - f" ins(%query, %key, %value, %scale_f16: {query_tensor_type}, {key_tensor_type}, {value_tensor_type}, {F16})\n" - f" outs(%result0: {result_tensor_type}) -> {result_tensor_type}\n" - f" return %result1: {result_tensor_type}\n" - f"}}\n" - ) - return MLIRFunction( - name=func_name, - signature=signature, - import_declaration=import_declaration, - definition=func_definition, - ) - - -# Represents a call to a generated test function. -@dataclasses.dataclass -class TestCall: - function: MLIRFunction - op: str - - -# Enumerates ways to initialize tensor buffer contents. -@enum.unique -class TensorGenerator(enum.Enum): - ZERO = "zero" # Fill with zeros - RANDOM = "random" # Fill with (deterministic) pseudorandom values. - - -# Intentionally fixed seed! We want full reproducibility here, both across runs -# and across machines. -# Intentionally not shared with local_pseudorandom_state to limit the ways -# in which shuffling testcases changes which random values are generated. -pseudorandom_generator_seed = 1 - - -def contents_generator_tag(generator: TensorGenerator): - if generator == TensorGenerator.ZERO: - return "" - elif generator == TensorGenerator.RANDOM: - global pseudorandom_generator_seed - pseudorandom_generator_seed = pseudorandom_generator_seed + 1 - return f"!tag:iree:fully_specified_pseudorandom {pseudorandom_generator_seed}" - else: - raise ValueError(generator) - - -# Generate a 3d tensor function argument of the given size as `%name`. -def generate_random_3d_tensor( - name: str, - tensor_shape: list, - element_type: typing.Union[QueryElemTypeId, ResultElemTypeId], -): - global pseudorandom_generator_seed - pseudorandom_generator_seed = pseudorandom_generator_seed + 1 - return ( - f" %{name}_dim0 = arith.constant {tensor_shape[0]} : i64\n" - f" %{name}_dim1 = arith.constant {tensor_shape[1]} : i64\n" - f" %{name}_dim2 = arith.constant {tensor_shape[2]} : i64\n" - f" %{name}_element_type = hal.element_type<{element_type.value}> : i32\n" - f" %{name}_seed = arith.constant {pseudorandom_generator_seed} : i32\n" - f" %{name} = call @attention_test.generate_random_tensor(%device, %{name}_dim0, %{name}_dim1, %{name}_dim2, %{name}_element_type, %{name}_seed) : (!hal.device, i64, i64, i64, i32, i32) -> !hal.buffer_view\n" - ) - - -call_id = 0 - - -def generate_call( - function: MLIRFunction, - query_type: QueryElemTypeId, - key_type: KeyElemTypeId, - value_type: ValueElemTypeId, - shapes_scale: TestShapeAndScale, -): - global call_id - func_name = f"{function.name}_{shapes_scale.batch}_{shapes_scale.m}_{shapes_scale.k1}_{shapes_scale.k2}_{shapes_scale.n}_{shapes_scale.k1}_{shapes_scale.scale}" - func_name = f"{func_name}_{call_id}" - call_id = call_id + 1 - - description = f"Attention shape (BATCHxMxK1xK2xN): {shapes_scale.batch}x{shapes_scale.m}x{shapes_scale.k1}x{shapes_scale.k2}x{shapes_scale.k1}x{shapes_scale.n}" - op = ( - f"func.func @{func_name}() attributes {{\n" - f' iree.reflection = {{description = "{description}"}}\n' - "} {\n" - " %device_index = arith.constant 0 : index\n" - " %device = hal.devices.get %device_index : !hal.device\n" - ) - - query_shape, key_shape, value_shape, result_shape = get_tensor_shapes( - shapes_scale, - ) - - op = op + generate_random_3d_tensor("query", query_shape, query_type) - op = op + generate_random_3d_tensor("key", key_shape, key_type) - op = op + generate_random_3d_tensor("value", value_shape, value_type) - - global pseudorandom_generator_seed - pseudorandom_generator_seed = pseudorandom_generator_seed - 1 - op = op + ( - f" %scale = arith.constant {shapes_scale.scale} : f32\n" - f" %result = call @module.{function.name}(%query, %key, %value, %scale) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view, f32) -> !hal.buffer_view\n" - ) - - op = op + ( - f" %batch = arith.constant {shapes_scale.batch} : i64 \n" - f" %m = arith.constant {shapes_scale.m} : i64 \n" - f" %k1 = arith.constant {shapes_scale.k1} : i64 \n" - f" %k2 = arith.constant {shapes_scale.k2} : i64 \n" - f" %n = arith.constant {shapes_scale.n} : i64 \n" - f" %queryTensor = hal.tensor.import %query : !hal.buffer_view -> tensor<{shapes_scale.batch}x{shapes_scale.m}x{shapes_scale.k1}xf16> \n" - f" %keyTensor = hal.tensor.import %key : !hal.buffer_view -> tensor<{shapes_scale.batch}x{shapes_scale.k2}x{shapes_scale.k1}xf16> \n" - f" %valueTensor = hal.tensor.import %value : !hal.buffer_view -> tensor<{shapes_scale.batch}x{shapes_scale.k2}x{shapes_scale.n}xf16> \n" - f" %resultTensor = hal.tensor.import %result : !hal.buffer_view -> tensor<{shapes_scale.batch}x{shapes_scale.m}x{shapes_scale.n}xf16> \n" - f" %queryExt = arith.extf %queryTensor : tensor<{shapes_scale.batch}x{shapes_scale.m}x{shapes_scale.k1}xf16> to tensor<{shapes_scale.batch}x{shapes_scale.m}x{shapes_scale.k1}xf32> \n" - f" %keyExt = arith.extf %keyTensor : tensor<{shapes_scale.batch}x{shapes_scale.k2}x{shapes_scale.k1}xf16> to tensor<{shapes_scale.batch}x{shapes_scale.k2}x{shapes_scale.k1}xf32> \n" - f" %valueExt = arith.extf %valueTensor : tensor<{shapes_scale.batch}x{shapes_scale.k2}x{shapes_scale.n}xf16> to tensor<{shapes_scale.batch}x{shapes_scale.k2}x{shapes_scale.n}xf32> \n" - f" %resultExt = arith.extf %resultTensor : tensor<{shapes_scale.batch}x{shapes_scale.m}x{shapes_scale.n}xf16> to tensor<{shapes_scale.batch}x{shapes_scale.m}x{shapes_scale.n}xf32> \n" - f" %queryExtBufferView = hal.tensor.export %queryExt : tensor<{shapes_scale.batch}x{shapes_scale.m}x{shapes_scale.k1}xf32> -> !hal.buffer_view \n" - f" %keyExtBufferView = hal.tensor.export %keyExt : tensor<{shapes_scale.batch}x{shapes_scale.k2}x{shapes_scale.k1}xf32> -> !hal.buffer_view \n" - f" %valueExtBufferView = hal.tensor.export %valueExt : tensor<{shapes_scale.batch}x{shapes_scale.k2}x{shapes_scale.n}xf32> -> !hal.buffer_view \n" - f" %resultExtBufferView = hal.tensor.export %resultExt : tensor<{shapes_scale.batch}x{shapes_scale.m}x{shapes_scale.n}xf32> -> !hal.buffer_view \n" - f" call @attention_test.check_attention_results(%device, %batch, %m, %k1, %k2, %n, %queryExtBufferView, %keyExtBufferView, %valueExtBufferView, %resultExtBufferView) : (!hal.device, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()\n" - ) - - op = op + " return\n" - op = op + "}\n" - - return TestCall(function=function, op=op) - - -# Generates all output files' contents as strings. -def generate( - query_type: QueryElemTypeId, - key_type: KeyElemTypeId, - value_type: ValueElemTypeId, - shapes_id: ShapesId, -): - functions = {} - calls = [] - - for shape in get_test_shapes(shapes_id): - function = generate_function( - query_type, - key_type, - value_type, - shape, - ) - if function.name not in functions: - functions[function.name] = function - calls.append( - generate_call( - function, - query_type, - key_type, - value_type, - shape, - ) - ) - - return (functions, calls) - - -def parse_arguments(): - parser = argparse.ArgumentParser(description="Generator of e2e Attention tests") - parser.add_argument( - "--output_attention_mlir", - type=str, - help="Path of output .mlir file containing the generated Attention functions", - required=True, - ) - parser.add_argument( - "--output_calls_mlir", - type=str, - help="Path of output .mlir file containing the calls", - required=True, - ) - parser.add_argument( - "--query_type", - type=str, - choices=["f16"], - help="Numeric type of query tensors ", - required=True, - ) - parser.add_argument( - "--key_type", - type=str, - choices=["f16"], - help="Numeric type of key tensors ", - required=True, - ) - parser.add_argument( - "--value_type", - type=str, - choices=["f16"], - help="Numeric type of value tensors ", - required=True, - ) - parser.add_argument( - "--shapes_scale", - type=str, - choices=[s.value for s in ShapesId], - help="Collection of tensor shapes to test", - required=True, - ) - parser.add_argument( - "--requirements", - type=str, - help="Target requirements for this module. Comma-separated. As in -iree-llvmcpu-target-cpu-features. If the target device does not meet all of the requirements, the test will be skipped.", - required=False, - ) - return parser.parse_args() - - -def write_code_file(functions, filename): - with open(filename, "w") as file: - for function in functions.values(): - file.write(function.definition + "\n") - - -def write_calls_file(functions, calls, filename, requirements): - # Module-level reflection information used to control the test tool. - reflection = "" - if requirements: - reflection = ( - "iree.reflection = {" - 'target_features = "' - + ",".join([req.lstrip("+") for req in requirements.split(",")]) - + '"' - "}" - ) - module_definition = ( - f"builtin.module @calls attributes {{\n" f" {reflection}\n" f"}} {{\n\n" - ) - - # Declare the custom module that generates arguments. - module_definition = module_definition + ( - "func.func private @attention_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view\n" - "func.func private @attention_test.check_attention_results(%device: !hal.device, %batch: i64, %m: i64, %k1: i64, %k2: i64, %n: i64, %query: !hal.buffer_view, %key: !hal.buffer_view, %value: !hal.buffer_view, %result: !hal.buffer_view)\n" - "\n" - ) - - # Declare the functions that will be called. - for function in functions.values(): - module_definition = module_definition + function.import_declaration + "\n" - module_definition = module_definition + "\n" - - # Emit the test cases for each call. - for call in calls: - module_definition = module_definition + call.op + "\n" - - module_definition = module_definition + "\n}\n" - - with open(filename, "w") as file: - file.write(module_definition) - - -def main(args): - query_type = QueryElemTypeId(args.query_type) - key_type = KeyElemTypeId(args.key_type) - value_type = ValueElemTypeId(args.value_type) - shapes_id = ShapesId(args.shapes_scale) - - (functions, calls) = generate( - query_type, - key_type, - value_type, - shapes_id, - ) - - write_code_file(functions, args.output_attention_mlir) - write_calls_file( - functions, - calls, - args.output_calls_mlir, - args.requirements, - ) - - -if __name__ == "__main__": - main(parse_arguments()) diff --git a/tests/e2e/convolution/BUILD.bazel b/tests/e2e/convolution/BUILD.bazel deleted file mode 100644 index 9847e210158f..000000000000 --- a/tests/e2e/convolution/BUILD.bazel +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright 2024 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -# End-to-end convolution 2d tests. - -load("//build_tools/bazel:iree_e2e_generated_runner_test.bzl", "iree_generated_e2e_runner_test") - -package( - features = ["layering_check"], - licenses = ["notice"], # Apache 2.0 -) - -py_binary( - name = "generate_e2e_conv2d_tests", - srcs = ["generate_e2e_conv2d_tests.py"], -) - -########################################################################### -## -## LLVMCPU backend -## -########################################################################### - -# Default CPU backend. -[iree_generated_e2e_runner_test( - name = "e2e_conv2d_cpu_%s_%s_%s_%s" % (dtype, dtype, dtype, size), - generator = ":generate_e2e_conv2d_tests", - generator_args = [ - "--input_type=%s" % dtype, - "--kernel_type=%s" % dtype, - "--acc_type=%s" % dtype, - "--shapes=%s" % size, - ], - tags = [ - "hostonly", - "local", - ], - target_backends_and_drivers = [ - ("llvm-cpu", "local-task"), - ], - target_cpu_features_variants = ["default"], - test_runner = "//tools/testing/e2e:iree-e2e-conv2d-test", - test_type = "conv2d", -) for dtype in [ - "f32", - "f16", -] for size in [ - "small", - "medium", - "large", -]] - -# Default CPU backend + winograd. -[iree_generated_e2e_runner_test( - name = "e2e_winograd_conv2d_cpu_%s_%s_%s_%s" % (dtype, dtype, dtype, size), - compiler_flags = [ - "--iree-preprocessing-pass-pipeline=builtin.module\\(func.func\\(iree-linalg-ext-convert-conv2d-to-winograd{replace-all-convs=true}\\)\\)", - ], - generator = ":generate_e2e_conv2d_tests", - generator_args = [ - "--input_type=%s" % dtype, - "--kernel_type=%s" % dtype, - "--acc_type=%s" % dtype, - "--shapes=%s" % size, - ], - tags = [ - "hostonly", - "local", - ], - target_backends_and_drivers = [ - ("llvm-cpu", "local-task"), - ], - target_cpu_features_variants = ["default"], - test_runner = "//tools/testing/e2e:iree-e2e-conv2d-test", - test_type = "conv2d", -) for dtype in [ - "f32", - "f16", -] for size in [ - "small", - "medium", - "large", -]] diff --git a/tests/e2e/convolution/CMakeLists.txt b/tests/e2e/convolution/CMakeLists.txt deleted file mode 100644 index 8ddad849b082..000000000000 --- a/tests/e2e/convolution/CMakeLists.txt +++ /dev/null @@ -1,325 +0,0 @@ -################################################################################ -# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from # -# tests/e2e/convolution/BUILD.bazel # -# # -# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary # -# CMake-only content. # -# # -# To disable autogeneration for this file entirely, delete this header. # -################################################################################ - -iree_add_all_subdirs() - -iree_generated_e2e_runner_test( - NAME - e2e_conv2d_cpu_f32_f32_f32_small - TEST_TYPE - conv2d - GENERATOR - "generate_e2e_conv2d_tests.py" - GENERATOR_ARGS - "--input_type=f32" - "--kernel_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-conv2d-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -iree_generated_e2e_runner_test( - NAME - e2e_conv2d_cpu_f32_f32_f32_medium - TEST_TYPE - conv2d - GENERATOR - "generate_e2e_conv2d_tests.py" - GENERATOR_ARGS - "--input_type=f32" - "--kernel_type=f32" - "--acc_type=f32" - "--shapes=medium" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-conv2d-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -iree_generated_e2e_runner_test( - NAME - e2e_conv2d_cpu_f32_f32_f32_large - TEST_TYPE - conv2d - GENERATOR - "generate_e2e_conv2d_tests.py" - GENERATOR_ARGS - "--input_type=f32" - "--kernel_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-conv2d-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -iree_generated_e2e_runner_test( - NAME - e2e_conv2d_cpu_f16_f16_f16_small - TEST_TYPE - conv2d - GENERATOR - "generate_e2e_conv2d_tests.py" - GENERATOR_ARGS - "--input_type=f16" - "--kernel_type=f16" - "--acc_type=f16" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-conv2d-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -iree_generated_e2e_runner_test( - NAME - e2e_conv2d_cpu_f16_f16_f16_medium - TEST_TYPE - conv2d - GENERATOR - "generate_e2e_conv2d_tests.py" - GENERATOR_ARGS - "--input_type=f16" - "--kernel_type=f16" - "--acc_type=f16" - "--shapes=medium" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-conv2d-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -iree_generated_e2e_runner_test( - NAME - e2e_conv2d_cpu_f16_f16_f16_large - TEST_TYPE - conv2d - GENERATOR - "generate_e2e_conv2d_tests.py" - GENERATOR_ARGS - "--input_type=f16" - "--kernel_type=f16" - "--acc_type=f16" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-conv2d-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -iree_generated_e2e_runner_test( - NAME - e2e_winograd_conv2d_cpu_f32_f32_f32_small - TEST_TYPE - conv2d - GENERATOR - "generate_e2e_conv2d_tests.py" - GENERATOR_ARGS - "--input_type=f32" - "--kernel_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-conv2d-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-preprocessing-pass-pipeline=builtin.module\(func.func\(iree-linalg-ext-convert-conv2d-to-winograd{replace-all-convs=true}\)\)" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -iree_generated_e2e_runner_test( - NAME - e2e_winograd_conv2d_cpu_f32_f32_f32_medium - TEST_TYPE - conv2d - GENERATOR - "generate_e2e_conv2d_tests.py" - GENERATOR_ARGS - "--input_type=f32" - "--kernel_type=f32" - "--acc_type=f32" - "--shapes=medium" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-conv2d-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-preprocessing-pass-pipeline=builtin.module\(func.func\(iree-linalg-ext-convert-conv2d-to-winograd{replace-all-convs=true}\)\)" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -iree_generated_e2e_runner_test( - NAME - e2e_winograd_conv2d_cpu_f32_f32_f32_large - TEST_TYPE - conv2d - GENERATOR - "generate_e2e_conv2d_tests.py" - GENERATOR_ARGS - "--input_type=f32" - "--kernel_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-conv2d-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-preprocessing-pass-pipeline=builtin.module\(func.func\(iree-linalg-ext-convert-conv2d-to-winograd{replace-all-convs=true}\)\)" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -iree_generated_e2e_runner_test( - NAME - e2e_winograd_conv2d_cpu_f16_f16_f16_small - TEST_TYPE - conv2d - GENERATOR - "generate_e2e_conv2d_tests.py" - GENERATOR_ARGS - "--input_type=f16" - "--kernel_type=f16" - "--acc_type=f16" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-conv2d-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-preprocessing-pass-pipeline=builtin.module\(func.func\(iree-linalg-ext-convert-conv2d-to-winograd{replace-all-convs=true}\)\)" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -iree_generated_e2e_runner_test( - NAME - e2e_winograd_conv2d_cpu_f16_f16_f16_medium - TEST_TYPE - conv2d - GENERATOR - "generate_e2e_conv2d_tests.py" - GENERATOR_ARGS - "--input_type=f16" - "--kernel_type=f16" - "--acc_type=f16" - "--shapes=medium" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-conv2d-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-preprocessing-pass-pipeline=builtin.module\(func.func\(iree-linalg-ext-convert-conv2d-to-winograd{replace-all-convs=true}\)\)" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -iree_generated_e2e_runner_test( - NAME - e2e_winograd_conv2d_cpu_f16_f16_f16_large - TEST_TYPE - conv2d - GENERATOR - "generate_e2e_conv2d_tests.py" - GENERATOR_ARGS - "--input_type=f16" - "--kernel_type=f16" - "--acc_type=f16" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-conv2d-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-preprocessing-pass-pipeline=builtin.module\(func.func\(iree-linalg-ext-convert-conv2d-to-winograd{replace-all-convs=true}\)\)" - LABELS - "hostonly" - "local" - TARGET_CPU_FEATURES_VARIANTS - "default" -) - -### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/tests/e2e/convolution/generate_e2e_conv2d_tests.py b/tests/e2e/convolution/generate_e2e_conv2d_tests.py deleted file mode 100644 index 0982e1801679..000000000000 --- a/tests/e2e/convolution/generate_e2e_conv2d_tests.py +++ /dev/null @@ -1,707 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -"""Generator for e2e conv2d tests. -""" - -import argparse -import enum -import dataclasses -import typing -import math - - -# Data type of kernel entries. The string values must match MLIR data types. -@enum.unique -class KernelElemTypeId(enum.Enum): - NONE = "" - F32 = "f32" - F16 = "f16" - - -# Data type of input entries. The string values must match MLIR data types. -@enum.unique -class InputElemTypeId(enum.Enum): - NONE = "" - F32 = "f32" - F16 = "f16" - - -# Enumerates of the collections of shapes that we can generate tests for. -# The values are the accepted values for the --shapes= flag. -@enum.unique -class ShapesId(enum.Enum): - SMALL = "small" - MEDIUM = "medium" - LARGE = "large" - - -# Enumerates ways to construct MLIR tensor types. -# TODO: Enable dynamic dimensions once the tests start passing. -@enum.unique -class Dynamicity(enum.Enum): - DYNAMIC = "dynamic" # Use '?' everywhere. Example: tensor. - STATIC = "static" # Use fixed values everywhere. Example: tensor<4x6xf32>. - MIXED = "mixed" # Randomly mix '?' and values. Example: tensor. - - -# Enumerates ways to initialize input buffer contents. -@enum.unique -class InputGenerator(enum.Enum): - ZERO = "zero" # Fill with zeros - RANDOM = "random" # Fill with (deterministic) pseudorandom values. - - -# Enumerates ways to initialize kernel buffer contents. -@enum.unique -class KernelGenerator(enum.Enum): - ZERO = "zero" # Fill with zeros - RANDOM = "random" # Fill with (deterministic) pseudorandom values. - - -# TODO: Add more input layouts as needed. The layout determines the dim of input and kernel. -@enum.unique -class InputLayout(enum.Enum): - NCHW = "nchw" - NHWC = "nhwc" - - -# TODO: Add more kernel layouts as needed. -@enum.unique -class KernelLayout(enum.Enum): - FCHW = "fchw" - HWCF = "hwcf" - - -# Describes the shape of a tensor conv2d in the usual convention: -# the input is {n}x{c}x{h}x{w}, the kernel is {f}x{c}x{kh}x{kw}, the accumulator/result is -# {n}x{f}x{oh}x{ow}. -# The extra `accumulate` boolean tells whether the conv2d is accumulating into -# an existing accumulator (C += A * B) or just overwriting the result -# (C = A * B). -@dataclasses.dataclass -class TestShape: - n: int - c: int - h: int - w: int - kh: int - kw: int - f: int - accumulate: bool - - -# Attributes for the linalg.conv2d operation. -@dataclasses.dataclass -class ConvAttrs: - STRIDE: typing.Tuple[int, int] = (1, 1) - DILATION: typing.Tuple[int, int] = (1, 1) - - -# Returns the list of TestShape's to use for the collection of shapes -# identified by shapes_id. -def get_test_shapes(shapes_id: ShapesId): - # Notes: - # 1. Be conservative in adding more shapes, as that can increase both the - # build and execution latency of tests. The build latency is nearly the - # same for all shapes, while execution latency grows linearly with - # n*f*ow*oh*kh*kw. - - if shapes_id == ShapesId.SMALL: - return [ - TestShape(n=1, c=1, h=1, w=1, kh=1, kw=1, f=1, accumulate=True), - TestShape(n=1, c=1, h=16, w=16, kh=2, kw=2, f=1, accumulate=True), - TestShape(n=2, c=2, h=32, w=32, kh=3, kw=3, f=2, accumulate=True), - ] - if shapes_id == ShapesId.MEDIUM: - return [ - TestShape(n=2, h=32, w=32, c=32, kh=3, kw=3, f=64, accumulate=True), - ] - if shapes_id == ShapesId.LARGE: - return [ - TestShape(n=2, c=4, h=128, w=128, kh=3, kw=3, f=8, accumulate=True), - TestShape(n=2, c=3, h=128, w=128, kh=3, kw=3, f=12, accumulate=True), - ] - - raise ValueError(shapes_id) - - -# Returns the list of Dynamicity's to use for the collection of shapes -# identified by shapes_id. -def get_dynamicities(shapes_id: ShapesId): - if shapes_id == ShapesId.LARGE: - return [ - Dynamicity.STATIC, - ] - # TODO: Enable dynamic dimensions once the tests start passing. - else: - return [ - Dynamicity.STATIC, - ] - raise ValueError(shapes_id) - - -# Intentionally fixed seed! We want full reproducibility here, both across runs -# and across machines. -# Intentionally not shared with pseudorandom_generator_seed to limit the ways -# in which shuffling testcases changes which random values are generated. -local_pseudorandom_state = 1 - - -# A shape dimension value, i.e. a size value that could appear in a MLIR type -# such as 'tensor'. None means a dynamic size, similar to '?' in MLIR. -@dataclasses.dataclass -class DimSize: - value: typing.Optional[int] - - -# Generates a compile-time MLIR size value, i.e. either a fixed positive integer -# or None (which maps to MLIR '?') depending on dynamicity. -def shape_dim(x: int, dynamicity: Dynamicity): - if dynamicity == Dynamicity.DYNAMIC: - return DimSize(None) - elif dynamicity == Dynamicity.STATIC: - return DimSize(x) - else: - raise ValueError(dynamicity) - - -# Stringification used for generating MLIR types, e.g. tensor. -def int_or_question_mark(s: DimSize): - return s.value or "?" - - -# Stringification used for generating alphanumeric identifiers, e.g. -# func.func @somefunction_DYNxDYNxf32, where we can't use "?" characters. -def int_or_DYN(s: DimSize): - return s.value or "DYN" - - -# Determines the shape of input and kernel tensors. -@dataclasses.dataclass -class TestInputTensorShapes: - n: DimSize - c: DimSize - h: DimSize - w: DimSize - kh: DimSize - kw: DimSize - f: DimSize - - -# Helper for generate_function. Generates TestInputTensorShapes, i.e. -# converts from the runtime shape dimensions in TestShape and given dynamicity to -# the set of shapes to be used in a test function's input tensors. -def generate_shapes(shape: TestShape, dynamicity: Dynamicity): - n = shape_dim(shape.n, dynamicity) - c = shape_dim(shape.c, dynamicity) - h = shape_dim(shape.h, dynamicity) - w = shape_dim(shape.w, dynamicity) - kh = shape_dim(shape.kh, dynamicity) - kw = shape_dim(shape.kw, dynamicity) - f = shape_dim(shape.f, dynamicity) - shapes = TestInputTensorShapes( - n=n, - c=c, - h=h, - w=w, - kh=kh, - kw=kw, - f=f, - ) - return shapes - - -# Helper to calculate the output shape based on the input shape, kernel shape, -# dilation and stride. -def calc_out_shape(i_shape: int, k_shape: int, dilation_val: int, stride_val: int): - x = (k_shape - 1) * (dilation_val - 1) - x = i_shape - k_shape - x - return math.floor(x / stride_val) + 1 - - -# Helper to return input, kernel and output shapes based on the layout and Conv2dParams. -def get_tensor_shape( - shapes: TestShape, - kernel_layout: KernelLayout, - input_layout: InputLayout, - conv_attr: ConvAttrs, -): - n = shapes.n - c = shapes.c - h = shapes.h - w = shapes.w - kh = shapes.kh - kw = shapes.kw - f = shapes.f - - # Extract input dimensions - input_height, input_width = h, w - - # Extract kernel dimensions - kernel_height, kernel_width = kh, kw - - # Get the dilation and stride - dilation = conv_attr.DILATION - stride = conv_attr.STRIDE - - # Calculate output height. - oh = calc_out_shape(input_height, kernel_height, dilation[0], stride[0]) - # Calculate output width. - ow = calc_out_shape(input_width, kernel_width, dilation[1], stride[1]) - - input_tensor_shape, kernel_tensor_shape, output_tensor_shape = [], [], [] - - if input_layout == InputLayout.NCHW: - input_tensor_shape = [n, c, h, w] - output_tensor_shape = [n, f, oh, ow] - elif input_layout == InputLayout.NHWC: - input_tensor_shape = [n, h, w, c] - output_tensor_shape = [n, oh, ow, f] - else: - raise ValueError(input_layout) - - if kernel_layout == KernelLayout.FCHW: - kernel_tensor_shape = [f, c, kh, kw] - elif kernel_layout == KernelLayout.HWCF: - kernel_tensor_shape = [f, c, kh, kw] - else: - raise ValueError(kernel_layout) - - return input_tensor_shape, kernel_tensor_shape, output_tensor_shape - - -# Helper for generate_function. -# Generates a name for a test function in the generated MLIR code. -def generate_function_name( - input_type: InputElemTypeId, - kernel_type: KernelElemTypeId, - output_type: InputElemTypeId, - shapes: TestInputTensorShapes, - accumulate: bool, -): - input_t = input_type.value - kernel_t = kernel_type.value - acc_t = output_type.value - n = int_or_DYN(shapes.n) - c = int_or_DYN(shapes.c) - h = int_or_DYN(shapes.h) - w = int_or_DYN(shapes.w) - kh = int_or_DYN(shapes.kh) - kw = int_or_DYN(shapes.kw) - f = int_or_DYN(shapes.f) - - conv2d_kind = "conv2d_accumulate" if accumulate else "conv2d" - return ( - f"{conv2d_kind}_{n}_{c}_{h}_{w}_times_" - + f"{kh}_{kw}_{f}_dtype_{input_t}_{kernel_t}_{acc_t}" - ) - - -# Represents a generated test function. -@dataclasses.dataclass -class MLIRFunction: - name: str - signature: str - import_declaration: str - definition: str - - -# Generates a test function in the generated MLIR code. -# The generated function will take the same arguments as linalg.conv2d variants -# and will just call linalg.conv2d variants with them, returning its result. -def generate_function( - input_type: InputElemTypeId, - input_layout: InputLayout, - kernel_type: KernelElemTypeId, - kernel_layout: KernelLayout, - acc_type: InputElemTypeId, - conv2d_attr: ConvAttrs, - shape: TestShape, - dynamicity: Dynamicity, -): - shapes = generate_shapes(shape, dynamicity) - func_name = generate_function_name( - input_type, - kernel_type, - acc_type, - shapes, - shape.accumulate, - ) - - input_shape, kernel_shape, output_shape = get_tensor_shape( - shape, kernel_layout, input_layout, conv2d_attr - ) - input_tensor_type = f"tensor<{input_shape[0]}x{input_shape[1]}x{input_shape[2]}x{input_shape[3]}x{input_type.value}>" - kernel_tensor_type = f"tensor<{kernel_shape[0]}x{kernel_shape[1]}x{kernel_shape[2]}x{kernel_shape[3]}x{kernel_type.value}>" - - acc_tensor_type = f"tensor<{output_shape[0]}x{output_shape[1]}x{output_shape[2]}x{output_shape[3]}x{input_type.value}>" - - op_name = None - if input_layout == InputLayout.NCHW: - if kernel_layout == KernelLayout.FCHW: - op_name = "linalg.conv_2d_nchw_fchw" - if kernel_layout == KernelLayout.HWCF: - op_name = "linalg.conv_2d_nchw_hwcf" - elif input_layout == InputLayout.NHWC: - if kernel_layout == KernelLayout.HWCF: - op_name = "linalg.conv_2d_nhwc_hwcf" - - conv_attr = f"{{dilations = dense<{list(conv2d_attr.DILATION)}> : tensor<2xi64>, strides = dense<{list(conv2d_attr.STRIDE)}> : tensor<2xi64>}}" - - # Compilation info is optional; prints empty string by default. - func_definition = "" - - signature = f"({input_tensor_type}, {kernel_tensor_type}, {acc_tensor_type}) -> {acc_tensor_type}" - import_declaration = f"func.func private @module.{func_name}(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view" - func_definition = func_definition + ( - f"func.func @{func_name}(%lhs: {input_tensor_type}, %rhs: {kernel_tensor_type}, %acc: {acc_tensor_type}) -> {acc_tensor_type} {{\n" - f" %result = {op_name} {conv_attr} ins(%lhs, %rhs: {input_tensor_type}, {kernel_tensor_type}) outs(%acc: {acc_tensor_type}) -> {acc_tensor_type}\n" - f" return %result: {acc_tensor_type}\n" - f"}}\n" - ) - - return MLIRFunction( - name=func_name, - signature=signature, - import_declaration=import_declaration, - definition=func_definition, - ) - - -# Represents a call to a generated test function. -@dataclasses.dataclass -class TestCall: - function: MLIRFunction - op: str - - -# Enumerates ways to initialize tensor buffer contents. -@enum.unique -class TensorGenerator(enum.Enum): - ZERO = "zero" # Fill with zeros - RANDOM = "random" # Fill with (deterministic) pseudorandom values. - - -# Intentionally fixed seed! We want full reproducibility here, both across runs -# and across machines. -# Intentionally not shared with local_pseudorandom_state to limit the ways -# in which shuffling testcases changes which random values are generated. -pseudorandom_generator_seed = 1 - - -def contents_generator_tag(generator: TensorGenerator): - if generator == TensorGenerator.ZERO: - return "" - elif generator == TensorGenerator.RANDOM: - global pseudorandom_generator_seed - pseudorandom_generator_seed = pseudorandom_generator_seed + 1 - return f"!tag:iree:fully_specified_pseudorandom {pseudorandom_generator_seed}" - else: - raise ValueError(generator) - - -# Generate a 4d tensor function argument of the given size as `%name`. -def generate_random_4d_tensor( - name: str, - tensor_shape: list, - element_type: typing.Union[InputElemTypeId, KernelElemTypeId], -): - global pseudorandom_generator_seed - pseudorandom_generator_seed = pseudorandom_generator_seed + 1 - return ( - f" %{name}_dim0 = arith.constant {tensor_shape[0]} : i64\n" - f" %{name}_dim1 = arith.constant {tensor_shape[1]} : i64\n" - f" %{name}_dim2 = arith.constant {tensor_shape[2]} : i64\n" - f" %{name}_dim3 = arith.constant {tensor_shape[3]} : i64\n" - f" %{name}_element_type = hal.element_type<{element_type.value}> : i32\n" - f" %{name}_seed = arith.constant {pseudorandom_generator_seed} : i32\n" - f" %{name} = call @conv2d_test.generate_random_tensor(%device, %{name}_dim0, %{name}_dim1, %{name}_dim2, %{name}_dim3, %{name}_element_type, %{name}_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view\n" - ) - - -call_id = 0 - - -def generate_call( - function: MLIRFunction, - input_type: InputElemTypeId, - input_layout: InputLayout, - kernel_type: KernelElemTypeId, - kernel_layout: KernelLayout, - conv2d_attr: ConvAttrs, - acc_type: InputElemTypeId, - shape: TestShape, -): - global call_id - func_name = f"{function.name}_{shape.n}_{shape.c}_{shape.h}_{shape.w}_{shape.f}_{shape.kh}_{shape.kw}" - if shape.accumulate: - func_name = f"{func_name}_acc" - func_name = f"{func_name}_{call_id}" - call_id = call_id + 1 - - description = f"Conv2d shape (NxCxHxWxFxKHxKW): {shape.n}x{shape.c}x{shape.h}x{shape.w}x{shape.f}x{shape.kh}x{shape.kw}" - op = ( - f"func.func @{func_name}() attributes {{\n" - f' iree.reflection = {{description = "{description}"}}\n' - "} {\n" - " %device_index = arith.constant 0 : index\n" - " %device = hal.devices.get %device_index : !hal.device\n" - ) - - inp_shape, kernel_shape, out_shape = get_tensor_shape( - shape, - kernel_layout, - input_layout, - conv2d_attr, - ) - - op = op + generate_random_4d_tensor("input", inp_shape, input_type) - op = op + generate_random_4d_tensor("kernel", kernel_shape, kernel_type) - if shape.accumulate: - op = op + generate_random_4d_tensor("acc", out_shape, acc_type) - # TODO(#16168): there's a bug with in-place input->output aliasing and - # we work around it here by passing in a unique copy. - global pseudorandom_generator_seed - pseudorandom_generator_seed = pseudorandom_generator_seed - 1 - op = op + generate_random_4d_tensor("acc_copy", out_shape, acc_type) - op = op + ( - f" %result = call @module.{function.name}(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view\n" - ) - else: - op = op + ( - f" %acc = util.null : !hal.buffer_view\n" - f" %result = call @module.{function.name}(%input, %kernel) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view\n" - ) - - op = op + ( - f" %n = arith.constant {shape.n} : i64\n" - f" %c = arith.constant {shape.c} : i64\n" - f" %h = arith.constant {shape.h} : i64\n" - f" %w = arith.constant {shape.w} : i64\n" - f" %f = arith.constant {shape.f} : i64\n" - f" %kh = arith.constant {shape.kh} : i64\n" - f" %kw = arith.constant {shape.kw} : i64\n" - f" %sh = arith.constant {conv2d_attr.STRIDE[0]} : i64\n" - f" %sw = arith.constant {conv2d_attr.STRIDE[1]} : i64\n" - f" %dh = arith.constant {conv2d_attr.DILATION[0]} : i64\n" - f" %dw = arith.constant {conv2d_attr.DILATION[1]} : i64\n" - f" call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()\n" - ) - - op = op + " return\n" - op = op + "}\n" - - return TestCall(function=function, op=op) - - -# Generates all output files' contents as strings. -def generate( - input_elem_type: InputElemTypeId, - input_layout: InputLayout, - kernel_elem_type: KernelElemTypeId, - kernel_layout: KernelLayout, - conv2d_attr: ConvAttrs, - acc_type: InputElemTypeId, - shapes_id: ShapesId, -): - functions = {} - calls = [] - - for shape in get_test_shapes(shapes_id): - for dynamicity in get_dynamicities(shapes_id): - function = generate_function( - input_elem_type, - input_layout, - kernel_elem_type, - kernel_layout, - acc_type, - conv2d_attr, - shape, - dynamicity, - ) - # Different testcases may differ only by runtime parameters but - # share the same code. For example, dynamic-shapes testcases - # share the same code involing tensor even though the runtime - # value in the trace are different. That's why we append conditionally - # to calls, but unconditionally to function_definitions. - if function.name not in functions: - functions[function.name] = function - calls.append( - generate_call( - function, - input_elem_type, - input_layout, - kernel_elem_type, - kernel_layout, - conv2d_attr, - acc_type, - shape, - ) - ) - - return (functions, calls) - - -def parse_arguments(): - parser = argparse.ArgumentParser(description="Generator of e2e conv2d tests") - parser.add_argument( - "--output_conv2d_mlir", - type=str, - help="Path of output .mlir file containing the generated conv2d functions", - required=True, - ) - parser.add_argument( - "--output_calls_mlir", - type=str, - help="Path of output .mlir file containing the calls", - required=True, - ) - parser.add_argument( - "--input_type", - type=str, - choices=["f32", "f16"], - help="Numeric type of input tensors", - required=True, - ) - parser.add_argument( - "--input_layout", - type=str, - default="nchw", - choices=["nchw", "nhwc"], - help="Layout of the input tensor. Currently, only nchw is supported.", - required=False, - ) - parser.add_argument( - "--kernel_type", - type=str, - choices=["f32", "f16"], - help="Numeric type of input tensors", - required=True, - ) - parser.add_argument( - "--kernel_layout", - type=str, - default="fchw", - choices=["fchw", "hwcf"], - help="Layout of kernel tensor. Currently, only fchw is supported.", - required=False, - ) - parser.add_argument( - "--acc_type", - type=str, - choices=["f32", "f16"], - help="Numeric type of input tensors", - default="", - required=False, - ) - parser.add_argument( - "--shapes", - type=str, - choices=[s.value for s in ShapesId], - help="Collection of tensor shapes to test", - required=True, - ) - parser.add_argument( - "--dilation", - type=str, - default="1,1", - help="The dilation factor for the convolution operation. Comma-separated. As in 1,1", - required=False, - ) - parser.add_argument( - "--stride", - type=str, - default="1,1", - help="The stride factor for the convolution operation. Comma-separated. As in 1,1", - required=False, - ) - parser.add_argument( - "--requirements", - type=str, - help="Target requirements for this module. Comma-separated. As in -iree-llvmcpu-target-cpu-features. If the target device does not meet all of the requirements, the test will be skipped.", - required=False, - ) - return parser.parse_args() - - -def write_code_file(functions, filename): - with open(filename, "w") as file: - for function in functions.values(): - file.write(function.definition + "\n") - - -def write_calls_file(functions, calls, filename, requirements): - # Module-level reflection information used to control the test tool. - reflection = "" - if requirements: - reflection = ( - "iree.reflection = {" - 'target_features = "' - + ",".join([req.lstrip("+") for req in requirements.split(",")]) - + '"' - "}" - ) - module_definition = ( - f"builtin.module @calls attributes {{\n" f" {reflection}\n" f"}} {{\n\n" - ) - - # Declare the custom module that generates arguments. - module_definition = module_definition + ( - "func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view\n" - "func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view)\n" - "\n" - ) - - # Declare the functions that will be called. - for function in functions.values(): - module_definition = module_definition + function.import_declaration + "\n" - module_definition = module_definition + "\n" - - # Emit the test cases for each call. - for call in calls: - module_definition = module_definition + call.op + "\n" - - module_definition = module_definition + "\n}\n" - - with open(filename, "w") as file: - file.write(module_definition) - - -def main(args): - input_type = InputElemTypeId(args.input_type) - input_layout = InputLayout(args.input_layout) - kernel_type = KernelElemTypeId(args.kernel_type) - kernel_layout = KernelLayout(args.kernel_layout) - # TODO: The output type is same as the input type for now. - acc_type = input_type - shapes_id = ShapesId(args.shapes) - conv2d_attr = ConvAttrs( - tuple(map(int, args.stride.split(","))), - tuple(map(int, args.dilation.split(","))), - ) - - (functions, calls) = generate( - input_type, - input_layout, - kernel_type, - kernel_layout, - conv2d_attr, - acc_type, - shapes_id, - ) - - write_code_file(functions, args.output_conv2d_mlir) - write_calls_file( - functions, - calls, - args.output_calls_mlir, - args.requirements, - ) - - -if __name__ == "__main__": - main(parse_arguments()) diff --git a/tests/e2e/matmul/BUILD.bazel b/tests/e2e/matmul/BUILD.bazel deleted file mode 100644 index b4c2b51e429b..000000000000 --- a/tests/e2e/matmul/BUILD.bazel +++ /dev/null @@ -1,704 +0,0 @@ -# Copyright 2022 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -# End-to-end matrix multiplication tests. - -load("//build_tools/bazel:iree_e2e_generated_runner_test.bzl", "iree_generated_e2e_runner_test") - -package( - features = ["layering_check"], - licenses = ["notice"], # Apache 2.0 -) - -py_binary( - name = "generate_e2e_matmul_tests", - srcs = ["generate_e2e_matmul_tests.py"], -) - -########################################################################### -## -## LLVMCPU backend -## -########################################################################### - -# LLVMCPU, non-data-tiling, no microkernels -[iree_generated_e2e_runner_test( - name = "e2e_matmul_cpu_nondt_%s_%s_%s" % (lhs_rhs_type, acc_type, size), - compiler_flags = [ - "--iree-opt-data-tiling=false", - "--iree-llvmcpu-enable-ukernels=none", - "--iree-llvmcpu-enable-scalable-vectorization", - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=%s" % lhs_rhs_type, - "--acc_type=%s" % acc_type, - "--shapes=%s" % size, - ], - tags = [ - # f16/bf16 trigger internal LLVM assertion errors on riscv and wasm. - "noriscv", - "nowasm", - ] if (lhs_rhs_type == "f16" or lhs_rhs_type == "bf16") else [], - target_backends_and_drivers = [ - ("llvm-cpu", "local-task"), - ], - target_cpu_features_variants = ["default"] + - # Widening matmuls fail to lower for SVE. - (["arm_64:sve:+sve"] if lhs_rhs_type == acc_type else []), - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) for (lhs_rhs_type, acc_type) in [ - # ("i8", "i32"), # TODO(#15800): enable once compile time is reasonable - # ("f32", "f32"), # TODO(#15800): enable once compile time is reasonable - # ("f16", "f16"), # TODO(#15800): enable once compile time is reasonable - # ("f16", "f32"), # TODO(#15800): enable once compile time is reasonable - # TODO(#15258): enable bf16 tests when that bug is fixed. - # ("bf16", "bf16"), - # ("bf16", "f32"), -] for size in [ - "small", - "large", -]] - -PREPROCESSING_TRANSPOSE_LHS = "--iree-preprocessing-pass-pipeline=builtin.module\\(util.func\\(iree-preprocessing-transpose-matmul-pass{input=lhs}\\)\\)" - -PREPROCESSING_PEEL = "--iree-llvmcpu-vector-pproc-strategy=peel" - -# LLVMCPU, non-data-tiling, no microkernels, ArmSME -[iree_generated_e2e_runner_test( - name = "e2e_matmul_cpu_arm_sme_nondt_%s_%s%s%s" % ( - dtype, - size, - "_transpose_lhs" if transpose_lhs else "", - "_peel" if peel else "", - ), - compiler_flags = [ - "--iree-opt-data-tiling=false", - "--iree-llvmcpu-enable-scalable-vectorization", - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown", - ] + ([PREPROCESSING_TRANSPOSE_LHS] if transpose_lhs else []) + - ([PREPROCESSING_PEEL] if peel else []), - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=%s" % dtype, - "--acc_type=%s" % dtype, - "--shapes=%s" % size, - ], - tags = [ - "requires-arm-sme", - ], - target_backends_and_drivers = [ - ("llvm-cpu", "local-task"), - ], - target_cpu_features_variants = ["arm_64:sme:+sve,+sme"], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) for dtype in [ - "f32", - # "f64" (also supported for ArmSME, but not by the test generator) -] for size in [ - "small", - "large", -] for transpose_lhs in [ - True, - False, -] for peel in [ - True, - False, -]] - -X86_64_AVX2 = [ - "+avx", - "+avx2", - "+fma", - "+f16c", -] - -X86_64_AVX512 = X86_64_AVX2 + [ - "+avx512f", - "+avx512vl", - "+avx512cd", - "+avx512bw", - "+avx512dq", -] - -X86_64_AVX512_VNNI = X86_64_AVX512 + [ - "+avx512vnni", -] - -X86_64_AVX512_BF16 = X86_64_AVX512 + [ - "+avx512bf16", -] - -# LLVMCPU, data-tiling, data-tiling + ukernels. -[iree_generated_e2e_runner_test( - name = "e2e_matmul_cpu_dt%s_%s_%s_%s" % ( - ("_uk" if use_uk else ""), - lhs_rhs_type, - acc_type, - size, - ), - compiler_flags = [ - "--iree-opt-data-tiling", - ] + ["--iree-llvmcpu-enable-ukernels=%s" % ("all" if use_uk else "none")], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=%s" % lhs_rhs_type, - "--acc_type=%s" % acc_type, - "--shapes=%s" % size, - ], - tags = ([ - # "--shapes=large" can cause timeouts on sanitizers. - "noasan", - "notsan", - ] if size == "large" else []) + ([ - # "--shapes=large" can cause timeouts on RISC-V emulator. - # f16/bf16 trigger internal LLVM assertion errors on riscv and wasm. - "noriscv", - "nowasm", - ] if (lhs_rhs_type == "f16" or lhs_rhs_type == "bf16") else []), - target_backends_and_drivers = [ - ("llvm-cpu", "local-task"), - ], - target_cpu_features_variants = ["default"] + - ([ - "arm_64:dotprod:+dotprod", - "arm_64:i8mm:+i8mm", - "x86_64:avx512vnni:" + ",".join(X86_64_AVX512_VNNI), - ] if lhs_rhs_type == "i8" and acc_type == "i32" else [ - "x86_64:avx2:" + ",".join(X86_64_AVX2), - "x86_64:avx512:" + ",".join(X86_64_AVX512), - ] if lhs_rhs_type == "f32" and acc_type == "f32" else [ - "x86_64:avx2:" + ",".join(X86_64_AVX2), - "x86_64:avx512:" + ",".join(X86_64_AVX512), - "arm_64:fullfp16:+fullfp16", - ] if lhs_rhs_type == "f16" and acc_type == "f16" else [ - "x86_64:avx2:" + ",".join(X86_64_AVX2), - "x86_64:avx512:" + ",".join(X86_64_AVX512), - "arm_64:fp16fml:+fp16fml", - ] if lhs_rhs_type == "f16" and acc_type == "f32" else [ - "x86_64:avx2:" + ",".join(X86_64_AVX2), - "x86_64:avx512:" + ",".join(X86_64_AVX512), - "x86_64:avx512bf16:" + ",".join(X86_64_AVX512_BF16), - "arm_64:bf16:+bf16", - ] if lhs_rhs_type == "bf16" and acc_type == "bf16" else [ - "x86_64:avx2:" + ",".join(X86_64_AVX2), - "x86_64:avx512:" + ",".join(X86_64_AVX512), - "x86_64:avx512bf16:" + ",".join(X86_64_AVX512_BF16), - "arm_64:bf16:+bf16", - ] if lhs_rhs_type == "bf16" and acc_type == "f32" else []), - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) for use_uk in [ - False, - True, -] for (lhs_rhs_type, acc_type) in ( - [ - ("i8", "i32"), - ("f32", "f32"), - ("f16", "f16"), - ("f16", "f32"), - ("bf16", "bf16"), - ("bf16", "f32"), - ] -) for size in [ - "small", - "large", -]] - -# LLVMCPU, data-tiling, data-tiling + ukernels + late materialization. -[iree_generated_e2e_runner_test( - name = "e2e_matmul_cpu_experimental_dt%s_%s_%s_%s" % ( - ("_uk" if use_uk else ""), - lhs_rhs_type, - acc_type, - size, - ), - compiler_flags = [ - "--iree-opt-data-tiling", - "--iree-global-opt-enable-early-materialization=false", - ] + ["--iree-llvmcpu-enable-ukernels=%s" % ("all" if use_uk else "none")], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=%s" % lhs_rhs_type, - "--acc_type=%s" % acc_type, - "--shapes=%s" % size, - ], - tags = ([ - # "--shapes=large" can cause timeouts on sanitizers. - "noasan", - "notsan", - ] if size == "large" else []) + ([ - # "--shapes=large" can cause timeouts on RISC-V emulator. - # f16/bf16 trigger internal LLVM assertion errors on riscv and wasm. - "noriscv", - "nowasm", - ] if (lhs_rhs_type == "f16" or lhs_rhs_type == "bf16") else []), - target_backends_and_drivers = [ - ("llvm-cpu", "local-task"), - ], - target_cpu_features_variants = ["default"] + - ([ - "arm_64:dotprod:+dotprod", - "arm_64:i8mm:+i8mm", - "x86_64:avx512vnni:" + ",".join(X86_64_AVX512_VNNI), - ] if lhs_rhs_type == "i8" and acc_type == "i32" else [ - "x86_64:avx2:" + ",".join(X86_64_AVX2), - "x86_64:avx512:" + ",".join(X86_64_AVX512), - ] if lhs_rhs_type == "f32" and acc_type == "f32" else [ - "x86_64:avx2:" + ",".join(X86_64_AVX2), - "x86_64:avx512:" + ",".join(X86_64_AVX512), - "arm_64:fullfp16:+fullfp16", - ] if lhs_rhs_type == "f16" and acc_type == "f16" else [ - "x86_64:avx2:" + ",".join(X86_64_AVX2), - "x86_64:avx512:" + ",".join(X86_64_AVX512), - "arm_64:fp16fml:+fp16fml", - ] if lhs_rhs_type == "f16" and acc_type == "f32" else [ - "x86_64:avx2:" + ",".join(X86_64_AVX2), - "x86_64:avx512:" + ",".join(X86_64_AVX512), - "x86_64:avx512bf16:" + ",".join(X86_64_AVX512_BF16), - "arm_64:bf16:+bf16", - ] if lhs_rhs_type == "bf16" and acc_type == "bf16" else [ - "x86_64:avx2:" + ",".join(X86_64_AVX2), - "x86_64:avx512:" + ",".join(X86_64_AVX512), - "x86_64:avx512bf16:" + ",".join(X86_64_AVX512_BF16), - "arm_64:bf16:+bf16", - ] if lhs_rhs_type == "bf16" and acc_type == "f32" else []), - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) for use_uk in [ - False, - True, -] for (lhs_rhs_type, acc_type) in ( - [ - ("i8", "i32"), - ("f32", "f32"), - ("f16", "f16"), - ("f16", "f32"), - ("bf16", "bf16"), - ("bf16", "f32"), - ] -) for size in [ - "small", - "large", -]] - -[iree_generated_e2e_runner_test( - name = "e2e_matmul_vmvx_experimental_dt%s_%s_%s" % ( - ("_uk" if use_uk else ""), - lhs_rhs_type, - acc_type, - ), - compiler_flags = [ - "--iree-opt-data-tiling", - "--iree-global-opt-enable-early-materialization=false", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=%s" % lhs_rhs_type, - "--acc_type=%s" % acc_type, - "--shapes=small", - ], - tags = [], - target_backends_and_drivers = [ - ("vmvx", "local-task"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) for use_uk in [ - False, - True, -] for (lhs_rhs_type, acc_type) in ( - [ - ("f32", "f32"), - ] -)] - -[iree_generated_e2e_runner_test( - name = "e2e_matmul_cuda_experimental_dt_%s_%s" % ( - lhs_rhs_type, - acc_type, - ), - compiler_flags = [ - "--iree-opt-data-tiling", - "--iree-global-opt-enable-early-materialization=false", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=%s" % lhs_rhs_type, - "--acc_type=%s" % acc_type, - "--shapes=small", - ], - tags = [], - target_backends_and_drivers = [ - ("cuda", "cuda"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) for (lhs_rhs_type, acc_type) in ( - [ - ("f32", "f32"), - ] -)] - -[iree_generated_e2e_runner_test( - name = "e2e_matmul_spirv_experimental_dt_%s_%s" % ( - lhs_rhs_type, - acc_type, - ), - compiler_flags = [ - "--iree-opt-data-tiling", - "--iree-global-opt-enable-early-materialization=false", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=%s" % lhs_rhs_type, - "--acc_type=%s" % acc_type, - "--shapes=small", - ], - tags = [], - target_backends_and_drivers = [ - ("vulkan-spirv", "vulkan"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) for (lhs_rhs_type, acc_type) in ( - [ - ("f32", "f32"), - ] -)] - -########################################################################### -## -## VMVX backend -## -########################################################################### - -# VMVX, data-tiling + microkernels. -[iree_generated_e2e_runner_test( - name = "e2e_matmul_vmvx_dt_uk_%s_small" % lhs_rhs_type, - compiler_flags = [ - "--iree-vmvx-enable-microkernels", - "--iree-opt-data-tiling", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=%s" % lhs_rhs_type, - "--shapes=small", - ], - target_backends_and_drivers = [ - ("vmvx", "local-task"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) for lhs_rhs_type in [ - "i8", - "f32", -]] - -########################################################################### -## -## CUDA backend -## -########################################################################### - -iree_generated_e2e_runner_test( - name = "e2e_matmul_cuda_f32_large_simt", - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=f32", - "--shapes=gpu_large_aligned", - "--compilation_info=LLVMGPUMatmulSimt", - ], - tags = [ - # CUDA cuInit fails with sanitizer on. - "noasan", - "nomsan", - "notsan", - "noubsan", - "requires-gpu-nvidia", - ], - target_backends_and_drivers = [ - ("cuda", "cuda"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) - -# Testing Ampere + TensorCore path. -# WMMA TensorCore(F32): wmma.161616.f32.tf32 -iree_generated_e2e_runner_test( - name = "e2e_matmul_cuda_f32_large_tensorcore", - compiler_flags = [ - "--iree-hal-cuda-llvm-target-arch=sm_80", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=f32", - "--shapes=gpu_large_aligned", - "--compilation_info=LLVMGPUMatmulTensorCore", - ], - tags = [ - # CUDA cuInit fails with sanitizer on. - "noasan", - "nomsan", - "notsan", - "noubsan", - "requires-gpu-sm80", - ], - target_backends_and_drivers = [ - ("cuda", "cuda"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) - -iree_generated_e2e_runner_test( - name = "e2e_matmul_cuda_f32_large_unaligned", - compiler_flags = [ - "--iree-hal-cuda-llvm-target-arch=sm_80", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=f32", - "--shapes=gpu_large", - ], - tags = [ - # CUDA cuInit fails with sanitizer on. - "noasan", - "nomsan", - "notsan", - "noubsan", - "requires-gpu-sm80", - ], - target_backends_and_drivers = [ - ("cuda", "cuda"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) - -iree_generated_e2e_runner_test( - name = "e2e_matmul_cuda_f16_large_unaligned", - compiler_flags = [ - "--iree-hal-cuda-llvm-target-arch=sm_80", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=f16", - "--shapes=gpu_large", - ], - tags = [ - # CUDA cuInit fails with sanitizer on. - "noasan", - "nomsan", - "notsan", - "noubsan", - "requires-gpu-sm80", - ], - target_backends_and_drivers = [ - ("cuda", "cuda"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) - -# MMA.SYNC TensorCore(F32): mma.sync.1688.f32.t32 -iree_generated_e2e_runner_test( - name = "e2e_matmul_cuda_f32_large_mma_sync", - compiler_flags = [ - "--iree-hal-cuda-llvm-target-arch=sm_80", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=f32", - "--shapes=gpu_large_aligned", - "--compilation_info=LLVMGPUMatmulTensorCoreMmaSync", - ], - tags = [ - # CUDA cuInit fails with sanitizer on. - "noasan", - "nomsan", - "notsan", - "noubsan", - "requires-gpu-sm80", - ], - target_backends_and_drivers = [ - ("cuda", "cuda"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) - -# WMMA TensorCore(F16): wmma.161616.f16.f16 -iree_generated_e2e_runner_test( - name = "e2e_matmul_cuda_f16_large_tensorcore", - compiler_flags = [ - "--iree-hal-cuda-llvm-target-arch=sm_80", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=f16", - "--shapes=gpu_large_aligned", - "--compilation_info=LLVMGPUMatmulTensorCore", - ], - tags = [ - # CUDA cuInit fails with sanitizer on. - "noasan", - "nomsan", - "notsan", - "noubsan", - "requires-gpu-sm80", - ], - target_backends_and_drivers = [ - ("cuda", "cuda"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) - -# MMA.SYNC TensorCore(F16): mma.sync.161616.f16.f16 -iree_generated_e2e_runner_test( - name = "e2e_matmul_cuda_f16_large_mma_sync", - compiler_flags = [ - "--iree-hal-cuda-llvm-target-arch=sm_80", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=f16", - "--shapes=gpu_large_aligned", - "--compilation_info=LLVMGPUMatmulTensorCoreMmaSync", - ], - tags = [ - # CUDA cuInit fails with sanitizer on. - "noasan", - "nomsan", - "notsan", - "noubsan", - "requires-gpu-sm80", - ], - target_backends_and_drivers = [ - ("cuda", "cuda"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) - -[iree_generated_e2e_runner_test( - name = "e2e_matmul_cuda_%s_large_splitk" % lhs_rhs_type, - compiler_flags = [ - "--iree-flow-split-matmul-reduction=4", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=%s" % lhs_rhs_type, - "--shapes=large", - ], - tags = [ - # CUDA cuInit fails with sanitizer on. - "noasan", - "nomsan", - "notsan", - "noubsan", - "requires-gpu-nvidia", - # "--shapes=large" can cause timeouts on riscv emulator. - "noriscv", - ], - target_backends_and_drivers = [ - ("cuda", "cuda"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) for lhs_rhs_type in [ - "f32", -]] - -########################################################################### -## -## Vulkan backend -## -########################################################################### - -[iree_generated_e2e_runner_test( - name = "e2e_matmul_vulkan_{0}_large_valhall".format(lhs_rhs_type), - compiler_flags = [ - "--iree-vulkan-target-triple=valhall-unknown-android31", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=%s" % lhs_rhs_type, - "--shapes=gpu_large_aligned", - "--compilation_info=SPIRVVectorizeMali", - ], - tags = [ - # Nvidia GPUs support a superset of Valhall features - "requires-gpu-nvidia", - "vulkan_uses_vk_khr_shader_float16_int8", - ], - target_backends_and_drivers = [ - ("vulkan-spirv", "vulkan"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) for lhs_rhs_type in [ - "i8", - "f16", - "f32", -]] - -[iree_generated_e2e_runner_test( - name = "e2e_matmul_vulkan_{0}_large_ampere".format(lhs_rhs_type), - compiler_flags = [ - "--iree-vulkan-target-triple=ampere-unknown-linux", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=%s" % lhs_rhs_type, - "--shapes=gpu_large_aligned", - "--compilation_info=SPIRVVectorizeNVIDIA", - ], - tags = [ - "requires-gpu-sm80", - "vulkan_uses_vk_khr_shader_float16_int8", - ], - target_backends_and_drivers = [ - ("vulkan-spirv", "vulkan"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) for lhs_rhs_type in [ - "i8", - "f16", - "f32", -]] - -iree_generated_e2e_runner_test( - name = "e2e_matmul_vulkan_f16_large_rdna3", - compiler_flags = [ - "--iree-vulkan-target-triple=rdna3-unknown-linux", - ], - generator = ":generate_e2e_matmul_tests", - generator_args = [ - "--lhs_rhs_type=f16", - "--shapes=gpu_large_aligned", - "--compilation_info=SPIRVCooperativeMatrixVectorize", - ], - runner_args = [ - "--require_exact_results=false", - ], - tags = [ - "requires-gpu", - "requires-gpu-rdna3", - "vulkan_uses_vk_khr_shader_float16_int8", - ], - target_backends_and_drivers = [ - ("vulkan-spirv", "vulkan"), - ], - test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", - test_type = "matmul", -) diff --git a/tests/e2e/matmul/CMakeLists.txt b/tests/e2e/matmul/CMakeLists.txt deleted file mode 100644 index 0556e756bef6..000000000000 --- a/tests/e2e/matmul/CMakeLists.txt +++ /dev/null @@ -1,2540 +0,0 @@ -################################################################################ -# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from # -# tests/e2e/matmul/BUILD.bazel # -# # -# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary # -# CMake-only content. # -# # -# To disable autogeneration for this file entirely, delete this header. # -################################################################################ - -iree_add_all_subdirs() - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_arm_sme_nondt_f32_small_transpose_lhs_peel - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - "--iree-preprocessing-pass-pipeline=builtin.module\(util.func\(iree-preprocessing-transpose-matmul-pass{input=lhs}\)\)" - "--iree-llvmcpu-vector-pproc-strategy=peel" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_arm_sme_nondt_f32_small_transpose_lhs - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - "--iree-preprocessing-pass-pipeline=builtin.module\(util.func\(iree-preprocessing-transpose-matmul-pass{input=lhs}\)\)" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_arm_sme_nondt_f32_small_peel - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - "--iree-llvmcpu-vector-pproc-strategy=peel" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_arm_sme_nondt_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_arm_sme_nondt_f32_large_transpose_lhs_peel - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - "--iree-preprocessing-pass-pipeline=builtin.module\(util.func\(iree-preprocessing-transpose-matmul-pass{input=lhs}\)\)" - "--iree-llvmcpu-vector-pproc-strategy=peel" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_arm_sme_nondt_f32_large_transpose_lhs - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - "--iree-preprocessing-pass-pipeline=builtin.module\(util.func\(iree-preprocessing-transpose-matmul-pass{input=lhs}\)\)" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_arm_sme_nondt_f32_large_peel - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - "--iree-llvmcpu-vector-pproc-strategy=peel" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_arm_sme_nondt_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling=false" - "--iree-llvmcpu-enable-scalable-vectorization" - "--iree-llvmcpu-target-triple=aarch64-unknown-unknown" - LABELS - "requires-arm-sme" - TARGET_CPU_FEATURES_VARIANTS - "arm_64:sme:+sve,+sme" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_i8_i32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_i8_i32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_f32_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_f32_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_f16_f16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_f16_f16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_f16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_f16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_bf16_bf16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_bf16_bf16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_bf16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_bf16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_uk_i8_i32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_uk_i8_i32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_uk_f32_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_uk_f32_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_uk_f16_f16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_uk_f16_f16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_uk_f16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_uk_f16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_uk_bf16_bf16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_uk_bf16_bf16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_uk_bf16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_dt_uk_bf16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_i8_i32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_i8_i32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_f32_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_f32_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_f16_f16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_f16_f16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_f16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_f16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_bf16_bf16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_bf16_bf16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_bf16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_bf16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=none" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_uk_i8_i32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_uk_i8_i32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "arm_64:dotprod:+dotprod" - "arm_64:i8mm:+i8mm" - "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_uk_f32_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_uk_f32_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_uk_f16_f16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_uk_f16_f16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f16" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fullfp16:+fullfp16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_uk_f16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_uk_f16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "arm_64:fp16fml:+fp16fml" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_uk_bf16_bf16_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_uk_bf16_bf16_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=bf16" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_uk_bf16_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cpu_experimental_dt_uk_bf16_f32_large - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=bf16" - "--acc_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "llvm-cpu" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - "--iree-llvmcpu-enable-ukernels=all" - LABELS - "noasan" - "notsan" - "noriscv" - "nowasm" - TARGET_CPU_FEATURES_VARIANTS - "default" - "x86_64:avx2:+avx,+avx2,+fma,+f16c" - "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" - "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" - "arm_64:bf16:+bf16" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_vmvx_experimental_dt_f32_f32 - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "vmvx" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - LABELS - -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_vmvx_experimental_dt_uk_f32_f32 - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "vmvx" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - LABELS - -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cuda_experimental_dt_f32_f32 - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "cuda" - DRIVERS - "cuda" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - LABELS - -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_spirv_experimental_dt_f32_f32 - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "vulkan-spirv" - DRIVERS - "vulkan" - COMPILER_FLAGS - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - LABELS - -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_vmvx_dt_uk_i8_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "vmvx" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-vmvx-enable-microkernels" - "--iree-opt-data-tiling" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_vmvx_dt_uk_f32_small - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "vmvx" - DRIVERS - "local-task" - COMPILER_FLAGS - "--iree-vmvx-enable-microkernels" - "--iree-opt-data-tiling" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cuda_f32_large_simt - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--shapes=gpu_large_aligned" - "--compilation_info=LLVMGPUMatmulSimt" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "cuda" - DRIVERS - "cuda" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-nvidia" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cuda_f32_large_tensorcore - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--shapes=gpu_large_aligned" - "--compilation_info=LLVMGPUMatmulTensorCore" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "cuda" - DRIVERS - "cuda" - COMPILER_FLAGS - "--iree-hal-cuda-llvm-target-arch=sm_80" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-sm80" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cuda_f32_large_unaligned - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--shapes=gpu_large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "cuda" - DRIVERS - "cuda" - COMPILER_FLAGS - "--iree-hal-cuda-llvm-target-arch=sm_80" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-sm80" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cuda_f16_large_unaligned - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--shapes=gpu_large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "cuda" - DRIVERS - "cuda" - COMPILER_FLAGS - "--iree-hal-cuda-llvm-target-arch=sm_80" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-sm80" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cuda_f32_large_mma_sync - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--shapes=gpu_large_aligned" - "--compilation_info=LLVMGPUMatmulTensorCoreMmaSync" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "cuda" - DRIVERS - "cuda" - COMPILER_FLAGS - "--iree-hal-cuda-llvm-target-arch=sm_80" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-sm80" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cuda_f16_large_tensorcore - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--shapes=gpu_large_aligned" - "--compilation_info=LLVMGPUMatmulTensorCore" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "cuda" - DRIVERS - "cuda" - COMPILER_FLAGS - "--iree-hal-cuda-llvm-target-arch=sm_80" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-sm80" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cuda_f16_large_mma_sync - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--shapes=gpu_large_aligned" - "--compilation_info=LLVMGPUMatmulTensorCoreMmaSync" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "cuda" - DRIVERS - "cuda" - COMPILER_FLAGS - "--iree-hal-cuda-llvm-target-arch=sm_80" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-sm80" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cuda_f32_large_splitk - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--shapes=large" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "cuda" - DRIVERS - "cuda" - COMPILER_FLAGS - "--iree-flow-split-matmul-reduction=4" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-nvidia" - "noriscv" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_vulkan_i8_large_valhall - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--shapes=gpu_large_aligned" - "--compilation_info=SPIRVVectorizeMali" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "vulkan-spirv" - DRIVERS - "vulkan" - COMPILER_FLAGS - "--iree-vulkan-target-triple=valhall-unknown-android31" - LABELS - "requires-gpu-nvidia" - "vulkan_uses_vk_khr_shader_float16_int8" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_vulkan_f16_large_valhall - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--shapes=gpu_large_aligned" - "--compilation_info=SPIRVVectorizeMali" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "vulkan-spirv" - DRIVERS - "vulkan" - COMPILER_FLAGS - "--iree-vulkan-target-triple=valhall-unknown-android31" - LABELS - "requires-gpu-nvidia" - "vulkan_uses_vk_khr_shader_float16_int8" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_vulkan_f32_large_valhall - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--shapes=gpu_large_aligned" - "--compilation_info=SPIRVVectorizeMali" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "vulkan-spirv" - DRIVERS - "vulkan" - COMPILER_FLAGS - "--iree-vulkan-target-triple=valhall-unknown-android31" - LABELS - "requires-gpu-nvidia" - "vulkan_uses_vk_khr_shader_float16_int8" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_vulkan_i8_large_ampere - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--shapes=gpu_large_aligned" - "--compilation_info=SPIRVVectorizeNVIDIA" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "vulkan-spirv" - DRIVERS - "vulkan" - COMPILER_FLAGS - "--iree-vulkan-target-triple=ampere-unknown-linux" - LABELS - "requires-gpu-sm80" - "vulkan_uses_vk_khr_shader_float16_int8" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_vulkan_f16_large_ampere - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--shapes=gpu_large_aligned" - "--compilation_info=SPIRVVectorizeNVIDIA" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "vulkan-spirv" - DRIVERS - "vulkan" - COMPILER_FLAGS - "--iree-vulkan-target-triple=ampere-unknown-linux" - LABELS - "requires-gpu-sm80" - "vulkan_uses_vk_khr_shader_float16_int8" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_vulkan_f32_large_ampere - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--shapes=gpu_large_aligned" - "--compilation_info=SPIRVVectorizeNVIDIA" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "vulkan-spirv" - DRIVERS - "vulkan" - COMPILER_FLAGS - "--iree-vulkan-target-triple=ampere-unknown-linux" - LABELS - "requires-gpu-sm80" - "vulkan_uses_vk_khr_shader_float16_int8" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_vulkan_f16_large_rdna3 - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--shapes=gpu_large_aligned" - "--compilation_info=SPIRVCooperativeMatrixVectorize" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "vulkan-spirv" - DRIVERS - "vulkan" - COMPILER_FLAGS - "--iree-vulkan-target-triple=rdna3-unknown-linux" - RUNNER_ARGS - "--require_exact_results=false" - LABELS - "requires-gpu" - "requires-gpu-rdna3" - "vulkan_uses_vk_khr_shader_float16_int8" -) - -### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### - -# To distinguish between CDNA(gfx9) and RDNA3(gfx11) -if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx9") - -unset(IREE_HIP_TEST_COMPILER_FLAGS) -list(APPEND IREE_HIP_TEST_COMPILER_FLAGS - "--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_rocm_f16_large_cdna3_mfma - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=gpu_large_aligned" - "--compilation_info=LLVMGPUVectorDistributeMFMA" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "rocm" - DRIVERS - "hip" - COMPILER_FLAGS - ${IREE_HIP_TEST_COMPILER_FLAGS} - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-cdna3" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_rocm_f32_large_cdna3_mfma - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=gpu_large_aligned" - "--compilation_info=LLVMGPUVectorDistributeMFMA" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "rocm" - DRIVERS - "hip" - COMPILER_FLAGS - ${IREE_HIP_TEST_COMPILER_FLAGS} - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-cdna3" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_rocm_f16_large_cdna3_mfma_tb - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--transpose_rhs" - "--shapes=gpu_large_aligned" - "--compilation_info=LLVMGPUVectorDistributeMFMA" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "rocm" - DRIVERS - "hip" - COMPILER_FLAGS - ${IREE_HIP_TEST_COMPILER_FLAGS} - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-cdna3" -) - -if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx94") - -# I8 Intrinsics has different layout on CDNA3/gfx94x, -# and only CDNA3/gfx94x has F8 intrinsics. - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_rocm_f8_large_cdna3_mfma - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f8E4M3FNUZ" - "--acc_type=f32" - "--shapes=gpu_large_aligned" - "--compilation_info=LLVMGPUVectorDistributeMFMA" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "rocm" - DRIVERS - "hip" - COMPILER_FLAGS - ${IREE_HIP_TEST_COMPILER_FLAGS} - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-cdna3" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_rocm_i8_large_cdna3_mfma_tb - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=i8" - "--acc_type=i32" - "--transpose_rhs" - "--shapes=gpu_large_aligned" - "--compilation_info=LLVMGPUVectorDistributeMFMA" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "rocm" - DRIVERS - "hip" - COMPILER_FLAGS - ${IREE_HIP_TEST_COMPILER_FLAGS} - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-cdna3" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_cdna_experimental_dt_f32_f32 - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "rocm" - DRIVERS - "hip" - COMPILER_FLAGS - ${IREE_HIP_TEST_COMPILER_FLAGS} - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-cdna3" -) -endif() - -elseif(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx11") - -unset(IREE_HIP_TEST_COMPILER_FLAGS) -list(APPEND IREE_HIP_TEST_COMPILER_FLAGS - "--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_rocm_f16_large_rdna3_wmma - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--shapes=gpu_large_aligned" - "--compilation_info=LLVMGPUVectorDistributeWMMA" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "rocm" - DRIVERS - "hip" - COMPILER_FLAGS - ${IREE_HIP_TEST_COMPILER_FLAGS} - RUNNER_ARGS - "--require_exact_results=false" - "--acceptable_fp_delta=1e-04" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-rdna3" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_rocm_f16_large_rdna3_wmma_tb - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f16" - "--acc_type=f32" - "--transpose_rhs" - "--shapes=gpu_large_aligned" - "--compilation_info=LLVMGPUVectorDistributeWMMA" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "rocm" - DRIVERS - "hip" - COMPILER_FLAGS - ${IREE_HIP_TEST_COMPILER_FLAGS} - RUNNER_ARGS - "--require_exact_results=false" - "--acceptable_fp_delta=1e-04" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-rdna3" -) - -iree_generated_e2e_runner_test( - NAME - e2e_matmul_rdna3_experimental_dt_f32_f32 - TEST_TYPE - matmul - GENERATOR - "generate_e2e_matmul_tests.py" - GENERATOR_ARGS - "--lhs_rhs_type=f32" - "--acc_type=f32" - "--shapes=small" - TEST_RUNNER - iree_tools_testing_e2e_iree-e2e-matmul-test - TARGET_BACKENDS - "rocm" - DRIVERS - "hip" - COMPILER_FLAGS - ${IREE_HIP_TEST_COMPILER_FLAGS} - "--iree-opt-data-tiling" - "--iree-global-opt-enable-early-materialization=false" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-rdna3" -) - -endif() diff --git a/tests/e2e/matmul/generate_e2e_matmul_tests.py b/tests/e2e/matmul/generate_e2e_matmul_tests.py deleted file mode 100644 index 6cb270233386..000000000000 --- a/tests/e2e/matmul/generate_e2e_matmul_tests.py +++ /dev/null @@ -1,992 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2021 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -"""iree_generated_e2e_matmul_test generator for e2e matmul tests. -""" - -from typing import Optional -import argparse -import os -import re -import enum -import dataclasses -import typing -import itertools - - -# Data type of matrix entries. The string values must match MLIR data types. -# This is a superset of the values accepted for the --lhs_rhs_types= flag, -# as this also includes accumulator-specific types like i32. -@enum.unique -class MatrixElemTypeId(enum.Enum): - NONE = "" - I8 = "i8" - I32 = "i32" - F32 = "f32" - F16 = "f16" - F8E4M3FNUZ = "f8E4M3FNUZ" - BF16 = "bf16" - - -# Enumerates of the collections of shapes that we can generate tests for. -# The values are the accepted values for the --shapes= flag. -@enum.unique -class ShapesId(enum.Enum): - SMALL = "small" - LARGE = "large" - GPU_LARGE = "gpu_large" - GPU_LARGE_ALIGNED = "gpu_large_aligned" - - -# Enumerates of the collections of compilation info that we can generate tests -# for. The values are the accepted values for the --compilation_info= flag. -@enum.unique -class CompilationInfoId(enum.Enum): - NONE = "" - LLVMGPUMatmulSimt = "LLVMGPUMatmulSimt" - LLVMGPUMatmulTensorCore = "LLVMGPUMatmulTensorCore" - LLVMGPUMatmulTensorCoreMmaSync = "LLVMGPUMatmulTensorCoreMmaSync" - LLVMGPUVectorDistributeMFMA = "LLVMGPUVectorDistributeMFMA" - LLVMGPUVectorDistributeWMMA = "LLVMGPUVectorDistributeWMMA" - SPIRVCooperativeMatrixVectorize = "SPIRVCooperativeMatrixVectorize" - SPIRVVectorizeMali = "SPIRVVectorizeMali" - SPIRVVectorizeNVIDIA = "SPIRVVectorizeNVIDIA" - - -# Enumerates ways to construct MLIR tensor types. -@enum.unique -class Dynamicity(enum.Enum): - DYNAMIC = "dynamic" # Use '?' everywhere. Example: tensor. - STATIC = "static" # Use fixed values everywhere. Example: tensor<4x6xf32>. - MIXED = "mixed" # Randomly mix '?' and values. Example: tensor. - - -# Enumerates ways to initialize matrix buffer contents. -@enum.unique -class MatrixGenerator(enum.Enum): - ZERO = "zero" # Fill with zeros - RANDOM = "random" # Fill with (deterministic) pseudorandom values. - - -# Describes the shape of a matrix multiplication in the usual convention: -# the LHS is {m}x{k}, the RHS is {k}x{n}, the accumulator/result is {m}x{n}. -# The extra `accumulate` boolean tells whether the matmul is accumulating into -# an existing accumulator (C += A * B) or just overwriting the result -# (C = A * B). -@dataclasses.dataclass -class TestShape: - m: int - k: int - n: int - accumulate: bool - - -# Describes a workgroup and tiling schedule to target a specific MMA intrinsic. -@dataclasses.dataclass -class MMASchedule: - intrinsic: str - m_count: int # Number of subgroups per workgroup along M - n_count: int # Number of subgroups per workgroup along N - m_tile_count: int - n_tile_count: int - k_tile_count: int - - def __str__(self): - return ( - "mma_schedule = #iree_gpu.mma_schedule<" - + f"intrinsic = #iree_gpu.mma_layout<{self.intrinsic}>, " - + f"subgroup_m_count = {self.m_count}, " - + f"subgroup_n_count = {self.n_count}>" - ) - - -# Describes how to construct compilation info for the testcase. -@dataclasses.dataclass -class CompilationInfo: - # Lowering Config - tile_sizes: typing.List[typing.List[int]] - # Translation Info - dispatch_lowering_pass_pipeline: str - software_pipeline_depth: int - mma_schedule: typing.Optional[MMASchedule] - # Compilation info - workgroup_size: typing.List[int] - subgroup_size: Optional[int] = None - - # Prints the workgroup size - def workgroup_size_str(self): - return "workgroup_size = [" + ", ".join(map(str, self.workgroup_size)) + "]" - - -# Returns the list of TestShape's to use for the collection of shapes -# identified by shapes_id. -def get_test_shapes(shapes_id: ShapesId): - # Notes: - # 1. Be conservative in adding more shapes, as that can increase both the - # build and execution latency of tests. The build latency is nearly the - # same for all shapes, while execution latency grows cubicly i.e. - # linearly with m*k*n. - # 2. Some shapes are commented out: they used to be tested but have been - # disabled to improve the trade-off between test coverage and build - # latency. - if shapes_id == ShapesId.SMALL: - return [ - # square matrices. Start by the simplest case of 1x1x1. - TestShape(m=1, k=1, n=1, accumulate=True), - TestShape(m=1, k=1, n=1, accumulate=False), - # Test some small powers of two, that exercise in particular the - # adjustment of data-tiling tile sizes to narrow cases. - TestShape(m=2, k=2, n=2, accumulate=True), - TestShape(m=4, k=4, n=4, accumulate=True), - TestShape(m=8, k=8, n=8, accumulate=True), - # test 9x9x9 because as many kernel M0/K0/N0 dims are equal to 8, - # this will often be the smallest value that exercises something above - # the kernel's size. - TestShape(m=9, k=9, n=9, accumulate=True), - # rectangular matrices. - # >= 2x differences between M/N/K dims may exercise tiling corner cases - # not exercised by nearly-square matrices. - TestShape(m=6, k=13, n=3, accumulate=True), - TestShape(m=15, k=37, n=7, accumulate=False), - TestShape(m=81, k=19, n=41, accumulate=True), - # shapes involving vectors (i.e. most rectangular cases) - # This is particularly relevant because we have dedicated kernels for - # the matrix*vector / vector*matrix case. - TestShape(m=1, k=10, n=10, accumulate=True), # vector*matrix - TestShape(m=1, k=10, n=10, accumulate=False), # vector*matrix - TestShape(m=10, k=1, n=10, accumulate=True), # outer-product - TestShape(m=10, k=10, n=1, accumulate=True), # matrix*vector - TestShape(m=10, k=10, n=1, accumulate=False), # matrix*vector - ] - if shapes_id == ShapesId.LARGE: - return [ - # some random large sizes - TestShape(m=123, k=456, n=789, accumulate=True), - TestShape(m=654, k=321, n=234, accumulate=False), - # shapes involving vectors (i.e. most rectangular cases) - TestShape(m=1, k=1000, n=1000, accumulate=True), # large vector*matrix - TestShape(m=1000, k=1000, n=1, accumulate=True), # large matrix*vector - TestShape(m=1000, k=1000, n=1, accumulate=False), # large matrix*vector - # Be conservative in adding larger shapes. They can result in - # high latency tests. If you have to, consider splitting them - # out in a way that constrains the latency impact, e.g. by - # running on fewer backends/drivers or with fewer generators - # (see get_test_generators). - ] - if shapes_id == ShapesId.GPU_LARGE_ALIGNED: - return [ - TestShape(m=512, k=128, n=512, accumulate=True), - TestShape(m=512, k=128, n=512, accumulate=False), - ] - if shapes_id == ShapesId.GPU_LARGE: - return [ - # unaligned cases. - TestShape(m=457, k=330, n=512, accumulate=False), - TestShape(m=457, k=330, n=514, accumulate=False), - TestShape(m=438, k=330, n=514, accumulate=False), - TestShape(m=540, k=332, n=516, accumulate=False), - TestShape(m=1000, k=4, n=512, accumulate=False), - TestShape(m=4, k=1000, n=512, accumulate=False), - TestShape(m=512, k=1000, n=4, accumulate=False), - TestShape(m=512, k=128, n=500, accumulate=False), - TestShape(m=457, k=160, n=512, accumulate=False), - TestShape(m=512, k=330, n=512, accumulate=False), - ] - - raise ValueError(shapes_id) - - -# Returns the list of Dynamicity's to use for the collection of shapes -# identified by shapes_id. -def get_dynamicities(shapes_id: ShapesId): - if shapes_id == ShapesId.GPU_LARGE or shapes_id == ShapesId.GPU_LARGE_ALIGNED: - return [ - Dynamicity.STATIC, - ] - else: - return [ - Dynamicity.DYNAMIC, - Dynamicity.STATIC, - ] - raise ValueError(shapes_id) - - -@dataclasses.dataclass -class TileWorkgroupSizePair: - tile_size: typing.List[typing.List[int]] - workgroup_size: typing.List[int] - - -# Constructs a TileWorkgroupSizePair for SPIR-V targets enforcing the -# constraints between the workgroup_size and tile size -def get_spirv_tile_workgroup_size_pair( - workgroup_size, t_tile_k, t_tile_m=4, t_tile_n=4 -): - x, y, z = workgroup_size - wg_tile_m = y * t_tile_m - wg_tile_n = x * t_tile_n - return TileWorkgroupSizePair( - [[wg_tile_m, wg_tile_n], [t_tile_m, t_tile_n], [0, 0, t_tile_k]], workgroup_size - ) - - -# Returns all the TileWorkgroupSizePairs for a given SPIRV Target -def get_all_spirv_tile_workgroup_size_pairs(t_tile_k): - tile_workgroup_size_pairs = [ - get_spirv_tile_workgroup_size_pair([32, 8, 1], t_tile_k), - get_spirv_tile_workgroup_size_pair([16, 8, 1], t_tile_k), - get_spirv_tile_workgroup_size_pair([64, 2, 1], t_tile_k), - get_spirv_tile_workgroup_size_pair([8, 8, 1], t_tile_k), - get_spirv_tile_workgroup_size_pair([32, 1, 1], t_tile_k), - get_spirv_tile_workgroup_size_pair([16, 2, 1], t_tile_k), - get_spirv_tile_workgroup_size_pair([32, 1, 1], t_tile_k), - ] - return tile_workgroup_size_pairs - - -def get_rocm_test_compilation_infos( - compilation_info_id: CompilationInfoId, lhs_rhs_type: MatrixElemTypeId -): - intrinsic = "" - if compilation_info_id == CompilationInfoId.LLVMGPUVectorDistributeMFMA: - intrinsic = "MFMA" - elif compilation_info_id == CompilationInfoId.LLVMGPUVectorDistributeWMMA: - intrinsic = "WMMA" - else: - raise ValueError("Unknown pipeline for rocm") - - schedules = [] - if intrinsic == "MFMA": - schedules = [ - MMASchedule("MFMA_F32_16x16x4_F32", 1, 1, 1, 1, 1), - MMASchedule("MFMA_F32_16x16x4_F32", 1, 1, 1, 1, 2), - MMASchedule("MFMA_F32_16x16x4_F32", 1, 1, 1, 2, 1), - MMASchedule("MFMA_F32_16x16x4_F32", 1, 1, 2, 1, 1), - MMASchedule("MFMA_F32_16x16x4_F32", 2, 2, 1, 1, 2), - MMASchedule("MFMA_F32_16x16x16_F16", 1, 1, 1, 1, 1), - MMASchedule("MFMA_F32_16x16x16_F16", 1, 1, 1, 1, 2), - MMASchedule("MFMA_F32_16x16x16_F16", 1, 1, 1, 2, 1), - MMASchedule("MFMA_F32_16x16x16_F16", 1, 1, 2, 1, 1), - MMASchedule("MFMA_F32_16x16x16_F16", 2, 2, 1, 1, 1), - MMASchedule("MFMA_F32_16x16x16_F16", 2, 4, 2, 1, 2), - MMASchedule("MFMA_F32_16x16x16_F16", 4, 2, 4, 2, 2), - MMASchedule("MFMA_F32_32x32x8_F16", 1, 1, 1, 2, 2), - MMASchedule("MFMA_F32_32x32x8_F16", 2, 2, 1, 1, 1), - MMASchedule("MFMA_F32_32x32x8_F16", 1, 4, 2, 1, 2), - MMASchedule("MFMA_F32_32x32x8_F16", 4, 2, 1, 2, 4), - MMASchedule("MFMA_F32_16x16x32_F8E4M3FNUZ", 1, 1, 1, 1, 1), - MMASchedule("MFMA_F32_16x16x32_F8E4M3FNUZ", 2, 2, 1, 1, 2), - MMASchedule("MFMA_F32_16x16x32_F8E4M3FNUZ", 4, 1, 4, 1, 1), - MMASchedule("MFMA_F32_16x16x32_F8E4M3FNUZ", 4, 2, 4, 2, 1), - MMASchedule("MFMA_I32_16x16x32_I8", 1, 1, 1, 1, 1), - MMASchedule("MFMA_I32_16x16x32_I8", 2, 2, 1, 1, 2), - MMASchedule("MFMA_I32_16x16x32_I8", 4, 1, 4, 1, 1), - MMASchedule("MFMA_I32_16x16x32_I8", 4, 2, 4, 2, 1), - MMASchedule("MFMA_I32_32x32x16_I8", 1, 1, 1, 1, 1), - MMASchedule("MFMA_I32_32x32x16_I8", 2, 2, 1, 1, 2), - MMASchedule("MFMA_I32_32x32x16_I8", 4, 1, 1, 2, 2), - MMASchedule("MFMA_I32_32x32x16_I8", 4, 2, 2, 2, 2), - ] - elif intrinsic == "WMMA": - schedules = [ - MMASchedule("WMMA_F32_16x16x16_F16", 1, 1, 1, 1, 1), - MMASchedule("WMMA_F32_16x16x16_F16", 1, 1, 1, 1, 2), - MMASchedule("WMMA_F32_16x16x16_F16", 1, 1, 1, 2, 1), - MMASchedule("WMMA_F32_16x16x16_F16", 1, 1, 2, 1, 1), - MMASchedule("WMMA_F32_16x16x16_F16", 2, 2, 1, 1, 1), - MMASchedule("WMMA_F32_16x16x16_F16", 2, 4, 2, 1, 2), - MMASchedule("WMMA_F32_16x16x16_F16", 4, 2, 4, 2, 2), - ] - else: - raise NotImplementedError("unhandled intrinsic case") - - subgroup_size = 64 if intrinsic == "MFMA" else 32 - - infos = [] - for schedule in schedules: - # Skip schedules with an intrinsic which element type does not - # match the requested one. - # Extracts the input type from strings. The naming convention is - # [output_type]_MxNxK_[input_type]. - input_type = schedule.intrinsic.split("_")[-1] - if lhs_rhs_type.value.upper() != input_type: - continue - - if schedule.intrinsic == "MFMA_F32_16x16x4_F32": - wg_tile_m = schedule.m_count * schedule.m_tile_count * 16 - wg_tile_n = schedule.n_count * schedule.n_tile_count * 16 - wg_tile_k = schedule.k_tile_count * 4 - elif schedule.intrinsic == "MFMA_F32_16x16x16_F16": - wg_tile_m = schedule.m_count * schedule.m_tile_count * 16 - wg_tile_n = schedule.n_count * schedule.n_tile_count * 16 - wg_tile_k = schedule.k_tile_count * 16 - elif schedule.intrinsic == "MFMA_F32_32x32x8_F16": - wg_tile_m = schedule.m_count * schedule.m_tile_count * 32 - wg_tile_n = schedule.n_count * schedule.n_tile_count * 32 - wg_tile_k = schedule.k_tile_count * 8 - elif ( - schedule.intrinsic == "MFMA_I32_16x16x32_I8" - or schedule.intrinsic == "MFMA_F32_16x16x32_F8E4M3FNUZ" - ): - wg_tile_m = schedule.m_count * schedule.m_tile_count * 16 - wg_tile_n = schedule.n_count * schedule.n_tile_count * 16 - wg_tile_k = schedule.k_tile_count * 32 - elif schedule.intrinsic == "MFMA_I32_32x32x16_I8": - wg_tile_m = schedule.m_count * schedule.m_tile_count * 32 - wg_tile_n = schedule.n_count * schedule.n_tile_count * 32 - wg_tile_k = schedule.k_tile_count * 16 - elif schedule.intrinsic == "WMMA_F32_16x16x16_F16": - wg_tile_m = schedule.m_count * schedule.m_tile_count * 16 - wg_tile_n = schedule.n_count * schedule.n_tile_count * 16 - wg_tile_k = schedule.k_tile_count * 16 - else: - raise NotImplementedError("unhandled intrinsic case") - - workgroup_tile = [[wg_tile_m, wg_tile_n, wg_tile_k]] - workgroup_size = [schedule.n_count * subgroup_size, schedule.m_count, 1] - infos.append( - CompilationInfo( - tile_sizes=workgroup_tile, - dispatch_lowering_pass_pipeline="LLVMGPUVectorDistribute", - workgroup_size=workgroup_size, - software_pipeline_depth=0, - mma_schedule=schedule, - subgroup_size=subgroup_size, - ) - ) - return infos - - -# Returns the list of CompilationInfo's to use for the CompilationInfoId. -def get_test_compilation_infos( - compilation_info_id: CompilationInfoId, lhs_rhs_type: MatrixElemTypeId -) -> typing.List[typing.Optional[CompilationInfo]]: - if compilation_info_id == CompilationInfoId.NONE: - return [None] - - if compilation_info_id in [ - CompilationInfoId.LLVMGPUVectorDistributeMFMA, - CompilationInfoId.LLVMGPUVectorDistributeWMMA, - ]: - return get_rocm_test_compilation_infos(compilation_info_id, lhs_rhs_type) - - software_pipeline_depth = 0 - if compilation_info_id == CompilationInfoId.LLVMGPUMatmulSimt: - tile_workgroup_size_pairs = [ - TileWorkgroupSizePair([[32, 128, 32]], [32, 8, 1]), - TileWorkgroupSizePair([[128, 64, 8]], [16, 8, 1]), - TileWorkgroupSizePair([[16, 256, 32]], [64, 2, 1]), - TileWorkgroupSizePair([[8, 32, 32]], [8, 8, 1]), - TileWorkgroupSizePair([[8, 128, 4]], [32, 1, 1]), - TileWorkgroupSizePair([[16, 64, 4]], [16, 2, 1]), - TileWorkgroupSizePair([[1, 128, 8]], [32, 1, 1]), - ] - software_pipeline_depth = 3 - elif compilation_info_id == CompilationInfoId.SPIRVCooperativeMatrixVectorize: - tile_workgroup_size_pairs = [ - TileWorkgroupSizePair( - [[64, 128], [32, 64], [0, 0, 32], [16, 16, 16]], [64, 2, 1] - ) - ] - elif compilation_info_id == CompilationInfoId.SPIRVVectorizeNVIDIA: - tile_workgroup_size_pairs = get_all_spirv_tile_workgroup_size_pairs(32) - elif compilation_info_id == CompilationInfoId.SPIRVVectorizeMali: - tile_workgroup_size_pairs = get_all_spirv_tile_workgroup_size_pairs(4) - elif ( - compilation_info_id == CompilationInfoId.LLVMGPUMatmulTensorCore - or compilation_info_id == CompilationInfoId.LLVMGPUMatmulTensorCoreMmaSync - ): - tile_workgroup_size_pairs = [] - ## WarpShape = 2x2 - tile_workgroup_size_pairs.append( - TileWorkgroupSizePair([[32, 32, 16]], [64, 2, 1]) - ) - tile_workgroup_size_pairs.append( - TileWorkgroupSizePair([[64, 64, 64]], [64, 2, 1]) - ) - - ## WarpShape = 4x1 - tile_workgroup_size_pairs.append( - TileWorkgroupSizePair([[32, 32, 32]], [64, 1, 1]) - ) - - ## WarpShape = 2x2 with large tiles using larger Shared Memory capacity. - if lhs_rhs_type == MatrixElemTypeId.F16: - tile_workgroup_size_pairs.append( - TileWorkgroupSizePair([[128, 128, 64]], [64, 2, 1]) - ) - elif lhs_rhs_type == MatrixElemTypeId.F32: - tile_workgroup_size_pairs.append( - TileWorkgroupSizePair([[128, 128, 16]], [64, 2, 1]) - ) - software_pipeline_depth = 3 - - compilation_infos = [] - for tile_workgroup_size_pair in tile_workgroup_size_pairs: - compilation_infos.append( - CompilationInfo( - tile_sizes=tile_workgroup_size_pair.tile_size, - dispatch_lowering_pass_pipeline=compilation_info_id.value, - workgroup_size=tile_workgroup_size_pair.workgroup_size, - software_pipeline_depth=software_pipeline_depth, - mma_schedule=None, - ) - ) - return compilation_infos - - -# Intentionally fixed seed! We want full reproducibility here, both across runs -# and across machines. -# Intentionally not shared with pseudorandom_generator_seed to limit the ways -# in which shuffling testcases changes which random values are generated. -local_pseudorandom_state = 1 - - -# A shape dimension value, i.e. a size value that could appear in a MLIR type -# such as 'tensor'. None means a dynamic size, similar to '?' in MLIR. -@dataclasses.dataclass -class DimSize: - value: typing.Optional[int] - - -# Generates a compile-time MLIR size value, i.e. either a fixed positive integer -# or None (which maps to MLIR '?') depending on dynamicity. -def shape_dim(x: int, dynamicity: Dynamicity): - if dynamicity == Dynamicity.DYNAMIC: - return DimSize(None) - elif dynamicity == Dynamicity.STATIC: - return DimSize(x) - else: - raise ValueError(dynamicity) - - -# Stringification used for generating MLIR types, e.g. tensor. -def int_or_question_mark(s: DimSize): - return s.value or "?" - - -# Stringification used for generating alphanumeric identifiers, e.g. -# func.func @somefunction_DYNxDYNxf32, where we can't use "?" characters. -def int_or_DYN(s: DimSize): - return s.value or "DYN" - - -# Gets friendlier form/type that we can use as arg types which we can cast into the target_type. -def cast_argtype_if_required(target_type: MatrixElemTypeId): - if target_type == MatrixElemTypeId.F8E4M3FNUZ: - return MatrixElemTypeId.F32 - return target_type - - -# Gets the op needed to cast/convert from the friendly form/type into the target_type. -def get_castback_from_arg_op(target_type: MatrixElemTypeId): - if target_type == MatrixElemTypeId.F8E4M3FNUZ: - return "arith.truncf" - return ValueError(f"Unhandled castback type of {t}") - - -# Describes the fully resolved shape dimensions of all 3 input matrices, -# LHS, RHS, and Accumulator, in a testcase. -# Each value is a string, which may either represent a positive integer such as "123", -# or a "?" string, meaning a dynamic dimension as in MLIR. -# These string values are used to generate MLIR function names and tensor shapes. -@dataclasses.dataclass -class TestInputMatricesShapes: - lhs_rows: DimSize - lhs_cols: DimSize - rhs_rows: DimSize - rhs_cols: DimSize - acc_rows: DimSize - acc_cols: DimSize - - -# Helper for generate_function. Generates TestInputMatricesShapes, i.e. -# converts from the runtime shape dimensions in TestShape and given dynamicity to -# the set of shapes to be used in a test function's input tensors. -def generate_shapes(shape: TestShape, transpose_rhs: bool, dynamicity: Dynamicity): - lhs_rows = shape_dim(shape.m, dynamicity) - lhs_cols = shape_dim(shape.k, dynamicity) - acc_rows = shape_dim(shape.m, dynamicity) - acc_cols = shape_dim(shape.n, dynamicity) - if transpose_rhs: - rhs_rows = shape_dim(shape.n, dynamicity) - rhs_cols = shape_dim(shape.k, dynamicity) - else: - rhs_rows = shape_dim(shape.k, dynamicity) - rhs_cols = shape_dim(shape.n, dynamicity) - shapes = TestInputMatricesShapes( - lhs_rows=lhs_rows, - lhs_cols=lhs_cols, - rhs_rows=rhs_rows, - rhs_cols=rhs_cols, - acc_rows=acc_rows, - acc_cols=acc_cols, - ) - return shapes - - -# Helper for generate_function. -# Generates a name for a test function in the generated MLIR code. -def generate_function_name( - lhs_rhs_type: MatrixElemTypeId, - acc_type: MatrixElemTypeId, - shapes: TestInputMatricesShapes, - accumulate: bool, - compilation_info: typing.Optional[CompilationInfo] = None, -): - input_t = lhs_rhs_type.value - acc_t = acc_type.value - lhs_r = int_or_DYN(shapes.lhs_rows) - lhs_c = int_or_DYN(shapes.lhs_cols) - rhs_r = int_or_DYN(shapes.rhs_rows) - rhs_c = int_or_DYN(shapes.rhs_cols) - acc_r = int_or_DYN(shapes.acc_rows) - acc_c = int_or_DYN(shapes.acc_cols) - - info = "" - if compilation_info: - tile_sizes = list(itertools.chain(*compilation_info.tile_sizes)) - tile_workgroup_key = ( - "_".join([str(a) for a in tile_sizes]) - + "_" - + "_".join([str(a) for a in compilation_info.workgroup_size]) - ) - info = f"_for_{compilation_info.dispatch_lowering_pass_pipeline}_{tile_workgroup_key}" - - matmul_kind = "matmul_accumulate" if accumulate else "matmul" - return ( - f"{matmul_kind}_{lhs_r}x{lhs_c}x{input_t}_times_" - + f"{rhs_r}x{rhs_c}x{input_t}_into_{acc_r}x{acc_c}x{acc_t}{info}" - ) - - -# Represents a generated test function. -@dataclasses.dataclass -class MLIRFunction: - name: str - signature: str - import_declaration: str - definition: str - - -# Generates a test function in the generated MLIR code. -# The generated function will take the same arguments as linalg.matmul variants -# and will just call linalg.matmul variants with them, returning its result. -def generate_function( - lhs_rhs_type: MatrixElemTypeId, - acc_type: MatrixElemTypeId, - shape: TestShape, - transpose_rhs: bool, - dynamicity: Dynamicity, - compilation_info: typing.Optional[CompilationInfo] = None, -): - shapes = generate_shapes(shape, transpose_rhs, dynamicity) - func_name = generate_function_name( - lhs_rhs_type, acc_type, shapes, shape.accumulate, compilation_info - ) - lhs_r = int_or_question_mark(shapes.lhs_rows) - lhs_c = int_or_question_mark(shapes.lhs_cols) - rhs_r = int_or_question_mark(shapes.rhs_rows) - rhs_c = int_or_question_mark(shapes.rhs_cols) - acc_r = int_or_question_mark(shapes.acc_rows) - acc_c = int_or_question_mark(shapes.acc_cols) - - casted_lhs_rhs_type = cast_argtype_if_required(lhs_rhs_type) - lhs_tensor_type = f"tensor<{lhs_r}x{lhs_c}x{casted_lhs_rhs_type.value}>" - rhs_tensor_type = f"tensor<{rhs_r}x{rhs_c}x{casted_lhs_rhs_type.value}>" - acc_tensor_type = f"tensor<{acc_r}x{acc_c}x{acc_type.value}>" - - if transpose_rhs: - op_name = "linalg.matmul_transpose_b" - else: - op_name = "linalg.matmul" - - # Compilation info is optional; prints empty string by default. - func_definition = "" - compilation_info_attr = "" - if compilation_info: - requested_pipeline = compilation_info.dispatch_lowering_pass_pipeline - compiler_pipeline = requested_pipeline - if requested_pipeline == "SPIRVVectorizeMali": - compiler_pipeline = "SPIRVBaseVectorize" - elif requested_pipeline == "SPIRVCooperativeMatrixVectorize": - compiler_pipeline = "SPIRVCooperativeMatrixVectorize" - elif requested_pipeline == "SPIRVVectorizeNVIDIA": - # TODO: change to test SPIRVMatmulPromoteVectorize too - compiler_pipeline = "SPIRVBaseVectorize" - - mma_schedule = "" - if compilation_info.mma_schedule is not None: - mma_schedule = ", {}".format(compilation_info.mma_schedule) - subgroup_size_str = "" - if compilation_info.subgroup_size is not None: - subgroup_size_str = f"subgroup_size = {compilation_info.subgroup_size}" - - compilation_info_string = ( - f"#compilation{generate_function.compilation_index} = " - "#iree_codegen.compilation_info<\n" - f" lowering_config = #iree_codegen.lowering_config,\n" - f" translation_info = <{compiler_pipeline} {compilation_info.workgroup_size_str()}\n" - f" {subgroup_size_str},\n" - f" {{ pipeline_depth = {compilation_info.software_pipeline_depth}, " - f" store_stage = 1{mma_schedule} }}>>\n" - ) - compilation_info_attr = ( - f"{{compilation_info = #compilation{generate_function.compilation_index}}} " - ) - func_definition = func_definition + compilation_info_string - generate_function.compilation_index += 1 - compute = f" %result = {op_name} {compilation_info_attr}ins(%lhs, %rhs: {lhs_tensor_type}, {rhs_tensor_type}) outs(%acc: {acc_tensor_type}) -> {acc_tensor_type}\n" - if casted_lhs_rhs_type != lhs_rhs_type: - castback_op = get_castback_from_arg_op(lhs_rhs_type) - compute_lhs_tensor_type = f"tensor<{lhs_r}x{lhs_c}x{lhs_rhs_type.value}>" - compute_rhs_tensor_type = f"tensor<{rhs_r}x{rhs_c}x{lhs_rhs_type.value}>" - compute = ( - f" %lhs_casted = {castback_op} %lhs: {lhs_tensor_type} to {compute_lhs_tensor_type}\n" - f" %rhs_casted = {castback_op} %rhs: {rhs_tensor_type} to {compute_rhs_tensor_type}\n" - f" %result = {op_name} {compilation_info_attr}ins(%lhs_casted, %rhs_casted: {compute_lhs_tensor_type}, {compute_rhs_tensor_type}) outs(%acc: {acc_tensor_type}) -> {acc_tensor_type}" - ) - if shape.accumulate: - signature = f"({lhs_tensor_type}, {rhs_tensor_type}, {acc_tensor_type}) -> {acc_tensor_type}" - import_declaration = f"func.func private @module.{func_name}(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view" - func_definition = func_definition + ( - f"func.func @{func_name}(%lhs: {lhs_tensor_type}, %rhs: {rhs_tensor_type}, %acc: {acc_tensor_type}) -> {acc_tensor_type} {{\n" - f"{compute}\n" - f" return %result: {acc_tensor_type}\n" - f"}}\n" - ) - else: - literal_zero_for_acc_type = "0.0" if "f" in acc_type.value else "0" - if acc_r == "?": - signature = f"({lhs_tensor_type}, {rhs_tensor_type}) -> {acc_tensor_type}" - import_declaration = f"func.func private @module.{func_name}(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view" - func_definition = func_definition + ( - f"func.func @{func_name}(%lhs: {lhs_tensor_type}, %rhs: {rhs_tensor_type}) -> {acc_tensor_type} {{\n" - f" %c0 = arith.constant 0 : index\n" - f" %c1 = arith.constant 1 : index\n" - f" %acc_dim0 = tensor.dim %lhs, %c0 : {lhs_tensor_type}\n" - f" %acc_dim1 = tensor.dim %rhs, %c1 : {rhs_tensor_type}\n" - f" %init_acc = tensor.empty(%acc_dim0, %acc_dim1) : {acc_tensor_type}\n" - f" %c0_acc_type = arith.constant {literal_zero_for_acc_type}: {acc_type.value}\n" - f" %acc = linalg.fill ins(%c0_acc_type : {acc_type.value}) outs(%init_acc : {acc_tensor_type}) -> {acc_tensor_type}\n" - f"{compute}" - f" return %result: {acc_tensor_type}\n" - f"}}\n" - ) - else: - signature = f"({lhs_tensor_type}, {rhs_tensor_type}) -> {acc_tensor_type}" - import_declaration = f"func.func private @module.{func_name}(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view" - func_definition = func_definition + ( - f"func.func @{func_name}(%lhs: {lhs_tensor_type}, %rhs: {rhs_tensor_type}) -> {acc_tensor_type} {{\n" - f" %init_acc = tensor.empty() : {acc_tensor_type}\n" - f" %c0_acc_type = arith.constant {literal_zero_for_acc_type}: {acc_type.value}\n" - f" %acc = linalg.fill ins(%c0_acc_type : {acc_type.value}) outs(%init_acc : {acc_tensor_type}) -> {acc_tensor_type}\n" - f"{compute}" - f" return %result: {acc_tensor_type}\n" - f"}}\n" - ) - return MLIRFunction( - name=func_name, - signature=signature, - import_declaration=import_declaration, - definition=func_definition, - ) - - -# Counter for producing unique compilation info attrs -generate_function.compilation_index = 0 - - -# Represents a call to a generated test function. -@dataclasses.dataclass -class TestCall: - function: MLIRFunction - op: str - - -# Intentionally fixed seed! We want full reproducibility here, both across runs -# and across machines. -# Intentionally not shared with local_pseudorandom_state to limit the ways -# in which shuffling testcases changes which random values are generated. -pseudorandom_generator_seed = 1 - - -def contents_generator_tag(generator: MatrixGenerator): - if generator == MatrixGenerator.ZERO: - return "" - elif generator == MatrixGenerator.RANDOM: - global pseudorandom_generator_seed - pseudorandom_generator_seed = pseudorandom_generator_seed + 1 - return f"!tag:iree:fully_specified_pseudorandom {pseudorandom_generator_seed}" - else: - raise ValueError(generator) - - -# Generate a matrix function argument of the given size as `%name`. -def generate_random_matrix( - name: str, - matrix_shape: list, - element_type: MatrixElemTypeId, -): - global pseudorandom_generator_seed - pseudorandom_generator_seed = pseudorandom_generator_seed + 1 - return ( - f" %{name}_dim0 = arith.constant {matrix_shape[0]} : i64\n" - f" %{name}_dim1 = arith.constant {matrix_shape[1]} : i64\n" - f" %{name}_element_type = hal.element_type<{element_type.value}> : i32\n" - f" %{name}_seed = arith.constant {pseudorandom_generator_seed} : i32\n" - f" %{name} = call @matmul_test.generate_random_matrix(%device, %{name}_dim0, %{name}_dim1, %{name}_element_type, %{name}_seed) : (!hal.device, i64, i64, i32, i32) -> !hal.buffer_view\n" - ) - - -call_id = 0 - - -# Generates the output trace for a testcase i.e. a single test function call, -# as a dictionary to be passed to yaml.dump. -def generate_call( - function: MLIRFunction, - lhs_rhs_type: MatrixElemTypeId, - acc_type: MatrixElemTypeId, - shape: TestShape, - transpose_rhs: bool, -): - global call_id - func_name = f"{function.name}_{shape.m}_{shape.k}_{shape.n}" - if shape.accumulate: - func_name = f"{func_name}_acc" - func_name = f"{func_name}_{call_id}" - call_id = call_id + 1 - - description = f"Matmul shape (MxKxN): {shape.m}x{shape.k}x{shape.n}" - op = ( - f"func.func @{func_name}() attributes {{\n" - f' iree.reflection = {{description = "{description}"}}\n' - "} {\n" - " %device_index = arith.constant 0 : index\n" - " %device = hal.devices.get %device_index : !hal.device\n" - ) - - lhs_shape = [shape.m, shape.k] - if transpose_rhs: - rhs_shape = [shape.n, shape.k] - transpose_rhs = 1 - else: - rhs_shape = [shape.k, shape.n] - transpose_rhs = 0 - - casted_lhs_rhs_type = cast_argtype_if_required(lhs_rhs_type) - op = op + generate_random_matrix("lhs", lhs_shape, casted_lhs_rhs_type) - op = op + generate_random_matrix("rhs", rhs_shape, casted_lhs_rhs_type) - if shape.accumulate: - op = op + generate_random_matrix("acc", [shape.m, shape.n], acc_type) - # TODO(#16168): there's a bug with in-place input->output aliasing and - # we work around it here by passing in a unique copy. - global pseudorandom_generator_seed - pseudorandom_generator_seed = pseudorandom_generator_seed - 1 - op = op + generate_random_matrix("acc_copy", [shape.m, shape.n], acc_type) - op = op + ( - f" %result = call @module.{function.name}(%lhs, %rhs, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view\n" - ) - else: - op = op + ( - f" %acc = util.null : !hal.buffer_view\n" - f" %result = call @module.{function.name}(%lhs, %rhs) : (!hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view\n" - ) - - op = op + ( - f" %m = arith.constant {shape.m} : i64\n" - f" %k = arith.constant {shape.k} : i64\n" - f" %n = arith.constant {shape.n} : i64\n" - f" %transpose_rhs = arith.constant {transpose_rhs} : i32\n" - f" call @matmul_test.check_matmul_results(%device, %m, %k, %n, %transpose_rhs, %lhs, %rhs, %acc, %result) : (!hal.device, i64, i64, i64, i32, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()\n" - ) - - op = op + " return\n" - op = op + "}\n" - - return TestCall(function=function, op=op) - - -# Generates all output files' contents as strings. -def generate( - lhs_rhs_type: MatrixElemTypeId, - acc_type: MatrixElemTypeId, - shapes_id: ShapesId, - transpose_rhs: bool, - compilation_info_id: CompilationInfoId, -): - functions = {} - calls = [] - - for compilation_info in get_test_compilation_infos( - compilation_info_id, lhs_rhs_type - ): - for shape in get_test_shapes(shapes_id): - for dynamicity in get_dynamicities(shapes_id): - function = generate_function( - lhs_rhs_type, - acc_type, - shape, - transpose_rhs, - dynamicity, - compilation_info, - ) - # Different testcases may differ only by runtime parameters but - # share the same code. For example, dynamic-shapes testcases - # share the same code involing tensor even though the runtime - # value in the trace are different. That's why we append conditionally - # to calls, but unconditionally to function_definitions. - if function.name not in functions: - functions[function.name] = function - calls.append( - generate_call( - function, lhs_rhs_type, acc_type, shape, transpose_rhs - ) - ) - - return (functions, calls) - - -def parse_arguments(): - parser = argparse.ArgumentParser(description="Generator of e2e matmul tests") - parser.add_argument( - "--output_matmul_mlir", - type=str, - help="Path of output .mlir file containing the generated matmuls", - required=True, - ) - parser.add_argument( - "--output_calls_mlir", - type=str, - help="Path of output .mlir file containing the calls", - required=True, - ) - parser.add_argument( - "--lhs_rhs_type", - type=str, - choices=["i32", "i8", "f32", "f16", "f8E4M3FNUZ", "bf16"], - help="Numeric type of input matrices", - required=True, - ) - parser.add_argument( - "--acc_type", - type=str, - choices=["i32", "f32", "f16", "bf16"], - help="Numeric type of input matrices", - default="", - required=False, - ) - parser.add_argument( - "--shapes", - type=str, - choices=[s.value for s in ShapesId], - help="Collection of matrix shapes to test", - required=True, - ) - parser.add_argument( - "--transpose_rhs", - action="store_true", - help="Whether to transpose RHS", - default=False, - required=False, - ) - parser.add_argument( - "--compilation_info", - type=str, - choices=[i.value for i in CompilationInfoId], - help="Collection of compilation info setups to test", - default="", - required=False, - ) - parser.add_argument( - "--requirements", - type=str, - help="Target requirements for this module. Comma-separated. As in -iree-llvmcpu-target-cpu-features. If the target device does not meet all of the requirements, the test will be skipped.", - required=False, - ) - return parser.parse_args() - - -def write_code_file(functions, filename): - with open(filename, "w") as file: - for function in functions.values(): - file.write(function.definition + "\n") - - -def write_calls_file(functions, calls, filename, requirements): - # Module-level reflection information used to control the test tool. - reflection = "" - if requirements: - reflection = ( - "iree.reflection = {" - 'target_features = "' - + ",".join([req.lstrip("+") for req in requirements.split(",")]) - + '"' - "}" - ) - module_definition = ( - f"builtin.module @calls attributes {{\n" f" {reflection}\n" f"}} {{\n\n" - ) - - # Declare the custom module that generates arguments. - module_definition = module_definition + ( - "func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view\n" - "func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view)\n" - "\n" - ) - - # Declare the functions that will be called. - for function in functions.values(): - module_definition = module_definition + function.import_declaration + "\n" - module_definition = module_definition + "\n" - - # Emit the test cases for each call. - for call in calls: - module_definition = module_definition + call.op + "\n" - - module_definition = module_definition + "\n}\n" - - with open(filename, "w") as file: - file.write(module_definition) - - -# For now, the accumulator type can always be inferred from the input LHS/RHS -# type, so we do that. That is temporary: eventually there will be cases -# where the same input types are used with different accumulator types, e.g. -# f16 inputs with both f16 and f32 accumulator. -def infer_acc_type(lhs_rhs_type: MatrixElemTypeId, acc_type: MatrixElemTypeId): - if acc_type != MatrixElemTypeId.NONE: - return acc_type - if lhs_rhs_type == MatrixElemTypeId.F8E4M3FNUZ: - return MatrixElemTypeId.F32 - if lhs_rhs_type == MatrixElemTypeId.I8: - return MatrixElemTypeId.I32 - return lhs_rhs_type - - -def main(args): - lhs_rhs_type = MatrixElemTypeId(args.lhs_rhs_type) - acc_type = MatrixElemTypeId(args.acc_type) - acc_type = infer_acc_type(lhs_rhs_type, acc_type) - shapes_id = ShapesId(args.shapes) - compilation_info_id = CompilationInfoId(args.compilation_info) - - (functions, calls) = generate( - lhs_rhs_type, acc_type, shapes_id, args.transpose_rhs, compilation_info_id - ) - - write_code_file(functions, args.output_matmul_mlir) - write_calls_file( - functions, - calls, - args.output_calls_mlir, - args.requirements, - ) - - -if __name__ == "__main__": - main(parse_arguments()) diff --git a/tools/testing/BUILD.bazel b/tools/testing/BUILD.bazel deleted file mode 100644 index 2a6834ce38b3..000000000000 --- a/tools/testing/BUILD.bazel +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright 2024 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -package( - features = ["layering_check"], - licenses = ["notice"], # Apache 2.0 -) diff --git a/tools/testing/CMakeLists.txt b/tools/testing/CMakeLists.txt deleted file mode 100644 index ae2678c84ef5..000000000000 --- a/tools/testing/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright 2024 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -iree_add_all_subdirs() diff --git a/tools/testing/e2e/BUILD.bazel b/tools/testing/e2e/BUILD.bazel deleted file mode 100644 index 397627961d20..000000000000 --- a/tools/testing/e2e/BUILD.bazel +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright 2024 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -load("//build_tools/bazel:build_defs.oss.bzl", "iree_runtime_cc_binary", "iree_runtime_cc_library") - -package( - default_visibility = ["//visibility:public"], - features = ["layering_check"], - licenses = ["notice"], # Apache 2.0 -) - -iree_runtime_cc_library( - name = "e2e_test_util", - srcs = ["test_utils.c"], - hdrs = ["test_utils.h"], - deps = [ - "//runtime/src/iree/base", - "//runtime/src/iree/base/internal", - "//runtime/src/iree/base/internal:cpu", - "//runtime/src/iree/base/internal:flags", - "//runtime/src/iree/base/internal:path", - "//runtime/src/iree/hal", - "//runtime/src/iree/modules/hal", - "//runtime/src/iree/tooling:context_util", - "//runtime/src/iree/tooling:device_util", - "//runtime/src/iree/vm", - "//runtime/src/iree/vm:cc", - ], -) - -iree_runtime_cc_binary( - name = "iree-e2e-matmul-test", - srcs = ["iree-e2e-matmul-test.cc"], - deps = [ - ":e2e_test_util", - "//runtime/src/iree/base", - "//runtime/src/iree/base/internal", - "//runtime/src/iree/base/internal:cpu", - "//runtime/src/iree/base/internal:flags", - "//runtime/src/iree/base/internal:path", - "//runtime/src/iree/hal", - "//runtime/src/iree/modules/hal", - "//runtime/src/iree/tooling:context_util", - "//runtime/src/iree/tooling:device_util", - "//runtime/src/iree/vm", - "//runtime/src/iree/vm:cc", - ], -) - -iree_runtime_cc_binary( - name = "iree-e2e-conv2d-test", - srcs = ["iree-e2e-conv2d-test.cc"], - deps = [ - ":e2e_test_util", - "//runtime/src/iree/base", - "//runtime/src/iree/base/internal", - "//runtime/src/iree/base/internal:cpu", - "//runtime/src/iree/base/internal:flags", - "//runtime/src/iree/base/internal:path", - "//runtime/src/iree/hal", - "//runtime/src/iree/modules/hal", - "//runtime/src/iree/tooling:context_util", - "//runtime/src/iree/tooling:device_util", - "//runtime/src/iree/vm", - "//runtime/src/iree/vm:cc", - ], -) - -iree_runtime_cc_binary( - name = "iree-e2e-attention-test", - srcs = ["iree-e2e-attention-test.cc"], - deps = [ - ":e2e_test_util", - "//runtime/src/iree/base", - "//runtime/src/iree/base/internal", - "//runtime/src/iree/base/internal:cpu", - "//runtime/src/iree/base/internal:flags", - "//runtime/src/iree/base/internal:path", - "//runtime/src/iree/hal", - "//runtime/src/iree/modules/hal", - "//runtime/src/iree/tooling:context_util", - "//runtime/src/iree/tooling:device_util", - "//runtime/src/iree/vm", - "//runtime/src/iree/vm:cc", - ], -) diff --git a/tools/testing/e2e/CMakeLists.txt b/tools/testing/e2e/CMakeLists.txt deleted file mode 100644 index ece0c59d00b0..000000000000 --- a/tools/testing/e2e/CMakeLists.txt +++ /dev/null @@ -1,100 +0,0 @@ -if(NOT IREE_ENABLE_THREADING) - return() -endif() - -### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_ABOVE_THIS_LINE ### -################################################################################ -# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from # -# tools/testing/e2e/BUILD.bazel # -# # -# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary # -# CMake-only content. # -# # -# To disable autogeneration for this file entirely, delete this header. # -################################################################################ - -iree_add_all_subdirs() - -iree_cc_library( - NAME - e2e_test_util - HDRS - "test_utils.h" - SRCS - "test_utils.c" - DEPS - iree::base - iree::base::internal - iree::base::internal::cpu - iree::base::internal::flags - iree::base::internal::path - iree::hal - iree::modules::hal - iree::tooling::context_util - iree::tooling::device_util - iree::vm - iree::vm::cc - PUBLIC -) - -iree_cc_binary( - NAME - iree-e2e-matmul-test - SRCS - "iree-e2e-matmul-test.cc" - DEPS - ::e2e_test_util - iree::base - iree::base::internal - iree::base::internal::cpu - iree::base::internal::flags - iree::base::internal::path - iree::hal - iree::modules::hal - iree::tooling::context_util - iree::tooling::device_util - iree::vm - iree::vm::cc -) - -iree_cc_binary( - NAME - iree-e2e-conv2d-test - SRCS - "iree-e2e-conv2d-test.cc" - DEPS - ::e2e_test_util - iree::base - iree::base::internal - iree::base::internal::cpu - iree::base::internal::flags - iree::base::internal::path - iree::hal - iree::modules::hal - iree::tooling::context_util - iree::tooling::device_util - iree::vm - iree::vm::cc -) - -iree_cc_binary( - NAME - iree-e2e-attention-test - SRCS - "iree-e2e-attention-test.cc" - DEPS - ::e2e_test_util - iree::base - iree::base::internal - iree::base::internal::cpu - iree::base::internal::flags - iree::base::internal::path - iree::hal - iree::modules::hal - iree::tooling::context_util - iree::tooling::device_util - iree::vm - iree::vm::cc -) - -### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/tools/testing/e2e/iree-e2e-attention-test.cc b/tools/testing/e2e/iree-e2e-attention-test.cc deleted file mode 100644 index 4b0464b13dfb..000000000000 --- a/tools/testing/e2e/iree-e2e-attention-test.cc +++ /dev/null @@ -1,486 +0,0 @@ -// Copyright 2024 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include -#include -#include -#include -#include - -#include "iree/base/api.h" -#include "iree/base/internal/cpu.h" -#include "iree/base/internal/flags.h" -#include "iree/base/internal/math.h" -#include "iree/base/internal/path.h" -#include "iree/hal/api.h" -#include "iree/modules/hal/module.h" -#include "iree/tooling/context_util.h" -#include "iree/tooling/device_util.h" -#include "iree/vm/api.h" -#include "iree/vm/native_module_cc.h" -#include "tools/testing/e2e/test_utils.h" - -//===----------------------------------------------------------------------===// -// Reference Attention -//===----------------------------------------------------------------------===// - -// Helper for reference_attention. -// Function to allocate and initialize tensors -float* allocate_tensor(int dim1, int dim2, int dim3) { - const int size = dim1 * dim2 * dim3; - float* tensor = (float*)malloc(size * sizeof(float)); - for (int i = 0; i < size; ++i) { - tensor[i] = 0.0f; - } - return tensor; -} - -// Function to free allocated tensors -void free_tensor(float* tensor) { - if (tensor != nullptr) free(tensor); -} - -// Function to calculate 1D index for a 3D array -int index_3d(int i, int j, int k, int dim2, int dim3) { - return i * dim2 * dim3 + j * dim3 + k; -} - -static void reference_attention_f32_f32_f32_f32( - iree_hal_dim_t M, iree_hal_dim_t K1, iree_hal_dim_t K2, iree_hal_dim_t N, - iree_hal_dim_t B, const float* query_data, const float* key_data, - const float* value_data, float* result_data, iree_hal_dim_t b, - float* Attention) { - // Compute Q * K^T - for (int m = 0; m < M; ++m) { - for (int k2 = 0; k2 < K2; ++k2) { - float sum = 0.0; - for (int k1 = 0; k1 < K1; ++k1) { - int q_idx = index_3d(b, m, k1, M, K1); - int k_idx = index_3d(b, k2, k1, K2, K1); - - sum += query_data[q_idx] * key_data[k_idx]; - } - int att_idx = index_3d(0, m, k2, M, K2); - Attention[att_idx] = sum / sqrt(K1); // Scale by sqrt(K1) - } - } - - // Compute softmax on Attention - for (int m = 0; m < M; ++m) { - // Find the maximum value for the current sequence - float max_val = -FLT_MAX; - for (int k2 = 0; k2 < K2; ++k2) { - int att_idx = index_3d(0, m, k2, M, K2); - max_val = iree_max(max_val, Attention[att_idx]); - } - - // Calculate the softmax denominator - float sum = 0.0f; - for (int k2 = 0; k2 < K2; ++k2) { - int att_idx = index_3d(0, m, k2, M, K2); - sum += exp(Attention[att_idx] - max_val); - } - - // Apply softmax - for (int k2 = 0; k2 < K2; ++k2) { - int att_idx = index_3d(0, m, k2, M, K2); - Attention[att_idx] = exp(Attention[att_idx]) / sum; - } - } - - // Compute Attention * V - for (int m = 0; m < M; ++m) { - for (int n = 0; n < N; ++n) { - float sum = 0.0; - for (int k2 = 0; k2 < K2; ++k2) { - int att_idx = index_3d(0, m, k2, M, K2); - int v_idx = index_3d(b, k2, n, K2, N); - sum += Attention[att_idx] * value_data[v_idx]; - } - int o_idx = index_3d(b, m, n, M, N); - result_data[o_idx] = sum; - } - } -} - -static iree_status_t reference_attention_element( - iree_hal_dim_t M, iree_hal_dim_t K1, iree_hal_dim_t K2, iree_hal_dim_t N, - iree_hal_dim_t B, iree_hal_element_type_t query_elem_type, - iree_hal_element_type_t key_elem_type, - iree_hal_element_type_t value_elem_type, void* query_data, void* key_data, - void* value_data, void* actual_data, void* result_data, iree_hal_dim_t b, - float* Attention) { - if (query_elem_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 && - key_elem_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 && - value_elem_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32) { - reference_attention_f32_f32_f32_f32( - M, K1, K2, N, B, (const float*)query_data, (const float*)key_data, - (const float*)value_data, (float*)result_data, b, Attention); - - } else { - return iree_make_status( - IREE_STATUS_INVALID_ARGUMENT, - "unhandled combination of element types in attention"); - } - return iree_ok_status(); -} - -// Reference attention implementation, used to compare attention results -// against. -static iree_status_t reference_attention( - iree_hal_dim_t B, iree_hal_dim_t M, iree_hal_dim_t K1, iree_hal_dim_t K2, - iree_hal_dim_t N, iree_hal_element_type_t query_elem_type, - iree_hal_element_type_t key_elem_type, - iree_hal_element_type_t value_elem_type, iree_byte_span_t query_contents, - iree_byte_span_t key_contents, iree_byte_span_t value_contents, - iree_byte_span_t actual_contents, iree_byte_span_t result_contents, - int compute_every) { - IREE_TRACE_ZONE_BEGIN(z0); - IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, B); - IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, M); - IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, K1); - IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, K2); - IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, N); - - iree_host_size_t count = 0; - float* Attention = allocate_tensor(1, M, K2); - for (iree_hal_dim_t b = 0; b < B; ++b) { - if (++count < compute_every) continue; - count = 0; - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, - reference_attention_element( - M, K1, K2, N, B, query_elem_type, key_elem_type, value_elem_type, - query_contents.data, key_contents.data, value_contents.data, - actual_contents.data, result_contents.data, b, Attention)); - } - free_tensor(Attention); - - IREE_TRACE_ZONE_END(z0); - return iree_ok_status(); -} -//===----------------------------------------------------------------------===// -// Attention comparison/logging -//===----------------------------------------------------------------------===// - -typedef struct { - iree_allocator_t host_allocator; - iree_hal_dim_t b; - iree_hal_dim_t m; - iree_hal_dim_t k1; - iree_hal_dim_t k2; - iree_hal_dim_t n; - iree_hal_element_type_t query_elem_type; - iree_hal_element_type_t key_elem_type; - iree_hal_element_type_t value_elem_type; - iree_hal_element_type_t result_elem_type; - iree_byte_span_t query_contents; - iree_byte_span_t key_contents; - iree_byte_span_t value_contents; - iree_byte_span_t actual_contents; - iree_byte_span_t expected_contents; -} attention_results_t; - -static void attention_results_deinitialize(attention_results_t* results); - -static iree_status_t attention_results_initialize( - iree_hal_device_t* device, iree_hal_dim_t b_size, iree_hal_dim_t m_size, - iree_hal_dim_t k1_size, iree_hal_dim_t k2_size, iree_hal_dim_t n_size, - iree_hal_buffer_view_t* query, iree_hal_buffer_view_t* key, - iree_hal_buffer_view_t* value, iree_hal_buffer_view_t* result, - iree_allocator_t host_allocator, attention_results_t* out_results) { - IREE_TRACE_ZONE_BEGIN(z0); - - memset(out_results, 0, sizeof(*out_results)); - out_results->host_allocator = host_allocator; - - out_results->b = b_size; - out_results->m = m_size; - out_results->k1 = k1_size; - out_results->k2 = k2_size; - out_results->n = n_size; - - out_results->query_elem_type = iree_hal_buffer_view_element_type(query); - out_results->key_elem_type = iree_hal_buffer_view_element_type(key); - out_results->value_elem_type = iree_hal_buffer_view_element_type(value); - out_results->result_elem_type = iree_hal_buffer_view_element_type(result); - - iree_hal_buffer_t* query_buffer = iree_hal_buffer_view_buffer(query); - iree_hal_buffer_t* key_buffer = iree_hal_buffer_view_buffer(key); - iree_hal_buffer_t* value_buffer = iree_hal_buffer_view_buffer(value); - iree_hal_buffer_t* result_buffer = iree_hal_buffer_view_buffer(result); - - iree_status_t status = iree_ok_status(); - - if (iree_status_is_ok(status)) { - out_results->query_contents.data_length = - iree_hal_buffer_byte_length(query_buffer); - status = iree_allocator_malloc(host_allocator, - out_results->query_contents.data_length, - (void**)&out_results->query_contents.data); - } - if (iree_status_is_ok(status)) { - status = iree_hal_device_transfer_d2h( - device, query_buffer, 0, out_results->query_contents.data, - out_results->query_contents.data_length, - IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); - } - if (iree_status_is_ok(status)) { - out_results->key_contents.data_length = - iree_hal_buffer_byte_length(key_buffer); - status = iree_allocator_malloc(host_allocator, - out_results->key_contents.data_length, - (void**)&out_results->key_contents.data); - } - if (iree_status_is_ok(status)) { - status = iree_hal_device_transfer_d2h( - device, key_buffer, 0, out_results->key_contents.data, - out_results->key_contents.data_length, - IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); - } - if (iree_status_is_ok(status)) { - out_results->value_contents.data_length = - iree_hal_buffer_byte_length(value_buffer); - status = iree_allocator_malloc(host_allocator, - out_results->value_contents.data_length, - (void**)&out_results->value_contents.data); - } - - if (iree_status_is_ok(status)) { - status = iree_hal_device_transfer_d2h( - device, value_buffer, 0, out_results->value_contents.data, - out_results->value_contents.data_length, - IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); - } - if (iree_status_is_ok(status)) { - out_results->actual_contents.data_length = - iree_hal_buffer_byte_length(result_buffer); - status = iree_allocator_malloc(host_allocator, - out_results->actual_contents.data_length, - (void**)&out_results->actual_contents.data); - } - if (iree_status_is_ok(status)) { - status = iree_hal_device_transfer_d2h( - device, result_buffer, 0, out_results->actual_contents.data, - out_results->actual_contents.data_length, - IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); - } - if (iree_status_is_ok(status)) { - out_results->expected_contents.data_length = - iree_hal_buffer_byte_length(result_buffer); - status = iree_allocator_malloc( - host_allocator, out_results->expected_contents.data_length, - (void**)&out_results->expected_contents.data); - } - if (!iree_status_is_ok(status)) { - attention_results_deinitialize(out_results); - } - IREE_TRACE_ZONE_END(z0); - return status; -} - -static void attention_results_deinitialize(attention_results_t* results) { - IREE_TRACE_ZONE_BEGIN(z0); - iree_allocator_free(results->host_allocator, results->query_contents.data); - iree_allocator_free(results->host_allocator, results->key_contents.data); - iree_allocator_free(results->host_allocator, results->value_contents.data); - iree_allocator_free(results->host_allocator, results->actual_contents.data); - iree_allocator_free(results->host_allocator, results->expected_contents.data); - - IREE_TRACE_ZONE_END(z0); -} - -// Helper for check_attention_results: the actual interesting part once we've -// obtained and validated the {b,m,k1,k2,n}_size values. On error, detailed -// logging is written to |file| if it is not NULL. -static iree_status_t check_attention_results_impl( - FILE* file, const attention_results_t* results, int check_every) { - IREE_TRACE_ZONE_BEGIN(z0); - - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, reference_attention(results->b, results->m, results->k1, results->k2, - results->n, results->query_elem_type, - results->key_elem_type, results->value_elem_type, - results->query_contents, results->key_contents, - results->value_contents, results->actual_contents, - results->expected_contents, check_every)); - - IREE_TRACE_ZONE_END(z0); - return iree_ok_status(); -} - -// Given an actual attention's inputs and output (all host-local), uses a -// reference attention implementation on the same inputs to check if the output -// is correct. On error, detailed logging is written to |file| if it is not -// NULL. -static iree_status_t check_attention_results( - FILE* file, const attention_results_t* results) { - IREE_TRACE_ZONE_BEGIN(z0); - // TODO: Increase the check every param to reduce the number of comparisons. - int check_every = 1; - iree_status_t status = - check_attention_results_impl(file, results, check_every); - if (!iree_status_is_ok(status) && check_every > 1) { - // If we got a failure with check_every>1, that didn't log a useful - // numerical summary, as most of the reference matrix entries hadn't been - // computed. Rerun now with check_every=1 to get that numerical logging. - iree_status_ignore(status); - status = check_attention_results_impl(file, results, 1); - } - IREE_TRACE_ZONE_END(z0); - return status; -} - -//===----------------------------------------------------------------------===// -// `attention_test` custom module -//===----------------------------------------------------------------------===// -// This uses the C++ wrapper to keep things simple. Though easier to use it's -// got additional overhead/code-size bloat that doesn't matter in a test like -// this. Making a C module builder API that removes the boilerplate there is TBD -// so this file is written in C besides this module so that we can swap it back -// to being pure C in the future. - -namespace iree { - -class AttentionTestModuleState final { - public: - explicit AttentionTestModuleState(iree_allocator_t host_allocator) - : host_allocator_(host_allocator) {} - ~AttentionTestModuleState() = default; - - // Fills the destination span with pseudorandom values of the given - // |element_type|. The given |seed| is passed to the pseudorandom generator. - // The pseudorandom values are reproducible both across runs and across - // machines. - StatusOr> GenerateRandom3dTensor( - const vm::ref device, int64_t dim0, int64_t dim1, - int64_t dim2, iree_hal_element_type_t element_type, int32_t seed) { - iree_hal_dim_t dims[3] = { - (iree_hal_dim_t)dim0, - (iree_hal_dim_t)dim1, - (iree_hal_dim_t)dim2, - }; - iree_hal_buffer_params_t buffer_params = {0}; - buffer_params.usage = IREE_HAL_BUFFER_USAGE_DEFAULT; - buffer_params.access = IREE_HAL_MEMORY_ACCESS_ALL; - buffer_params.type = IREE_HAL_MEMORY_TYPE_OPTIMAL_FOR_DEVICE; - vm::ref result_view; - struct callback_state_t { - iree_hal_element_type_t element_type; - int32_t seed; - } callback_state = { - element_type, - seed, - }; - IREE_RETURN_IF_ERROR(iree_hal_buffer_view_generate_buffer( - device.get(), iree_hal_device_allocator(device.get()), - IREE_ARRAYSIZE(dims), dims, element_type, - IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, buffer_params, - +[](iree_hal_buffer_mapping_t* mapping, void* user_data) { - callback_state_t callback_state = *(callback_state_t*)user_data; - iree_byte_span_t span = mapping->contents; - // Generate "uniform" integer-valued numbers in the range [min, max]. - int32_t min = 0; - int32_t max = 0; - iree_test_utils_get_min_max_for_element_type( - callback_state.element_type, &min, &max); - uint32_t range = (max - min + 1); - iree_host_size_t element_byte_count = - iree_hal_element_dense_byte_count(callback_state.element_type); - uint8_t* data_end = span.data + span.data_length; - uint32_t state = callback_state.seed; - for (uint8_t* data = span.data; data < data_end; - data += element_byte_count) { - int32_t value = - (int32_t)iree_test_utils_pseudorandom_range(&state, range) + - min; - iree_test_utils_write_element(callback_state.element_type, value, - data); - } - return iree_ok_status(); - }, - &callback_state, &result_view)); - return std::move(result_view); - } - - Status CheckAttentionResults( - const vm::ref device, int64_t b, int64_t m, int64_t k1, - int64_t k2, int64_t n, const vm::ref query, - const vm::ref key, - const vm::ref value, - const vm::ref actual_result) { - attention_results_t results = {}; - IREE_RETURN_IF_ERROR(attention_results_initialize( - device.get(), (iree_hal_dim_t)b, (iree_hal_dim_t)m, (iree_hal_dim_t)k1, - (iree_hal_dim_t)k2, (iree_hal_dim_t)n, query.get(), key.get(), - value.get(), actual_result.get(), host_allocator_, &results)); - iree_status_t status = check_attention_results(stderr, &results); - attention_results_deinitialize(&results); - return status; - } - - private: - iree_allocator_t host_allocator_; -}; - -static const vm::NativeFunction - kAttentionTestModuleFunctions[] = { - vm::MakeNativeFunction( - "generate_random_tensor", - &AttentionTestModuleState::GenerateRandom3dTensor), - vm::MakeNativeFunction( - "check_attention_results", - &AttentionTestModuleState::CheckAttentionResults), -}; - -struct AttentionTestModule final - : public vm::NativeModule { - using vm::NativeModule::NativeModule; - StatusOr> CreateState( - iree_allocator_t host_allocator) override { - return std::make_unique(host_allocator); - } -}; - -} // namespace iree - -static iree_status_t attention_test_module_create( - iree_vm_instance_t* instance, iree_allocator_t host_allocator, - iree_vm_module_t** out_module) { - IREE_ASSERT_ARGUMENT(out_module); - *out_module = NULL; - auto module = std::make_unique( - "attention_test", /*version=*/0, instance, host_allocator, - iree::span< - const iree::vm::NativeFunction>( - iree::kAttentionTestModuleFunctions)); - *out_module = module.release()->interface(); - return iree_ok_status(); -} - -int main(int argc, char** argv) { - IREE_TRACE_APP_ENTER(); - - iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_DEFAULT, &argc, &argv); - if (argc != 1) { - fprintf(stderr, "use --module= flags to specify the modules to run\n"); - IREE_TRACE_APP_EXIT(EXIT_FAILURE); - return EXIT_FAILURE; - } - - iree_status_t status = iree_test_utils_load_and_run_e2e_tests( - iree_allocator_system(), attention_test_module_create); - int exit_code = EXIT_SUCCESS; - if (!iree_status_is_ok(status)) { - iree_status_fprint(stderr, status); - bool is_unavailable = iree_status_is_unavailable(status); - iree_status_free(status); - exit_code = is_unavailable ? EXIT_SUCCESS : EXIT_FAILURE; - } - - IREE_TRACE_APP_EXIT(exit_code); - return exit_code; -} diff --git a/tools/testing/e2e/iree-e2e-conv2d-test.cc b/tools/testing/e2e/iree-e2e-conv2d-test.cc deleted file mode 100644 index c4158fdc73c9..000000000000 --- a/tools/testing/e2e/iree-e2e-conv2d-test.cc +++ /dev/null @@ -1,567 +0,0 @@ -// Copyright 2024 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include -#include -#include -#include - -#include "iree/base/api.h" -#include "iree/base/internal/flags.h" -#include "iree/base/internal/math.h" -#include "iree/hal/api.h" -#include "iree/modules/hal/module.h" -#include "iree/tooling/context_util.h" -#include "iree/tooling/device_util.h" -#include "iree/vm/api.h" -#include "iree/vm/native_module_cc.h" -#include "tools/testing/e2e/test_utils.h" - -//===----------------------------------------------------------------------===// -// Reference conv2d (NCHW-FCHW) -//===----------------------------------------------------------------------===// - -// Conversion from 4D indices in row major order to 1D index. -static int convert_to_1d_index(iree_hal_dim_t channels, iree_hal_dim_t height, - iree_hal_dim_t width, iree_hal_dim_t n, - iree_hal_dim_t c, iree_hal_dim_t h, - iree_hal_dim_t w) { - return n * (channels * height * width) + c * (height * width) + h * width + w; -} - -// [f16 <= f16 * f16 + f16] -static void reference_conv2d_f16_f16_f16_f16( - iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, - iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, - iree_hal_dim_t kw_size, iree_hal_dim_t sh_size, iree_hal_dim_t sw_size, - iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, iree_hal_dim_t oh_size, - iree_hal_dim_t ow_size, const uint16_t* input_data, - const uint16_t* kernel_data, const uint16_t* acc_data, - uint16_t* result_data, iree_hal_dim_t n, iree_hal_dim_t oc, - iree_hal_dim_t oh, iree_hal_dim_t ow) { - iree_hal_dim_t out_idx = - convert_to_1d_index(f_size, oh_size, ow_size, n, oc, oh, ow); - - float acc = acc_data ? iree_math_f16_to_f32(acc_data[out_idx]) : 0.f; - - for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { - for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { - for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { - iree_hal_dim_t inp_idx = convert_to_1d_index( - c_size, h_size, w_size, n, ic, (oh * sh_size + kh * dh_size), - (ow * sw_size + kw * dw_size)); - iree_hal_dim_t krnl_idx = - convert_to_1d_index(c_size, kh_size, kw_size, oc, ic, kh, kw); - - acc += iree_math_f16_to_f32(input_data[inp_idx]) * - iree_math_f16_to_f32(kernel_data[krnl_idx]); - } - } - result_data[out_idx] = iree_math_f32_to_f16(acc); - } -} - -static void reference_conv2d_f32_f32_f32_f32( - iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, - iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, - iree_hal_dim_t kw_size, iree_hal_dim_t sh_size, iree_hal_dim_t sw_size, - iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, iree_hal_dim_t oh_size, - iree_hal_dim_t ow_size, const float* input_data, const float* kernel_data, - const float* acc_data, float* result_data, iree_hal_dim_t n, - iree_hal_dim_t oc, iree_hal_dim_t oh, iree_hal_dim_t ow) { - iree_hal_dim_t out_idx = - convert_to_1d_index(f_size, oh_size, ow_size, n, oc, oh, ow); - - float acc = acc_data ? acc_data[out_idx] : 0; - - for (iree_hal_dim_t ic = 0; ic < c_size; ++ic) { - for (iree_hal_dim_t kh = 0; kh < kh_size; ++kh) { - for (iree_hal_dim_t kw = 0; kw < kw_size; ++kw) { - iree_hal_dim_t inp_idx = convert_to_1d_index( - c_size, h_size, w_size, n, ic, (oh * sh_size + kh * dh_size), - (ow * sw_size + kw * dw_size)); - iree_hal_dim_t krnl_idx = - convert_to_1d_index(c_size, kh_size, kw_size, oc, ic, kh, kw); - - acc += input_data[inp_idx] * kernel_data[krnl_idx]; - } - } - result_data[out_idx] = acc; - } -} - -// Helper for reference_conv2d. -static iree_status_t reference_conv2d_element( - iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, - iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, - iree_hal_dim_t kw_size, iree_hal_dim_t sh_size, iree_hal_dim_t sw_size, - iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, iree_hal_dim_t oh_size, - iree_hal_dim_t ow_size, iree_hal_element_type_t input_type, - iree_hal_element_type_t kernel_type, iree_hal_element_type_t acc_type, - void* input_data, void* kernel_data, void* acc_data, void* result_data, - iree_hal_dim_t n, iree_hal_dim_t oc, iree_hal_dim_t oh, iree_hal_dim_t ow) { - if (input_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 && - kernel_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 && - acc_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32) { - reference_conv2d_f32_f32_f32_f32( - n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, sh_size, - sw_size, dh_size, dw_size, oh_size, ow_size, (const float*)input_data, - (const float*)kernel_data, (const float*)acc_data, (float*)result_data, - n, oc, oh, ow); - } else if (input_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && - kernel_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && - acc_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16) { - reference_conv2d_f16_f16_f16_f16( - n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, sh_size, - sw_size, dh_size, dw_size, oh_size, ow_size, - (const uint16_t*)input_data, (const uint16_t*)kernel_data, - (const uint16_t*)acc_data, (uint16_t*)result_data, n, oc, oh, ow); - } else { - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "unhandled combination of element types in conv2d"); - } - return iree_ok_status(); -} - -// Calculate the output shape given the dilation and strides. -static iree_hal_dim_t out_shape_calc(iree_hal_dim_t i_shape, - iree_hal_dim_t k_shape, - iree_hal_dim_t stride, - iree_hal_dim_t dilation) { - iree_hal_dim_t x = (k_shape - 1) * (dilation - 1); - x = i_shape - k_shape - x; - return floor(x / stride) + 1; -} - -// Reference conv2d-NCHW-FCHW implementation, used to compare conv2d results -// against. -static iree_status_t reference_conv2d( - iree_hal_dim_t n_size, iree_hal_dim_t c_size, iree_hal_dim_t h_size, - iree_hal_dim_t w_size, iree_hal_dim_t f_size, iree_hal_dim_t kh_size, - iree_hal_dim_t kw_size, iree_hal_dim_t sh_size, iree_hal_dim_t sw_size, - iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, - iree_hal_element_type_t input_type, iree_hal_element_type_t kernel_type, - iree_hal_element_type_t acc_type, iree_byte_span_t input_contents, - iree_byte_span_t kernel_contents, iree_byte_span_t acc_contents, - iree_byte_span_t result_contents, int compute_every) { - IREE_TRACE_ZONE_BEGIN(z0); - IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, n_size); - IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, c_size); - IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, h_size); - IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, w_size); - - iree_hal_dim_t oh_size = out_shape_calc(h_size, kh_size, sh_size, dh_size); - iree_hal_dim_t ow_size = out_shape_calc(w_size, kw_size, sw_size, dw_size); - - for (iree_hal_dim_t n = 0; n < n_size; ++n) { - for (iree_hal_dim_t oc = 0; oc < f_size; ++oc) { - for (iree_hal_dim_t oh = 0; oh < oh_size; ++oh) { - for (iree_hal_dim_t ow = 0; ow < ow_size; ++ow) { - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, reference_conv2d_element( - n_size, c_size, h_size, w_size, f_size, kh_size, kw_size, - sh_size, sw_size, dh_size, dw_size, oh_size, ow_size, - input_type, kernel_type, acc_type, input_contents.data, - kernel_contents.data, acc_contents.data, - result_contents.data, n, oc, oh, ow)); - } - } - } - } - - IREE_TRACE_ZONE_END(z0); - return iree_ok_status(); -} - -//===----------------------------------------------------------------------===// -// Conv2d comparison/logging -//===----------------------------------------------------------------------===// - -typedef struct { - iree_allocator_t host_allocator; - iree_hal_dim_t n; // batch dim - iree_hal_dim_t c; // input channels - iree_hal_dim_t h; // input height - iree_hal_dim_t w; // input width - iree_hal_dim_t f; // output channels - iree_hal_dim_t kh; // kernel height - iree_hal_dim_t kw; // kernel width - iree_hal_dim_t sh; // stride along height dim - iree_hal_dim_t sw; // stride along width dim - iree_hal_dim_t dh; // dilation along height dim - iree_hal_dim_t dw; // dilation along width dim - iree_hal_element_type_t input_type; - iree_hal_element_type_t kernel_type; - iree_hal_element_type_t acc_type; - iree_hal_element_type_t result_type; - iree_byte_span_t input_contents; - iree_byte_span_t kernel_contents; - iree_byte_span_t acc_contents; - iree_byte_span_t actual_contents; - iree_byte_span_t expected_contents; -} conv2d_results_t; - -static void conv2d_results_deinitialize(conv2d_results_t* results); - -static iree_status_t conv2d_results_initialize( - iree_hal_device_t* device, iree_hal_dim_t n_size, iree_hal_dim_t c_size, - iree_hal_dim_t h_size, iree_hal_dim_t w_size, iree_hal_dim_t f_size, - iree_hal_dim_t kh_size, iree_hal_dim_t kw_size, iree_hal_dim_t sh_size, - iree_hal_dim_t sw_size, iree_hal_dim_t dh_size, iree_hal_dim_t dw_size, - iree_hal_buffer_view_t* input, iree_hal_buffer_view_t* kernel, - iree_hal_buffer_view_t* acc, iree_hal_buffer_view_t* result, - iree_allocator_t host_allocator, conv2d_results_t* out_results) { - IREE_TRACE_ZONE_BEGIN(z0); - - memset(out_results, 0, sizeof(*out_results)); - out_results->host_allocator = host_allocator; - - out_results->n = n_size; - out_results->c = c_size; - out_results->h = h_size; - out_results->w = w_size; - out_results->f = f_size; - out_results->kh = kh_size; - out_results->kw = kw_size; - out_results->sh = sh_size; - out_results->sw = sw_size; - out_results->dh = dh_size; - out_results->dw = dw_size; - - out_results->input_type = iree_hal_buffer_view_element_type(input); - out_results->kernel_type = iree_hal_buffer_view_element_type(kernel); - out_results->acc_type = iree_hal_buffer_view_element_type(acc); - out_results->result_type = iree_hal_buffer_view_element_type(result); - - iree_hal_buffer_t* input_buffer = iree_hal_buffer_view_buffer(input); - iree_hal_buffer_t* kernel_buffer = iree_hal_buffer_view_buffer(kernel); - iree_hal_buffer_t* acc_buffer = acc ? iree_hal_buffer_view_buffer(acc) : NULL; - iree_hal_buffer_t* result_buffer = iree_hal_buffer_view_buffer(result); - - iree_status_t status = iree_ok_status(); - - if (iree_status_is_ok(status)) { - out_results->input_contents.data_length = - iree_hal_buffer_byte_length(input_buffer); - status = iree_allocator_malloc(host_allocator, - out_results->input_contents.data_length, - (void**)&out_results->input_contents.data); - } - if (iree_status_is_ok(status)) { - status = iree_hal_device_transfer_d2h( - device, input_buffer, 0, out_results->input_contents.data, - out_results->input_contents.data_length, - IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); - } - - if (iree_status_is_ok(status)) { - out_results->kernel_contents.data_length = - iree_hal_buffer_byte_length(kernel_buffer); - status = iree_allocator_malloc(host_allocator, - out_results->kernel_contents.data_length, - (void**)&out_results->kernel_contents.data); - } - if (iree_status_is_ok(status)) { - status = iree_hal_device_transfer_d2h( - device, kernel_buffer, 0, out_results->kernel_contents.data, - out_results->kernel_contents.data_length, - IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); - } - - if (acc_buffer) { - if (iree_status_is_ok(status)) { - out_results->acc_contents.data_length = - iree_hal_buffer_byte_length(acc_buffer); - status = iree_allocator_malloc(host_allocator, - out_results->acc_contents.data_length, - (void**)&out_results->acc_contents.data); - } - if (iree_status_is_ok(status)) { - status = iree_hal_device_transfer_d2h( - device, acc_buffer, 0, out_results->acc_contents.data, - out_results->acc_contents.data_length, - IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); - } - } - - if (iree_status_is_ok(status)) { - out_results->actual_contents.data_length = - iree_hal_buffer_byte_length(result_buffer); - status = iree_allocator_malloc(host_allocator, - out_results->actual_contents.data_length, - (void**)&out_results->actual_contents.data); - } - if (iree_status_is_ok(status)) { - status = iree_hal_device_transfer_d2h( - device, result_buffer, 0, out_results->actual_contents.data, - out_results->actual_contents.data_length, - IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); - } - - if (iree_status_is_ok(status)) { - out_results->expected_contents.data_length = - iree_hal_buffer_byte_length(result_buffer); - status = iree_allocator_malloc( - host_allocator, out_results->expected_contents.data_length, - (void**)&out_results->expected_contents.data); - } - - if (!iree_status_is_ok(status)) { - conv2d_results_deinitialize(out_results); - } - IREE_TRACE_ZONE_END(z0); - return status; -} - -static void conv2d_results_deinitialize(conv2d_results_t* results) { - IREE_TRACE_ZONE_BEGIN(z0); - - iree_allocator_free(results->host_allocator, results->input_contents.data); - iree_allocator_free(results->host_allocator, results->kernel_contents.data); - if (!iree_byte_span_is_empty(results->acc_contents)) { - iree_allocator_free(results->host_allocator, results->acc_contents.data); - } - iree_allocator_free(results->host_allocator, results->actual_contents.data); - iree_allocator_free(results->host_allocator, results->expected_contents.data); - - IREE_TRACE_ZONE_END(z0); -} - -// Helper for check_conv2d: the actual interesting part once we've -// obtained and validated the {n, f, oh, ow}_size values. On error, the first -// index is returned where the actual and expected value doesn't match. TODO: -// Add detailed logging to |file|. -static iree_status_t check_conv2d_results_impl(FILE* file, - const conv2d_results_t* results, - int check_every) { - IREE_TRACE_ZONE_BEGIN(z0); - - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, reference_conv2d(results->n, results->c, results->h, results->w, - results->f, results->kh, results->kw, results->sh, - results->sw, results->dh, results->dw, - results->input_type, results->acc_type, - results->kernel_type, results->input_contents, - results->kernel_contents, results->acc_contents, - results->expected_contents, check_every)); - - int count = 0; - - iree_hal_dim_t oh_size = - out_shape_calc(results->h, results->kh, results->sh, results->dh); - iree_hal_dim_t ow_size = - out_shape_calc(results->w, results->kw, results->sw, results->dw); - - for (iree_hal_dim_t n = 0; n < results->n; ++n) { - for (iree_hal_dim_t oc = 0; oc < results->f; ++oc) { - for (iree_hal_dim_t oh = 0; oh < oh_size; ++oh) { - for (iree_hal_dim_t ow = 0; ow < ow_size; ++ow) { - if (++count < check_every) continue; - count = 0; - iree_hal_dim_t idx = - convert_to_1d_index(results->f, oh_size, ow_size, n, oc, oh, ow); - iree_test_utils_e2e_value_t actual_value = - iree_test_utils_read_buffer_element( - idx, results->result_type, results->actual_contents.data); - iree_test_utils_e2e_value_t expected_value = - iree_test_utils_read_buffer_element( - idx, results->result_type, results->expected_contents.data); - if (!iree_test_utils_result_elements_agree(actual_value, - expected_value)) { - fprintf( - file, - "\n\nerror: the actual and expected result tensors disagree " - "at n %" PRIdim ", oc %" PRIdim ", oh %" PRIdim ", ow %" PRIdim - ".\n\n", - n, oc, oh, ow); - IREE_TRACE_ZONE_END(z0); - return iree_make_status(IREE_STATUS_ABORTED); - } - } - } - } - } - - IREE_TRACE_ZONE_END(z0); - return iree_ok_status(); -} - -// Given an actual conv2d's inputs and output (all host-local), uses a -// reference conv2d implementation on the same inputs to check if the output -// is correct. On error, the first index is returned where the actual and -// expected value doesn't match. TODO: Add detailed logging to |file|. -static iree_status_t check_conv2d_results(FILE* file, - const conv2d_results_t* results) { - IREE_TRACE_ZONE_BEGIN(z0); - // TODO: Increase the check every param to reduce the number of comparisons. - int check_every = 1; - iree_status_t status = check_conv2d_results_impl(file, results, check_every); - if (!iree_status_is_ok(status) && check_every > 1) { - // If we got a failure with check_every>1, that didn't log a useful - // numerical summary, as most of the reference tensor entries hadn't been - // computed. Rerun now with check_every=1 to get that numerical logging. - iree_status_ignore(status); - status = check_conv2d_results_impl(file, results, 1); - } - IREE_TRACE_ZONE_END(z0); - return status; -} - -//===----------------------------------------------------------------------===// -// `conv2d_test` custom module -//===----------------------------------------------------------------------===// -// This uses the C++ wrapper to keep things simple. Though easier to use it's -// got additional overhead/code-size bloat that doesn't matter in a test like -// this. Making a C module builder API that removes the boilerplate there is -// TBD so this file is written in C besides this module so that we can swap it -// back to being pure C in the future. - -namespace iree { - -class Conv2dTestModuleState final { - public: - explicit Conv2dTestModuleState(iree_allocator_t host_allocator) - : host_allocator_(host_allocator) {} - ~Conv2dTestModuleState() = default; - - // Fills the destination span with pseudorandom values of the given - // |element_type|. The given |seed| is passed to the pseudorandom generator. - // The pseudorandom values are reproducible both across runs and across - // machines. - StatusOr> GenerateRandom4dTensor( - const vm::ref device, int64_t dim0, int64_t dim1, - int64_t dim2, int64_t dim3, iree_hal_element_type_t element_type, - int32_t seed) { - iree_hal_dim_t dims[4] = { - (iree_hal_dim_t)dim0, - (iree_hal_dim_t)dim1, - (iree_hal_dim_t)dim2, - (iree_hal_dim_t)dim3, - }; - iree_hal_buffer_params_t buffer_params = {0}; - buffer_params.usage = IREE_HAL_BUFFER_USAGE_DEFAULT; - buffer_params.access = IREE_HAL_MEMORY_ACCESS_ALL; - buffer_params.type = IREE_HAL_MEMORY_TYPE_OPTIMAL_FOR_DEVICE; - vm::ref result_view; - struct callback_state_t { - iree_hal_element_type_t element_type; - int32_t seed; - } callback_state = { - element_type, - seed, - }; - IREE_RETURN_IF_ERROR(iree_hal_buffer_view_generate_buffer( - device.get(), iree_hal_device_allocator(device.get()), - IREE_ARRAYSIZE(dims), dims, element_type, - IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, buffer_params, - +[](iree_hal_buffer_mapping_t* mapping, void* user_data) { - callback_state_t callback_state = *(callback_state_t*)user_data; - iree_byte_span_t span = mapping->contents; - // Generate "uniform" integer-valued numbers in the range [min, - // max]. - int32_t min = 0; - int32_t max = 0; - iree_test_utils_get_min_max_for_element_type( - callback_state.element_type, &min, &max); - uint32_t range = (max - min + 1); - iree_host_size_t element_byte_count = - iree_hal_element_dense_byte_count(callback_state.element_type); - uint8_t* data_end = span.data + span.data_length; - uint32_t state = callback_state.seed; - for (uint8_t* data = span.data; data < data_end; - data += element_byte_count) { - int32_t value = - (int32_t)iree_test_utils_pseudorandom_range(&state, range) + - min; - iree_test_utils_write_element(callback_state.element_type, value, - data); - } - return iree_ok_status(); - }, - &callback_state, &result_view)); - return std::move(result_view); - } - - Status CheckConv2dResults( - const vm::ref device, int64_t n, int64_t c, int64_t h, - int64_t w, int64_t f, int64_t kh, int64_t kw, int64_t sh, int64_t sw, - int64_t dh, int64_t dw, const vm::ref input, - const vm::ref kernel, - const vm::ref acc, - const vm::ref actual_result) { - conv2d_results_t results = {}; - IREE_RETURN_IF_ERROR(conv2d_results_initialize( - device.get(), (iree_hal_dim_t)n, (iree_hal_dim_t)c, (iree_hal_dim_t)h, - (iree_hal_dim_t)w, (iree_hal_dim_t)f, (iree_hal_dim_t)kh, - (iree_hal_dim_t)kw, (iree_hal_dim_t)sh, (iree_hal_dim_t)sw, - (iree_hal_dim_t)dh, (iree_hal_dim_t)dw, input.get(), kernel.get(), - acc.get(), actual_result.get(), host_allocator_, &results)); - iree_status_t status = check_conv2d_results(stderr, &results); - conv2d_results_deinitialize(&results); - return status; - } - - private: - iree_allocator_t host_allocator_; -}; - -static const vm::NativeFunction - kConv2dTestModuleFunctions[] = { - vm::MakeNativeFunction("generate_random_tensor", - &Conv2dTestModuleState::GenerateRandom4dTensor), - vm::MakeNativeFunction("check_conv2d_results", - &Conv2dTestModuleState::CheckConv2dResults), -}; - -struct Conv2dTestModule final : public vm::NativeModule { - using vm::NativeModule::NativeModule; - StatusOr> CreateState( - iree_allocator_t host_allocator) override { - return std::make_unique(host_allocator); - } -}; - -} // namespace iree - -static iree_status_t conv2d_test_module_create(iree_vm_instance_t* instance, - iree_allocator_t host_allocator, - iree_vm_module_t** out_module) { - IREE_ASSERT_ARGUMENT(out_module); - *out_module = NULL; - auto module = std::make_unique( - "conv2d_test", /*version=*/0, instance, host_allocator, - iree::span>( - iree::kConv2dTestModuleFunctions)); - *out_module = module.release()->interface(); - return iree_ok_status(); -} - -int main(int argc, char** argv) { - IREE_TRACE_APP_ENTER(); - - iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_DEFAULT, &argc, &argv); - if (argc != 1) { - fprintf(stderr, "use --module= flags to specify the modules to run\n"); - IREE_TRACE_APP_EXIT(EXIT_FAILURE); - return EXIT_FAILURE; - } - - // Run the tests. Note that some modules may be compiled for other platforms - // and not have the required architectures for execution within them - to keep - // the test runner dumber we gracefully fail those cases by returning success. - iree_status_t status = iree_test_utils_load_and_run_e2e_tests( - iree_allocator_system(), conv2d_test_module_create); - int exit_code = EXIT_SUCCESS; - if (!iree_status_is_ok(status)) { - iree_status_fprint(stderr, status); - bool is_device_unavailable = iree_status_is_not_found(status); - iree_status_free(status); - exit_code = is_device_unavailable ? EXIT_SUCCESS : EXIT_FAILURE; - } - - IREE_TRACE_APP_EXIT(exit_code); - return exit_code; -} diff --git a/tools/testing/e2e/iree-e2e-matmul-test.cc b/tools/testing/e2e/iree-e2e-matmul-test.cc deleted file mode 100644 index f2773f048e79..000000000000 --- a/tools/testing/e2e/iree-e2e-matmul-test.cc +++ /dev/null @@ -1,743 +0,0 @@ -// Copyright 2024 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include -#include -#include - -#include "iree/base/api.h" -#include "iree/base/internal/cpu.h" -#include "iree/base/internal/flags.h" -#include "iree/base/internal/math.h" -#include "iree/base/internal/path.h" -#include "iree/hal/api.h" -#include "iree/modules/hal/module.h" -#include "iree/tooling/context_util.h" -#include "iree/tooling/device_util.h" -#include "iree/vm/api.h" -#include "iree/vm/native_module_cc.h" -#include "tools/testing/e2e/test_utils.h" - -//===----------------------------------------------------------------------===// -// Reference matmul -//===----------------------------------------------------------------------===// - -#define REFERENCE_MATMUL(LHSTYPE, RHSTYPE, RESTYPE, ACCTYPE) \ - static void reference_matmul_##LHSTYPE##_##RHSTYPE##_##RESTYPE##_##ACCTYPE( \ - iree_hal_dim_t m_size, iree_hal_dim_t k_size, iree_hal_dim_t n_size, \ - iree_hal_element_type_t lhs_type, iree_hal_element_type_t rhs_type, \ - iree_hal_element_type_t acc_type, bool transpose_rhs, \ - const LHSTYPE* lhs_data, const RHSTYPE* rhs_data, \ - const ACCTYPE* acc_data, RESTYPE* result_data, iree_hal_dim_t m, \ - iree_hal_dim_t n) { \ - ACCTYPE acc = acc_data ? acc_data[n + m * n_size] : 0; \ - for (iree_hal_dim_t k = 0; k < k_size; ++k) { \ - LHSTYPE lhs_value = lhs_data[k + m * k_size]; \ - RHSTYPE rhs_value = \ - transpose_rhs ? rhs_data[k + n * k_size] : rhs_data[n + k * n_size]; \ - acc += (ACCTYPE)lhs_value * (ACCTYPE)rhs_value; \ - } \ - result_data[n + m * n_size] = acc; \ - } - -// Reference mamtul instantiations from macro REFERENCE_MATMUL -// for the f32 input, f32 accumlation, and f32 result. -// [float <= float * float + float] -REFERENCE_MATMUL(float, float, float, float) - -// Reference mamtul instantiations from macro REFERENCE_MATMUL -// for the int8_t input, int32_t accumlation, and int32_t result. -// [i32 <= i8 * i8 + i32] -REFERENCE_MATMUL(int8_t, int8_t, int32_t, int32_t) - -// Reference mamtul instantiations from macro REFERENCE_MATMUL -// for the int32_t input, int32_t accumlation, and int32_t result. -// [i32 <= i32 * i32 + i32] -REFERENCE_MATMUL(int32_t, int32_t, int32_t, int32_t) - -// Reference mamtul for the f16 input, f16 accumlation, and f16 result. -// [f16 <= f16 * f16 + f16] -static void reference_matmul_f16_f16_f16_f16( - iree_hal_dim_t m_size, iree_hal_dim_t k_size, iree_hal_dim_t n_size, - iree_hal_element_type_t lhs_type, iree_hal_element_type_t rhs_type, - iree_hal_element_type_t acc_type, bool transpose_rhs, - const uint16_t* lhs_data, const uint16_t* rhs_data, - const uint16_t* acc_data, uint16_t* result_data, iree_hal_dim_t m, - iree_hal_dim_t n) { - float acc = acc_data ? iree_math_f16_to_f32(acc_data[n + m * n_size]) : 0.f; - for (iree_hal_dim_t k = 0; k < k_size; ++k) { - int64_t rhs_index = transpose_rhs ? k + n * k_size : n + k * n_size; - acc += iree_math_f16_to_f32(lhs_data[k + m * k_size]) * - iree_math_f16_to_f32(rhs_data[rhs_index]); - } - result_data[n + m * n_size] = iree_math_f32_to_f16(acc); -} - -// Reference mamtul for the f16 input, f32 accumlation, and f32 result. -// [f32 <= f16 * f16 + f32] -static void reference_matmul_f16_f16_f32_f32( - iree_hal_dim_t m_size, iree_hal_dim_t k_size, iree_hal_dim_t n_size, - iree_hal_element_type_t lhs_type, iree_hal_element_type_t rhs_type, - iree_hal_element_type_t acc_type, bool transpose_rhs, - const uint16_t* lhs_data, const uint16_t* rhs_data, const float* acc_data, - float* result_data, iree_hal_dim_t m, iree_hal_dim_t n) { - float acc = acc_data ? acc_data[n + m * n_size] : 0.f; - for (iree_hal_dim_t k = 0; k < k_size; ++k) { - int64_t rhs_index = transpose_rhs ? k + n * k_size : n + k * n_size; - acc += iree_math_f16_to_f32(lhs_data[k + m * k_size]) * - iree_math_f16_to_f32(rhs_data[rhs_index]); - } - result_data[n + m * n_size] = acc; -} - -// Reference mamtul for the bf16 input, bf16 accumlation, and bf16 result. -// [bf16 <= bf16 * bf16 + bf16] -static void reference_matmul_bf16_bf16_bf16_bf16( - iree_hal_dim_t m_size, iree_hal_dim_t k_size, iree_hal_dim_t n_size, - iree_hal_element_type_t lhs_type, iree_hal_element_type_t rhs_type, - iree_hal_element_type_t acc_type, bool transpose_rhs, - const uint16_t* lhs_data, const uint16_t* rhs_data, - const uint16_t* acc_data, uint16_t* result_data, iree_hal_dim_t m, - iree_hal_dim_t n) { - float acc = acc_data ? iree_math_bf16_to_f32(acc_data[n + m * n_size]) : 0.f; - for (iree_hal_dim_t k = 0; k < k_size; ++k) { - int64_t rhs_index = transpose_rhs ? k + n * k_size : n + k * n_size; - acc += iree_math_bf16_to_f32(lhs_data[k + m * k_size]) * - iree_math_bf16_to_f32(rhs_data[rhs_index]); - } - result_data[n + m * n_size] = iree_math_f32_to_bf16(acc); -} - -// Reference mamtul for the bf16 input, f32 accumlation, and f32 result. -// [f32 <= bf16 * bf16 + f32] -static void reference_matmul_bf16_bf16_f32_f32( - iree_hal_dim_t m_size, iree_hal_dim_t k_size, iree_hal_dim_t n_size, - iree_hal_element_type_t lhs_type, iree_hal_element_type_t rhs_type, - iree_hal_element_type_t acc_type, bool transpose_rhs, - const uint16_t* lhs_data, const uint16_t* rhs_data, const float* acc_data, - float* result_data, iree_hal_dim_t m, iree_hal_dim_t n) { - float acc = acc_data ? acc_data[n + m * n_size] : 0.f; - for (iree_hal_dim_t k = 0; k < k_size; ++k) { - int64_t rhs_index = transpose_rhs ? k + n * k_size : n + k * n_size; - acc += iree_math_bf16_to_f32(lhs_data[k + m * k_size]) * - iree_math_bf16_to_f32(rhs_data[rhs_index]); - } - result_data[n + m * n_size] = acc; -} - -// Helper for reference_matmul. -// Computes one element in the result matrix. -static iree_status_t reference_matmul_element( - iree_hal_dim_t m_size, iree_hal_dim_t k_size, iree_hal_dim_t n_size, - iree_hal_element_type_t lhs_type, iree_hal_element_type_t rhs_type, - iree_hal_element_type_t acc_type, bool transpose_rhs, void* lhs_data, - void* rhs_data, void* acc_data, void* result_data, iree_hal_dim_t m, - iree_hal_dim_t n) { - if (lhs_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 && - rhs_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 && - acc_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32) { - reference_matmul_float_float_float_float( - m_size, k_size, n_size, lhs_type, rhs_type, acc_type, transpose_rhs, - (const float*)lhs_data, (const float*)rhs_data, (const float*)acc_data, - (float*)result_data, m, n); - } else if (iree_hal_element_type_is_integer(lhs_type, 8) && - iree_hal_element_type_is_integer(rhs_type, 8) && - iree_hal_element_type_is_integer(acc_type, 32)) { - reference_matmul_int8_t_int8_t_int32_t_int32_t( - m_size, k_size, n_size, lhs_type, rhs_type, acc_type, transpose_rhs, - (const int8_t*)lhs_data, (const int8_t*)rhs_data, - (const int32_t*)acc_data, (int32_t*)result_data, m, n); - } else if (iree_hal_element_type_is_integer(lhs_type, 32) && - iree_hal_element_type_is_integer(rhs_type, 32) && - iree_hal_element_type_is_integer(acc_type, 32)) { - reference_matmul_int32_t_int32_t_int32_t_int32_t( - m_size, k_size, n_size, lhs_type, rhs_type, acc_type, transpose_rhs, - (const int32_t*)lhs_data, (const int32_t*)rhs_data, - (const int32_t*)acc_data, (int32_t*)result_data, m, n); - } else if (lhs_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && - rhs_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && - acc_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16) { - reference_matmul_f16_f16_f16_f16( - m_size, k_size, n_size, lhs_type, rhs_type, acc_type, transpose_rhs, - (const uint16_t*)lhs_data, (const uint16_t*)rhs_data, - (const uint16_t*)acc_data, (uint16_t*)result_data, m, n); - } else if (lhs_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && - rhs_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16 && - acc_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32) { - reference_matmul_f16_f16_f32_f32( - m_size, k_size, n_size, lhs_type, rhs_type, acc_type, transpose_rhs, - (const uint16_t*)lhs_data, (const uint16_t*)rhs_data, - (const float*)acc_data, (float*)result_data, m, n); - } else if (lhs_type == IREE_HAL_ELEMENT_TYPE_BFLOAT_16 && - rhs_type == IREE_HAL_ELEMENT_TYPE_BFLOAT_16 && - acc_type == IREE_HAL_ELEMENT_TYPE_BFLOAT_16) { - reference_matmul_bf16_bf16_bf16_bf16( - m_size, k_size, n_size, lhs_type, rhs_type, acc_type, transpose_rhs, - (const uint16_t*)lhs_data, (const uint16_t*)rhs_data, - (const uint16_t*)acc_data, (uint16_t*)result_data, m, n); - } else if (lhs_type == IREE_HAL_ELEMENT_TYPE_BFLOAT_16 && - rhs_type == IREE_HAL_ELEMENT_TYPE_BFLOAT_16 && - acc_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32) { - reference_matmul_bf16_bf16_f32_f32( - m_size, k_size, n_size, lhs_type, rhs_type, acc_type, transpose_rhs, - (const uint16_t*)lhs_data, (const uint16_t*)rhs_data, - (const float*)acc_data, (float*)result_data, m, n); - } else { - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "unhandled combination of element types in matmul"); - } - return iree_ok_status(); -} - -// Reference matmul implementation, used to compare matmul results against. -static iree_status_t reference_matmul( - iree_hal_dim_t m_size, iree_hal_dim_t k_size, iree_hal_dim_t n_size, - iree_hal_element_type_t lhs_type, iree_hal_element_type_t rhs_type, - iree_hal_element_type_t acc_type, bool transpose_rhs, - iree_byte_span_t lhs_contents, iree_byte_span_t rhs_contents, - iree_byte_span_t acc_contents, iree_byte_span_t result_contents, - int compute_every) { - IREE_TRACE_ZONE_BEGIN(z0); - IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, m_size); - IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, k_size); - IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, n_size); - - iree_host_size_t count = 0; - for (iree_hal_dim_t m = 0; m < m_size; ++m) { - for (iree_hal_dim_t n = 0; n < n_size; ++n) { - if (++count < compute_every) continue; - count = 0; - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, reference_matmul_element( - m_size, k_size, n_size, lhs_type, rhs_type, acc_type, - transpose_rhs, lhs_contents.data, rhs_contents.data, - acc_contents.data, result_contents.data, m, n)); - } - } - - IREE_TRACE_ZONE_END(z0); - return iree_ok_status(); -} - -//===----------------------------------------------------------------------===// -// Matmul comparison/logging -//===----------------------------------------------------------------------===// - -typedef struct { - iree_allocator_t host_allocator; - iree_hal_dim_t m; - iree_hal_dim_t k; - iree_hal_dim_t n; - iree_hal_element_type_t lhs_type; - iree_hal_element_type_t rhs_type; - iree_hal_element_type_t acc_type; - iree_hal_element_type_t result_type; - bool transpose_rhs; - iree_byte_span_t lhs_contents; - iree_byte_span_t rhs_contents; - iree_byte_span_t acc_contents; - iree_byte_span_t actual_contents; - iree_byte_span_t expected_contents; -} matmul_results_t; - -static void matmul_results_deinitialize(matmul_results_t* results); - -static iree_status_t matmul_results_initialize( - iree_hal_device_t* device, iree_hal_dim_t m_size, iree_hal_dim_t k_size, - iree_hal_dim_t n_size, uint32_t transpose_rhs, iree_hal_buffer_view_t* lhs, - iree_hal_buffer_view_t* rhs, iree_hal_buffer_view_t* acc, - iree_hal_buffer_view_t* result, iree_allocator_t host_allocator, - matmul_results_t* out_results) { - IREE_TRACE_ZONE_BEGIN(z0); - - memset(out_results, 0, sizeof(*out_results)); - out_results->host_allocator = host_allocator; - - out_results->m = m_size; - out_results->k = k_size; - out_results->n = n_size; - - out_results->lhs_type = iree_hal_buffer_view_element_type(lhs); - out_results->rhs_type = iree_hal_buffer_view_element_type(rhs); - out_results->acc_type = iree_hal_buffer_view_element_type(result); - out_results->result_type = iree_hal_buffer_view_element_type(result); - - out_results->transpose_rhs = transpose_rhs != 0; - - iree_hal_buffer_t* lhs_buffer = iree_hal_buffer_view_buffer(lhs); - iree_hal_buffer_t* rhs_buffer = iree_hal_buffer_view_buffer(rhs); - iree_hal_buffer_t* acc_buffer = acc ? iree_hal_buffer_view_buffer(acc) : NULL; - iree_hal_buffer_t* result_buffer = iree_hal_buffer_view_buffer(result); - - iree_status_t status = iree_ok_status(); - - if (iree_status_is_ok(status)) { - out_results->lhs_contents.data_length = - iree_hal_buffer_byte_length(lhs_buffer); - status = iree_allocator_malloc(host_allocator, - out_results->lhs_contents.data_length, - (void**)&out_results->lhs_contents.data); - } - if (iree_status_is_ok(status)) { - status = iree_hal_device_transfer_d2h( - device, lhs_buffer, 0, out_results->lhs_contents.data, - out_results->lhs_contents.data_length, - IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); - } - - if (iree_status_is_ok(status)) { - out_results->rhs_contents.data_length = - iree_hal_buffer_byte_length(rhs_buffer); - status = iree_allocator_malloc(host_allocator, - out_results->rhs_contents.data_length, - (void**)&out_results->rhs_contents.data); - } - if (iree_status_is_ok(status)) { - status = iree_hal_device_transfer_d2h( - device, rhs_buffer, 0, out_results->rhs_contents.data, - out_results->rhs_contents.data_length, - IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); - } - - if (acc_buffer) { - if (iree_status_is_ok(status)) { - out_results->acc_contents.data_length = - iree_hal_buffer_byte_length(acc_buffer); - status = iree_allocator_malloc(host_allocator, - out_results->acc_contents.data_length, - (void**)&out_results->acc_contents.data); - } - if (iree_status_is_ok(status)) { - status = iree_hal_device_transfer_d2h( - device, acc_buffer, 0, out_results->acc_contents.data, - out_results->acc_contents.data_length, - IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); - } - } - - if (iree_status_is_ok(status)) { - out_results->actual_contents.data_length = - iree_hal_buffer_byte_length(result_buffer); - status = iree_allocator_malloc(host_allocator, - out_results->actual_contents.data_length, - (void**)&out_results->actual_contents.data); - } - if (iree_status_is_ok(status)) { - status = iree_hal_device_transfer_d2h( - device, result_buffer, 0, out_results->actual_contents.data, - out_results->actual_contents.data_length, - IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT, iree_infinite_timeout()); - } - - if (iree_status_is_ok(status)) { - out_results->expected_contents.data_length = - iree_hal_buffer_byte_length(result_buffer); - status = iree_allocator_malloc( - host_allocator, out_results->expected_contents.data_length, - (void**)&out_results->expected_contents.data); - } - - if (!iree_status_is_ok(status)) { - matmul_results_deinitialize(out_results); - } - IREE_TRACE_ZONE_END(z0); - return status; -} - -static void matmul_results_deinitialize(matmul_results_t* results) { - IREE_TRACE_ZONE_BEGIN(z0); - - iree_allocator_free(results->host_allocator, results->lhs_contents.data); - iree_allocator_free(results->host_allocator, results->rhs_contents.data); - if (!iree_byte_span_is_empty(results->acc_contents)) { - iree_allocator_free(results->host_allocator, results->acc_contents.data); - } - iree_allocator_free(results->host_allocator, results->actual_contents.data); - iree_allocator_free(results->host_allocator, results->expected_contents.data); - - IREE_TRACE_ZONE_END(z0); -} - -// Returns the largest number of characters to print any matrix element. -static int get_max_elem_width(precision_t precision, iree_hal_dim_t rows, - iree_hal_dim_t row_start, iree_hal_dim_t row_end, - iree_hal_dim_t cols, iree_hal_dim_t col_start, - iree_hal_dim_t col_end, - iree_hal_element_type_t element_type, - const uint8_t* matrix) { - int max_elem_width = 0; - for (int row = row_start; row < row_end; row++) { - for (int col = col_start; col < col_end; col++) { - iree_hal_dim_t idx = col + row * cols; - iree_test_utils_e2e_value_t elem = - iree_test_utils_read_buffer_element(idx, element_type, matrix); - // NOTE: iree_max is a macro and may evaluate its args twice. - char buf[64]; - int this_elem_width = - iree_test_utils_snprintf_value(buf, sizeof(buf), elem, precision); - max_elem_width = iree_max(max_elem_width, this_elem_width); - } - } - return max_elem_width; -} - -// Prints |matrix| to |file|, with |label| as caption. -// |precision| controls how many decimals are printed for float values. -// -// If |other_matrix| is not NULL, then any matrix entries that disagree -// between |matrix| and |other_matrix| (according to -// matmul_result_elements_agree) are highlighted. -// -// |highlight| is either NULL or is a UTF-8 string that will be printed next to -// any entry of |matrix| that disagrees with the corresponding entry of -// |other_matrix|. -// -// |highlight| should be NULL if and only if |other_matrix| is NULL. -// -// In order for matrix columns to be properly laid out, the rendering of -// |highlight| in a fixed-width font should have the width of two regular Latin -// characters. According to -// https://www.unicode.org/reports/tr11/#Recommendations, a single emoji -// character should meet that requirement. -static void print_matrix(FILE* file, const char* label, precision_t precision, - iree_hal_dim_t rows, iree_hal_dim_t row_start, - iree_hal_dim_t row_end, iree_hal_dim_t cols, - iree_hal_dim_t col_start, iree_hal_dim_t col_end, - iree_hal_element_type_t element_type, - const uint8_t* matrix, const uint8_t* other_matrix, - const char* highlight) { - IREE_ASSERT((other_matrix == NULL) == (highlight == NULL)); - int max_elem_width = - get_max_elem_width(precision, rows, row_start, row_end, cols, col_start, - col_end, element_type, matrix); - if (other_matrix) { - // NOTE: iree_max is a macro and may evaluate its args twice. - int other_matrix_max_elem_width = - get_max_elem_width(precision, rows, row_start, row_end, cols, col_start, - col_end, element_type, other_matrix); - max_elem_width = iree_max(max_elem_width, other_matrix_max_elem_width); - } - - fprintf(file, - "%s (rows %" PRIdsz "..%" PRIdsz " out of 0..%" PRIdsz - ", columns %" PRIdsz "..%" PRIdsz " out of 0..%" PRIdsz ")\n", - label, row_start, row_end - 1, rows - 1, col_start, col_end - 1, - cols - 1); - for (int row = row_start; row < row_end; row++) { - for (int col = col_start; col < col_end; col++) { - iree_hal_dim_t idx = col + row * cols; - iree_test_utils_e2e_value_t element = - iree_test_utils_read_buffer_element(idx, element_type, matrix); - bool disagree = false; - if (other_matrix) { - iree_test_utils_e2e_value_t other_element = - iree_test_utils_read_buffer_element(idx, element_type, - other_matrix); - disagree = - !iree_test_utils_result_elements_agree(element, other_element); - } - char buf[64]; - iree_test_utils_snprintf_value(buf, sizeof(buf), element, precision); - fprintf(file, "%*s", max_elem_width, buf); - // See comment on |highlight| function parameter for why 2 spaces. - // A 3rd space is added unconditionally to make it clear that a highlight - // concerns the matrix entry to its left. - fprintf(file, "%s ", disagree ? highlight : " "); - } - fprintf(file, "\n"); - } -} - -// Helper for check_matmul_results: handler for the failure case. -// If |file| is not NULL, detailed logging is written to it. -static iree_status_t check_matmul_failure( - FILE* file, const matmul_results_t* results, - iree_test_utils_e2e_value_t actual_value, - iree_test_utils_e2e_value_t expected_value, iree_hal_dim_t row, - iree_hal_dim_t col, int check_every) { - if (!file || check_every > 1) { - // No logging of errors with check_every>1 as most of the reference matrix - // elements have not been computed. The caller is expected to retry with - // check_every=1. - return iree_make_status(IREE_STATUS_ABORTED); - } - - IREE_TRACE_ZONE_BEGIN(z0); - - fprintf(file, - "\n\nerror: the actual and expected result matrices disagree " - "at row %" PRIdim ", column %" PRIdim ".\n\n", - row, col); - char actual_value_buf[32]; - char expected_value_buf[32]; - iree_test_utils_snprintf_value(actual_value_buf, sizeof(actual_value_buf), - actual_value, PRECISION_HIGH); - iree_test_utils_snprintf_value(expected_value_buf, sizeof(expected_value_buf), - expected_value, PRECISION_HIGH); - fprintf(file, "actual value: %s\n", actual_value_buf); - fprintf(file, "expected value: %s\n", expected_value_buf); - - iree_hal_dim_t context = 8; - const char* context_env = getenv("IREE_MATMUL_TEST_SHOW_CONTEXT"); - if (context_env) { - if (1 != sscanf(context_env, "%" PRIdim, &context)) { - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "failed to parse IREE_MATMUL_TEST_SHOW_CONTEXT " - "as \"%%" PRIdim "\"; got \"%s\"", - context_env); - } - } - iree_hal_dim_t m_start = - (iree_hal_dim_t)iree_max(0, (int64_t)row - (int64_t)context); - iree_hal_dim_t m_end = iree_min(results->m, row + context); - iree_hal_dim_t n_start = - (iree_hal_dim_t)iree_max(0, (int64_t)col - (int64_t)context); - iree_hal_dim_t n_end = iree_min(results->n, col + context); - iree_hal_dim_t k_start = 0; - iree_hal_dim_t k_end = iree_min(results->k, 2 * context); - // [k_start, k_end) could be arbitrarily long at this point. Constrain it a - // bit to avoid huge output. - k_end = iree_min(k_end, k_start + 4 * context); - - fprintf(file, "\n"); - print_matrix(file, "left-hand side", PRECISION_LOW, results->m, m_start, - m_end, results->k, k_start, k_end, results->lhs_type, - results->lhs_contents.data, NULL, NULL); - fprintf(file, "\n"); - print_matrix(file, "right-hand side", PRECISION_LOW, results->k, k_start, - k_end, results->n, n_start, n_end, results->rhs_type, - results->rhs_contents.data, NULL, NULL); - fprintf(file, "\n"); - if (results->acc_contents.data) { - print_matrix(file, "input accumulator", PRECISION_LOW, results->m, m_start, - m_end, results->n, n_start, n_end, results->acc_type, - results->acc_contents.data, NULL, NULL); - fprintf(file, "\n"); - } - print_matrix(file, "expected result", PRECISION_LOW, results->m, m_start, - m_end, results->n, n_start, n_end, results->result_type, - results->expected_contents.data, results->actual_contents.data, - iree_test_utils_emoji(true)); - fprintf(file, "\n"); - print_matrix(file, "actual result", PRECISION_LOW, results->m, m_start, m_end, - results->n, n_start, n_end, results->result_type, - results->actual_contents.data, results->expected_contents.data, - iree_test_utils_emoji(false)); - fprintf(file, "\n"); - - IREE_TRACE_ZONE_END(z0); - return iree_make_status(IREE_STATUS_ABORTED); -} - -// Helper for check_matmul_results: the actual interesting part once we've -// obtained and validated the {m,k,n}_size values. On error, detailed logging is -// written to |file| if it is not NULL. -static iree_status_t check_matmul_results_impl(FILE* file, - const matmul_results_t* results, - int check_every) { - IREE_TRACE_ZONE_BEGIN(z0); - - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, reference_matmul( - results->m, results->k, results->n, results->lhs_type, - results->rhs_type, results->acc_type, results->transpose_rhs, - results->lhs_contents, results->rhs_contents, - results->acc_contents, results->expected_contents, check_every)); - - int count = 0; - for (iree_hal_dim_t m = 0; m < results->m; ++m) { - for (iree_hal_dim_t n = 0; n < results->n; ++n) { - if (++count < check_every) continue; - count = 0; - iree_hal_dim_t idx = m * results->n + n; - iree_test_utils_e2e_value_t actual_value = - iree_test_utils_read_buffer_element(idx, results->result_type, - results->actual_contents.data); - iree_test_utils_e2e_value_t expected_value = - iree_test_utils_read_buffer_element(idx, results->result_type, - results->expected_contents.data); - if (!iree_test_utils_result_elements_agree(actual_value, - expected_value)) { - iree_status_t status = check_matmul_failure( - file, results, actual_value, expected_value, m, n, check_every); - IREE_TRACE_ZONE_END(z0); - return status; - } - } - } - - IREE_TRACE_ZONE_END(z0); - return iree_ok_status(); -} - -// Given an actual matmul's inputs and output (all host-local), uses a reference -// matmul implementation on the same inputs to check if the output is correct. -// On error, detailed logging is written to |file| if it is not NULL. -static iree_status_t check_matmul_results(FILE* file, - const matmul_results_t* results) { - IREE_TRACE_ZONE_BEGIN(z0); - int check_every = iree_test_utils_calculate_check_every( - results->m * results->n, results->n); - iree_status_t status = check_matmul_results_impl(file, results, check_every); - if (!iree_status_is_ok(status) && check_every > 1) { - // If we got a failure with check_every>1, that didn't log a useful - // numerical summary, as most of the reference matrix entries hadn't been - // computed. Rerun now with check_every=1 to get that numerical logging. - iree_status_ignore(status); - status = check_matmul_results_impl(file, results, 1); - } - IREE_TRACE_ZONE_END(z0); - return status; -} - -//===----------------------------------------------------------------------===// -// `matmul_test` custom module -//===----------------------------------------------------------------------===// -// This uses the C++ wrapper to keep things simple. Though easier to use it's -// got additional overhead/code-size bloat that doesn't matter in a test like -// this. Making a C module builder API that removes the boilerplate there is TBD -// so this file is written in C besides this module so that we can swap it back -// to being pure C in the future. - -namespace iree { - -class MatmulTestModuleState final { - public: - explicit MatmulTestModuleState(iree_allocator_t host_allocator) - : host_allocator_(host_allocator) {} - ~MatmulTestModuleState() = default; - - // Fills the destination span with pseudorandom values of the given - // |element_type|. The given |seed| is passed to the pseudorandom generator. - // The pseudorandom values are reproducible both across runs and across - // machines. - StatusOr> GenerateRandomMatrix( - const vm::ref device, int64_t dim0, int64_t dim1, - iree_hal_element_type_t element_type, int32_t seed) { - iree_hal_dim_t dims[2] = { - (iree_hal_dim_t)dim0, - (iree_hal_dim_t)dim1, - }; - iree_hal_buffer_params_t buffer_params = {0}; - buffer_params.usage = IREE_HAL_BUFFER_USAGE_DEFAULT; - buffer_params.access = IREE_HAL_MEMORY_ACCESS_ALL; - buffer_params.type = IREE_HAL_MEMORY_TYPE_OPTIMAL_FOR_DEVICE; - vm::ref result_view; - struct callback_state_t { - iree_hal_element_type_t element_type; - int32_t seed; - } callback_state = { - element_type, - seed, - }; - IREE_RETURN_IF_ERROR(iree_hal_buffer_view_generate_buffer( - device.get(), iree_hal_device_allocator(device.get()), - IREE_ARRAYSIZE(dims), dims, element_type, - IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, buffer_params, - +[](iree_hal_buffer_mapping_t* mapping, void* user_data) { - callback_state_t callback_state = *(callback_state_t*)user_data; - iree_byte_span_t span = mapping->contents; - // Generate "uniform" integer-valued numbers in the range [min, max]. - int32_t min = 0; - int32_t max = 0; - iree_test_utils_get_min_max_for_element_type( - callback_state.element_type, &min, &max); - uint32_t range = (max - min + 1); - iree_host_size_t element_byte_count = - iree_hal_element_dense_byte_count(callback_state.element_type); - uint8_t* data_end = span.data + span.data_length; - uint32_t state = callback_state.seed; - for (uint8_t* data = span.data; data < data_end; - data += element_byte_count) { - int32_t value = - (int32_t)iree_test_utils_pseudorandom_range(&state, range) + - min; - iree_test_utils_write_element(callback_state.element_type, value, - data); - } - return iree_ok_status(); - }, - &callback_state, &result_view)); - return std::move(result_view); - } - - Status CheckMatmulResults( - const vm::ref device, int64_t m, int64_t k, int64_t n, - int32_t transpose_rhs, const vm::ref lhs, - const vm::ref rhs, - const vm::ref acc, - const vm::ref actual_result) { - matmul_results_t results = {}; - IREE_RETURN_IF_ERROR(matmul_results_initialize( - device.get(), (iree_hal_dim_t)m, (iree_hal_dim_t)k, (iree_hal_dim_t)n, - transpose_rhs, lhs.get(), rhs.get(), acc.get(), actual_result.get(), - host_allocator_, &results)); - iree_status_t status = check_matmul_results(stderr, &results); - matmul_results_deinitialize(&results); - return status; - } - - private: - iree_allocator_t host_allocator_; -}; - -static const vm::NativeFunction - kMatmulTestModuleFunctions[] = { - vm::MakeNativeFunction("generate_random_matrix", - &MatmulTestModuleState::GenerateRandomMatrix), - vm::MakeNativeFunction("check_matmul_results", - &MatmulTestModuleState::CheckMatmulResults), -}; - -struct MatmulTestModule final : public vm::NativeModule { - using vm::NativeModule::NativeModule; - StatusOr> CreateState( - iree_allocator_t host_allocator) override { - return std::make_unique(host_allocator); - } -}; - -} // namespace iree - -static iree_status_t matmul_test_module_create(iree_vm_instance_t* instance, - iree_allocator_t host_allocator, - iree_vm_module_t** out_module) { - IREE_ASSERT_ARGUMENT(out_module); - *out_module = NULL; - auto module = std::make_unique( - "matmul_test", /*version=*/0, instance, host_allocator, - iree::span>( - iree::kMatmulTestModuleFunctions)); - *out_module = module.release()->interface(); - return iree_ok_status(); -} - -int main(int argc, char** argv) { - IREE_TRACE_APP_ENTER(); - - iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_DEFAULT, &argc, &argv); - if (argc != 1) { - fprintf(stderr, "use --module= flags to specify the modules to run\n"); - IREE_TRACE_APP_EXIT(EXIT_FAILURE); - return EXIT_FAILURE; - } - - // Run the tests. Note that some modules may be compiled for other platforms - // and not have the required architectures for execution within them - to keep - // the test runner dumber we gracefully fail those cases by returning success. - iree_status_t status = iree_test_utils_load_and_run_e2e_tests( - iree_allocator_system(), matmul_test_module_create); - int exit_code = EXIT_SUCCESS; - if (!iree_status_is_ok(status)) { - iree_status_fprint(stderr, status); - bool is_device_unavailable = iree_status_is_not_found(status); - iree_status_free(status); - exit_code = is_device_unavailable ? EXIT_SUCCESS : EXIT_FAILURE; - } - - IREE_TRACE_APP_EXIT(exit_code); - return exit_code; -} diff --git a/tools/testing/e2e/test_utils.c b/tools/testing/e2e/test_utils.c deleted file mode 100644 index 29811482de5a..000000000000 --- a/tools/testing/e2e/test_utils.c +++ /dev/null @@ -1,494 +0,0 @@ -// Copyright 2024 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "tools/testing/e2e/test_utils.h" - -#include -#include -#include -#include - -#include "iree/base/api.h" -#include "iree/base/internal/cpu.h" -#include "iree/base/internal/flags.h" -#include "iree/base/internal/math.h" -#include "iree/base/internal/path.h" -#include "iree/hal/api.h" -#include "iree/modules/hal/module.h" -#include "iree/tooling/context_util.h" -#include "iree/tooling/device_util.h" -#include "iree/vm/api.h" - -IREE_FLAG(bool, require_exact_results, true, - "Requires floating point result elements to match exactly."); - -bool iree_test_utils_require_exact_results(void) { - return FLAG_require_exact_results; -} - -IREE_FLAG( - float, acceptable_fp_delta, 1e-5f, - "Maximum absolute difference allowed with inexact floating point results."); - -float iree_test_utils_acceptable_fb_delta(void) { - return FLAG_acceptable_fp_delta; -} - -IREE_FLAG( - int32_t, max_elements_to_check, 10000, - "Maximum number of tensor elements to check for the given test. For larger " - "buffers, only every n-th element will be checked for some n chosed to " - "stay just under that threshold and to avoid being a divisor of the inner " - "dimension size to avoid special patterns. As the check uses a slow " - "reference implementation, this is a trade-off between test latency and " - "coverage. The value 0 means check all elements."); - -int32_t iree_test_utils_max_elements_to_check(void) { - return FLAG_max_elements_to_check; -} - -const char* iree_test_utils_emoji(bool good) { return good ? "🦄" : "🐞"; } - -int iree_test_utils_calculate_check_every(iree_hal_dim_t tot_elements, - iree_hal_dim_t no_div_of) { - int check_every = 1; - if (iree_test_utils_max_elements_to_check()) { - check_every = - ((tot_elements) + iree_test_utils_max_elements_to_check() - 1) / - iree_test_utils_max_elements_to_check(); - if (check_every < 1) check_every = 1; - if (check_every > 1) - while ((no_div_of % check_every) == 0) ++check_every; - } - return check_every; -} - -iree_test_utils_e2e_value_t iree_test_utils_value_make_none() { - iree_test_utils_e2e_value_t result; - result.type = IREE_TEST_UTILS_VALUE_TYPE_NONE; - return result; -} - -iree_test_utils_e2e_value_t iree_test_utils_value_make_i8(int8_t value) { - iree_test_utils_e2e_value_t result; - result.type = IREE_TEST_UTILS_VALUE_TYPE_I8; - result.i8 = value; - return result; -} - -iree_test_utils_e2e_value_t iree_test_utils_value_make_i16(int16_t value) { - iree_test_utils_e2e_value_t result; - result.type = IREE_TEST_UTILS_VALUE_TYPE_I16; - result.i16 = value; - return result; -} - -iree_test_utils_e2e_value_t iree_test_utils_value_make_i32(int32_t value) { - iree_test_utils_e2e_value_t result; - result.type = IREE_TEST_UTILS_VALUE_TYPE_I32; - result.i32 = value; - return result; -} - -iree_test_utils_e2e_value_t iree_test_utils_value_make_f16(uint16_t value) { - iree_test_utils_e2e_value_t result; - result.type = IREE_TEST_UTILS_VALUE_TYPE_F16; - result.f16_u16 = value; - return result; -} - -iree_test_utils_e2e_value_t iree_test_utils_value_make_bf16(uint16_t value) { - iree_test_utils_e2e_value_t result; - result.type = IREE_TEST_UTILS_VALUE_TYPE_BF16; - result.bf16_u16 = value; - return result; -} - -iree_test_utils_e2e_value_t iree_test_utils_value_make_f32(float value) { - iree_test_utils_e2e_value_t result; - result.type = IREE_TEST_UTILS_VALUE_TYPE_F32; - result.f32 = value; - return result; -} - -iree_test_utils_e2e_value_t iree_test_utils_read_buffer_element( - iree_hal_dim_t index, iree_hal_element_type_t result_type, - const void* data) { - if (iree_hal_element_type_is_integer(result_type, 8)) { - return iree_test_utils_value_make_i8(((int8_t*)data)[index]); - } else if (iree_hal_element_type_is_integer(result_type, 16)) { - return iree_test_utils_value_make_i16(((int16_t*)data)[index]); - } else if (iree_hal_element_type_is_integer(result_type, 32)) { - return iree_test_utils_value_make_i32(((int32_t*)data)[index]); - } else if (result_type == IREE_HAL_ELEMENT_TYPE_FLOAT_16) { - return iree_test_utils_value_make_f16(((uint16_t*)data)[index]); - } else if (result_type == IREE_HAL_ELEMENT_TYPE_BFLOAT_16) { - return iree_test_utils_value_make_bf16(((uint16_t*)data)[index]); - } else if (result_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32) { - return iree_test_utils_value_make_f32(((float*)data)[index]); - } - iree_status_abort(iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "unhandled matmul result type")); - return iree_test_utils_value_make_none(); -} - -int iree_test_utils_snprintf_value(char* buf, size_t bufsize, - iree_test_utils_e2e_value_t value, - precision_t precision) { - switch (value.type) { - case IREE_TEST_UTILS_VALUE_TYPE_I8: - return snprintf(buf, bufsize, "%" PRIi8, value.i8); - case IREE_TEST_UTILS_VALUE_TYPE_I16: - return snprintf(buf, bufsize, "%" PRIi16, value.i16); - case IREE_TEST_UTILS_VALUE_TYPE_I32: - return snprintf(buf, bufsize, "%" PRIi32, value.i32); - case IREE_TEST_UTILS_VALUE_TYPE_I64: - return snprintf(buf, bufsize, "%" PRIi64, value.i64); - case IREE_TEST_UTILS_VALUE_TYPE_F16: - return snprintf(buf, bufsize, - precision == PRECISION_HIGH ? "%.5g" : "%.4g", - iree_math_f16_to_f32(value.f16_u16)); - case IREE_TEST_UTILS_VALUE_TYPE_BF16: - return snprintf(buf, bufsize, - precision == PRECISION_HIGH ? "%.5g" : "%.4g", - iree_math_bf16_to_f32(value.bf16_u16)); - case IREE_TEST_UTILS_VALUE_TYPE_F32: - return snprintf(buf, bufsize, - precision == PRECISION_HIGH ? "%.8g" : "%.4g", value.f32); - case IREE_TEST_UTILS_VALUE_TYPE_F64: - return snprintf(buf, bufsize, - precision == PRECISION_HIGH ? "%.16g" : "%.4g", - value.f64); - default: - iree_status_abort(iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "unhandled value type")); - return 0; - } -} - -bool iree_test_utils_result_elements_agree(iree_test_utils_e2e_value_t expected, - iree_test_utils_e2e_value_t actual) { - float acceptable_fp_delta = iree_test_utils_acceptable_fb_delta(); - if (expected.type != actual.type) { - iree_status_abort( - iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "mismatched types")); - return false; - } - - if (acceptable_fp_delta < 0.0f) { - iree_status_abort(iree_make_status( - IREE_STATUS_INVALID_ARGUMENT, - "negative tolerance (acceptable_fp_delta=%.8g)", acceptable_fp_delta)); - return false; - } - - switch (expected.type) { - case IREE_TEST_UTILS_VALUE_TYPE_I32: - return actual.i32 == expected.i32; - // Since we fill buffers with small integers for floating point GEMMs - // functional testing, we can test for bit-exactness on the actual and - // expected values. Inexact results are only permitted when the - // `require_exact_results` flag is set to `false`. - case IREE_TEST_UTILS_VALUE_TYPE_F16: - if (actual.f16_u16 == expected.f16_u16) return true; - if (iree_test_utils_max_elements_to_check()) return false; - return fabsf(iree_math_f16_to_f32(actual.f16_u16) - - iree_math_f16_to_f32(expected.f16_u16)) < - acceptable_fp_delta; - case IREE_TEST_UTILS_VALUE_TYPE_BF16: - if (actual.bf16_u16 == expected.bf16_u16) return true; - if (iree_test_utils_require_exact_results()) return false; - return fabsf(iree_math_bf16_to_f32(actual.bf16_u16) - - iree_math_bf16_to_f32(expected.bf16_u16)) < - acceptable_fp_delta; - case IREE_TEST_UTILS_VALUE_TYPE_F32: - if (actual.f32 == expected.f32) return true; - if (iree_test_utils_require_exact_results()) return false; - return fabsf(actual.f32 - expected.f32) < acceptable_fp_delta; - default: - iree_status_abort(iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "unhandled value type")); - return false; - } -} - -//===----------------------------------------------------------------------===// -// RNG utilities -//===----------------------------------------------------------------------===// - -void iree_test_utils_write_element(iree_hal_element_type_t element_type, - int32_t value, void* dst) { -#define WRITE_ELEMENT_CASE(ETYPE, CTYPE) \ - case IREE_HAL_ELEMENT_TYPE_##ETYPE: \ - *(CTYPE*)dst = (CTYPE)value; \ - break; - - switch (element_type) { - WRITE_ELEMENT_CASE(INT_8, int8_t) - WRITE_ELEMENT_CASE(INT_16, int16_t) - WRITE_ELEMENT_CASE(INT_32, int32_t) - WRITE_ELEMENT_CASE(INT_64, int64_t) - WRITE_ELEMENT_CASE(SINT_8, int8_t) - WRITE_ELEMENT_CASE(SINT_16, int16_t) - WRITE_ELEMENT_CASE(SINT_32, int32_t) - WRITE_ELEMENT_CASE(SINT_64, int64_t) - WRITE_ELEMENT_CASE(UINT_8, uint8_t) - WRITE_ELEMENT_CASE(UINT_16, uint16_t) - WRITE_ELEMENT_CASE(UINT_32, uint32_t) - WRITE_ELEMENT_CASE(UINT_64, uint64_t) - // clang-format off - case IREE_HAL_ELEMENT_TYPE_FLOAT_16: - *(uint16_t*)dst = iree_math_f32_to_f16((float)value); - break; - case IREE_HAL_ELEMENT_TYPE_BFLOAT_16: - *(uint16_t*)dst = iree_math_f32_to_bf16((float)value); - break; - WRITE_ELEMENT_CASE(FLOAT_32, float) - WRITE_ELEMENT_CASE(FLOAT_64, double) - // clang-format on - default: - IREE_ASSERT(false, "unhandled element type"); - break; - } - -#undef WRITE_ELEMENT_CASE -} - -uint32_t iree_test_utils_pseudorandom_uint32(uint32_t* state) { - *state = (*state * IREE_PRNG_MULTIPLIER) % IREE_PRNG_MODULUS; - return *state; -} - -uint32_t iree_test_utils_pseudorandom_range(uint32_t* state, uint32_t range) { - return iree_test_utils_pseudorandom_uint32(state) % range; -} - -void iree_test_utils_get_min_max_for_element_type( - iree_hal_element_type_t element_type, int32_t* min, int32_t* max) { - switch (element_type) { - case IREE_HAL_ELEMENT_TYPE_INT_8: - case IREE_HAL_ELEMENT_TYPE_SINT_8: - *min = -2; - *max = +2; - break; - case IREE_HAL_ELEMENT_TYPE_UINT_8: - *min = 0; - *max = +2; - break; - case IREE_HAL_ELEMENT_TYPE_INT_16: - case IREE_HAL_ELEMENT_TYPE_SINT_16: - case IREE_HAL_ELEMENT_TYPE_FLOAT_16: - *min = -4; - *max = +4; - break; - case IREE_HAL_ELEMENT_TYPE_BFLOAT_16: - *min = -2; - *max = +2; - break; - case IREE_HAL_ELEMENT_TYPE_UINT_16: - *min = 0; - *max = +4; - break; - case IREE_HAL_ELEMENT_TYPE_INT_32: - case IREE_HAL_ELEMENT_TYPE_SINT_32: - case IREE_HAL_ELEMENT_TYPE_FLOAT_32: - *min = -8; - *max = +8; - break; - case IREE_HAL_ELEMENT_TYPE_UINT_32: - *min = 0; - *max = +8; - break; - case IREE_HAL_ELEMENT_TYPE_INT_64: - case IREE_HAL_ELEMENT_TYPE_SINT_64: - case IREE_HAL_ELEMENT_TYPE_FLOAT_64: - *min = -16; - *min = +16; - break; - case IREE_HAL_ELEMENT_TYPE_UINT_64: - *min = 0; - *max = +16; - break; - default: - IREE_ASSERT(false, "unhandled element type"); - break; - } -} - -//===----------------------------------------------------------------------===// -// Test runner -//===----------------------------------------------------------------------===// - -iree_status_t iree_test_utils_check_test_function(iree_vm_function_t function, - bool* out_is_valid) { - *out_is_valid = true; - - iree_string_view_t function_name = iree_vm_function_name(&function); - if (iree_string_view_starts_with(function_name, - iree_make_cstring_view("__"))) { - // Internal compiler/runtime support function. - *out_is_valid = false; - } - - iree_vm_function_signature_t function_signature = - iree_vm_function_signature(&function); - iree_host_size_t argument_count = 0; - iree_host_size_t result_count = 0; - IREE_RETURN_IF_ERROR(iree_vm_function_call_count_arguments_and_results( - &function_signature, &argument_count, &result_count)); - if (argument_count || result_count) { - // Takes args or has results we don't expect. - *out_is_valid = false; - } - - return iree_ok_status(); -} - -iree_status_t iree_test_utils_run_test_function( - iree_vm_context_t* context, iree_vm_function_t function, - iree_allocator_t host_allocator) { - IREE_TRACE_ZONE_BEGIN(z0); - iree_string_view_t function_name = iree_vm_function_name(&function); - IREE_TRACE_ZONE_APPEND_TEXT(z0, function_name.data, function_name.size); - fprintf(stderr, "--- TEST[%.*s] ---\n", (int)function_name.size, - function_name.data); - iree_string_view_t function_desc = - iree_vm_function_lookup_attr_by_name(&function, IREE_SV("description")); - if (!iree_string_view_is_empty(function_desc)) { - fprintf(stderr, "%.*s\n", (int)function_desc.size, function_desc.data); - } - iree_status_t status = iree_vm_invoke( - context, function, IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/NULL, - /*inputs=*/NULL, /*outputs=*/NULL, host_allocator); - IREE_TRACE_ZONE_END(z0); - return status; -} - -iree_status_t iree_test_utils_run_all_test_functions( - iree_vm_context_t* context, iree_vm_module_t* test_module, - iree_allocator_t host_allocator) { - IREE_TRACE_ZONE_BEGIN(z0); - - // Walk all functions and find the ones we can run (no args, non-internal). - const iree_vm_module_signature_t module_signature = - iree_vm_module_signature(test_module); - for (iree_host_size_t i = 0; i < module_signature.export_function_count; - ++i) { - // Get the function and filter to just the public user exports. - iree_vm_function_t function; - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, iree_vm_module_lookup_function_by_ordinal( - test_module, IREE_VM_FUNCTION_LINKAGE_EXPORT, i, &function)); - bool is_valid = false; - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, iree_test_utils_check_test_function(function, &is_valid)); - if (is_valid) { - // Try to run the function and fail on mismatch. - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, - iree_test_utils_run_test_function(context, function, host_allocator)); - } - } - - IREE_TRACE_ZONE_END(z0); - return iree_ok_status(); -} - -iree_status_t iree_test_utils_check_module_requirements( - iree_vm_module_t* module) { - iree_string_view_t target_features = - iree_vm_module_lookup_attr_by_name(module, IREE_SV("target_features")); - while (!iree_string_view_is_empty(target_features)) { - iree_string_view_t required_feature; - iree_string_view_split(target_features, ',', &required_feature, - &target_features); - if (iree_string_view_is_empty(required_feature)) continue; - int64_t feature_is_supported = 0; - IREE_RETURN_IF_ERROR( - iree_cpu_lookup_data_by_key(required_feature, &feature_is_supported)); - if (!feature_is_supported) { - return iree_make_status( - // The error status matters. We distinguish "feature not supported" - // which is a normal thing to happen from actual errors. - IREE_STATUS_NOT_FOUND, - "target device does not have the required feature '%.*s'", - (int)required_feature.size, required_feature.data); - } - } - return iree_ok_status(); -} - -iree_status_t iree_test_utils_load_and_run_e2e_tests( - iree_allocator_t host_allocator, - iree_status_t (*test_module_create)(iree_vm_instance_t*, iree_allocator_t, - iree_vm_module_t**)) { - IREE_TRACE_ZONE_BEGIN(z0); - - iree_cpu_initialize(host_allocator); - - iree_vm_instance_t* instance = NULL; - IREE_RETURN_AND_END_ZONE_IF_ERROR( - z0, iree_tooling_create_instance(host_allocator, &instance)); - - iree_tooling_module_list_t module_list; - iree_tooling_module_list_initialize(&module_list); - - // Create the test module providing helper functions used by test programs. - iree_vm_module_t* custom_test_module = NULL; - iree_status_t status = - test_module_create(instance, host_allocator, &custom_test_module); - if (iree_status_is_ok(status)) { - status = - iree_tooling_module_list_push_back(&module_list, custom_test_module); - } - iree_vm_module_release(custom_test_module); - - // Load all modules specified by --module= flags. - if (iree_status_is_ok(status)) { - status = iree_tooling_load_modules_from_flags(instance, host_allocator, - &module_list); - } - iree_vm_module_t* test_module = iree_tooling_module_list_back(&module_list); - - // Create the context with our support module and all --module= flags. - iree_vm_context_t* context = NULL; - iree_hal_device_t* device = NULL; - if (iree_status_is_ok(status)) { - status = iree_tooling_create_context_from_flags( - instance, module_list.count, module_list.values, - /*default_device_uri=*/iree_string_view_empty(), host_allocator, - &context, &device, /*out_device_allocator=*/NULL); - } - - // Ensure the test module is possible to run. - if (iree_status_is_ok(status)) { - status = iree_test_utils_check_module_requirements(test_module); - } - iree_tooling_module_list_reset(&module_list); - - // Begin profiling (if enabled). - if (iree_status_is_ok(status)) { - status = iree_hal_begin_profiling_from_flags(device); - } - - // Run all of the tests in the test module. - if (iree_status_is_ok(status)) { - status = iree_test_utils_run_all_test_functions(context, test_module, - host_allocator); - } - - // End profiling (if enabled). - if (iree_status_is_ok(status)) { - status = iree_hal_end_profiling_from_flags(device); - } - - iree_hal_device_release(device); - iree_vm_context_release(context); - iree_vm_instance_release(instance); - - IREE_TRACE_ZONE_END(z0); - return status; -} diff --git a/tools/testing/e2e/test_utils.h b/tools/testing/e2e/test_utils.h deleted file mode 100644 index f095537112e9..000000000000 --- a/tools/testing/e2e/test_utils.h +++ /dev/null @@ -1,148 +0,0 @@ -// Copyright 2024 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_TOOLS_TESTING_E2E_TEST_UTILS_H_ -#define IREE_TOOLS_TESTING_E2E_TEST_UTILS_H_ -#include - -#include "iree/base/api.h" -#include "iree/base/internal/flags.h" -#include "iree/hal/api.h" -#include "iree/vm/api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -bool iree_test_utils_require_exact_results(void); - -float iree_test_utils_acceptable_fb_delta(void); - -int32_t iree_test_utils_max_elements_to_check(void); - -const char* iree_test_utils_emoji(bool good); - -int iree_test_utils_calculate_check_every(iree_hal_dim_t tot_elements, - iree_hal_dim_t no_div_of); - -// Defines the type of a primitive value. -typedef enum iree_test_utils_value_type_e { - // Not a value type. - IREE_TEST_UTILS_VALUE_TYPE_NONE = 0, - // int8_t. - IREE_TEST_UTILS_VALUE_TYPE_I8 = 1, - // int16_t. - IREE_TEST_UTILS_VALUE_TYPE_I16 = 2, - // int32_t. - IREE_TEST_UTILS_VALUE_TYPE_I32 = 3, - // int64_t. - IREE_TEST_UTILS_VALUE_TYPE_I64 = 4, - // halft_t. - IREE_TEST_UTILS_VALUE_TYPE_F16 = 5, - // float. - IREE_TEST_UTILS_VALUE_TYPE_F32 = 6, - // double. - IREE_TEST_UTILS_VALUE_TYPE_F64 = 7, - // bfloat16 - IREE_TEST_UTILS_VALUE_TYPE_BF16 = 8, -} iree_test_utils_value_type_t; - -// Maximum size, in bytes, of any value type we can represent. -#define IREE_E2E_TEST_VALUE_STORAGE_SIZE 8 - -// A variant value type. -typedef struct iree_test_utils_value_t { - iree_test_utils_value_type_t type; - union { - int8_t i8; - int16_t i16; - int32_t i32; - int64_t i64; - float f32; - uint16_t f16_u16; - uint16_t bf16_u16; - double f64; - uint8_t value_storage[IREE_E2E_TEST_VALUE_STORAGE_SIZE]; // max size of all - // value types - }; -} iree_test_utils_e2e_value_t; - -// Enum controlling how many decimals to print floats with. -typedef enum iree_test_utils_precision_e { - PRECISION_LOW, - PRECISION_HIGH, -} precision_t; - -// Reads an element from a buffer given index. -iree_test_utils_e2e_value_t iree_test_utils_read_buffer_element( - iree_hal_dim_t index, iree_hal_element_type_t result_type, - const void* data); - -// Prints a iree_e2e_test_value_t to a string buffer. Returns the number of -// characters written. Like snprintf. -int iree_test_utils_snprintf_value(char* buf, size_t bufsize, - iree_test_utils_e2e_value_t value, - precision_t precision); - -// Returns true if |expected| and |actual| agree to tolerable accuracy. -bool iree_test_utils_result_elements_agree(iree_test_utils_e2e_value_t expected, - iree_test_utils_e2e_value_t actual); - -//===----------------------------------------------------------------------===// -// RNG utilities -//===----------------------------------------------------------------------===// - -// Parameter for locally defined lcg similar to std::minstd_rand. -#define IREE_PRNG_MULTIPLIER 48271 -#define IREE_PRNG_MODULUS 2147483647 - -// Simple deterministic pseudorandom generator. -// This function is same as C++'s std::minstd_rand. -uint32_t iree_test_utils_pseudorandom_uint32(uint32_t* state); - -// Returns a random uint32_t in the range [0, range). -uint32_t iree_test_utils_pseudorandom_range(uint32_t* state, uint32_t range); - -// Writes an element of the given |element_type| with the given integral |value| -// to |dst|. -void iree_test_utils_write_element(iree_hal_element_type_t element_type, - int32_t value, void* dst); - -// Get minimum and maximum for integer-valued uniform distribution. -void iree_test_utils_get_min_max_for_element_type( - iree_hal_element_type_t element_type, int32_t* min, int32_t* max); - -// Returns true if the |function| is a supported callable test function. -// We only support functions that are publicly exported, not an internal -// compiler/runtime function (__ prefixed), and take/return no args/results. -iree_status_t iree_test_utils_check_test_function(iree_vm_function_t function, - bool* out_is_valid); - -// Synchronous runs a test |function|. -// If the test fails then the failure status is returned to the caller. -iree_status_t iree_test_utils_run_test_function( - iree_vm_context_t* context, iree_vm_function_t function, - iree_allocator_t host_allocator); - -// Runs all test functions in |test_module|. -iree_status_t iree_test_utils_run_all_test_functions( - iree_vm_context_t* context, iree_vm_module_t* test_module, - iree_allocator_t host_allocator); - -// Returns OK if there are declared requirements on |module| and they are all -// met and otherwise NOT_FOUND indicating that the module should not be run. -iree_status_t iree_test_utils_check_module_requirements( - iree_vm_module_t* module); - -iree_status_t iree_test_utils_load_and_run_e2e_tests( - iree_allocator_t host_allocator, - iree_status_t (*test_module_create)(iree_vm_instance_t*, iree_allocator_t, - iree_vm_module_t**)); -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // IREE_TOOLS_TESTING_E2E_TEST_UTILS_H_