CHIP-SPV · pvelesko · Mar 30, 2024 · Mar 29, 2024 · Mar 29, 2024 · Mar 30, 2024
diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
@@ -195,7 +195,7 @@ jobs:
     name: Build and test chipStar on ${{ matrix.backend }} ${{matrix.pocl-version }} (llvm-${{ matrix.llvm-version }})
     runs-on: ubuntu-latest
     env:
-      EXCLUDE: ${{ matrix.backend == 'intel' && '"`cat ./test_lists/cpu_opencl_failed_tests.txt`"' || '"`cat ./test_lists/cpu_pocl_failed_tests.txt`"' }}
+      EXCLUDE: ${{ matrix.backend == 'intel' && '"`cat ./test_lists/OPENCL_POCL.txt`|`cat ./test_lists/ALL.txt`"' || '"`cat ./test_lists/OPENCL_POCL.txt`|`cat ./test_lists/ALL.txt`"' }}
     strategy:
       matrix:
         pocl-version: [4, 5]

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -363,8 +363,18 @@ add_custom_target(hipInfoBin ALL
   COMMAND ${HIPCC_BUILD_PATH}/hipcc ${CMAKE_SOURCE_DIR}/samples/hipInfo/hipInfo.cpp -o ${CMAKE_BINARY_DIR}/hipInfo
   DEPENDS hipcc.bin CHIP devicelib_bc)
 
-include(UnitTests)
-
+execute_process(
+  COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/scripts/manage_known_failures.py 
+  ${CMAKE_CURRENT_SOURCE_DIR}/tests/known_failures.yaml
+  --generate ${CMAKE_BINARY_DIR}/test_lists
+  RESULT_VARIABLE result_var
+  ERROR_VARIABLE error_var
+)
+message("Result: ${result_var}")
+# if the script fails, print the error and abort
+if(NOT result_var EQUAL 0)
+  message(FATAL_ERROR "manage_known_failures.py: ${error_var}")
+endif()
 # chipStar BINARIES & TESTS
 # =============================================================================
 

diff --git a/HIP b/HIP
diff --git a/samples/cuda_samples/4_Finance/BlackScholes/BlackScholes.cu b/samples/cuda_samples/4_Finance/BlackScholes/BlackScholes.cu
@@ -51,7 +51,7 @@ float RandFloat(float low, float high)
 ////////////////////////////////////////////////////////////////////////////////
 // Data configuration
 ////////////////////////////////////////////////////////////////////////////////
-const int OPT_N = 4000000;
+const int OPT_N = 1000000;
 const int  NUM_ITERATIONS = 512;
 
 

diff --git a/samples/cuda_samples/6_Advanced/scan/scan.cu b/samples/cuda_samples/6_Advanced/scan/scan.cu
@@ -164,7 +164,7 @@ __global__ void uniformUpdate(
 ////////////////////////////////////////////////////////////////////////////////
 //Derived as 32768 (max power-of-two gridDim.x) * 4 * THREADBLOCK_SIZE
 //Due to scanExclusiveShared<<<>>>() 1D block addressing
-extern "C" const uint MAX_BATCH_ELEMENTS = 32 * 1048576;
+extern "C" const uint MAX_BATCH_ELEMENTS = 16 * 1048576;
 extern "C" const uint MIN_SHORT_ARRAY_SIZE = 4;
 extern "C" const uint MAX_SHORT_ARRAY_SIZE = 4 * THREADBLOCK_SIZE;
 extern "C" const uint MIN_LARGE_ARRAY_SIZE = 8 * THREADBLOCK_SIZE;

diff --git a/scripts/check.py b/scripts/check.py
@@ -12,24 +12,29 @@
                     epilog='have a nice day')
 
 parser.add_argument('work_dir', type=str, help='Path to build directory')
-parser.add_argument('device_type', type=str, choices=['cpu', 'igpu', 'dgpu'], help='Device type')
-parser.add_argument('backend', type=str, choices=['opencl', 'level0-reg', 'level0-imm', 'pocl'], help='Backend to use')
+parser.add_argument('device_type', type=str, choices=['cpu', 'igpu', 'dgpu', 'pocl'], help='Device type')
+parser.add_argument('backend', type=str, choices=['opencl', 'level0'], help='Backend to use')
 parser.add_argument('--num-threads', type=int, nargs='?', default=os.cpu_count(), help='Number of threads to use (default: number of cores on the system)')
 parser.add_argument('--timeout', type=int, nargs='?', default=200, help='Timeout in seconds (default: 200)')
 parser.add_argument('-m', '--modules', type=str, choices=['on', 'off'], default="off", help='load modulefiles automatically (default: off)')
 parser.add_argument('-v', '--verbose', action='store_true', help='verbose output')
 parser.add_argument('-d', '--dry-run', '-N', action='store_true', help='dry run')
-parser.add_argument('-c', '--categories', action='store_true', help='run tests by categories, including running a set of tests in a single thread')
 parser.add_argument('--regex-include', type=str, nargs='?', default="", help='Tests to be run must also match this regex (known failures will still be excluded)')
 parser.add_argument('--regex-exclude', type=str, nargs='?', default="", help='Specifically exclude tests that match this regex (known failures will still be excluded)')
 
 # --total-runtime cannot be used with --num-tries
 group = parser.add_mutually_exclusive_group()
-group.add_argument('--total-runtime', type=str, nargs='?', default=0, help='Set --num-tries such that the total runtime is approximately this value in hours')
+group.add_argument('--total-runtime', type=str, nargs='?', default=None, help='Set --num-tries such that the total runtime is approximately this value in hours')
 group.add_argument('--num-tries', type=int, nargs='?', default=1, help='Number of tries (default: 1)')
 
 args = parser.parse_args()
 
+# make sure that args.total_runtime end in either m or h
+if args.total_runtime is not None:
+    if str(args.total_runtime[-1]) not in ["m", "h"]:
+        print("Error: --total-runtime should end in either 'm' or 'h'")
+        exit(1)
+
 # execute a command and return the output along with the return code
 def run_cmd(cmd):
     # get current milliseconds
@@ -56,30 +61,13 @@ def run_cmd(cmd):
   print(f"num_tries: {args.num_tries}")
   print(f"timeout: {args.timeout}")
 
-if args.device_type == "cpu":
+if args.device_type in ["cpu", "pocl"]:
     device_type_stripped = "cpu"
 elif args.device_type in ["dgpu", "igpu"]:
     device_type_stripped = "gpu"
 
-if args.backend in ["pocl", "opencl"]:
-    env_vars = f"CHIP_BE=opencl CHIP_DEVICE_TYPE={device_type_stripped}"
-else:
-    env_vars = f"CHIP_BE=level0 CHIP_DEVICE_TYPE={device_type_stripped}"
-
-if args.backend == "level0-reg":
-    level0_cmd_list = "reg_"
-    args.backend = "level0"
-    backend_full = "level0_reg"
-    env_vars += " CHIP_L0_IMM_CMD_LISTS=OFF"
-elif args.backend == "level0-imm":
-    level0_cmd_list = "imm_"
-    args.backend = "level0"
-    backend_full = "level0_imm"
-    env_vars += " CHIP_L0_IMM_CMD_LISTS=ON"
-else:
-    level0_cmd_list = ""
-    backend_full = args.backend
-
+env_vars = f"CHIP_BE={args.backend} CHIP_DEVICE_TYPE={device_type_stripped}"
+
 # setup module load line
 modules = ""
 if args.modules == "on":
@@ -94,7 +82,7 @@ def run_cmd(cmd):
       modules += "level-zero/igpu"
   elif args.backend == "level0" and args.device_type == "dgpu":
       modules += "level-zero/dgpu"
-  elif args.backend == "pocl" and args.device_type == "cpu":
+  elif args.backend == "opencl" and args.device_type == "pocl":
       modules += "opencl/pocl"
   modules += " &&  module list;"
 
@@ -120,28 +108,15 @@ def run_cmd(cmd):
 else:
     texture_cmd = ""
 
+all_test_list = f"./test_lists/ALL.txt"
+failed_test_list = f"./test_lists/{args.backend.upper()}_{device_type_stripped.upper()}.txt"
+
 def run_tests(num_tries):
-  if args.categories:
-    cmd_deviceFunc = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R deviceFunc -O checkpy_{args.device_type}_{args.backend}_device.txt"
-    cmd_graph = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"[Gg]raph\" -O checkpy_{args.device_type}_{args.backend}_graph.txt"
-    cmd_single = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j 1 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_single.txt"
-    cmd_other = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}|deviceFunc|[Gg]raph|`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_other.txt"
-
-    res_deviceFunc, err  = run_cmd(cmd_deviceFunc)
-    res_graph, err = run_cmd(cmd_graph)
-    res_single, err = run_cmd(cmd_single)
-    res_other, err = run_cmd(cmd_other)
-
-    if "0 tests failed" in res_deviceFunc and "0 tests failed" in res_graph and "0 tests failed" in res_single and "0 tests failed" in res_other:
-        exit(0)
-    else:
-        exit(1)
-  else:
-    if len(args.regex_exclude) > 0:
-        args.regex_exclude = f"{args.regex_exclude}|"
-    if len(args.regex_include) > 0:
-        args.regex_include = f"-R {args.regex_include}"
-    cmd = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j {args.num_threads} {args.regex_include} -E \"{args.regex_exclude}`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}\" -O checkpy_{args.device_type}_{backend_full}.txt"
+  if len(args.regex_exclude) > 0:
+      args.regex_exclude = f"{args.regex_exclude}|"
+  if len(args.regex_include) > 0:
+      args.regex_include = f"-R {args.regex_include}"
+  cmd = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j {args.num_threads} {args.regex_include} -E \"{args.regex_exclude}`cat {failed_test_list}`|`cat {all_test_list}`{texture_cmd}\" -O checkpy_{args.backend}_{args.device_type}.txt"
   res, err = run_cmd(cmd)
   return res, err
 

diff --git a/scripts/manage_known_failures.py b/scripts/manage_known_failures.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+
+import sys
+import yaml
+import argparse
+import os
+
+parser = argparse.ArgumentParser(
+    prog="check.py",
+    description="Run the unit tests for the specified device type and backend",
+    epilog="have a nice day",
+)
+
+parser.add_argument(
+    "known_failures_path", help="Path to the known_failures.yaml file", type=str
+)
+
+parser.add_argument(
+    "--generate",
+    type=str,
+    help="Generate test_lists files at the specified output path",
+)
+
+parser.add_argument(
+    "--cleanup", action="store_true", help="Cleanup the known_failures.yaml file"
+)
+
+parser.add_argument(
+    "--print", action="store_true", help="Pretty print the known_failures.yaml file"
+)
+
+
+args = parser.parse_args()
+
+categories = ["ALL", "OPENCL_GPU", "OPENCL_CPU", "OPENCL_POCL", "LEVEL0_GPU"]
+
+
+def dump_known_failures_to_yaml(known_failures, yaml_path, total_tests):
+    known_failures["TOTAL_TESTS"] = total_tests
+    with open(yaml_path, 'w') as file:
+        yaml.dump(known_failures, file)
+
+
+def load_known_failures_from_yaml(yaml_path):
+    with open(yaml_path, 'r') as file:
+        known_failures = yaml.safe_load(file)
+    total_tests = known_failures.pop("TOTAL_TESTS", None)
+    return known_failures, int(total_tests)
+
+
+def prune_tests_map(tests_map):
+    # Define the categories to check
+    categories_to_check = ["OPENCL_GPU", "OPENCL_CPU", "OPENCL_POCL", "LEVEL0_GPU"]
+    opencl_categories = ["OPENCL_GPU", "OPENCL_CPU", "OPENCL_POCL"]
+
+    # Find all tests that are in all categories
+    common_tests = set(tests_map[categories_to_check[0]].keys())
+    for category in categories_to_check[1:]:
+        common_tests &= set(tests_map[category].keys())
+
+    # Remove common tests from their categories and add them to "ALL"
+    for test in common_tests:
+        for category in categories_to_check:
+            del tests_map[category][test]
+        tests_map["ALL"][test] = ""
+
+    # If a test appears in any of the OPENCL categories and also in LEVEL0_GPU, add it to "ALL"
+    for test in tests_map["LEVEL0_GPU"].keys():
+        for category in opencl_categories:
+            if test in tests_map[category]:
+                del tests_map[category][test]
+                tests_map["ALL"][test] = ""
+
+    # Ensure tests in "ALL" do not appear in any other category
+    for test in tests_map["ALL"].keys():
+        for category in tests_map:
+            if category != "ALL" and test in tests_map[category]:
+                del tests_map[category][test]
+
+    # Sort the tests in each category by their names
+    for category in tests_map:
+        tests_map[category] = dict(sorted(tests_map[category].items()))
+
+    return tests_map
+
+def pretty_print_known_failures(known_failures, total_tests):
+    all_tests = set(known_failures.get("ALL", {}).keys())
+    for category, tests in known_failures.items():
+        if category == "ALL":
+            continue
+        category_failures = set(tests.keys())
+        unique_failures = category_failures - all_tests
+        total_failures = category_failures.union(all_tests)
+        num_unique_failures = len(unique_failures)
+        num_total_failures = len(total_failures)
+        pass_rate = ((total_tests - num_total_failures) / total_tests) * 100
+        print(f"{category} - Unique Failures: {num_unique_failures}, Total Failures: {num_total_failures}, Pass Rate: {pass_rate:.2f}%")
+        for test in unique_failures:
+            print(f"\t{test}")
+    num_all_failures = len(all_tests)
+    all_pass_rate = ((total_tests - num_all_failures) / total_tests) * 100
+    print(f"ALL - Total Failures: {num_all_failures}, Pass Rate: {all_pass_rate:.2f}%")
+
+
+def generate_test_string(tests_map, output_dir):
+    test_string_map = {}
+    for category, tests in tests_map.items():
+        test_string = "$|".join(tests) + "$"
+        test_string_map[category] = test_string
+        # print(f"{category}\n {test_string}")
+        with open(f"{output_dir}/{category}.txt", "+w") as file:
+            file.write(test_string)
+    return test_string_map
+
+
+def main():
+    known_failures, total_tests = load_known_failures_from_yaml(args.known_failures_path)
+    if args.generate:
+        print("Generating test_lists files")
+        # make sure output_dir exists, if not create it
+        if not os.path.exists(args.generate):
+            os.makedirs(args.generate)
+        generate_test_string(known_failures, args.generate)
+    elif args.cleanup:
+        print("Cleaning up known_failures.yaml")
+        known_failures = prune_tests_map(known_failures)
+        dump_known_failures_to_yaml(known_failures, args.known_failures_path, total_tests)
+    elif args.print:
+        pretty_print_known_failures(known_failures, total_tests)
+    else:
+        print("No action specified. Use --generate, --cleanup, or --print")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/unit_tests.sh b/scripts/unit_tests.sh
@@ -163,7 +163,7 @@ module unload opencl/dgpu
 echo "begin igpu_level0_failed_imm_tests"
 # module load level-zero/igpu
 # module list
-../scripts/check.py ./ igpu level0-imm --num-threads=${num_threads} --timeout=$timeout --num-tries=$num_tries --modules=on | tee igpu_level0_imm_make_check_result.txt
+../scripts/check.py ./ igpu level0 --num-threads=${num_threads} --timeout=$timeout --num-tries=$num_tries --modules=on | tee igpu_level0_imm_make_check_result.txt
 # CHIP_L0_IMM_CMD_LISTS=OFF ctest --timeout $timeout --repeat until-fail:${num_tries} $(ctest_j_option 4) --output-on-failure -E "`cat ./test_lists/igpu_level0_failed_imm_tests.txt`" | tee igpu_level0_imm_make_check_result.txt
 # pushd ${LIBCEED_DIR}
 # make FC= CC=clang CXX=clang++ BACKENDS="/gpu/hip/ref /gpu/hip/shared /gpu/hip/gen" prove --repeat until-fail:${num_tries} $(ctest_j_option 12) PROVE_OPS="-j" | tee dgpu_level0_imm_make_check_result.txt
@@ -175,7 +175,7 @@ echo "end igpu_level0_failed_imm_tests"
 echo "begin dgpu_level0_failed_imm_tests"
 # module load level-zero/dgpu
 # module list
-../scripts/check.py ./ dgpu level0-imm --num-threads=${num_threads} --timeout=$timeout --num-tries=$num_tries --modules=on | tee dgpu_level0_imm_make_check_result.txt
+../scripts/check.py ./ dgpu level0 --num-threads=${num_threads} --timeout=$timeout --num-tries=$num_tries --modules=on | tee dgpu_level0_imm_make_check_result.txt
 # CHIP_L0_IMM_CMD_LISTS=ON ctest --timeout $timeout --repeat until-fail:${num_tries} $(ctest_j_option 8) --output-on-failure -E "`cat ./test_lists/dgpu_level0_failed_imm_tests.txt`" | tee dgpu_level0_imm_make_check_result.txt
 # pushd ${LIBCEED_DIR}
 # HIP_DIR=${CHIPSTAR_INSTALL_DIR} make FC= CC=clang CXX=clang++ BACKENDS="/gpu/hip/ref /gpu/hip/shared /gpu/hip/gen" prove --repeat until-fail:${num_tries} $(ctest_j_option 12) PROVE_OPS="-j" | tee dgpu_level0_imm_make_check_result.txt