diff --git a/CHANGELOG.md b/CHANGELOG.md index d73e3323..e96d3260 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,16 @@ Documentation for HIPIFY is available at [https://rocmdocs.amd.com/projects/HIPIFY/en/latest/](https://rocmdocs.amd.com/projects/HIPIFY/en/latest/). +## HIPIFY for ROCm 6.2.1 + +### Additions + +* CUDA 12.5.1 support +* cuDNN 9.2.1 support +* LLVM 18.1.8 support +* `hipBLAS` 64-bit APIs support +* Support for Math Constants `math_constants.h` + ## HIPIFY for ROCm 6.2.0 ### Additions diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d105e20..8fafd6f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,9 +22,9 @@ else() endif() message(STATUS "HIPIFY config:") -message(STATUS " - Build hipify-clang : ${HIPIFY_BUILD}") -message(STATUS " - Test hipify-clang : ${HIPIFY_TEST}") -message(STATUS " - Is part of HIP SDK : ${HIPIFY_INCLUDE_IN_HIP_SDK}") +message(STATUS " - Build hipify-clang : ${HIPIFY_BUILD}") +message(STATUS " - Test hipify-clang : ${HIPIFY_TEST}") +message(STATUS " - Is part of HIP SDK : ${HIPIFY_INCLUDE_IN_HIP_SDK}") message(STATUS " - Install clang headers : ${HIPIFY_INSTALL_CLANG_HEADERS}") if(HIPIFY_INCLUDE_IN_HIP_SDK) @@ -38,10 +38,10 @@ else() find_package(LLVM REQUIRED CONFIG PATHS ${CMAKE_PREFIX_PATH}) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}:") - message(STATUS " - CMake module path : ${LLVM_CMAKE_DIR}") - message(STATUS " - Clang include path : ${CLANG_INCLUDE_DIRS}") - message(STATUS " - LLVM Include path : ${LLVM_INCLUDE_DIRS}") - message(STATUS " - Binary path : ${LLVM_TOOLS_BINARY_DIR}") + message(STATUS " - CMake module path : ${LLVM_CMAKE_DIR}") + message(STATUS " - Clang include path : ${CLANG_INCLUDE_DIRS}") + message(STATUS " - LLVM Include path : ${LLVM_INCLUDE_DIRS}") + message(STATUS " - Binary path : ${LLVM_TOOLS_BINARY_DIR}") endif() list(APPEND CMAKE_MODULE_PATH ${LLVM_CMAKE_DIR}) @@ -279,12 +279,12 @@ if(HIPIFY_CLANG_TESTS OR HIPIFY_CLANG_TESTS_ONLY) require_program(lit) require_program(FileCheck) - set(CUDA_TOOLKIT_ROOT_DIR OFF CACHE PATH "Path to CUDA Toolkit to use in hipify-clang unit testing") - set(CUDA_SDK_ROOT_DIR OFF CACHE PATH "Path to CUDA Toolkit Samples") - set(CUDA_DNN_ROOT_DIR OFF CACHE PATH "Path to cuDNN") - set(CUDA_CUB_ROOT_DIR OFF CACHE PATH "Path to CUB") + set(CUDA_TOOLKIT_ROOT_DIR "" CACHE PATH "Path to CUDA Toolkit to use in hipify-clang unit testing") + set(CUDA_SDK_ROOT_DIR "" CACHE PATH "Path to CUDA Toolkit Samples") + set(CUDA_DNN_ROOT_DIR "" CACHE PATH "Path to cuDNN") + set(CUDA_CUB_ROOT_DIR "" CACHE PATH "Path to CUB") - if(DEFINED CUDA_TOOLKIT_ROOT_DIR AND NOT CUDA_TOOLKIT_ROOT_DIR MATCHES "OFF") + if(NOT CUDA_TOOLKIT_ROOT_DIR STREQUAL "" AND NOT CUDA_TOOLKIT_ROOT_DIR MATCHES "OFF") if(NOT DEFINED CUDAToolkit_ROOT OR CUDAToolkit_ROOT MATCHES "OFF") set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}") endif() @@ -295,10 +295,10 @@ if(HIPIFY_CLANG_TESTS OR HIPIFY_CLANG_TESTS_ONLY) endif() message(STATUS "Initial CUDA to configure:") - message(STATUS " - CUDA Toolkit path : ${CUDA_TOOLKIT_ROOT_DIR}") - message(STATUS " - CUDA Samples path : ${CUDA_SDK_ROOT_DIR}") - message(STATUS " - cuDNN path : ${CUDA_DNN_ROOT_DIR}") - message(STATUS " - CUB path : ${CUDA_CUB_ROOT_DIR}") + message(STATUS " - CUDA Toolkit path : ${CUDA_TOOLKIT_ROOT_DIR}") + message(STATUS " - CUDA Samples path : ${CUDA_SDK_ROOT_DIR}") + message(STATUS " - cuDNN path : ${CUDA_DNN_ROOT_DIR}") + message(STATUS " - CUB path : ${CUDA_CUB_ROOT_DIR}") if(${CMAKE_VERSION} VERSION_LESS "3.27.0") find_package(CUDA REQUIRED) @@ -313,19 +313,46 @@ if(HIPIFY_CLANG_TESTS OR HIPIFY_CLANG_TESTS_ONLY) set(CUDA_TOOLKIT_ROOT_DIR "${CUDAToolkit_LIBRARY_ROOT}") endif() - if(WIN32 AND CUDA_VERSION VERSION_LESS "11.6" AND CUDA_SDK_ROOT_DIR STREQUAL "OFF" AND NOT CUDA_TOOLKIT_ROOT_DIR MATCHES "OFF") - set(NVCUDASAMPLES "NVCUDASAMPLES${CUDA_VERSION_MAJOR}_${CUDA_VERSION_MINOR}_ROOT") - if(DEFINED ENV{${NVCUDASAMPLES}}) - set(CUDA_SDK_ROOT_DIR "$ENV{${NVCUDASAMPLES}}") - string(REPLACE "\\" "/" CUDA_SDK_ROOT_DIR ${CUDA_SDK_ROOT_DIR}) + if(CUDA_SDK_ROOT_DIR STREQUAL "") + if(WIN32) + if(CUDA_VERSION VERSION_LESS "11.6") + set(NVCUDASAMPLES "NVCUDASAMPLES${CUDA_VERSION_MAJOR}_${CUDA_VERSION_MINOR}_ROOT") + if(DEFINED ENV{${NVCUDASAMPLES}}) + set(CUDA_SDK_ROOT_DIR "$ENV{${NVCUDASAMPLES}}") + string(REPLACE "\\" "/" CUDA_SDK_ROOT_DIR ${CUDA_SDK_ROOT_DIR}) + endif() + endif() + elseif(UNIX) + if(NOT CUDA_TOOLKIT_ROOT_DIR STREQUAL "" AND NOT CUDA_TOOLKIT_ROOT_DIR STREQUAL "OFF" AND CUDA_VERSION VERSION_LESS "10.1") + set(CUDA_SDK_ROOT_DIR "${CUDA_TOOLKIT_ROOT_DIR}/samples") + endif() + endif() + endif() + if(CUDA_SDK_ROOT_DIR STREQUAL "") + set(CUDA_SDK_ROOT_DIR OFF) + endif() + + if(CUDA_DNN_ROOT_DIR STREQUAL "") + set(CUDA_DNN_ROOT_DIR OFF) + endif() + + if(CUDA_CUB_ROOT_DIR STREQUAL "") + if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0") + if(WIN32 OR (UNIX AND CUDA_VERSION VERSION_LESS "11.4" OR CUDA_VERSION VERSION_GREATER_EQUAL "11.6")) + set(CUDA_CUB_ROOT_DIR "${CUDA_TOOLKIT_ROOT_DIR}/include/cub") + else() + set(CUDA_CUB_ROOT_DIR OFF) + endif() + else() + set(CUDA_CUB_ROOT_DIR OFF) endif() endif() message(STATUS "Found CUDA config:") - message(STATUS " - CUDA Toolkit path : ${CUDA_TOOLKIT_ROOT_DIR}") - message(STATUS " - CUDA Samples path : ${CUDA_SDK_ROOT_DIR}") - message(STATUS " - cuDNN path : ${CUDA_DNN_ROOT_DIR}") - message(STATUS " - CUB path : ${CUDA_CUB_ROOT_DIR}") + message(STATUS " - CUDA Toolkit path : ${CUDA_TOOLKIT_ROOT_DIR}") + message(STATUS " - CUDA Samples path : ${CUDA_SDK_ROOT_DIR}") + message(STATUS " - cuDNN path : ${CUDA_DNN_ROOT_DIR}") + message(STATUS " - CUB path : ${CUDA_CUB_ROOT_DIR}") if((CUDA_VERSION VERSION_LESS "7.0") OR (LLVM_PACKAGE_VERSION VERSION_LESS "3.8") OR (CUDA_VERSION VERSION_GREATER "7.5" AND LLVM_PACKAGE_VERSION VERSION_LESS "4.0") OR diff --git a/bin/hipify-perl b/bin/hipify-perl index 913acc50..4f7a2051 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1444,14 +1444,164 @@ my %experimental_funcs = ( "cudaDriverEntryPointSymbolNotFound" => "6.2.0", "cudaDriverEntryPointSuccess" => "6.2.0", "cudaDriverEntryPointQueryResult" => "6.2.0", + "cublasZtrsv_v2_64" => "6.2.0", + "cublasZtrsv_64" => "6.2.0", + "cublasZtrmv_v2_64" => "6.2.0", + "cublasZtrmv_64" => "6.2.0", + "cublasZtpsv_v2_64" => "6.2.0", + "cublasZtpsv_64" => "6.2.0", + "cublasZtpmv_v2_64" => "6.2.0", + "cublasZtpmv_64" => "6.2.0", + "cublasZtbsv_v2_64" => "6.2.0", + "cublasZtbsv_64" => "6.2.0", + "cublasZtbmv_v2_64" => "6.2.0", + "cublasZtbmv_64" => "6.2.0", + "cublasZsyr_v2_64" => "6.2.0", + "cublasZsyr_64" => "6.2.0", + "cublasZsyr2_v2_64" => "6.2.0", + "cublasZsyr2_64" => "6.2.0", + "cublasZsymv_v2_64" => "6.2.0", + "cublasZsymv_64" => "6.2.0", + "cublasZhpr_v2_64" => "6.2.0", + "cublasZhpr_64" => "6.2.0", + "cublasZhpr2_v2_64" => "6.2.0", + "cublasZhpr2_64" => "6.2.0", + "cublasZhpmv_v2_64" => "6.2.0", + "cublasZhpmv_64" => "6.2.0", + "cublasZher_v2_64" => "6.2.0", + "cublasZher_64" => "6.2.0", + "cublasZher2_v2_64" => "6.2.0", + "cublasZher2_64" => "6.2.0", + "cublasZhemv_v2_64" => "6.2.0", + "cublasZhemv_64" => "6.2.0", + "cublasZhbmv_v2_64" => "6.2.0", + "cublasZhbmv_64" => "6.2.0", + "cublasZgeru_v2_64" => "6.2.0", + "cublasZgeru_64" => "6.2.0", + "cublasZgerc_v2_64" => "6.2.0", + "cublasZgerc_64" => "6.2.0", + "cublasZgemv_v2_64" => "6.2.0", + "cublasZgemv_64" => "6.2.0", + "cublasZgemvStridedBatched_64" => "6.2.0", + "cublasZgemvBatched_64" => "6.2.0", "cublasZgbmv_v2_64" => "6.2.0", "cublasZgbmv_64" => "6.2.0", + "cublasStrsv_v2_64" => "6.2.0", + "cublasStrsv_64" => "6.2.0", + "cublasStrmv_v2_64" => "6.2.0", + "cublasStrmv_64" => "6.2.0", + "cublasStpsv_v2_64" => "6.2.0", + "cublasStpsv_64" => "6.2.0", + "cublasStpmv_v2_64" => "6.2.0", + "cublasStpmv_64" => "6.2.0", + "cublasStbsv_v2_64" => "6.2.0", + "cublasStbsv_64" => "6.2.0", + "cublasStbmv_v2_64" => "6.2.0", + "cublasStbmv_64" => "6.2.0", + "cublasSsyr_v2_64" => "6.2.0", + "cublasSsyr_64" => "6.2.0", + "cublasSsyr2_v2_64" => "6.2.0", + "cublasSsyr2_64" => "6.2.0", + "cublasSsymv_v2_64" => "6.2.0", + "cublasSsymv_64" => "6.2.0", + "cublasSspr_v2_64" => "6.2.0", + "cublasSspr_64" => "6.2.0", + "cublasSspr2_v2_64" => "6.2.0", + "cublasSspr2_64" => "6.2.0", + "cublasSspmv_v2_64" => "6.2.0", + "cublasSspmv_64" => "6.2.0", + "cublasSsbmv_v2_64" => "6.2.0", + "cublasSsbmv_64" => "6.2.0", + "cublasSger_v2_64" => "6.2.0", + "cublasSger_64" => "6.2.0", + "cublasSgemv_v2_64" => "6.2.0", + "cublasSgemv_64" => "6.2.0", + "cublasSgemvStridedBatched_64" => "6.2.0", + "cublasSgemvBatched_64" => "6.2.0", "cublasSgbmv_v2_64" => "6.2.0", "cublasSgbmv_64" => "6.2.0", + "cublasScalEx_64" => "6.2.0", + "cublasRotEx_64" => "6.2.0", + "cublasNrm2Ex_64" => "6.2.0", + "cublasDtrsv_v2_64" => "6.2.0", + "cublasDtrsv_64" => "6.2.0", + "cublasDtrmv_v2_64" => "6.2.0", + "cublasDtrmv_64" => "6.2.0", + "cublasDtpsv_v2_64" => "6.2.0", + "cublasDtpsv_64" => "6.2.0", + "cublasDtpmv_v2_64" => "6.2.0", + "cublasDtpmv_64" => "6.2.0", + "cublasDtbsv_v2_64" => "6.2.0", + "cublasDtbsv_64" => "6.2.0", + "cublasDtbmv_v2_64" => "6.2.0", + "cublasDtbmv_64" => "6.2.0", + "cublasDsyr_v2_64" => "6.2.0", + "cublasDsyr_64" => "6.2.0", + "cublasDsyr2_v2_64" => "6.2.0", + "cublasDsyr2_64" => "6.2.0", + "cublasDsymv_v2_64" => "6.2.0", + "cublasDsymv_64" => "6.2.0", + "cublasDspr_v2_64" => "6.2.0", + "cublasDspr_64" => "6.2.0", + "cublasDspr2_v2_64" => "6.2.0", + "cublasDspr2_64" => "6.2.0", + "cublasDspmv_v2_64" => "6.2.0", + "cublasDspmv_64" => "6.2.0", + "cublasDsbmv_v2_64" => "6.2.0", + "cublasDsbmv_64" => "6.2.0", + "cublasDotcEx_64" => "6.2.0", + "cublasDotEx_64" => "6.2.0", + "cublasDger_v2_64" => "6.2.0", + "cublasDger_64" => "6.2.0", + "cublasDgemv_v2_64" => "6.2.0", + "cublasDgemv_64" => "6.2.0", + "cublasDgemvStridedBatched_64" => "6.2.0", + "cublasDgemvBatched_64" => "6.2.0", "cublasDgbmv_v2_64" => "6.2.0", "cublasDgbmv_64" => "6.2.0", + "cublasCtrsv_v2_64" => "6.2.0", + "cublasCtrsv_64" => "6.2.0", + "cublasCtrmv_v2_64" => "6.2.0", + "cublasCtrmv_64" => "6.2.0", + "cublasCtpsv_v2_64" => "6.2.0", + "cublasCtpsv_64" => "6.2.0", + "cublasCtpmv_v2_64" => "6.2.0", + "cublasCtpmv_64" => "6.2.0", + "cublasCtbsv_v2_64" => "6.2.0", + "cublasCtbsv_64" => "6.2.0", + "cublasCtbmv_v2_64" => "6.2.0", + "cublasCtbmv_64" => "6.2.0", + "cublasCsyr_v2_64" => "6.2.0", + "cublasCsyr_64" => "6.2.0", + "cublasCsyr2_v2_64" => "6.2.0", + "cublasCsyr2_64" => "6.2.0", + "cublasCsymv_v2_64" => "6.2.0", + "cublasCsymv_64" => "6.2.0", + "cublasChpr_v2_64" => "6.2.0", + "cublasChpr_64" => "6.2.0", + "cublasChpr2_v2_64" => "6.2.0", + "cublasChpr2_64" => "6.2.0", + "cublasChpmv_v2_64" => "6.2.0", + "cublasChpmv_64" => "6.2.0", + "cublasCher_v2_64" => "6.2.0", + "cublasCher_64" => "6.2.0", + "cublasCher2_v2_64" => "6.2.0", + "cublasCher2_64" => "6.2.0", + "cublasChemv_v2_64" => "6.2.0", + "cublasChemv_64" => "6.2.0", + "cublasChbmv_v2_64" => "6.2.0", + "cublasChbmv_64" => "6.2.0", + "cublasCgeru_v2_64" => "6.2.0", + "cublasCgeru_64" => "6.2.0", + "cublasCgerc_v2_64" => "6.2.0", + "cublasCgerc_64" => "6.2.0", + "cublasCgemv_v2_64" => "6.2.0", + "cublasCgemv_64" => "6.2.0", + "cublasCgemvStridedBatched_64" => "6.2.0", + "cublasCgemvBatched_64" => "6.2.0", "cublasCgbmv_v2_64" => "6.2.0", "cublasCgbmv_64" => "6.2.0", + "cublasAxpyEx_64" => "6.2.0", "cuStreamBeginCaptureToGraph" => "6.2.0", "cuGraphNodeSetParams" => "6.2.0", "cuGraphMemcpyNodeSetParams" => "6.2.0", @@ -1652,14 +1802,164 @@ sub experimentalSubstitutions { subst("cuGetProcAddress", "hipGetProcAddress", "driver_entry_point"); subst("cudaGetDriverEntryPoint", "hipGetProcAddress", "driver_entry_point"); subst("cudaGetFuncBySymbol", "hipGetFuncBySymbol", "driver_interact"); - subst("cublasCgbmv_64", "hipblasCgbmv_64", "library"); - subst("cublasCgbmv_v2_64", "hipblasCgbmv_64", "library"); + subst("cublasAxpyEx_64", "hipblasAxpyEx_v2_64", "library"); + subst("cublasCgbmv_64", "hipblasCgbmv_v2_64", "library"); + subst("cublasCgbmv_v2_64", "hipblasCgbmv_v2_64", "library"); + subst("cublasCgemvBatched_64", "hipblasCgemvBatched_v2_64", "library"); + subst("cublasCgemvStridedBatched_64", "hipblasCgemvStridedBatched_v2_64", "library"); + subst("cublasCgemv_64", "hipblasCgemv_v2_64", "library"); + subst("cublasCgemv_v2_64", "hipblasCgemv_v2_64", "library"); + subst("cublasCgerc_64", "hipblasCgerc_v2_64", "library"); + subst("cublasCgerc_v2_64", "hipblasCgerc_v2_64", "library"); + subst("cublasCgeru_64", "hipblasCgeru_v2_64", "library"); + subst("cublasCgeru_v2_64", "hipblasCgeru_v2_64", "library"); + subst("cublasChbmv_64", "hipblasChbmv_v2_64", "library"); + subst("cublasChbmv_v2_64", "hipblasChbmv_v2_64", "library"); + subst("cublasChemv_64", "hipblasChemv_v2_64", "library"); + subst("cublasChemv_v2_64", "hipblasChemv_v2_64", "library"); + subst("cublasCher2_64", "hipblasCher2_v2_64", "library"); + subst("cublasCher2_v2_64", "hipblasCher2_v2_64", "library"); + subst("cublasCher_64", "hipblasCher_v2_64", "library"); + subst("cublasCher_v2_64", "hipblasCher_v2_64", "library"); + subst("cublasChpmv_64", "hipblasChpmv_v2_64", "library"); + subst("cublasChpmv_v2_64", "hipblasChpmv_v2_64", "library"); + subst("cublasChpr2_64", "hipblasChpr2_v2_64", "library"); + subst("cublasChpr2_v2_64", "hipblasChpr2_v2_64", "library"); + subst("cublasChpr_64", "hipblasChpr_v2_64", "library"); + subst("cublasChpr_v2_64", "hipblasChpr_v2_64", "library"); + subst("cublasCsymv_64", "hipblasCsymv_v2_64", "library"); + subst("cublasCsymv_v2_64", "hipblasCsymv_v2_64", "library"); + subst("cublasCsyr2_64", "hipblasCsyr2_v2_64", "library"); + subst("cublasCsyr2_v2_64", "hipblasCsyr2_v2_64", "library"); + subst("cublasCsyr_64", "hipblasCsyr_v2_64", "library"); + subst("cublasCsyr_v2_64", "hipblasCsyr_v2_64", "library"); + subst("cublasCtbmv_64", "hipblasCtbmv_v2_64", "library"); + subst("cublasCtbmv_v2_64", "hipblasCtbmv_v2_64", "library"); + subst("cublasCtbsv_64", "hipblasCtbsv_v2_64", "library"); + subst("cublasCtbsv_v2_64", "hipblasCtbsv_v2_64", "library"); + subst("cublasCtpmv_64", "hipblasCtpmv_v2_64", "library"); + subst("cublasCtpmv_v2_64", "hipblasCtpmv_v2_64", "library"); + subst("cublasCtpsv_64", "hipblasCtpsv_v2_64", "library"); + subst("cublasCtpsv_v2_64", "hipblasCtpsv_v2_64", "library"); + subst("cublasCtrmv_64", "hipblasCtrmv_v2_64", "library"); + subst("cublasCtrmv_v2_64", "hipblasCtrmv_v2_64", "library"); + subst("cublasCtrsv_64", "hipblasCtrsv_v2_64", "library"); + subst("cublasCtrsv_v2_64", "hipblasCtrsv_v2_64", "library"); subst("cublasDgbmv_64", "hipblasDgbmv_64", "library"); subst("cublasDgbmv_v2_64", "hipblasDgbmv_64", "library"); + subst("cublasDgemvBatched_64", "hipblasDgemvBatched_64", "library"); + subst("cublasDgemvStridedBatched_64", "hipblasDgemvStridedBatched_64", "library"); + subst("cublasDgemv_64", "hipblasDgemv_64", "library"); + subst("cublasDgemv_v2_64", "hipblasDgemv_64", "library"); + subst("cublasDger_64", "hipblasDger_64", "library"); + subst("cublasDger_v2_64", "hipblasDger_64", "library"); + subst("cublasDotEx_64", "hipblasDotEx_v2_64", "library"); + subst("cublasDotcEx_64", "hipblasDotcEx_v2_64", "library"); + subst("cublasDsbmv_64", "hipblasDsbmv_64", "library"); + subst("cublasDsbmv_v2_64", "hipblasDsbmv_64", "library"); + subst("cublasDspmv_64", "hipblasDspmv_64", "library"); + subst("cublasDspmv_v2_64", "hipblasDspmv_64", "library"); + subst("cublasDspr2_64", "hipblasDspr2_64", "library"); + subst("cublasDspr2_v2_64", "hipblasDspr2_64", "library"); + subst("cublasDspr_64", "hipblasDspr_64", "library"); + subst("cublasDspr_v2_64", "hipblasDspr_64", "library"); + subst("cublasDsymv_64", "hipblasDsymv_64", "library"); + subst("cublasDsymv_v2_64", "hipblasDsymv_64", "library"); + subst("cublasDsyr2_64", "hipblasDsyr2_64", "library"); + subst("cublasDsyr2_v2_64", "hipblasDsyr2_64", "library"); + subst("cublasDsyr_64", "hipblasDsyr_64", "library"); + subst("cublasDsyr_v2_64", "hipblasDsyr_64", "library"); + subst("cublasDtbmv_64", "hipblasDtbmv_64", "library"); + subst("cublasDtbmv_v2_64", "hipblasDtbmv_64", "library"); + subst("cublasDtbsv_64", "hipblasDtbsv_64", "library"); + subst("cublasDtbsv_v2_64", "hipblasDtbsv_64", "library"); + subst("cublasDtpmv_64", "hipblasDtpmv_64", "library"); + subst("cublasDtpmv_v2_64", "hipblasDtpmv_64", "library"); + subst("cublasDtpsv_64", "hipblasDtpsv_64", "library"); + subst("cublasDtpsv_v2_64", "hipblasDtpsv_64", "library"); + subst("cublasDtrmv_64", "hipblasDtrmv_64", "library"); + subst("cublasDtrmv_v2_64", "hipblasDtrmv_64", "library"); + subst("cublasDtrsv_64", "hipblasDtrsv_64", "library"); + subst("cublasDtrsv_v2_64", "hipblasDtrsv_64", "library"); + subst("cublasNrm2Ex_64", "hipblasNrm2Ex_v2_64", "library"); + subst("cublasRotEx_64", "hipblasRotEx_v2_64", "library"); + subst("cublasScalEx_64", "hipblasScalEx_v2_64", "library"); subst("cublasSgbmv_64", "hipblasSgbmv_64", "library"); subst("cublasSgbmv_v2_64", "hipblasSgbmv_64", "library"); - subst("cublasZgbmv_64", "hipblasZgbmv_64", "library"); - subst("cublasZgbmv_v2_64", "hipblasZgbmv_64", "library"); + subst("cublasSgemvBatched_64", "hipblasSgemvBatched_64", "library"); + subst("cublasSgemvStridedBatched_64", "hipblasSgemvStridedBatched_64", "library"); + subst("cublasSgemv_64", "hipblasSgemv_64", "library"); + subst("cublasSgemv_v2_64", "hipblasSgemv_64", "library"); + subst("cublasSger_64", "hipblasSger_64", "library"); + subst("cublasSger_v2_64", "hipblasSger_64", "library"); + subst("cublasSsbmv_64", "hipblasSsbmv_64", "library"); + subst("cublasSsbmv_v2_64", "hipblasSsbmv_64", "library"); + subst("cublasSspmv_64", "hipblasSspmv_64", "library"); + subst("cublasSspmv_v2_64", "hipblasSspmv_64", "library"); + subst("cublasSspr2_64", "hipblasSspr2_64", "library"); + subst("cublasSspr2_v2_64", "hipblasSspr2_64", "library"); + subst("cublasSspr_64", "hipblasSspr_64", "library"); + subst("cublasSspr_v2_64", "hipblasSspr_64", "library"); + subst("cublasSsymv_64", "hipblasSsymv_64", "library"); + subst("cublasSsymv_v2_64", "hipblasSsymv_64", "library"); + subst("cublasSsyr2_64", "hipblasSsyr2_64", "library"); + subst("cublasSsyr2_v2_64", "hipblasSsyr2_64", "library"); + subst("cublasSsyr_64", "hipblasSsyr_64", "library"); + subst("cublasSsyr_v2_64", "hipblasSsyr_64", "library"); + subst("cublasStbmv_64", "hipblasStbmv_64", "library"); + subst("cublasStbmv_v2_64", "hipblasStbmv_64", "library"); + subst("cublasStbsv_64", "hipblasStbsv_64", "library"); + subst("cublasStbsv_v2_64", "hipblasStbsv_64", "library"); + subst("cublasStpmv_64", "hipblasStpmv_64", "library"); + subst("cublasStpmv_v2_64", "hipblasStpmv_64", "library"); + subst("cublasStpsv_64", "hipblasStpsv_64", "library"); + subst("cublasStpsv_v2_64", "hipblasStpsv_64", "library"); + subst("cublasStrmv_64", "hipblasStrmv_64", "library"); + subst("cublasStrmv_v2_64", "hipblasStrmv_64", "library"); + subst("cublasStrsv_64", "hipblasStrsv_64", "library"); + subst("cublasStrsv_v2_64", "hipblasStrsv_64", "library"); + subst("cublasZgbmv_64", "hipblasZgbmv_v2_64", "library"); + subst("cublasZgbmv_v2_64", "hipblasZgbmv_v2_64", "library"); + subst("cublasZgemvBatched_64", "hipblasZgemvBatched_v2_64", "library"); + subst("cublasZgemvStridedBatched_64", "hipblasZgemvStridedBatched_v2_64", "library"); + subst("cublasZgemv_64", "hipblasZgemv_v2_64", "library"); + subst("cublasZgemv_v2_64", "hipblasZgemv_v2_64", "library"); + subst("cublasZgerc_64", "hipblasZgerc_v2_64", "library"); + subst("cublasZgerc_v2_64", "hipblasZgerc_v2_64", "library"); + subst("cublasZgeru_64", "hipblasZgeru_v2_64", "library"); + subst("cublasZgeru_v2_64", "hipblasZgeru_v2_64", "library"); + subst("cublasZhbmv_64", "hipblasZhbmv_v2_64", "library"); + subst("cublasZhbmv_v2_64", "hipblasZhbmv_v2_64", "library"); + subst("cublasZhemv_64", "hipblasZhemv_v2_64", "library"); + subst("cublasZhemv_v2_64", "hipblasZhemv_v2_64", "library"); + subst("cublasZher2_64", "hipblasZher2_v2_64", "library"); + subst("cublasZher2_v2_64", "hipblasZher2_v2_64", "library"); + subst("cublasZher_64", "hipblasZher_v2_64", "library"); + subst("cublasZher_v2_64", "hipblasZher_v2_64", "library"); + subst("cublasZhpmv_64", "hipblasZhpmv_v2_64", "library"); + subst("cublasZhpmv_v2_64", "hipblasZhpmv_v2_64", "library"); + subst("cublasZhpr2_64", "hipblasZhpr2_v2_64", "library"); + subst("cublasZhpr2_v2_64", "hipblasZhpr2_v2_64", "library"); + subst("cublasZhpr_64", "hipblasZhpr_v2_64", "library"); + subst("cublasZhpr_v2_64", "hipblasZhpr_v2_64", "library"); + subst("cublasZsymv_64", "hipblasZsymv_v2_64", "library"); + subst("cublasZsymv_v2_64", "hipblasZsymv_v2_64", "library"); + subst("cublasZsyr2_64", "hipblasZsyr2_v2_64", "library"); + subst("cublasZsyr2_v2_64", "hipblasZsyr2_v2_64", "library"); + subst("cublasZsyr_64", "hipblasZsyr_v2_64", "library"); + subst("cublasZsyr_v2_64", "hipblasZsyr_v2_64", "library"); + subst("cublasZtbmv_64", "hipblasZtbmv_v2_64", "library"); + subst("cublasZtbmv_v2_64", "hipblasZtbmv_v2_64", "library"); + subst("cublasZtbsv_64", "hipblasZtbsv_v2_64", "library"); + subst("cublasZtbsv_v2_64", "hipblasZtbsv_v2_64", "library"); + subst("cublasZtpmv_64", "hipblasZtpmv_v2_64", "library"); + subst("cublasZtpmv_v2_64", "hipblasZtpmv_v2_64", "library"); + subst("cublasZtpsv_64", "hipblasZtpsv_v2_64", "library"); + subst("cublasZtpsv_v2_64", "hipblasZtpsv_v2_64", "library"); + subst("cublasZtrmv_64", "hipblasZtrmv_v2_64", "library"); + subst("cublasZtrmv_v2_64", "hipblasZtrmv_v2_64", "library"); + subst("cublasZtrsv_64", "hipblasZtrsv_v2_64", "library"); + subst("cublasZtrsv_v2_64", "hipblasZtrsv_v2_64", "library"); subst("curandSetGeneratorOrdering", "hiprandSetGeneratorOrdering", "library"); subst("cusolverDnCreateParams", "hipsolverDnCreateParams", "library"); subst("cusolverDnDestroyParams", "hipsolverDnDestroyParams", "library"); @@ -3897,6 +4197,8 @@ sub simpleSubstitutions { subst("cublasDgemmStridedBatched", "hipblasDgemmStridedBatched", "library"); subst("cublasDgemm_v2", "hipblasDgemm", "library"); subst("cublasDgemv", "hipblasDgemv", "library"); + subst("cublasDgemvBatched", "hipblasDgemvBatched", "library"); + subst("cublasDgemvStridedBatched", "hipblasDgemvStridedBatched", "library"); subst("cublasDgemv_v2", "hipblasDgemv", "library"); subst("cublasDgeqrfBatched", "hipblasDgeqrfBatched", "library"); subst("cublasDger", "hipblasDger", "library"); @@ -4092,6 +4394,8 @@ sub simpleSubstitutions { subst("cublasSgemmStridedBatched", "hipblasSgemmStridedBatched", "library"); subst("cublasSgemm_v2", "hipblasSgemm", "library"); subst("cublasSgemv", "hipblasSgemv", "library"); + subst("cublasSgemvBatched", "hipblasSgemvBatched", "library"); + subst("cublasSgemvStridedBatched", "hipblasSgemvStridedBatched", "library"); subst("cublasSgemv_v2", "hipblasSgemv", "library"); subst("cublasSgeqrfBatched", "hipblasSgeqrfBatched", "library"); subst("cublasSger", "hipblasSger", "library"); @@ -5319,6 +5623,7 @@ sub simpleSubstitutions { subst("device_functions.h", "hip\/device_functions.h", "include"); subst("driver_types.h", "hip\/driver_types.h", "include"); subst("library_types.h", "hip\/library_types.h", "include"); + subst("math_constants.h", "hip\/hip_math_constants.h", "include"); subst("texture_fetch_functions.h", "", "include"); subst("vector_types.h", "hip\/hip_vector_types.h", "include"); subst("cuComplex.h", "hip\/hip_complex.h", "include_cuda_main_header"); @@ -7128,6 +7433,101 @@ sub simpleSubstitutions { subst("CUB_PTX_WARP_THREADS", "HIPCUB_WARP_THREADS", "define"); subst("CUB_RUNTIME_FUNCTION", "HIPCUB_RUNTIME_FUNCTION", "define"); subst("CUB_STDERR", "HIPCUB_STDERR", "define"); + subst("CUDART_2_OVER_PI", "HIP_2_OVER_PI", "define"); + subst("CUDART_2_OVER_PI_F", "HIP_2_OVER_PI_F", "define"); + subst("CUDART_3PIO4", "HIP_3PIO4", "define"); + subst("CUDART_3PIO4_F", "HIP_3PIO4_F", "define"); + subst("CUDART_DBL2INT_CVT", "HIP_DBL2INT_CVT", "define"); + subst("CUDART_INF", "HIP_INF", "define"); + subst("CUDART_INF_F", "HIP_INF_F", "define"); + subst("CUDART_L2E", "HIP_L2E", "define"); + subst("CUDART_L2E_F", "HIP_L2E_F", "define"); + subst("CUDART_L2E_HI", "HIP_L2E_HI", "define"); + subst("CUDART_L2E_LO", "HIP_L2E_LO", "define"); + subst("CUDART_L2T", "HIP_L2T", "define"); + subst("CUDART_L2T_F", "HIP_L2T_F", "define"); + subst("CUDART_LG2", "HIP_LG2", "define"); + subst("CUDART_LG2_F", "HIP_LG2_F", "define"); + subst("CUDART_LG2_HI", "HIP_LG2_HI", "define"); + subst("CUDART_LG2_LO", "HIP_LG2_LO", "define"); + subst("CUDART_LG2_X_1024", "HIP_LG2_X_1024", "define"); + subst("CUDART_LG2_X_1075", "HIP_LG2_X_1075", "define"); + subst("CUDART_LGE", "HIP_LGE", "define"); + subst("CUDART_LGE_F", "HIP_LGE_F", "define"); + subst("CUDART_LGE_HI", "HIP_LGE_HI", "define"); + subst("CUDART_LGE_LO", "HIP_LGE_LO", "define"); + subst("CUDART_LN2", "HIP_LN2", "define"); + subst("CUDART_LN2_F", "HIP_LN2_F", "define"); + subst("CUDART_LN2_HI", "HIP_LN2_HI", "define"); + subst("CUDART_LN2_LO", "HIP_LN2_LO", "define"); + subst("CUDART_LN2_X_1024", "HIP_LN2_X_1024", "define"); + subst("CUDART_LN2_X_1025", "HIP_LN2_X_1025", "define"); + subst("CUDART_LN2_X_1075", "HIP_LN2_X_1075", "define"); + subst("CUDART_LNPI", "HIP_LNPI", "define"); + subst("CUDART_LNPI_F", "HIP_LNPI_F", "define"); + subst("CUDART_LNT", "HIP_LNT", "define"); + subst("CUDART_LNT_F", "HIP_LNT_F", "define"); + subst("CUDART_LNT_HI", "HIP_LNT_HI", "define"); + subst("CUDART_LNT_LO", "HIP_LNT_LO", "define"); + subst("CUDART_MAX_NORMAL_F", "HIP_MAX_NORMAL_F", "define"); + subst("CUDART_MIN_DENORM", "HIP_MIN_DENORM", "define"); + subst("CUDART_MIN_DENORM_F", "HIP_MIN_DENORM_F", "define"); + subst("CUDART_NAN", "HIP_NAN", "define"); + subst("CUDART_NAN_F", "HIP_NAN_F", "define"); + subst("CUDART_NEG_ZERO", "HIP_NEG_ZERO", "define"); + subst("CUDART_NEG_ZERO_F", "HIP_NEG_ZERO_F", "define"); + subst("CUDART_NORM_HUGE_F", "HIP_NORM_HUGE_F", "define"); + subst("CUDART_ONE", "HIP_ONE", "define"); + subst("CUDART_ONE_F", "HIP_ONE_F", "define"); + subst("CUDART_PI", "HIP_PI", "define"); + subst("CUDART_PIO2", "HIP_PIO2", "define"); + subst("CUDART_PIO2_F", "HIP_PIO2_F", "define"); + subst("CUDART_PIO2_HI", "HIP_PIO2_HI", "define"); + subst("CUDART_PIO2_LO", "HIP_PIO2_LO", "define"); + subst("CUDART_PIO4", "HIP_PIO4", "define"); + subst("CUDART_PIO4_F", "HIP_PIO4_F", "define"); + subst("CUDART_PIO4_HI", "HIP_PIO4_HI", "define"); + subst("CUDART_PIO4_LO", "HIP_PIO4_LO", "define"); + subst("CUDART_PI_F", "HIP_PI_F", "define"); + subst("CUDART_PI_HI", "HIP_PI_HI", "define"); + subst("CUDART_PI_LO", "HIP_PI_LO", "define"); + subst("CUDART_REMQUO_BITS_F", "HIP_REMQUO_BITS_F", "define"); + subst("CUDART_REMQUO_MASK_F", "HIP_REMQUO_MASK_F", "define"); + subst("CUDART_SQRT_2OPI", "HIP_SQRT_2OPI", "define"); + subst("CUDART_SQRT_2PI", "HIP_SQRT_2PI", "define"); + subst("CUDART_SQRT_2PI_HI", "HIP_SQRT_2PI_HI", "define"); + subst("CUDART_SQRT_2PI_LO", "HIP_SQRT_2PI_LO", "define"); + subst("CUDART_SQRT_2_OVER_PI_F", "HIP_SQRT_2_OVER_PI_F", "define"); + subst("CUDART_SQRT_HALF", "HIP_SQRT_HALF", "define"); + subst("CUDART_SQRT_HALF_F", "HIP_SQRT_HALF_F", "define"); + subst("CUDART_SQRT_HALF_HI", "HIP_SQRT_HALF_HI", "define"); + subst("CUDART_SQRT_HALF_HI_F", "HIP_SQRT_HALF_HI_F", "define"); + subst("CUDART_SQRT_HALF_LO", "HIP_SQRT_HALF_LO", "define"); + subst("CUDART_SQRT_HALF_LO_F", "HIP_SQRT_HALF_LO_F", "define"); + subst("CUDART_SQRT_PIO2", "HIP_SQRT_PIO2", "define"); + subst("CUDART_SQRT_PIO2_HI", "HIP_SQRT_PIO2_HI", "define"); + subst("CUDART_SQRT_PIO2_LO", "HIP_SQRT_PIO2_LO", "define"); + subst("CUDART_SQRT_TWO", "HIP_SQRT_TWO", "define"); + subst("CUDART_SQRT_TWO_F", "HIP_SQRT_TWO_F", "define"); + subst("CUDART_THIRD", "HIP_THIRD", "define"); + subst("CUDART_THIRD_F", "HIP_THIRD_F", "define"); + subst("CUDART_TRIG_PLOSS", "HIP_TRIG_PLOSS", "define"); + subst("CUDART_TRIG_PLOSS_F", "HIP_TRIG_PLOSS_F", "define"); + subst("CUDART_TWOTHIRD", "HIP_TWOTHIRD", "define"); + subst("CUDART_TWO_TO_126_F", "HIP_TWO_TO_126_F", "define"); + subst("CUDART_TWO_TO_23", "HIP_TWO_TO_23", "define"); + subst("CUDART_TWO_TO_23_F", "HIP_TWO_TO_23_F", "define"); + subst("CUDART_TWO_TO_24_F", "HIP_TWO_TO_24_F", "define"); + subst("CUDART_TWO_TO_31_F", "HIP_TWO_TO_31_F", "define"); + subst("CUDART_TWO_TO_32_F", "HIP_TWO_TO_32_F", "define"); + subst("CUDART_TWO_TO_52", "HIP_TWO_TO_52", "define"); + subst("CUDART_TWO_TO_53", "HIP_TWO_TO_53", "define"); + subst("CUDART_TWO_TO_54", "HIP_TWO_TO_54", "define"); + subst("CUDART_TWO_TO_M1022", "HIP_TWO_TO_M1022", "define"); + subst("CUDART_TWO_TO_M126_F", "HIP_TWO_TO_M126_F", "define"); + subst("CUDART_TWO_TO_M54", "HIP_TWO_TO_M54", "define"); + subst("CUDART_ZERO", "HIP_ZERO", "define"); + subst("CUDART_ZERO_F", "HIP_ZERO_F", "define"); subst("CUDA_ARRAY3D_CUBEMAP", "hipArrayCubemap", "define"); subst("CUDA_ARRAY3D_LAYERED", "hipArrayLayered", "define"); subst("CUDA_ARRAY3D_SURFACE_LDST", "hipArraySurfaceLoadStore", "define"); @@ -9116,6 +9516,7 @@ sub warnUnsupportedFunctions { "cudaLaunchAttributeSynchronizationPolicy", "cudaLaunchAttributeProgrammaticStreamSerialization", "cudaLaunchAttributeProgrammaticEvent", + "cudaLaunchAttributePreferredSharedMemoryCarveout", "cudaLaunchAttributeMemSyncDomainMap", "cudaLaunchAttributeMemSyncDomain", "cudaLaunchAttributeLaunchCompletionEvent", @@ -9127,6 +9528,7 @@ sub warnUnsupportedFunctions { "cudaKeyValuePair", "cudaKernel_t", "cudaKernelNodeParamsV2", + "cudaKernelNodeAttributePreferredSharedMemoryCarveout", "cudaKernelNodeAttributeMemSyncDomainMap", "cudaKernelNodeAttributeMemSyncDomain", "cudaKernelNodeAttributeDeviceUpdatableKernelNode", @@ -9186,6 +9588,7 @@ sub warnUnsupportedFunctions { "cudaGetParameterBuffer", "cudaGetKernel", "cudaGetDriverEntryPointFlags", + "cudaGetDriverEntryPointByVersion", "cudaGLUnregisterBufferObject", "cudaGLUnmapBufferObjectAsync", "cudaGLUnmapBufferObject", @@ -9429,6 +9832,7 @@ sub warnUnsupportedFunctions { "cudaDevAttrGPUDirectRDMASupported", "cudaDevAttrGPUDirectRDMAFlushWritesOptions", "cudaDevAttrDeferredMappingCudaArraySupported", + "cudaDevAttrD3D12CigSupported", "cudaDevAttrClusterLaunch", "cudaDevAttrCanFlushRemoteWrites", "cudaD3D9UnregisterResource", @@ -9560,6 +9964,7 @@ sub warnUnsupportedFunctions { "cuStreamSetAttribute", "cuStreamGetId", "cuStreamGetGreenCtx", + "cuStreamGetCtx_v2", "cuStreamGetCtx", "cuStreamGetCaptureInfo_v3", "cuStreamGetAttribute", @@ -9634,9 +10039,11 @@ sub warnUnsupportedFunctions { "cuKernelSetAttribute", "cuKernelGetParamInfo", "cuKernelGetName", + "cuKernelGetLibrary", "cuKernelGetFunction", "cuKernelGetAttribute", "cuGreenCtxWaitEvent", + "cuGreenCtxStreamCreate", "cuGreenCtxRecordEvent", "cuGreenCtxGetDevResource", "cuGreenCtxDestroy", @@ -9735,6 +10142,7 @@ sub warnUnsupportedFunctions { "cuD3D10GetDevice", "cuD3D10CtxCreateOnDevice", "cuD3D10CtxCreate", + "cuCtxWaitEvent", "cuCtxSetFlags", "cuCtxResetPersistingL2Cache", "cuCtxGetId", @@ -9742,6 +10150,7 @@ sub warnUnsupportedFunctions { "cuCtxGetDevResource", "cuCtxFromGreenCtx", "cuCtxDetach", + "cuCtxCreate_v4", "cuCtxCreate_v3", "cuCtxAttach", "cuCoredumpSetAttributeGlobal", @@ -9791,6 +10200,7 @@ sub warnUnsupportedFunctions { "_CUB_ASM_PTR_", "NVRTC_ERROR_TIME_FILE_WRITE_FAILED", "NVFFT_PLAN_PROPERTY_INT64_PATIENT_JIT", + "NVFFT_PLAN_PROPERTY_INT64_MAX_NUM_HOST_THREADS", "NVCL_EVENT_SCHED_YIELD", "NVCL_EVENT_SCHED_SPIN", "NVCL_EVENT_SCHED_BLOCKING_SYNC", @@ -9934,6 +10344,7 @@ sub warnUnsupportedFunctions { "CUdeviceNumaConfig_enum", "CUdeviceNumaConfig", "CUdevSmResource_st", + "CUdevSmResourceSplit_flags", "CUdevSmResource", "CUdevResource_st", "CUdevResourceType", @@ -9956,10 +10367,16 @@ sub warnUnsupportedFunctions { "CUd3d10DeviceList", "CUctx_flags_enum", "CUctx_flags", + "CUctxCreateParams_st", + "CUctxCreateParams", + "CUctxCigParam_st", + "CUctxCigParam", "CUcoredumpSettings_enum", "CUcoredumpSettings", "CUclusterSchedulingPolicy_enum", "CUclusterSchedulingPolicy", + "CUcigDataType_enum", + "CUcigDataType", "CUasyncNotificationType_enum", "CUasyncNotificationType", "CUasyncNotificationInfo_st", @@ -10074,11 +10491,14 @@ sub warnUnsupportedFunctions { "CU_MEM_HANDLE_TYPE_FABRIC", "CU_MEM_CREATE_USAGE_TILE_POOL", "CU_MEM_ACCESS_FLAGS_PROT_MAX", + "CU_LIMIT_SHMEM_SIZE", "CU_LIMIT_PERSISTING_L2_CACHE_SIZE", "CU_LIMIT_MAX_L2_FETCH_GRANULARITY", "CU_LIMIT_MAX", "CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH", "CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT", + "CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED", + "CU_LIMIT_CIG_ENABLED", "CU_LIBRARY_NUM_OPTIONS", "CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE", "CU_LIBRARY_BINARY_IS_PRESERVED", @@ -10090,6 +10510,7 @@ sub warnUnsupportedFunctions { "CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY", "CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION", "CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT", + "CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", "CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP", "CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN", "CU_LAUNCH_ATTRIBUTE_MAX", @@ -10098,6 +10519,7 @@ sub warnUnsupportedFunctions { "CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE", "CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE", "CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION", + "CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", "CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP", "CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN", "CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE", @@ -10250,6 +10672,8 @@ sub warnUnsupportedFunctions { "CU_EGL_COLOR_FORMAT_ARGB", "CU_EGL_COLOR_FORMAT_ABGR", "CU_EGL_COLOR_FORMAT_A", + "CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE", + "CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING", "CU_DEV_RESOURCE_TYPE_SM", "CU_DEV_RESOURCE_TYPE_MAX", "CU_DEV_RESOURCE_TYPE_INVALID", @@ -10291,6 +10715,7 @@ sub warnUnsupportedFunctions { "CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED", "CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED", "CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED", + "CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED", "CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH", "CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2", "CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS_V1", @@ -10330,18 +10755,33 @@ sub warnUnsupportedFunctions { "CU_CTX_FLAGS_MASK", "CU_CTX_COREDUMP_ENABLE", "CU_COREDUMP_TRIGGER_HOST", + "CU_COREDUMP_SKIP_SHARED_MEMORY", + "CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES", + "CU_COREDUMP_SKIP_LOCAL_MEMORY", + "CU_COREDUMP_SKIP_GLOBAL_MEMORY", + "CU_COREDUMP_SKIP_ABORT", "CU_COREDUMP_PIPE", "CU_COREDUMP_MAX", + "CU_COREDUMP_LIGHTWEIGHT_FLAGS", "CU_COREDUMP_LIGHTWEIGHT", + "CU_COREDUMP_GENERATION_FLAGS", "CU_COREDUMP_FILE", "CU_COREDUMP_ENABLE_USER_TRIGGER", "CU_COREDUMP_ENABLE_ON_EXCEPTION", + "CU_COREDUMP_DEFAULT_FLAGS", "CU_COMPUTE_ACCELERATED_TARGET_BASE", "CU_CLUSTER_SCHEDULING_POLICY_SPREAD", "CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING", "CU_CLUSTER_SCHEDULING_POLICY_DEFAULT", "CU_ASYNC_NOTIFICATION_TYPE_OVER_BUDGET", "CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL", + "CU_AD_FORMAT_YUY2", + "CU_AD_FORMAT_Y444_PLANAR8", + "CU_AD_FORMAT_Y444_PLANAR10", + "CU_AD_FORMAT_Y416", + "CU_AD_FORMAT_Y410", + "CU_AD_FORMAT_Y216", + "CU_AD_FORMAT_Y210", "CU_AD_FORMAT_UNORM_INT8X4", "CU_AD_FORMAT_UNORM_INT8X2", "CU_AD_FORMAT_UNORM_INT8X1", @@ -10354,7 +10794,13 @@ sub warnUnsupportedFunctions { "CU_AD_FORMAT_SNORM_INT16X4", "CU_AD_FORMAT_SNORM_INT16X2", "CU_AD_FORMAT_SNORM_INT16X1", + "CU_AD_FORMAT_P216", + "CU_AD_FORMAT_P210", + "CU_AD_FORMAT_P016", + "CU_AD_FORMAT_P010", + "CU_AD_FORMAT_NV16", "CU_AD_FORMAT_NV12", + "CU_AD_FORMAT_MAX", "CU_AD_FORMAT_BC7_UNORM_SRGB", "CU_AD_FORMAT_BC7_UNORM", "CU_AD_FORMAT_BC6H_UF16", @@ -10369,12 +10815,14 @@ sub warnUnsupportedFunctions { "CU_AD_FORMAT_BC2_UNORM", "CU_AD_FORMAT_BC1_UNORM_SRGB", "CU_AD_FORMAT_BC1_UNORM", + "CU_AD_FORMAT_AYUV", "CUSPARSE_SPSV_UPDATE_GENERAL", "CUSPARSE_SPSV_UPDATE_DIAGONAL", "CUSPARSE_SPSM_UPDATE_GENERAL", "CUSPARSE_SPSM_UPDATE_DIAGONAL", "CUSPARSE_SPMV_SELL_ALG1", "CUSPARSE_SPMM_OP_ALG_DEFAULT", + "CUSPARSE_SPMM_BSR_ALG1", "CUSPARSE_SPMMA_PREPROCESS", "CUSPARSE_SPMMA_ALG4", "CUSPARSE_SPMMA_ALG3", @@ -10512,6 +10960,7 @@ sub warnUnsupportedFunctions { "CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION", "CUDNN_STATUS_VERSION_MISMATCH", "CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH", + "CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED", "CUDNN_STATUS_SPECIFIC_ERROR", "CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH", "CUDNN_STATUS_RUNTIME_IN_PROGRESS", @@ -11069,6 +11518,8 @@ sub warnUnsupportedFunctions { "CUDNN_ATTR_ENGINEHEUR_RESULTS", "CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH", "CUDNN_ATTR_ENGINEHEUR_MODE", + "CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE", + "CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED", "CUDNN_ATTR_ENGINECFG_KNOB_CHOICES", "CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO", "CUDNN_ATTR_ENGINECFG_ENGINE", @@ -11159,6 +11610,7 @@ sub warnUnsupportedFunctions { "CUDA_ARRAY_MEMORY_REQUIREMENTS_v1", "CUDA_ARRAY_MEMORY_REQUIREMENTS_st", "CUDA_ARRAY_MEMORY_REQUIREMENTS", + "CUDA_ARRAY3D_VIDEO_ENCODE_DECODE", "CUDA_ARRAY3D_SPARSE", "CUDA_ARRAY3D_DEPTH_TEXTURE", "CUDA_ARRAY3D_DEFERRED_MAPPING", @@ -11166,6 +11618,7 @@ sub warnUnsupportedFunctions { "CUDA_ARRAY3D_2DARRAY", "CUDALIBMG_GRID_MAPPING_ROW_MAJOR", "CUDALIBMG_GRID_MAPPING_COL_MAJOR", + "CUCoredumpGenerationFlags", "CUB_USE_COOPERATIVE_GROUPS", "CUB_SUBSCRIPTION_FACTOR", "CUB_STATIC_ASSERT", @@ -11223,7 +11676,8 @@ sub warnUnsupportedFunctions { "CUBLAS_STOREV_ROWWISE", "CUBLAS_STOREV_COLUMNWISE", "CUBLAS_DIRECT_FORWARD", - "CUBLAS_DIRECT_BACKWARD" + "CUBLAS_DIRECT_BACKWARD", + "CIG_DATA_TYPE_D3D12_COMMAND_QUEUE" ) { my $mt = m/($func)/g; @@ -11240,67 +11694,27 @@ sub warnHipOnlyUnsupportedFunctions { my $k = 0; foreach $func ( "cublasZtrttp", - "cublasZtrsv_v2_64", - "cublasZtrsv_64", "cublasZtrsm_v2_64", "cublasZtrsm_64", "cublasZtrsmBatched_64", - "cublasZtrmv_v2_64", - "cublasZtrmv_64", "cublasZtrmm_v2_64", "cublasZtrmm_64", "cublasZtpttr", - "cublasZtpsv_v2_64", - "cublasZtpsv_64", - "cublasZtpmv_v2_64", - "cublasZtpmv_64", - "cublasZtbsv_v2_64", - "cublasZtbsv_64", - "cublasZtbmv_v2_64", - "cublasZtbmv_64", "cublasZsyrkx_64", "cublasZsyrk_v2_64", "cublasZsyrk_64", - "cublasZsyr_v2_64", - "cublasZsyr_64", "cublasZsyr2k_v2_64", "cublasZsyr2k_64", - "cublasZsyr2_v2_64", - "cublasZsyr2_64", - "cublasZsymv_v2_64", - "cublasZsymv_64", "cublasZsymm_v2_64", "cublasZsymm_64", "cublasZmatinvBatched", - "cublasZhpr_v2_64", - "cublasZhpr_64", - "cublasZhpr2_v2_64", - "cublasZhpr2_64", - "cublasZhpmv_v2_64", - "cublasZhpmv_64", "cublasZherkx_64", "cublasZherk_v2_64", "cublasZherk_64", - "cublasZher_v2_64", - "cublasZher_64", "cublasZher2k_v2_64", "cublasZher2k_64", - "cublasZher2_v2_64", - "cublasZher2_64", - "cublasZhemv_v2_64", - "cublasZhemv_64", "cublasZhemm_v2_64", "cublasZhemm_64", - "cublasZhbmv_v2_64", - "cublasZhbmv_64", - "cublasZgeru_v2_64", - "cublasZgeru_64", - "cublasZgerc_v2_64", - "cublasZgerc_64", - "cublasZgemv_v2_64", - "cublasZgemv_64", - "cublasZgemvStridedBatched_64", - "cublasZgemvBatched_64", "cublasZgemm_v2_64", "cublasZgemm_64", "cublasZgemmStridedBatched_64", @@ -11322,55 +11736,21 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSwapEx_64", "cublasSwapEx", "cublasStrttp", - "cublasStrsv_v2_64", - "cublasStrsv_64", "cublasStrsm_v2_64", "cublasStrsm_64", "cublasStrsmBatched_64", - "cublasStrmv_v2_64", - "cublasStrmv_64", "cublasStrmm_v2_64", "cublasStrmm_64", "cublasStpttr", - "cublasStpsv_v2_64", - "cublasStpsv_64", - "cublasStpmv_v2_64", - "cublasStpmv_64", - "cublasStbsv_v2_64", - "cublasStbsv_64", - "cublasStbmv_v2_64", - "cublasStbmv_64", "cublasSsyrkx_64", "cublasSsyrk_v2_64", "cublasSsyrk_64", - "cublasSsyr_v2_64", - "cublasSsyr_64", "cublasSsyr2k_v2_64", "cublasSsyr2k_64", - "cublasSsyr2_v2_64", - "cublasSsyr2_64", - "cublasSsymv_v2_64", - "cublasSsymv_64", "cublasSsymm_v2_64", "cublasSsymm_64", - "cublasSspr_v2_64", - "cublasSspr_64", - "cublasSspr2_v2_64", - "cublasSspr2_64", - "cublasSspmv_v2_64", - "cublasSspmv_64", - "cublasSsbmv_v2_64", - "cublasSsbmv_64", "cublasSmatinvBatched", "cublasShutdown", - "cublasSger_v2_64", - "cublasSger_64", - "cublasSgemv_v2_64", - "cublasSgemv_64", - "cublasSgemvStridedBatched_64", - "cublasSgemvStridedBatched", - "cublasSgemvBatched_64", - "cublasSgemvBatched", "cublasSgemm_v2_64", "cublasSgemm_64", "cublasSgemmStridedBatched_64", @@ -11388,13 +11768,10 @@ sub warnHipOnlyUnsupportedFunctions { "cublasSetLoggerCallback", "cublasSetKernelStream", "cublasSdgmm_64", - "cublasScalEx_64", "cublasRotmgEx", "cublasRotmEx_64", "cublasRotmEx", "cublasRotgEx", - "cublasRotEx_64", - "cublasNrm2Ex_64", "cublasMigrateComputeType", "cublasLtReductionScheme_t", "cublasLtPointerModeMask_t", @@ -11464,60 +11841,26 @@ sub warnHipOnlyUnsupportedFunctions { "cublasGetError", "cublasGetCudartVersion", "cublasGemmStridedBatchedEx_64", + "cublasGemmGroupedBatchedEx_64", + "cublasGemmGroupedBatchedEx", "cublasGemmEx_64", "cublasGemmBatchedEx_64", "cublasFree", "cublasDtrttp", - "cublasDtrsv_v2_64", - "cublasDtrsv_64", "cublasDtrsm_v2_64", "cublasDtrsm_64", "cublasDtrsmBatched_64", - "cublasDtrmv_v2_64", - "cublasDtrmv_64", "cublasDtrmm_v2_64", "cublasDtrmm_64", "cublasDtpttr", - "cublasDtpsv_v2_64", - "cublasDtpsv_64", - "cublasDtpmv_v2_64", - "cublasDtpmv_64", - "cublasDtbsv_v2_64", - "cublasDtbsv_64", - "cublasDtbmv_v2_64", - "cublasDtbmv_64", "cublasDsyrkx_64", "cublasDsyrk_v2_64", "cublasDsyrk_64", - "cublasDsyr_v2_64", - "cublasDsyr_64", "cublasDsyr2k_v2_64", "cublasDsyr2k_64", - "cublasDsyr2_v2_64", - "cublasDsyr2_64", - "cublasDsymv_v2_64", - "cublasDsymv_64", "cublasDsymm_v2_64", "cublasDsymm_64", - "cublasDspr_v2_64", - "cublasDspr_64", - "cublasDspr2_v2_64", - "cublasDspr2_64", - "cublasDspmv_v2_64", - "cublasDspmv_64", - "cublasDsbmv_v2_64", - "cublasDsbmv_64", - "cublasDotcEx_64", - "cublasDotEx_64", "cublasDmatinvBatched", - "cublasDger_v2_64", - "cublasDger_64", - "cublasDgemv_v2_64", - "cublasDgemv_64", - "cublasDgemvStridedBatched_64", - "cublasDgemvStridedBatched", - "cublasDgemvBatched_64", - "cublasDgemvBatched", "cublasDgemm_v2_64", "cublasDgemm_64", "cublasDgemmStridedBatched_64", @@ -11527,24 +11870,12 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDgeam_64", "cublasDdgmm_64", "cublasCtrttp", - "cublasCtrsv_v2_64", - "cublasCtrsv_64", "cublasCtrsm_v2_64", "cublasCtrsm_64", "cublasCtrsmBatched_64", - "cublasCtrmv_v2_64", - "cublasCtrmv_64", "cublasCtrmm_v2_64", "cublasCtrmm_64", "cublasCtpttr", - "cublasCtpsv_v2_64", - "cublasCtpsv_64", - "cublasCtpmv_v2_64", - "cublasCtpmv_64", - "cublasCtbsv_v2_64", - "cublasCtbsv_64", - "cublasCtbmv_v2_64", - "cublasCtbmv_64", "cublasCsyrkx_64", "cublasCsyrk_v2_64", "cublasCsyrk_64", @@ -11552,26 +11883,14 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCsyrkEx", "cublasCsyrk3mEx_64", "cublasCsyrk3mEx", - "cublasCsyr_v2_64", - "cublasCsyr_64", "cublasCsyr2k_v2_64", "cublasCsyr2k_64", - "cublasCsyr2_v2_64", - "cublasCsyr2_64", - "cublasCsymv_v2_64", - "cublasCsymv_64", "cublasCsymm_v2_64", "cublasCsymm_64", "cublasCopyEx_64", "cublasCopyEx", "cublasContext", "cublasCmatinvBatched", - "cublasChpr_v2_64", - "cublasChpr_64", - "cublasChpr2_v2_64", - "cublasChpr2_64", - "cublasChpmv_v2_64", - "cublasChpmv_64", "cublasCherkx_64", "cublasCherk_v2_64", "cublasCherk_64", @@ -11579,26 +11898,10 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCherkEx", "cublasCherk3mEx_64", "cublasCherk3mEx", - "cublasCher_v2_64", - "cublasCher_64", "cublasCher2k_v2_64", "cublasCher2k_64", - "cublasCher2_v2_64", - "cublasCher2_64", - "cublasChemv_v2_64", - "cublasChemv_64", "cublasChemm_v2_64", "cublasChemm_64", - "cublasChbmv_v2_64", - "cublasChbmv_64", - "cublasCgeru_v2_64", - "cublasCgeru_64", - "cublasCgerc_v2_64", - "cublasCgerc_64", - "cublasCgemv_v2_64", - "cublasCgemv_64", - "cublasCgemvStridedBatched_64", - "cublasCgemvBatched_64", "cublasCgemm_v2_64", "cublasCgemm_64", "cublasCgemmStridedBatched_64", @@ -11615,7 +11918,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCgemm3m", "cublasCgeam_64", "cublasCdgmm_64", - "cublasAxpyEx_64", "cublasAsumEx_64", "cublasAsumEx", "cublasAlloc", @@ -11992,6 +12294,8 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgemm3m", "cublasZgelsBatched", "cublasZgeam_64", + "cublasZgbmv_v2_64", + "cublasZgbmv_64", "cublasZdgmm_64", "cublasXerbla", "cublasUint8gemmBias", @@ -12065,6 +12369,8 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSgemmBatched_64", "cublasSgelsBatched", "cublasSgeam_64", + "cublasSgbmv_v2_64", + "cublasSgbmv_64", "cublasSetVector_64", "cublasSetVectorAsync_64", "cublasSetSmCountTarget", @@ -12167,6 +12473,8 @@ sub warnRocOnlyUnsupportedFunctions { "cublasGetError", "cublasGetCudartVersion", "cublasGemmStridedBatchedEx_64", + "cublasGemmGroupedBatchedEx_64", + "cublasGemmGroupedBatchedEx", "cublasGemmEx_64", "cublasGemmBatchedEx_64", "cublasFree", @@ -12231,6 +12539,8 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgemmBatched_64", "cublasDgelsBatched", "cublasDgeam_64", + "cublasDgbmv_v2_64", + "cublasDgbmv_64", "cublasDdgmm_64", "cublasCtrttp", "cublasCtrsv_v2_64", @@ -12324,6 +12634,8 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgemm3m", "cublasCgelsBatched", "cublasCgeam_64", + "cublasCgbmv_v2_64", + "cublasCgbmv_64", "cublasCdgmm_64", "cublasAsumEx_64", "cublasAsumEx", diff --git a/docs/hipify-clang.rst b/docs/hipify-clang.rst index ee12238d..789dd0b7 100644 --- a/docs/hipify-clang.rst +++ b/docs/hipify-clang.rst @@ -37,7 +37,7 @@ Dependencies * `LLVM+Clang `_ of at least version `4.0.0 `_; the latest stable and recommended release: - `18.1.6 `_. + `18.1.8 `_. * `CUDA `_ of at least version `7.0 `_, the latest supported version is @@ -178,12 +178,14 @@ Dependencies `18.1.3 `_, `18.1.4 `_, `18.1.5 `_, - `18.1.6 `_ :sup:`4` + `18.1.6 `_, + `18.1.7 `_, + `18.1.8 `_ :sup:`4` - `12.3.2 `_ :sup:`4` - **Latest stable config** - **Latest stable config** * - `19.0.0 git `_ - - `12.4.1 `_ + - `12.5.1 `_ - ✅ - ✅ @@ -225,7 +227,7 @@ Dependencies In most cases, you can get a suitable version of ``LLVM+Clang`` with your package manager. However, you can also `download a release archive `_ and build or install it. In case of multiple versions of ``LLVM`` installed, set `CMAKE_PREFIX_PATH `_ so that -``CMake`` can find the desired version of ``LLVM``. For example, ``-DCMAKE_PREFIX_PATH=D:\LLVM\18.1.6\dist``. +``CMake`` can find the desired version of ``LLVM``. For example, ``-DCMAKE_PREFIX_PATH=D:\LLVM\18.1.8\dist``. Usage ============================================================ @@ -258,7 +260,7 @@ header files used during the hipification process: .. code:: shell - ./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.3 --clang-resource-directory=/usr/llvm/18.1.6/dist/lib/clang/18 + ./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.3 --clang-resource-directory=/usr/llvm/18.1.8/dist/lib/clang/18 For more information, refer to the `Clang manual for compiling CUDA `_. @@ -395,7 +397,7 @@ To ensure LLVM being found or in case of multiple LLVM instances, specify the pa .. code-block:: bash - -DCMAKE_PREFIX_PATH=/usr/llvm/18.1.6/dist + -DCMAKE_PREFIX_PATH=/usr/llvm/18.1.8/dist On Windows, specify the following option for CMake in the first place: ``-G "Visual Studio 17 2022"``. @@ -469,7 +471,7 @@ LLVM <= 9.0.1 LLVM >= 10.0.0 ----------------- -1. Download `LLVM project `_ sources. +1. Download `LLVM project `_ sources. 2. Build `LLVM project `_: @@ -543,11 +545,12 @@ LLVM >= 10.0.0 .. code-block:: shell - -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.1.1 + -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.2.1 -5. Install `CUB `_ belonging to the version corresponding to the CUDA version: - - * To specify the path to CUB, specify using the ``CUDA_CUB_ROOT_DIR`` option: +5. [Optional] Install `CUB 1.9.8 `_ for ``CUDA < 11.0`` only; + for ``CUDA >= 11.0``, the CUB shipped with CUDA will be used for testing. + + * To specify the path to CUB, use the ``CUDA_CUB_ROOT_DIR`` option (only for ``CUDA < 11.0``): **Linux**: @@ -559,7 +562,7 @@ LLVM >= 10.0.0 .. code-block:: shell - -DCUDA_CUB_ROOT_DIR=D:/CUDA/CUB/cub-2.1.0 + -DCUDA_CUB_ROOT_DIR=D:/CUDA/CUB 6. Install `Python `_ version 2.7 or greater. @@ -571,13 +574,13 @@ LLVM >= 10.0.0 .. code-block:: bash - python /usr/llvm/18.1.6/llvm-project/llvm/utils/lit/setup.py install + python /usr/llvm/18.1.8/llvm-project/llvm/utils/lit/setup.py install **Windows**: .. code-block:: shell - python D:/LLVM/18.1.6/llvm-project/llvm/utils/lit/setup.py install + python D:/LLVM/18.1.8/llvm-project/llvm/utils/lit/setup.py install In case of errors similar to ``ModuleNotFoundError: No module named 'setuptools'``, upgrade the ``setuptools`` package: @@ -591,23 +594,23 @@ LLVM >= 10.0.0 .. code-block:: bash - -DLLVM_EXTERNAL_LIT=/usr/llvm/18.1.6/build/bin/llvm-lit + -DLLVM_EXTERNAL_LIT=/usr/llvm/18.1.8/build/bin/llvm-lit **Windows**: .. code-block:: shell - -DLLVM_EXTERNAL_LIT=D:/LLVM/18.1.6/build/Release/bin/llvm-lit.py + -DLLVM_EXTERNAL_LIT=D:/LLVM/18.1.8/build/Release/bin/llvm-lit.py * ``FileCheck``: **Linux**: - Copy from ``/usr/llvm/18.1.6/build/bin/`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. + Copy from ``/usr/llvm/18.1.8/build/bin/`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. **Windows**: - Copy from ``D:/LLVM/18.1.6/build/Release/bin`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. + Copy from ``D:/LLVM/18.1.8/build/Release/bin`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. Alternatively, specify the path to ``FileCheck`` in the ``CMAKE_INSTALL_PREFIX`` option. @@ -634,8 +637,8 @@ On Linux, the following configurations are tested: * Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 * Ubuntu 16-19: LLVM 8.0.0 - 14.0.6, CUDA 7.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -* Ubuntu 20-21: LLVM 9.0.0 - 18.1.6, CUDA 7.0 - 12.3.2, cuDNN 5.1.10 - 9.1.1 -* Ubuntu 22-23: LLVM 13.0.0 - 18.1.6, CUDA 7.0 - 12.3.2, cuDNN 8.0.5 - 9.1.1 +* Ubuntu 20-21: LLVM 9.0.0 - 18.1.8, CUDA 7.0 - 12.3.2, cuDNN 5.1.10 - 9.2.1 +* Ubuntu 22-23: LLVM 13.0.0 - 18.1.8, CUDA 7.0 - 12.3.2, cuDNN 8.0.5 - 9.2.1 Minimum build system requirements for the above configurations: @@ -643,7 +646,7 @@ Minimum build system requirements for the above configurations: Recommended build system requirements: -* CMake 3.28.3, GNU C/C++ 13.2, Python 3.12.3. +* CMake 3.30.0, GNU C/C++ 13.2, Python 3.12.4. Here's how to build ``hipify-clang`` with testing support on ``Ubuntu 23.10.01``: @@ -653,11 +656,10 @@ Here's how to build ``hipify-clang`` with testing support on ``Ubuntu 23.10.01`` -DHIPIFY_CLANG_TESTS=ON \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=/usr/llvm/18.1.6/dist \ + -DCMAKE_PREFIX_PATH=/usr/llvm/18.1.8/dist \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.3.2 \ - -DCUDA_DNN_ROOT_DIR=/usr/local/cudnn-9.1.1 \ - -DCUDA_CUB_ROOT_DIR=/usr/local/cub-2.1.0 \ - -DLLVM_EXTERNAL_LIT=/usr/llvm/18.1.6/build/bin/llvm-lit \ + -DCUDA_DNN_ROOT_DIR=/usr/local/cudnn-9.2.1 \ + -DLLVM_EXTERNAL_LIT=/usr/llvm/18.1.8/build/bin/llvm-lit \ ../hipify The corresponding successful output is: @@ -681,21 +683,21 @@ The corresponding successful output is: -- - Test hipify-clang : ON -- - Is part of HIP SDK : OFF -- Found ZLIB: /usr/lib/x86_64-linux-gnu/libz.so (found version "1.2.13") - -- Found LLVM 18.1.6: - -- - CMake module path : /usr/llvm/18.1.6/dist/lib/cmake/llvm - -- - Clang include path : /usr/llvm/18.1.6/dist/include - -- - LLVM Include path : /usr/llvm/18.1.6/dist/include - -- - Binary path : /usr/llvm/18.1.6/dist/bin + -- Found LLVM 18.1.8: + -- - CMake module path : /usr/llvm/18.1.8/dist/lib/cmake/llvm + -- - Clang include path : /usr/llvm/18.1.8/dist/include + -- - LLVM Include path : /usr/llvm/18.1.8/dist/include + -- - Binary path : /usr/llvm/18.1.8/dist/bin -- Linker detection: GNU ld -- ---- The below configuring for hipify-clang testing only ---- - -- Found Python: /usr/bin/python3.12 (found version "3.12.3") found components: Interpreter + -- Found Python: /usr/bin/python3.12 (found version "3.12.4") found components: Interpreter -- Found lit: /usr/local/bin/lit -- Found FileCheck: /GIT/LLVM/trunk/dist/FileCheck -- Initial CUDA to configure: -- - CUDA Toolkit path : /usr/local/cuda-12.3.2 - -- - CUDA Samples path : OFF - -- - cuDNN path : /usr/local/cudnn-9.1.1 - -- - CUB path : /usr/local/cub-2.1.0 + -- - CUDA Samples path : + -- - cuDNN path : /usr/local/cudnn-9.2.1 + -- - CUB path : -- Found CUDAToolkit: /usr/local/cuda-12.3.2/targets/x86_64-linux/include (found version "12.3.107") -- Performing Test CMAKE_HAVE_LIBC_PTHREAD -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success @@ -703,8 +705,8 @@ The corresponding successful output is: -- Found CUDA config: -- - CUDA Toolkit path : /usr/local/cuda-12.3.2 -- - CUDA Samples path : OFF - -- - cuDNN path : /usr/local/cudnn-9.1.1 - -- - CUB path : /usr/local/cub-2.1.0 + -- - cuDNN path : /usr/local/cudnn-9.2.1 + -- - CUB path : /usr/local/cuda-12.3.2/include/cub -- Configuring done (0.5s) -- Generating done (0.0s) -- Build files have been written to: /usr/hipify/build @@ -720,11 +722,11 @@ The corresponding successful output is: Running HIPify regression tests =============================================================== CUDA 12.3.107 - will be used for testing - LLVM 18.1.6 - will be used for testing + LLVM 18.1.8 - will be used for testing x86_64 - Platform architecture Linux 6.5.0-15-generic - Platform OS 64 - hipify-clang binary bitness - 64 - python 3.12.3 binary bitness + 64 - python 3.12.4 binary bitness =============================================================== -- Testing: 106 tests, 12 threads -- Testing Time: 6.91s @@ -814,18 +816,18 @@ Tested configurations: - ``2019.16.11.29, 2022.17.7.1`` - ``3.27.3`` - ``3.11.4`` - * - ``17.0.1`` :sup:`6` - ``18.1.6`` :sup:`7` + * - ``17.0.1`` :sup:`6` - ``18.1.8`` :sup:`7` - ``7.0 - 12.3.2`` - - ``8.0.5 - 9.1.1`` - - ``2019.16.11.35, 2022.17.9.6`` - - ``3.29.3`` - - ``3.12.3`` + - ``8.0.5 - 9.2.1`` + - ``2019.16.11.37, 2022.17.10.4`` + - ``3.30.0`` + - ``3.12.4`` * - ``19.0.0git`` - - ``7.0 - 12.4.1`` - - ``8.0.5 - 9.1.1`` - - ``2019.16.11.35, 2022.17.9.6`` - - ``3.29.3`` - - ``3.12.3`` + - ``7.0 - 12.5.1`` + - ``8.0.5 - 9.2.1`` + - ``2019.16.11.37, 2022.17.10.4`` + - ``3.30.0`` + - ``3.12.4`` :sup:`5` LLVM 14.x.x is the latest major release supporting Visual Studio 2017. @@ -850,12 +852,11 @@ Building with testing support using ``Visual Studio 17 2022`` on ``Windows 11``: -DHIPIFY_CLANG_TESTS=ON \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=D:/LLVM/18.1.6/dist \ + -DCMAKE_PREFIX_PATH=D:/LLVM/18.1.8/dist \ -DCUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.3" \ -DCUDA_SDK_ROOT_DIR="C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.3" \ - -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.1.1 \ - -DCUDA_CUB_ROOT_DIR=D:/CUDA/CUB/cub-2.1.0 \ - -DLLVM_EXTERNAL_LIT=D:/LLVM/18.1.6/build/Release/bin/llvm-lit.py \ + -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.2.1 \ + -DLLVM_EXTERNAL_LIT=D:/LLVM/18.1.8/build/Release/bin/llvm-lit.py \ ../hipify The corresponding successful output is: @@ -879,26 +880,26 @@ The corresponding successful output is: -- - Build hipify-clang : ON -- - Test hipify-clang : ON -- - Is part of HIP SDK : OFF - -- Found LLVM 18.1.6: - -- - CMake module path : D:/LLVM/18.1.6/dist/lib/cmake/llvm - -- - Clang include path : D:/LLVM/18.1.6/dist/include - -- - LLVM Include path : D:/LLVM/18.1.6/dist/include - -- - Binary path : D:/LLVM/18.1.6/dist/bin + -- Found LLVM 18.1.8: + -- - CMake module path : D:/LLVM/18.1.8/dist/lib/cmake/llvm + -- - Clang include path : D:/LLVM/18.1.8/dist/include + -- - LLVM Include path : D:/LLVM/18.1.8/dist/include + -- - Binary path : D:/LLVM/18.1.8/dist/bin -- ---- The below configuring for hipify-clang testing only ---- - -- Found Python: C:/Users/TT/AppData/Local/Programs/Python/Python312/python.exe (found version "3.12.3") found components: Interpreter + -- Found Python: C:/Users/TT/AppData/Local/Programs/Python/Python312/python.exe (found version "3.12.4") found components: Interpreter -- Found lit: C:/Users/TT/AppData/Local/Programs/Python/Python312/Scripts/lit.exe - -- Found FileCheck: D:/LLVM/18.1.6/dist/bin/FileCheck.exe + -- Found FileCheck: D:/LLVM/18.1.8/dist/bin/FileCheck.exe -- Initial CUDA to configure: -- - CUDA Toolkit path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.3 -- - CUDA Samples path : C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.3 - -- - cuDNN path : D:/CUDA/cuDNN/9.1.1 - -- - CUB path : D:/CUDA/CUB/cub-2.1.0 + -- - cuDNN path : D:/CUDA/cuDNN/9.2.1 + -- - CUB path : -- Found CUDAToolkit: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.3/include (found version "12.3.107") -- Found CUDA config: -- - CUDA Toolkit path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.3 -- - CUDA Samples path : C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.3 - -- - cuDNN path : D:/CUDA/cuDNN/9.1.1 - -- - CUB path : D:/CUDA/CUB/cub-2.1.0 + -- - cuDNN path : D:/CUDA/cuDNN/9.2.1 + -- - CUB path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.3/include/cub -- Configuring done (1.4s) -- Generating done (0.1s) -- Build files have been written to: D:/HIPIFY/build diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in index 189f0449..4cda4737 100644 --- a/docs/sphinx/requirements.in +++ b/docs/sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core==1.4.0 +rocm-docs-core==1.6.1 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt index 6a951b0a..ae44135c 100644 --- a/docs/sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -16,7 +16,7 @@ beautifulsoup4==4.12.3 # via pydata-sphinx-theme breathe==4.35.0 # via rocm-docs-core -certifi==2024.2.2 +certifi==2024.7.4 # via requests cffi==1.16.0 # via @@ -92,7 +92,7 @@ requests==2.32.2 # via # pygithub # sphinx -rocm-docs-core==1.4.0 +rocm-docs-core==1.6.1 # via -r requirements.in smmap==5.0.1 # via gitdb @@ -139,7 +139,7 @@ typing-extensions==4.11.0 # via # pydata-sphinx-theme # pygithub -urllib3==2.2.1 +urllib3==2.2.2 # via # pygithub # requests diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 2c44d4a8..e4ead7db 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -630,7 +630,7 @@ |`cublasIzamin_v2`| | | | |`hipblasIzamin_v2`|6.0.0| | | | | |`cublasIzamin_v2_64`|12.0| | | |`hipblasIzamin_v2_64`|6.1.0| | | | | |`cublasNrm2Ex`|8.0| | | |`hipblasNrm2Ex_v2`|6.0.0| | | | | -|`cublasNrm2Ex_64`|12.0| | | | | | | | | | +|`cublasNrm2Ex_64`|12.0| | | |`hipblasNrm2Ex_v2_64`|6.2.0| | | |6.2.0| |`cublasSasum`| | | | |`hipblasSasum`|1.8.2| | | | | |`cublasSasum_64`|12.0| | | |`hipblasSasum_64`|6.1.0| | | | | |`cublasSasum_v2`| | | | |`hipblasSasum`|1.8.2| | | | | @@ -723,293 +723,293 @@ |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cublasCgbmv`| | | | |`hipblasCgbmv_v2`|6.0.0| | | | | -|`cublasCgbmv_64`|12.0| | | |`hipblasCgbmv_64`|6.2.0| | | |6.2.0| +|`cublasCgbmv_64`|12.0| | | |`hipblasCgbmv_v2_64`|6.2.0| | | |6.2.0| |`cublasCgbmv_v2`| | | | |`hipblasCgbmv_v2`|6.0.0| | | | | -|`cublasCgbmv_v2_64`|12.0| | | |`hipblasCgbmv_64`|6.2.0| | | |6.2.0| +|`cublasCgbmv_v2_64`|12.0| | | |`hipblasCgbmv_v2_64`|6.2.0| | | |6.2.0| |`cublasCgemv`| | | | |`hipblasCgemv_v2`|6.0.0| | | | | -|`cublasCgemv_64`|12.0| | | | | | | | | | +|`cublasCgemv_64`|12.0| | | |`hipblasCgemv_v2_64`|6.2.0| | | |6.2.0| |`cublasCgemv_v2`| | | | |`hipblasCgemv_v2`|6.0.0| | | | | -|`cublasCgemv_v2_64`|12.0| | | | | | | | | | +|`cublasCgemv_v2_64`|12.0| | | |`hipblasCgemv_v2_64`|6.2.0| | | |6.2.0| |`cublasCgerc`| | | | |`hipblasCgerc_v2`|6.0.0| | | | | -|`cublasCgerc_64`|12.0| | | | | | | | | | +|`cublasCgerc_64`|12.0| | | |`hipblasCgerc_v2_64`|6.2.0| | | |6.2.0| |`cublasCgerc_v2`| | | | |`hipblasCgerc_v2`|6.0.0| | | | | -|`cublasCgerc_v2_64`|12.0| | | | | | | | | | +|`cublasCgerc_v2_64`|12.0| | | |`hipblasCgerc_v2_64`|6.2.0| | | |6.2.0| |`cublasCgeru`| | | | |`hipblasCgeru_v2`|6.0.0| | | | | -|`cublasCgeru_64`|12.0| | | | | | | | | | +|`cublasCgeru_64`|12.0| | | |`hipblasCgeru_v2_64`|6.2.0| | | |6.2.0| |`cublasCgeru_v2`| | | | |`hipblasCgeru_v2`|6.0.0| | | | | -|`cublasCgeru_v2_64`|12.0| | | | | | | | | | +|`cublasCgeru_v2_64`|12.0| | | |`hipblasCgeru_v2_64`|6.2.0| | | |6.2.0| |`cublasChbmv`| | | | |`hipblasChbmv_v2`|6.0.0| | | | | -|`cublasChbmv_64`|12.0| | | | | | | | | | +|`cublasChbmv_64`|12.0| | | |`hipblasChbmv_v2_64`|6.2.0| | | |6.2.0| |`cublasChbmv_v2`| | | | |`hipblasChbmv_v2`|6.0.0| | | | | -|`cublasChbmv_v2_64`|12.0| | | | | | | | | | +|`cublasChbmv_v2_64`|12.0| | | |`hipblasChbmv_v2_64`|6.2.0| | | |6.2.0| |`cublasChemv`| | | | |`hipblasChemv_v2`|6.0.0| | | | | -|`cublasChemv_64`|12.0| | | | | | | | | | +|`cublasChemv_64`|12.0| | | |`hipblasChemv_v2_64`|6.2.0| | | |6.2.0| |`cublasChemv_v2`| | | | |`hipblasChemv_v2`|6.0.0| | | | | -|`cublasChemv_v2_64`|12.0| | | | | | | | | | +|`cublasChemv_v2_64`|12.0| | | |`hipblasChemv_v2_64`|6.2.0| | | |6.2.0| |`cublasCher`| | | | |`hipblasCher_v2`|6.0.0| | | | | |`cublasCher2`| | | | |`hipblasCher2_v2`|6.0.0| | | | | -|`cublasCher2_64`|12.0| | | | | | | | | | +|`cublasCher2_64`|12.0| | | |`hipblasCher2_v2_64`|6.2.0| | | |6.2.0| |`cublasCher2_v2`| | | | |`hipblasCher2_v2`|6.0.0| | | | | -|`cublasCher2_v2_64`|12.0| | | | | | | | | | -|`cublasCher_64`|12.0| | | | | | | | | | +|`cublasCher2_v2_64`|12.0| | | |`hipblasCher2_v2_64`|6.2.0| | | |6.2.0| +|`cublasCher_64`|12.0| | | |`hipblasCher_v2_64`|6.2.0| | | |6.2.0| |`cublasCher_v2`| | | | |`hipblasCher_v2`|6.0.0| | | | | -|`cublasCher_v2_64`|12.0| | | | | | | | | | +|`cublasCher_v2_64`|12.0| | | |`hipblasCher_v2_64`|6.2.0| | | |6.2.0| |`cublasChpmv`| | | | |`hipblasChpmv_v2`|6.0.0| | | | | -|`cublasChpmv_64`|12.0| | | | | | | | | | +|`cublasChpmv_64`|12.0| | | |`hipblasChpmv_v2_64`|6.2.0| | | |6.2.0| |`cublasChpmv_v2`| | | | |`hipblasChpmv_v2`|6.0.0| | | | | -|`cublasChpmv_v2_64`|12.0| | | | | | | | | | +|`cublasChpmv_v2_64`|12.0| | | |`hipblasChpmv_v2_64`|6.2.0| | | |6.2.0| |`cublasChpr`| | | | |`hipblasChpr_v2`|6.0.0| | | | | |`cublasChpr2`| | | | |`hipblasChpr2_v2`|6.0.0| | | | | -|`cublasChpr2_64`|12.0| | | | | | | | | | +|`cublasChpr2_64`|12.0| | | |`hipblasChpr2_v2_64`|6.2.0| | | |6.2.0| |`cublasChpr2_v2`| | | | |`hipblasChpr2_v2`|6.0.0| | | | | -|`cublasChpr2_v2_64`|12.0| | | | | | | | | | -|`cublasChpr_64`|12.0| | | | | | | | | | +|`cublasChpr2_v2_64`|12.0| | | |`hipblasChpr2_v2_64`|6.2.0| | | |6.2.0| +|`cublasChpr_64`|12.0| | | |`hipblasChpr_v2_64`|6.2.0| | | |6.2.0| |`cublasChpr_v2`| | | | |`hipblasChpr_v2`|6.0.0| | | | | -|`cublasChpr_v2_64`|12.0| | | | | | | | | | +|`cublasChpr_v2_64`|12.0| | | |`hipblasChpr_v2_64`|6.2.0| | | |6.2.0| |`cublasCsymv`| | | | |`hipblasCsymv_v2`|6.0.0| | | | | -|`cublasCsymv_64`|12.0| | | | | | | | | | +|`cublasCsymv_64`|12.0| | | |`hipblasCsymv_v2_64`|6.2.0| | | |6.2.0| |`cublasCsymv_v2`| | | | |`hipblasCsymv_v2`|6.0.0| | | | | -|`cublasCsymv_v2_64`|12.0| | | | | | | | | | +|`cublasCsymv_v2_64`|12.0| | | |`hipblasCsymv_v2_64`|6.2.0| | | |6.2.0| |`cublasCsyr`| | | | |`hipblasCsyr_v2`|6.0.0| | | | | |`cublasCsyr2`| | | | |`hipblasCsyr2_v2`|6.0.0| | | | | -|`cublasCsyr2_64`|12.0| | | | | | | | | | +|`cublasCsyr2_64`|12.0| | | |`hipblasCsyr2_v2_64`|6.2.0| | | |6.2.0| |`cublasCsyr2_v2`| | | | |`hipblasCsyr2_v2`|6.0.0| | | | | -|`cublasCsyr2_v2_64`|12.0| | | | | | | | | | -|`cublasCsyr_64`|12.0| | | | | | | | | | +|`cublasCsyr2_v2_64`|12.0| | | |`hipblasCsyr2_v2_64`|6.2.0| | | |6.2.0| +|`cublasCsyr_64`|12.0| | | |`hipblasCsyr_v2_64`|6.2.0| | | |6.2.0| |`cublasCsyr_v2`| | | | |`hipblasCsyr_v2`|6.0.0| | | | | -|`cublasCsyr_v2_64`|12.0| | | | | | | | | | +|`cublasCsyr_v2_64`|12.0| | | |`hipblasCsyr_v2_64`|6.2.0| | | |6.2.0| |`cublasCtbmv`| | | | |`hipblasCtbmv_v2`|6.0.0| | | | | -|`cublasCtbmv_64`|12.0| | | | | | | | | | +|`cublasCtbmv_64`|12.0| | | |`hipblasCtbmv_v2_64`|6.2.0| | | |6.2.0| |`cublasCtbmv_v2`| | | | |`hipblasCtbmv_v2`|6.0.0| | | | | -|`cublasCtbmv_v2_64`|12.0| | | | | | | | | | +|`cublasCtbmv_v2_64`|12.0| | | |`hipblasCtbmv_v2_64`|6.2.0| | | |6.2.0| |`cublasCtbsv`| | | | |`hipblasCtbsv_v2`|6.0.0| | | | | -|`cublasCtbsv_64`|12.0| | | | | | | | | | +|`cublasCtbsv_64`|12.0| | | |`hipblasCtbsv_v2_64`|6.2.0| | | |6.2.0| |`cublasCtbsv_v2`| | | | |`hipblasCtbsv_v2`|6.0.0| | | | | -|`cublasCtbsv_v2_64`|12.0| | | | | | | | | | +|`cublasCtbsv_v2_64`|12.0| | | |`hipblasCtbsv_v2_64`|6.2.0| | | |6.2.0| |`cublasCtpmv`| | | | |`hipblasCtpmv_v2`|6.0.0| | | | | -|`cublasCtpmv_64`|12.0| | | | | | | | | | +|`cublasCtpmv_64`|12.0| | | |`hipblasCtpmv_v2_64`|6.2.0| | | |6.2.0| |`cublasCtpmv_v2`| | | | |`hipblasCtpmv_v2`|6.0.0| | | | | -|`cublasCtpmv_v2_64`|12.0| | | | | | | | | | +|`cublasCtpmv_v2_64`|12.0| | | |`hipblasCtpmv_v2_64`|6.2.0| | | |6.2.0| |`cublasCtpsv`| | | | |`hipblasCtpsv_v2`|6.0.0| | | | | -|`cublasCtpsv_64`|12.0| | | | | | | | | | +|`cublasCtpsv_64`|12.0| | | |`hipblasCtpsv_v2_64`|6.2.0| | | |6.2.0| |`cublasCtpsv_v2`| | | | |`hipblasCtpsv_v2`|6.0.0| | | | | -|`cublasCtpsv_v2_64`|12.0| | | | | | | | | | +|`cublasCtpsv_v2_64`|12.0| | | |`hipblasCtpsv_v2_64`|6.2.0| | | |6.2.0| |`cublasCtrmv`| | | | |`hipblasCtrmv_v2`|6.0.0| | | | | -|`cublasCtrmv_64`|12.0| | | | | | | | | | +|`cublasCtrmv_64`|12.0| | | |`hipblasCtrmv_v2_64`|6.2.0| | | |6.2.0| |`cublasCtrmv_v2`| | | | |`hipblasCtrmv_v2`|6.0.0| | | | | -|`cublasCtrmv_v2_64`|12.0| | | | | | | | | | +|`cublasCtrmv_v2_64`|12.0| | | |`hipblasCtrmv_v2_64`|6.2.0| | | |6.2.0| |`cublasCtrsv`| | | | |`hipblasCtrsv_v2`|6.0.0| | | | | -|`cublasCtrsv_64`|12.0| | | | | | | | | | +|`cublasCtrsv_64`|12.0| | | |`hipblasCtrsv_v2_64`|6.2.0| | | |6.2.0| |`cublasCtrsv_v2`| | | | |`hipblasCtrsv_v2`|6.0.0| | | | | -|`cublasCtrsv_v2_64`|12.0| | | | | | | | | | +|`cublasCtrsv_v2_64`|12.0| | | |`hipblasCtrsv_v2_64`|6.2.0| | | |6.2.0| |`cublasDgbmv`| | | | |`hipblasDgbmv`|3.5.0| | | | | |`cublasDgbmv_64`|12.0| | | |`hipblasDgbmv_64`|6.2.0| | | |6.2.0| |`cublasDgbmv_v2`| | | | |`hipblasDgbmv`|3.5.0| | | | | |`cublasDgbmv_v2_64`|12.0| | | |`hipblasDgbmv_64`|6.2.0| | | |6.2.0| |`cublasDgemv`| | | | |`hipblasDgemv`|1.8.2| | | | | -|`cublasDgemv_64`|12.0| | | | | | | | | | +|`cublasDgemv_64`|12.0| | | |`hipblasDgemv_64`|6.2.0| | | |6.2.0| |`cublasDgemv_v2`| | | | |`hipblasDgemv`|1.8.2| | | | | -|`cublasDgemv_v2_64`|12.0| | | | | | | | | | +|`cublasDgemv_v2_64`|12.0| | | |`hipblasDgemv_64`|6.2.0| | | |6.2.0| |`cublasDger`| | | | |`hipblasDger`|1.8.2| | | | | -|`cublasDger_64`|12.0| | | | | | | | | | +|`cublasDger_64`|12.0| | | |`hipblasDger_64`|6.2.0| | | |6.2.0| |`cublasDger_v2`| | | | |`hipblasDger`|1.8.2| | | | | -|`cublasDger_v2_64`|12.0| | | | | | | | | | +|`cublasDger_v2_64`|12.0| | | |`hipblasDger_64`|6.2.0| | | |6.2.0| |`cublasDsbmv`| | | | |`hipblasDsbmv`|3.5.0| | | | | -|`cublasDsbmv_64`|12.0| | | | | | | | | | +|`cublasDsbmv_64`|12.0| | | |`hipblasDsbmv_64`|6.2.0| | | |6.2.0| |`cublasDsbmv_v2`| | | | |`hipblasDsbmv`|3.5.0| | | | | -|`cublasDsbmv_v2_64`|12.0| | | | | | | | | | +|`cublasDsbmv_v2_64`|12.0| | | |`hipblasDsbmv_64`|6.2.0| | | |6.2.0| |`cublasDspmv`| | | | |`hipblasDspmv`|3.5.0| | | | | -|`cublasDspmv_64`|12.0| | | | | | | | | | +|`cublasDspmv_64`|12.0| | | |`hipblasDspmv_64`|6.2.0| | | |6.2.0| |`cublasDspmv_v2`| | | | |`hipblasDspmv`|3.5.0| | | | | -|`cublasDspmv_v2_64`|12.0| | | | | | | | | | +|`cublasDspmv_v2_64`|12.0| | | |`hipblasDspmv_64`|6.2.0| | | |6.2.0| |`cublasDspr`| | | | |`hipblasDspr`|3.5.0| | | | | |`cublasDspr2`| | | | |`hipblasDspr2`|3.5.0| | | | | -|`cublasDspr2_64`|12.0| | | | | | | | | | +|`cublasDspr2_64`|12.0| | | |`hipblasDspr2_64`|6.2.0| | | |6.2.0| |`cublasDspr2_v2`| | | | |`hipblasDspr2`|3.5.0| | | | | -|`cublasDspr2_v2_64`|12.0| | | | | | | | | | -|`cublasDspr_64`|12.0| | | | | | | | | | +|`cublasDspr2_v2_64`|12.0| | | |`hipblasDspr2_64`|6.2.0| | | |6.2.0| +|`cublasDspr_64`|12.0| | | |`hipblasDspr_64`|6.2.0| | | |6.2.0| |`cublasDspr_v2`| | | | |`hipblasDspr`|3.5.0| | | | | -|`cublasDspr_v2_64`|12.0| | | | | | | | | | +|`cublasDspr_v2_64`|12.0| | | |`hipblasDspr_64`|6.2.0| | | |6.2.0| |`cublasDsymv`| | | | |`hipblasDsymv`|3.5.0| | | | | -|`cublasDsymv_64`|12.0| | | | | | | | | | +|`cublasDsymv_64`|12.0| | | |`hipblasDsymv_64`|6.2.0| | | |6.2.0| |`cublasDsymv_v2`| | | | |`hipblasDsymv`|3.5.0| | | | | -|`cublasDsymv_v2_64`|12.0| | | | | | | | | | +|`cublasDsymv_v2_64`|12.0| | | |`hipblasDsymv_64`|6.2.0| | | |6.2.0| |`cublasDsyr`| | | | |`hipblasDsyr`|3.0.0| | | | | |`cublasDsyr2`| | | | |`hipblasDsyr2`|3.5.0| | | | | -|`cublasDsyr2_64`|12.0| | | | | | | | | | +|`cublasDsyr2_64`|12.0| | | |`hipblasDsyr2_64`|6.2.0| | | |6.2.0| |`cublasDsyr2_v2`| | | | |`hipblasDsyr2`|3.5.0| | | | | -|`cublasDsyr2_v2_64`|12.0| | | | | | | | | | -|`cublasDsyr_64`|12.0| | | | | | | | | | +|`cublasDsyr2_v2_64`|12.0| | | |`hipblasDsyr2_64`|6.2.0| | | |6.2.0| +|`cublasDsyr_64`|12.0| | | |`hipblasDsyr_64`|6.2.0| | | |6.2.0| |`cublasDsyr_v2`| | | | |`hipblasDsyr`|3.0.0| | | | | -|`cublasDsyr_v2_64`|12.0| | | | | | | | | | +|`cublasDsyr_v2_64`|12.0| | | |`hipblasDsyr_64`|6.2.0| | | |6.2.0| |`cublasDtbmv`| | | | |`hipblasDtbmv`|3.5.0| | | | | -|`cublasDtbmv_64`|12.0| | | | | | | | | | +|`cublasDtbmv_64`|12.0| | | |`hipblasDtbmv_64`|6.2.0| | | |6.2.0| |`cublasDtbmv_v2`| | | | |`hipblasDtbmv`|3.5.0| | | | | -|`cublasDtbmv_v2_64`|12.0| | | | | | | | | | +|`cublasDtbmv_v2_64`|12.0| | | |`hipblasDtbmv_64`|6.2.0| | | |6.2.0| |`cublasDtbsv`| | | | |`hipblasDtbsv`|3.6.0| | | | | -|`cublasDtbsv_64`|12.0| | | | | | | | | | +|`cublasDtbsv_64`|12.0| | | |`hipblasDtbsv_64`|6.2.0| | | |6.2.0| |`cublasDtbsv_v2`| | | | |`hipblasDtbsv`|3.6.0| | | | | -|`cublasDtbsv_v2_64`|12.0| | | | | | | | | | +|`cublasDtbsv_v2_64`|12.0| | | |`hipblasDtbsv_64`|6.2.0| | | |6.2.0| |`cublasDtpmv`| | | | |`hipblasDtpmv`|3.5.0| | | | | -|`cublasDtpmv_64`|12.0| | | | | | | | | | +|`cublasDtpmv_64`|12.0| | | |`hipblasDtpmv_64`|6.2.0| | | |6.2.0| |`cublasDtpmv_v2`| | | | |`hipblasDtpmv`|3.5.0| | | | | -|`cublasDtpmv_v2_64`|12.0| | | | | | | | | | +|`cublasDtpmv_v2_64`|12.0| | | |`hipblasDtpmv_64`|6.2.0| | | |6.2.0| |`cublasDtpsv`| | | | |`hipblasDtpsv`|3.5.0| | | | | -|`cublasDtpsv_64`|12.0| | | | | | | | | | +|`cublasDtpsv_64`|12.0| | | |`hipblasDtpsv_64`|6.2.0| | | |6.2.0| |`cublasDtpsv_v2`| | | | |`hipblasDtpsv`|3.5.0| | | | | -|`cublasDtpsv_v2_64`|12.0| | | | | | | | | | +|`cublasDtpsv_v2_64`|12.0| | | |`hipblasDtpsv_64`|6.2.0| | | |6.2.0| |`cublasDtrmv`| | | | |`hipblasDtrmv`|3.5.0| | | | | -|`cublasDtrmv_64`|12.0| | | | | | | | | | +|`cublasDtrmv_64`|12.0| | | |`hipblasDtrmv_64`|6.2.0| | | |6.2.0| |`cublasDtrmv_v2`| | | | |`hipblasDtrmv`|3.5.0| | | | | -|`cublasDtrmv_v2_64`|12.0| | | | | | | | | | +|`cublasDtrmv_v2_64`|12.0| | | |`hipblasDtrmv_64`|6.2.0| | | |6.2.0| |`cublasDtrsv`| | | | |`hipblasDtrsv`|3.0.0| | | | | -|`cublasDtrsv_64`|12.0| | | | | | | | | | +|`cublasDtrsv_64`|12.0| | | |`hipblasDtrsv_64`|6.2.0| | | |6.2.0| |`cublasDtrsv_v2`| | | | |`hipblasDtrsv`|3.0.0| | | | | -|`cublasDtrsv_v2_64`|12.0| | | | | | | | | | +|`cublasDtrsv_v2_64`|12.0| | | |`hipblasDtrsv_64`|6.2.0| | | |6.2.0| |`cublasSgbmv`| | | | |`hipblasSgbmv`|3.5.0| | | | | |`cublasSgbmv_64`|12.0| | | |`hipblasSgbmv_64`|6.2.0| | | |6.2.0| |`cublasSgbmv_v2`| | | | |`hipblasSgbmv`|3.5.0| | | | | |`cublasSgbmv_v2_64`|12.0| | | |`hipblasSgbmv_64`|6.2.0| | | |6.2.0| |`cublasSgemv`| | | | |`hipblasSgemv`|1.8.2| | | | | -|`cublasSgemv_64`|12.0| | | | | | | | | | +|`cublasSgemv_64`|12.0| | | |`hipblasSgemv_64`|6.2.0| | | |6.2.0| |`cublasSgemv_v2`| | | | |`hipblasSgemv`|1.8.2| | | | | -|`cublasSgemv_v2_64`|12.0| | | | | | | | | | +|`cublasSgemv_v2_64`|12.0| | | |`hipblasSgemv_64`|6.2.0| | | |6.2.0| |`cublasSger`| | | | |`hipblasSger`|1.8.2| | | | | -|`cublasSger_64`|12.0| | | | | | | | | | +|`cublasSger_64`|12.0| | | |`hipblasSger_64`|6.2.0| | | |6.2.0| |`cublasSger_v2`| | | | |`hipblasSger`|1.8.2| | | | | -|`cublasSger_v2_64`|12.0| | | | | | | | | | +|`cublasSger_v2_64`|12.0| | | |`hipblasSger_64`|6.2.0| | | |6.2.0| |`cublasSsbmv`| | | | |`hipblasSsbmv`|3.5.0| | | | | -|`cublasSsbmv_64`|12.0| | | | | | | | | | +|`cublasSsbmv_64`|12.0| | | |`hipblasSsbmv_64`|6.2.0| | | |6.2.0| |`cublasSsbmv_v2`| | | | |`hipblasSsbmv`|3.5.0| | | | | -|`cublasSsbmv_v2_64`|12.0| | | | | | | | | | +|`cublasSsbmv_v2_64`|12.0| | | |`hipblasSsbmv_64`|6.2.0| | | |6.2.0| |`cublasSspmv`| | | | |`hipblasSspmv`|3.5.0| | | | | -|`cublasSspmv_64`|12.0| | | | | | | | | | +|`cublasSspmv_64`|12.0| | | |`hipblasSspmv_64`|6.2.0| | | |6.2.0| |`cublasSspmv_v2`| | | | |`hipblasSspmv`|3.5.0| | | | | -|`cublasSspmv_v2_64`|12.0| | | | | | | | | | +|`cublasSspmv_v2_64`|12.0| | | |`hipblasSspmv_64`|6.2.0| | | |6.2.0| |`cublasSspr`| | | | |`hipblasSspr`|3.5.0| | | | | |`cublasSspr2`| | | | |`hipblasSspr2`|3.5.0| | | | | -|`cublasSspr2_64`|12.0| | | | | | | | | | +|`cublasSspr2_64`|12.0| | | |`hipblasSspr2_64`|6.2.0| | | |6.2.0| |`cublasSspr2_v2`| | | | |`hipblasSspr2`|3.5.0| | | | | -|`cublasSspr2_v2_64`|12.0| | | | | | | | | | -|`cublasSspr_64`|12.0| | | | | | | | | | +|`cublasSspr2_v2_64`|12.0| | | |`hipblasSspr2_64`|6.2.0| | | |6.2.0| +|`cublasSspr_64`|12.0| | | |`hipblasSspr_64`|6.2.0| | | |6.2.0| |`cublasSspr_v2`| | | | |`hipblasSspr`|3.5.0| | | | | -|`cublasSspr_v2_64`|12.0| | | | | | | | | | +|`cublasSspr_v2_64`|12.0| | | |`hipblasSspr_64`|6.2.0| | | |6.2.0| |`cublasSsymv`| | | | |`hipblasSsymv`|3.5.0| | | | | -|`cublasSsymv_64`|12.0| | | | | | | | | | +|`cublasSsymv_64`|12.0| | | |`hipblasSsymv_64`|6.2.0| | | |6.2.0| |`cublasSsymv_v2`| | | | |`hipblasSsymv`|3.5.0| | | | | -|`cublasSsymv_v2_64`|12.0| | | | | | | | | | +|`cublasSsymv_v2_64`|12.0| | | |`hipblasSsymv_64`|6.2.0| | | |6.2.0| |`cublasSsyr`| | | | |`hipblasSsyr`|3.0.0| | | | | |`cublasSsyr2`| | | | |`hipblasSsyr2`|3.5.0| | | | | -|`cublasSsyr2_64`|12.0| | | | | | | | | | +|`cublasSsyr2_64`|12.0| | | |`hipblasSsyr2_64`|6.2.0| | | |6.2.0| |`cublasSsyr2_v2`| | | | |`hipblasSsyr2`|3.5.0| | | | | -|`cublasSsyr2_v2_64`|12.0| | | | | | | | | | -|`cublasSsyr_64`|12.0| | | | | | | | | | +|`cublasSsyr2_v2_64`|12.0| | | |`hipblasSsyr2_64`|6.2.0| | | |6.2.0| +|`cublasSsyr_64`|12.0| | | |`hipblasSsyr_64`|6.2.0| | | |6.2.0| |`cublasSsyr_v2`| | | | |`hipblasSsyr`|3.0.0| | | | | -|`cublasSsyr_v2_64`|12.0| | | | | | | | | | +|`cublasSsyr_v2_64`|12.0| | | |`hipblasSsyr_64`|6.2.0| | | |6.2.0| |`cublasStbmv`| | | | |`hipblasStbmv`|3.5.0| | | | | -|`cublasStbmv_64`|12.0| | | | | | | | | | +|`cublasStbmv_64`|12.0| | | |`hipblasStbmv_64`|6.2.0| | | |6.2.0| |`cublasStbmv_v2`| | | | |`hipblasStbmv`|3.5.0| | | | | -|`cublasStbmv_v2_64`|12.0| | | | | | | | | | +|`cublasStbmv_v2_64`|12.0| | | |`hipblasStbmv_64`|6.2.0| | | |6.2.0| |`cublasStbsv`| | | | |`hipblasStbsv`|3.6.0| | | | | -|`cublasStbsv_64`|12.0| | | | | | | | | | +|`cublasStbsv_64`|12.0| | | |`hipblasStbsv_64`|6.2.0| | | |6.2.0| |`cublasStbsv_v2`| | | | |`hipblasStbsv`|3.6.0| | | | | -|`cublasStbsv_v2_64`|12.0| | | | | | | | | | +|`cublasStbsv_v2_64`|12.0| | | |`hipblasStbsv_64`|6.2.0| | | |6.2.0| |`cublasStpmv`| | | | |`hipblasStpmv`|3.5.0| | | | | -|`cublasStpmv_64`|12.0| | | | | | | | | | +|`cublasStpmv_64`|12.0| | | |`hipblasStpmv_64`|6.2.0| | | |6.2.0| |`cublasStpmv_v2`| | | | |`hipblasStpmv`|3.5.0| | | | | -|`cublasStpmv_v2_64`|12.0| | | | | | | | | | +|`cublasStpmv_v2_64`|12.0| | | |`hipblasStpmv_64`|6.2.0| | | |6.2.0| |`cublasStpsv`| | | | |`hipblasStpsv`|3.5.0| | | | | -|`cublasStpsv_64`|12.0| | | | | | | | | | +|`cublasStpsv_64`|12.0| | | |`hipblasStpsv_64`|6.2.0| | | |6.2.0| |`cublasStpsv_v2`| | | | |`hipblasStpsv`|3.5.0| | | | | -|`cublasStpsv_v2_64`|12.0| | | | | | | | | | +|`cublasStpsv_v2_64`|12.0| | | |`hipblasStpsv_64`|6.2.0| | | |6.2.0| |`cublasStrmv`| | | | |`hipblasStrmv`|3.5.0| | | | | -|`cublasStrmv_64`|12.0| | | | | | | | | | +|`cublasStrmv_64`|12.0| | | |`hipblasStrmv_64`|6.2.0| | | |6.2.0| |`cublasStrmv_v2`| | | | |`hipblasStrmv`|3.5.0| | | | | -|`cublasStrmv_v2_64`|12.0| | | | | | | | | | +|`cublasStrmv_v2_64`|12.0| | | |`hipblasStrmv_64`|6.2.0| | | |6.2.0| |`cublasStrsv`| | | | |`hipblasStrsv`|3.0.0| | | | | -|`cublasStrsv_64`|12.0| | | | | | | | | | +|`cublasStrsv_64`|12.0| | | |`hipblasStrsv_64`|6.2.0| | | |6.2.0| |`cublasStrsv_v2`| | | | |`hipblasStrsv`|3.0.0| | | | | -|`cublasStrsv_v2_64`|12.0| | | | | | | | | | +|`cublasStrsv_v2_64`|12.0| | | |`hipblasStrsv_64`|6.2.0| | | |6.2.0| |`cublasZgbmv`| | | | |`hipblasZgbmv_v2`|6.0.0| | | | | -|`cublasZgbmv_64`|12.0| | | |`hipblasZgbmv_64`|6.2.0| | | |6.2.0| +|`cublasZgbmv_64`|12.0| | | |`hipblasZgbmv_v2_64`|6.2.0| | | |6.2.0| |`cublasZgbmv_v2`| | | | |`hipblasZgbmv_v2`|6.0.0| | | | | -|`cublasZgbmv_v2_64`|12.0| | | |`hipblasZgbmv_64`|6.2.0| | | |6.2.0| +|`cublasZgbmv_v2_64`|12.0| | | |`hipblasZgbmv_v2_64`|6.2.0| | | |6.2.0| |`cublasZgemv`| | | | |`hipblasZgemv_v2`|6.0.0| | | | | -|`cublasZgemv_64`|12.0| | | | | | | | | | +|`cublasZgemv_64`|12.0| | | |`hipblasZgemv_v2_64`|6.2.0| | | |6.2.0| |`cublasZgemv_v2`| | | | |`hipblasZgemv_v2`|6.0.0| | | | | -|`cublasZgemv_v2_64`|12.0| | | | | | | | | | +|`cublasZgemv_v2_64`|12.0| | | |`hipblasZgemv_v2_64`|6.2.0| | | |6.2.0| |`cublasZgerc`| | | | |`hipblasZgerc_v2`|6.0.0| | | | | -|`cublasZgerc_64`|12.0| | | | | | | | | | +|`cublasZgerc_64`|12.0| | | |`hipblasZgerc_v2_64`|6.2.0| | | |6.2.0| |`cublasZgerc_v2`| | | | |`hipblasZgerc_v2`|6.0.0| | | | | -|`cublasZgerc_v2_64`|12.0| | | | | | | | | | +|`cublasZgerc_v2_64`|12.0| | | |`hipblasZgerc_v2_64`|6.2.0| | | |6.2.0| |`cublasZgeru`| | | | |`hipblasZgeru_v2`|6.0.0| | | | | -|`cublasZgeru_64`|12.0| | | | | | | | | | +|`cublasZgeru_64`|12.0| | | |`hipblasZgeru_v2_64`|6.2.0| | | |6.2.0| |`cublasZgeru_v2`| | | | |`hipblasZgeru_v2`|6.0.0| | | | | -|`cublasZgeru_v2_64`|12.0| | | | | | | | | | +|`cublasZgeru_v2_64`|12.0| | | |`hipblasZgeru_v2_64`|6.2.0| | | |6.2.0| |`cublasZhbmv`| | | | |`hipblasZhbmv_v2`|6.0.0| | | | | -|`cublasZhbmv_64`|12.0| | | | | | | | | | +|`cublasZhbmv_64`|12.0| | | |`hipblasZhbmv_v2_64`|6.2.0| | | |6.2.0| |`cublasZhbmv_v2`| | | | |`hipblasZhbmv_v2`|6.0.0| | | | | -|`cublasZhbmv_v2_64`|12.0| | | | | | | | | | +|`cublasZhbmv_v2_64`|12.0| | | |`hipblasZhbmv_v2_64`|6.2.0| | | |6.2.0| |`cublasZhemv`| | | | |`hipblasZhemv_v2`|6.0.0| | | | | -|`cublasZhemv_64`|12.0| | | | | | | | | | +|`cublasZhemv_64`|12.0| | | |`hipblasZhemv_v2_64`|6.2.0| | | |6.2.0| |`cublasZhemv_v2`| | | | |`hipblasZhemv_v2`|6.0.0| | | | | -|`cublasZhemv_v2_64`|12.0| | | | | | | | | | +|`cublasZhemv_v2_64`|12.0| | | |`hipblasZhemv_v2_64`|6.2.0| | | |6.2.0| |`cublasZher`| | | | |`hipblasZher_v2`|6.0.0| | | | | |`cublasZher2`| | | | |`hipblasZher2_v2`|6.0.0| | | | | -|`cublasZher2_64`|12.0| | | | | | | | | | +|`cublasZher2_64`|12.0| | | |`hipblasZher2_v2_64`|6.2.0| | | |6.2.0| |`cublasZher2_v2`| | | | |`hipblasZher2_v2`|6.0.0| | | | | -|`cublasZher2_v2_64`|12.0| | | | | | | | | | -|`cublasZher_64`|12.0| | | | | | | | | | +|`cublasZher2_v2_64`|12.0| | | |`hipblasZher2_v2_64`|6.2.0| | | |6.2.0| +|`cublasZher_64`|12.0| | | |`hipblasZher_v2_64`|6.2.0| | | |6.2.0| |`cublasZher_v2`| | | | |`hipblasZher_v2`|6.0.0| | | | | -|`cublasZher_v2_64`|12.0| | | | | | | | | | +|`cublasZher_v2_64`|12.0| | | |`hipblasZher_v2_64`|6.2.0| | | |6.2.0| |`cublasZhpmv`| | | | |`hipblasZhpmv_v2`|6.0.0| | | | | -|`cublasZhpmv_64`|12.0| | | | | | | | | | +|`cublasZhpmv_64`|12.0| | | |`hipblasZhpmv_v2_64`|6.2.0| | | |6.2.0| |`cublasZhpmv_v2`| | | | |`hipblasZhpmv_v2`|6.0.0| | | | | -|`cublasZhpmv_v2_64`|12.0| | | | | | | | | | +|`cublasZhpmv_v2_64`|12.0| | | |`hipblasZhpmv_v2_64`|6.2.0| | | |6.2.0| |`cublasZhpr`| | | | |`hipblasZhpr_v2`|6.0.0| | | | | |`cublasZhpr2`| | | | |`hipblasZhpr2_v2`|6.0.0| | | | | -|`cublasZhpr2_64`|12.0| | | | | | | | | | +|`cublasZhpr2_64`|12.0| | | |`hipblasZhpr2_v2_64`|6.2.0| | | |6.2.0| |`cublasZhpr2_v2`| | | | |`hipblasZhpr2_v2`|6.0.0| | | | | -|`cublasZhpr2_v2_64`|12.0| | | | | | | | | | -|`cublasZhpr_64`|12.0| | | | | | | | | | +|`cublasZhpr2_v2_64`|12.0| | | |`hipblasZhpr2_v2_64`|6.2.0| | | |6.2.0| +|`cublasZhpr_64`|12.0| | | |`hipblasZhpr_v2_64`|6.2.0| | | |6.2.0| |`cublasZhpr_v2`| | | | |`hipblasZhpr_v2`|6.0.0| | | | | -|`cublasZhpr_v2_64`|12.0| | | | | | | | | | +|`cublasZhpr_v2_64`|12.0| | | |`hipblasZhpr_v2_64`|6.2.0| | | |6.2.0| |`cublasZsymv`| | | | |`hipblasZsymv_v2`|6.0.0| | | | | -|`cublasZsymv_64`|12.0| | | | | | | | | | +|`cublasZsymv_64`|12.0| | | |`hipblasZsymv_v2_64`|6.2.0| | | |6.2.0| |`cublasZsymv_v2`| | | | |`hipblasZsymv_v2`|6.0.0| | | | | -|`cublasZsymv_v2_64`|12.0| | | | | | | | | | +|`cublasZsymv_v2_64`|12.0| | | |`hipblasZsymv_v2_64`|6.2.0| | | |6.2.0| |`cublasZsyr`| | | | |`hipblasZsyr_v2`|6.0.0| | | | | |`cublasZsyr2`| | | | |`hipblasZsyr2_v2`|6.0.0| | | | | -|`cublasZsyr2_64`|12.0| | | | | | | | | | +|`cublasZsyr2_64`|12.0| | | |`hipblasZsyr2_v2_64`|6.2.0| | | |6.2.0| |`cublasZsyr2_v2`| | | | |`hipblasZsyr2_v2`|6.0.0| | | | | -|`cublasZsyr2_v2_64`|12.0| | | | | | | | | | -|`cublasZsyr_64`|12.0| | | | | | | | | | +|`cublasZsyr2_v2_64`|12.0| | | |`hipblasZsyr2_v2_64`|6.2.0| | | |6.2.0| +|`cublasZsyr_64`|12.0| | | |`hipblasZsyr_v2_64`|6.2.0| | | |6.2.0| |`cublasZsyr_v2`| | | | |`hipblasZsyr_v2`|6.0.0| | | | | -|`cublasZsyr_v2_64`|12.0| | | | | | | | | | +|`cublasZsyr_v2_64`|12.0| | | |`hipblasZsyr_v2_64`|6.2.0| | | |6.2.0| |`cublasZtbmv`| | | | |`hipblasZtbmv_v2`|6.0.0| | | | | -|`cublasZtbmv_64`|12.0| | | | | | | | | | +|`cublasZtbmv_64`|12.0| | | |`hipblasZtbmv_v2_64`|6.2.0| | | |6.2.0| |`cublasZtbmv_v2`| | | | |`hipblasZtbmv_v2`|6.0.0| | | | | -|`cublasZtbmv_v2_64`|12.0| | | | | | | | | | +|`cublasZtbmv_v2_64`|12.0| | | |`hipblasZtbmv_v2_64`|6.2.0| | | |6.2.0| |`cublasZtbsv`| | | | |`hipblasZtbsv_v2`|6.0.0| | | | | -|`cublasZtbsv_64`|12.0| | | | | | | | | | +|`cublasZtbsv_64`|12.0| | | |`hipblasZtbsv_v2_64`|6.2.0| | | |6.2.0| |`cublasZtbsv_v2`| | | | |`hipblasZtbsv_v2`|6.0.0| | | | | -|`cublasZtbsv_v2_64`|12.0| | | | | | | | | | +|`cublasZtbsv_v2_64`|12.0| | | |`hipblasZtbsv_v2_64`|6.2.0| | | |6.2.0| |`cublasZtpmv`| | | | |`hipblasZtpmv_v2`|6.0.0| | | | | -|`cublasZtpmv_64`|12.0| | | | | | | | | | +|`cublasZtpmv_64`|12.0| | | |`hipblasZtpmv_v2_64`|6.2.0| | | |6.2.0| |`cublasZtpmv_v2`| | | | |`hipblasZtpmv_v2`|6.0.0| | | | | -|`cublasZtpmv_v2_64`|12.0| | | | | | | | | | +|`cublasZtpmv_v2_64`|12.0| | | |`hipblasZtpmv_v2_64`|6.2.0| | | |6.2.0| |`cublasZtpsv`| | | | |`hipblasZtpsv_v2`|6.0.0| | | | | -|`cublasZtpsv_64`|12.0| | | | | | | | | | +|`cublasZtpsv_64`|12.0| | | |`hipblasZtpsv_v2_64`|6.2.0| | | |6.2.0| |`cublasZtpsv_v2`| | | | |`hipblasZtpsv_v2`|6.0.0| | | | | -|`cublasZtpsv_v2_64`|12.0| | | | | | | | | | +|`cublasZtpsv_v2_64`|12.0| | | |`hipblasZtpsv_v2_64`|6.2.0| | | |6.2.0| |`cublasZtrmv`| | | | |`hipblasZtrmv_v2`|6.0.0| | | | | -|`cublasZtrmv_64`|12.0| | | | | | | | | | +|`cublasZtrmv_64`|12.0| | | |`hipblasZtrmv_v2_64`|6.2.0| | | |6.2.0| |`cublasZtrmv_v2`| | | | |`hipblasZtrmv_v2`|6.0.0| | | | | -|`cublasZtrmv_v2_64`|12.0| | | | | | | | | | +|`cublasZtrmv_v2_64`|12.0| | | |`hipblasZtrmv_v2_64`|6.2.0| | | |6.2.0| |`cublasZtrsv`| | | | |`hipblasZtrsv_v2`|6.0.0| | | | | -|`cublasZtrsv_64`|12.0| | | | | | | | | | +|`cublasZtrsv_64`|12.0| | | |`hipblasZtrsv_v2_64`|6.2.0| | | |6.2.0| |`cublasZtrsv_v2`| | | | |`hipblasZtrsv_v2`|6.0.0| | | | | -|`cublasZtrsv_v2_64`|12.0| | | | | | | | | | +|`cublasZtrsv_v2_64`|12.0| | | |`hipblasZtrsv_v2_64`|6.2.0| | | |6.2.0| ## **7. CUBLAS Level-3 Function Reference** @@ -1032,9 +1032,9 @@ |`cublasCgemm_v2`| | | | |`hipblasCgemm_v2`|6.0.0| | | | | |`cublasCgemm_v2_64`|12.0| | | | | | | | | | |`cublasCgemvBatched`|11.6| | | |`hipblasCgemvBatched_v2`|6.0.0| | | | | -|`cublasCgemvBatched_64`|12.0| | | | | | | | | | +|`cublasCgemvBatched_64`|12.0| | | |`hipblasCgemvBatched_v2_64`|6.2.0| | | |6.2.0| |`cublasCgemvStridedBatched`|11.6| | | |`hipblasCgemvStridedBatched_v2`|6.0.0| | | | | -|`cublasCgemvStridedBatched_64`|12.0| | | | | | | | | | +|`cublasCgemvStridedBatched_64`|12.0| | | |`hipblasCgemvStridedBatched_v2_64`|6.2.0| | | |6.2.0| |`cublasChemm`| | | | |`hipblasChemm_v2`|6.0.0| | | | | |`cublasChemm_64`|12.0| | | | | | | | | | |`cublasChemm_v2`| | | | |`hipblasChemm_v2`|6.0.0| | | | | @@ -1081,10 +1081,10 @@ |`cublasDgemm_64`|12.0| | | | | | | | | | |`cublasDgemm_v2`| | | | |`hipblasDgemm`|1.8.2| | | | | |`cublasDgemm_v2_64`|12.0| | | | | | | | | | -|`cublasDgemvBatched`|11.6| | | | | | | | | | -|`cublasDgemvBatched_64`|12.0| | | | | | | | | | -|`cublasDgemvStridedBatched`|11.6| | | | | | | | | | -|`cublasDgemvStridedBatched_64`|12.0| | | | | | | | | | +|`cublasDgemvBatched`|11.6| | | |`hipblasDgemvBatched`|3.0.0| | | | | +|`cublasDgemvBatched_64`|12.0| | | |`hipblasDgemvBatched_64`|6.2.0| | | |6.2.0| +|`cublasDgemvStridedBatched`|11.6| | | |`hipblasDgemvStridedBatched`|3.0.0| | | | | +|`cublasDgemvStridedBatched_64`|12.0| | | |`hipblasDgemvStridedBatched_64`|6.2.0| | | |6.2.0| |`cublasDsymm`| | | | |`hipblasDsymm`|3.6.0| | | | | |`cublasDsymm_64`|12.0| | | | | | | | | | |`cublasDsymm_v2`| | | | |`hipblasDsymm`|3.6.0| | | | | @@ -1107,6 +1107,8 @@ |`cublasDtrsm_64`|12.0| | | | | | | | | | |`cublasDtrsm_v2`| | | | |`hipblasDtrsm`|1.8.2| | | | | |`cublasDtrsm_v2_64`|12.0| | | | | | | | | | +|`cublasGemmGroupedBatchedEx`|12.5| | | | | | | | | | +|`cublasGemmGroupedBatchedEx_64`|12.5| | | | | | | | | | |`cublasHSHgemvBatched`|11.6| | | | | | | | | | |`cublasHSHgemvBatched_64`|12.0| | | | | | | | | | |`cublasHSHgemvStridedBatched`|11.6| | | | | | | | | | @@ -1131,10 +1133,10 @@ |`cublasSgemm_64`|12.0| | | | | | | | | | |`cublasSgemm_v2`| | | | |`hipblasSgemm`|1.8.2| | | | | |`cublasSgemm_v2_64`|12.0| | | | | | | | | | -|`cublasSgemvBatched`|11.6| | | | | | | | | | -|`cublasSgemvBatched_64`|12.0| | | | | | | | | | -|`cublasSgemvStridedBatched`|11.6| | | | | | | | | | -|`cublasSgemvStridedBatched_64`|12.0| | | | | | | | | | +|`cublasSgemvBatched`|11.6| | | |`hipblasSgemvBatched`|1.6.0| | | | | +|`cublasSgemvBatched_64`|12.0| | | |`hipblasSgemvBatched_64`|6.2.0| | | |6.2.0| +|`cublasSgemvStridedBatched`|11.6| | | |`hipblasSgemvStridedBatched`|3.0.0| | | | | +|`cublasSgemvStridedBatched_64`|12.0| | | |`hipblasSgemvStridedBatched_64`|6.2.0| | | |6.2.0| |`cublasSsymm`| | | | |`hipblasSsymm`|3.6.0| | | | | |`cublasSsymm_64`|12.0| | | | | | | | | | |`cublasSsymm_v2`| | | | |`hipblasSsymm`|3.6.0| | | | | @@ -1176,9 +1178,9 @@ |`cublasZgemm_v2`| | | | |`hipblasZgemm_v2`|6.0.0| | | | | |`cublasZgemm_v2_64`|12.0| | | | | | | | | | |`cublasZgemvBatched`|11.6| | | |`hipblasZgemvBatched_v2`|6.0.0| | | | | -|`cublasZgemvBatched_64`|12.0| | | | | | | | | | +|`cublasZgemvBatched_64`|12.0| | | |`hipblasZgemvBatched_v2_64`|6.2.0| | | |6.2.0| |`cublasZgemvStridedBatched`|11.6| | | |`hipblasZgemvStridedBatched_v2`|6.0.0| | | | | -|`cublasZgemvStridedBatched_64`|12.0| | | | | | | | | | +|`cublasZgemvStridedBatched_64`|12.0| | | |`hipblasZgemvStridedBatched_v2_64`|6.2.0| | | |6.2.0| |`cublasZhemm`| | | | |`hipblasZhemm_v2`|6.0.0| | | | | |`cublasZhemm_64`|12.0| | | | | | | | | | |`cublasZhemm_v2`| | | | |`hipblasZhemm_v2`|6.0.0| | | | | @@ -1223,7 +1225,7 @@ |`cublasAsumEx`|10.1| | | | | | | | | | |`cublasAsumEx_64`|12.0| | | | | | | | | | |`cublasAxpyEx`|8.0| | | |`hipblasAxpyEx_v2`|6.0.0| | | | | -|`cublasAxpyEx_64`|12.0| | | | | | | | | | +|`cublasAxpyEx_64`|12.0| | | |`hipblasAxpyEx_v2_64`|6.2.0| | | |6.2.0| |`cublasCdgmm`| | | | |`hipblasCdgmm_v2`|6.0.0| | | | | |`cublasCdgmm_64`|12.0| | | | | | | | | | |`cublasCgeam`| | | | |`hipblasCgeam_v2`|6.0.0| | | | | @@ -1261,9 +1263,9 @@ |`cublasDgetrsBatched`| | | | |`hipblasDgetrsBatched`|3.5.0| | | | | |`cublasDmatinvBatched`| | | | | | | | | | | |`cublasDotEx`|8.0| | | |`hipblasDotEx_v2`|6.0.0| | | | | -|`cublasDotEx_64`|12.0| | | | | | | | | | +|`cublasDotEx_64`|12.0| | | |`hipblasDotEx_v2_64`|6.2.0| | | |6.2.0| |`cublasDotcEx`|8.0| | | |`hipblasDotcEx_v2`|6.0.0| | | | | -|`cublasDotcEx_64`|12.0| | | | | | | | | | +|`cublasDotcEx_64`|12.0| | | |`hipblasDotcEx_v2_64`|6.2.0| | | |6.2.0| |`cublasDtpttr`| | | | | | | | | | | |`cublasDtrsmBatched`| | | | |`hipblasDtrsmBatched`|3.2.0| | | | | |`cublasDtrsmBatched_64`|12.0| | | | | | | | | | @@ -1279,13 +1281,13 @@ |`cublasIaminEx`|10.1| | | | | | | | | | |`cublasIaminEx_64`|12.0| | | | | | | | | | |`cublasRotEx`|10.1| | | |`hipblasRotEx_v2`|6.0.0| | | | | -|`cublasRotEx_64`|12.0| | | | | | | | | | +|`cublasRotEx_64`|12.0| | | |`hipblasRotEx_v2_64`|6.2.0| | | |6.2.0| |`cublasRotgEx`|10.1| | | | | | | | | | |`cublasRotmEx`|10.1| | | | | | | | | | |`cublasRotmEx_64`|12.0| | | | | | | | | | |`cublasRotmgEx`|10.1| | | | | | | | | | |`cublasScalEx`|8.0| | | |`hipblasScalEx_v2`|6.0.0| | | | | -|`cublasScalEx_64`|12.0| | | | | | | | | | +|`cublasScalEx_64`|12.0| | | |`hipblasScalEx_v2_64`|6.2.0| | | |6.2.0| |`cublasSdgmm`| | | | |`hipblasSdgmm`|3.6.0| | | | | |`cublasSdgmm_64`|12.0| | | | | | | | | | |`cublasSgeam`| | | | |`hipblasSgeam`|1.8.2| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index baefeb22..cd6d0e59 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -630,7 +630,7 @@ |`cublasIzamin_v2`| | | | |`hipblasIzamin_v2`|6.0.0| | | | |`rocblas_izamin`|3.5.0| | | | | |`cublasIzamin_v2_64`|12.0| | | |`hipblasIzamin_v2_64`|6.1.0| | | | |`rocblas_izamin_64`|6.1.0| | | | | |`cublasNrm2Ex`|8.0| | | |`hipblasNrm2Ex_v2`|6.0.0| | | | |`rocblas_nrm2_ex`|4.1.0| | | | | -|`cublasNrm2Ex_64`|12.0| | | | | | | | | |`rocblas_nrm2_ex_64`|6.1.0| | | | | +|`cublasNrm2Ex_64`|12.0| | | |`hipblasNrm2Ex_v2_64`|6.2.0| | | |6.2.0|`rocblas_nrm2_ex_64`|6.1.0| | | | | |`cublasSasum`| | | | |`hipblasSasum`|1.8.2| | | | |`rocblas_sasum`|1.5.0| | | | | |`cublasSasum_64`|12.0| | | |`hipblasSasum_64`|6.1.0| | | | |`rocblas_sasum_64`|6.1.0| | | | | |`cublasSasum_v2`| | | | |`hipblasSasum`|1.8.2| | | | |`rocblas_sasum`|1.5.0| | | | | @@ -723,293 +723,293 @@ |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**|**ROC**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cublasCgbmv`| | | | |`hipblasCgbmv_v2`|6.0.0| | | | |`rocblas_cgbmv`|3.5.0| | | | | -|`cublasCgbmv_64`|12.0| | | |`hipblasCgbmv_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasCgbmv_64`|12.0| | | |`hipblasCgbmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCgbmv_v2`| | | | |`hipblasCgbmv_v2`|6.0.0| | | | |`rocblas_cgbmv`|3.5.0| | | | | -|`cublasCgbmv_v2_64`|12.0| | | |`hipblasCgbmv_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasCgbmv_v2_64`|12.0| | | |`hipblasCgbmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCgemv`| | | | |`hipblasCgemv_v2`|6.0.0| | | | |`rocblas_cgemv`|1.5.0| | | | | -|`cublasCgemv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgemv_64`|12.0| | | |`hipblasCgemv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCgemv_v2`| | | | |`hipblasCgemv_v2`|6.0.0| | | | |`rocblas_cgemv`|1.5.0| | | | | -|`cublasCgemv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgemv_v2_64`|12.0| | | |`hipblasCgemv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCgerc`| | | | |`hipblasCgerc_v2`|6.0.0| | | | |`rocblas_cgerc`|3.5.0| | | | | -|`cublasCgerc_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgerc_64`|12.0| | | |`hipblasCgerc_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCgerc_v2`| | | | |`hipblasCgerc_v2`|6.0.0| | | | |`rocblas_cgerc`|3.5.0| | | | | -|`cublasCgerc_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgerc_v2_64`|12.0| | | |`hipblasCgerc_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCgeru`| | | | |`hipblasCgeru_v2`|6.0.0| | | | |`rocblas_cgeru`|3.5.0| | | | | -|`cublasCgeru_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgeru_64`|12.0| | | |`hipblasCgeru_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCgeru_v2`| | | | |`hipblasCgeru_v2`|6.0.0| | | | |`rocblas_cgeru`|3.5.0| | | | | -|`cublasCgeru_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgeru_v2_64`|12.0| | | |`hipblasCgeru_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasChbmv`| | | | |`hipblasChbmv_v2`|6.0.0| | | | |`rocblas_chbmv`|3.5.0| | | | | -|`cublasChbmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasChbmv_64`|12.0| | | |`hipblasChbmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasChbmv_v2`| | | | |`hipblasChbmv_v2`|6.0.0| | | | |`rocblas_chbmv`|3.5.0| | | | | -|`cublasChbmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasChbmv_v2_64`|12.0| | | |`hipblasChbmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasChemv`| | | | |`hipblasChemv_v2`|6.0.0| | | | |`rocblas_chemv`|1.5.0| | | | | -|`cublasChemv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasChemv_64`|12.0| | | |`hipblasChemv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasChemv_v2`| | | | |`hipblasChemv_v2`|6.0.0| | | | |`rocblas_chemv`|1.5.0| | | | | -|`cublasChemv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasChemv_v2_64`|12.0| | | |`hipblasChemv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCher`| | | | |`hipblasCher_v2`|6.0.0| | | | |`rocblas_cher`|3.5.0| | | | | |`cublasCher2`| | | | |`hipblasCher2_v2`|6.0.0| | | | |`rocblas_cher2`|3.5.0| | | | | -|`cublasCher2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCher2_64`|12.0| | | |`hipblasCher2_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCher2_v2`| | | | |`hipblasCher2_v2`|6.0.0| | | | |`rocblas_cher2`|3.5.0| | | | | -|`cublasCher2_v2_64`|12.0| | | | | | | | | | | | | | | | -|`cublasCher_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCher2_v2_64`|12.0| | | |`hipblasCher2_v2_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasCher_64`|12.0| | | |`hipblasCher_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCher_v2`| | | | |`hipblasCher_v2`|6.0.0| | | | |`rocblas_cher`|3.5.0| | | | | -|`cublasCher_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCher_v2_64`|12.0| | | |`hipblasCher_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasChpmv`| | | | |`hipblasChpmv_v2`|6.0.0| | | | |`rocblas_chpmv`|3.5.0| | | | | -|`cublasChpmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasChpmv_64`|12.0| | | |`hipblasChpmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasChpmv_v2`| | | | |`hipblasChpmv_v2`|6.0.0| | | | |`rocblas_chpmv`|3.5.0| | | | | -|`cublasChpmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasChpmv_v2_64`|12.0| | | |`hipblasChpmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasChpr`| | | | |`hipblasChpr_v2`|6.0.0| | | | |`rocblas_chpr`|3.5.0| | | | | |`cublasChpr2`| | | | |`hipblasChpr2_v2`|6.0.0| | | | |`rocblas_chpr2`|3.5.0| | | | | -|`cublasChpr2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasChpr2_64`|12.0| | | |`hipblasChpr2_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasChpr2_v2`| | | | |`hipblasChpr2_v2`|6.0.0| | | | |`rocblas_chpr2`|3.5.0| | | | | -|`cublasChpr2_v2_64`|12.0| | | | | | | | | | | | | | | | -|`cublasChpr_64`|12.0| | | | | | | | | | | | | | | | +|`cublasChpr2_v2_64`|12.0| | | |`hipblasChpr2_v2_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasChpr_64`|12.0| | | |`hipblasChpr_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasChpr_v2`| | | | |`hipblasChpr_v2`|6.0.0| | | | |`rocblas_chpr`|3.5.0| | | | | -|`cublasChpr_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasChpr_v2_64`|12.0| | | |`hipblasChpr_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCsymv`| | | | |`hipblasCsymv_v2`|6.0.0| | | | |`rocblas_csymv`|3.5.0| | | | | -|`cublasCsymv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCsymv_64`|12.0| | | |`hipblasCsymv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCsymv_v2`| | | | |`hipblasCsymv_v2`|6.0.0| | | | |`rocblas_csymv`|3.5.0| | | | | -|`cublasCsymv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCsymv_v2_64`|12.0| | | |`hipblasCsymv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCsyr`| | | | |`hipblasCsyr_v2`|6.0.0| | | | |`rocblas_csyr`|1.7.1| | | | | |`cublasCsyr2`| | | | |`hipblasCsyr2_v2`|6.0.0| | | | |`rocblas_csyr2`|3.5.0| | | | | -|`cublasCsyr2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCsyr2_64`|12.0| | | |`hipblasCsyr2_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCsyr2_v2`| | | | |`hipblasCsyr2_v2`|6.0.0| | | | |`rocblas_csyr2`|3.5.0| | | | | -|`cublasCsyr2_v2_64`|12.0| | | | | | | | | | | | | | | | -|`cublasCsyr_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCsyr2_v2_64`|12.0| | | |`hipblasCsyr2_v2_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasCsyr_64`|12.0| | | |`hipblasCsyr_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCsyr_v2`| | | | |`hipblasCsyr_v2`|6.0.0| | | | |`rocblas_csyr`|1.7.1| | | | | -|`cublasCsyr_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCsyr_v2_64`|12.0| | | |`hipblasCsyr_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCtbmv`| | | | |`hipblasCtbmv_v2`|6.0.0| | | | |`rocblas_ctbmv`|3.5.0| | | | | -|`cublasCtbmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtbmv_64`|12.0| | | |`hipblasCtbmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCtbmv_v2`| | | | |`hipblasCtbmv_v2`|6.0.0| | | | |`rocblas_ctbmv`|3.5.0| | | | | -|`cublasCtbmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtbmv_v2_64`|12.0| | | |`hipblasCtbmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCtbsv`| | | | |`hipblasCtbsv_v2`|6.0.0| | | | |`rocblas_ctbsv`|3.5.0| | | | | -|`cublasCtbsv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtbsv_64`|12.0| | | |`hipblasCtbsv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCtbsv_v2`| | | | |`hipblasCtbsv_v2`|6.0.0| | | | |`rocblas_ctbsv`|3.5.0| | | | | -|`cublasCtbsv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtbsv_v2_64`|12.0| | | |`hipblasCtbsv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCtpmv`| | | | |`hipblasCtpmv_v2`|6.0.0| | | | |`rocblas_ctpmv`|3.5.0| | | | | -|`cublasCtpmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtpmv_64`|12.0| | | |`hipblasCtpmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCtpmv_v2`| | | | |`hipblasCtpmv_v2`|6.0.0| | | | |`rocblas_ctpmv`|3.5.0| | | | | -|`cublasCtpmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtpmv_v2_64`|12.0| | | |`hipblasCtpmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCtpsv`| | | | |`hipblasCtpsv_v2`|6.0.0| | | | |`rocblas_ctpsv`|3.5.0| | | | | -|`cublasCtpsv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtpsv_64`|12.0| | | |`hipblasCtpsv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCtpsv_v2`| | | | |`hipblasCtpsv_v2`|6.0.0| | | | |`rocblas_ctpsv`|3.5.0| | | | | -|`cublasCtpsv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtpsv_v2_64`|12.0| | | |`hipblasCtpsv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCtrmv`| | | | |`hipblasCtrmv_v2`|6.0.0| | | | |`rocblas_ctrmv`|3.5.0| | | | | -|`cublasCtrmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtrmv_64`|12.0| | | |`hipblasCtrmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCtrmv_v2`| | | | |`hipblasCtrmv_v2`|6.0.0| | | | |`rocblas_ctrmv`|3.5.0| | | | | -|`cublasCtrmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtrmv_v2_64`|12.0| | | |`hipblasCtrmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCtrsv`| | | | |`hipblasCtrsv_v2`|6.0.0| | | | |`rocblas_ctrsv`|3.5.0| | | | | -|`cublasCtrsv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtrsv_64`|12.0| | | |`hipblasCtrsv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCtrsv_v2`| | | | |`hipblasCtrsv_v2`|6.0.0| | | | |`rocblas_ctrsv`|3.5.0| | | | | -|`cublasCtrsv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCtrsv_v2_64`|12.0| | | |`hipblasCtrsv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDgbmv`| | | | |`hipblasDgbmv`|3.5.0| | | | |`rocblas_dgbmv`|3.5.0| | | | | |`cublasDgbmv_64`|12.0| | | |`hipblasDgbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDgbmv_v2`| | | | |`hipblasDgbmv`|3.5.0| | | | |`rocblas_dgbmv`|3.5.0| | | | | |`cublasDgbmv_v2_64`|12.0| | | |`hipblasDgbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDgemv`| | | | |`hipblasDgemv`|1.8.2| | | | |`rocblas_dgemv`|1.5.0| | | | | -|`cublasDgemv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDgemv_64`|12.0| | | |`hipblasDgemv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDgemv_v2`| | | | |`hipblasDgemv`|1.8.2| | | | |`rocblas_dgemv`|1.5.0| | | | | -|`cublasDgemv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDgemv_v2_64`|12.0| | | |`hipblasDgemv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDger`| | | | |`hipblasDger`|1.8.2| | | | |`rocblas_dger`|1.5.0| | | | | -|`cublasDger_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDger_64`|12.0| | | |`hipblasDger_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDger_v2`| | | | |`hipblasDger`|1.8.2| | | | |`rocblas_dger`|1.5.0| | | | | -|`cublasDger_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDger_v2_64`|12.0| | | |`hipblasDger_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDsbmv`| | | | |`hipblasDsbmv`|3.5.0| | | | |`rocblas_dsbmv`|3.5.0| | | | | -|`cublasDsbmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDsbmv_64`|12.0| | | |`hipblasDsbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDsbmv_v2`| | | | |`hipblasDsbmv`|3.5.0| | | | |`rocblas_dsbmv`|3.5.0| | | | | -|`cublasDsbmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDsbmv_v2_64`|12.0| | | |`hipblasDsbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDspmv`| | | | |`hipblasDspmv`|3.5.0| | | | |`rocblas_dspmv`|3.5.0| | | | | -|`cublasDspmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDspmv_64`|12.0| | | |`hipblasDspmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDspmv_v2`| | | | |`hipblasDspmv`|3.5.0| | | | |`rocblas_dspmv`|3.5.0| | | | | -|`cublasDspmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDspmv_v2_64`|12.0| | | |`hipblasDspmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDspr`| | | | |`hipblasDspr`|3.5.0| | | | |`rocblas_dspr`|3.5.0| | | | | |`cublasDspr2`| | | | |`hipblasDspr2`|3.5.0| | | | |`rocblas_dspr2`|3.5.0| | | | | -|`cublasDspr2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDspr2_64`|12.0| | | |`hipblasDspr2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDspr2_v2`| | | | |`hipblasDspr2`|3.5.0| | | | |`rocblas_dspr2`|3.5.0| | | | | -|`cublasDspr2_v2_64`|12.0| | | | | | | | | | | | | | | | -|`cublasDspr_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDspr2_v2_64`|12.0| | | |`hipblasDspr2_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasDspr_64`|12.0| | | |`hipblasDspr_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDspr_v2`| | | | |`hipblasDspr`|3.5.0| | | | |`rocblas_dspr`|3.5.0| | | | | -|`cublasDspr_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDspr_v2_64`|12.0| | | |`hipblasDspr_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDsymv`| | | | |`hipblasDsymv`|3.5.0| | | | |`rocblas_dsymv`|1.5.0| | | | | -|`cublasDsymv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDsymv_64`|12.0| | | |`hipblasDsymv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDsymv_v2`| | | | |`hipblasDsymv`|3.5.0| | | | |`rocblas_dsymv`|1.5.0| | | | | -|`cublasDsymv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDsymv_v2_64`|12.0| | | |`hipblasDsymv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDsyr`| | | | |`hipblasDsyr`|3.0.0| | | | |`rocblas_dsyr`|1.7.1| | | | | |`cublasDsyr2`| | | | |`hipblasDsyr2`|3.5.0| | | | |`rocblas_dsyr2`|3.5.0| | | | | -|`cublasDsyr2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDsyr2_64`|12.0| | | |`hipblasDsyr2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDsyr2_v2`| | | | |`hipblasDsyr2`|3.5.0| | | | |`rocblas_dsyr2`|3.5.0| | | | | -|`cublasDsyr2_v2_64`|12.0| | | | | | | | | | | | | | | | -|`cublasDsyr_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDsyr2_v2_64`|12.0| | | |`hipblasDsyr2_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasDsyr_64`|12.0| | | |`hipblasDsyr_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDsyr_v2`| | | | |`hipblasDsyr`|3.0.0| | | | |`rocblas_dsyr`|1.7.1| | | | | -|`cublasDsyr_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDsyr_v2_64`|12.0| | | |`hipblasDsyr_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDtbmv`| | | | |`hipblasDtbmv`|3.5.0| | | | |`rocblas_dtbmv`|3.5.0| | | | | -|`cublasDtbmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtbmv_64`|12.0| | | |`hipblasDtbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDtbmv_v2`| | | | |`hipblasDtbmv`|3.5.0| | | | |`rocblas_dtbmv`|3.5.0| | | | | -|`cublasDtbmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtbmv_v2_64`|12.0| | | |`hipblasDtbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDtbsv`| | | | |`hipblasDtbsv`|3.6.0| | | | |`rocblas_dtbsv`|3.5.0| | | | | -|`cublasDtbsv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtbsv_64`|12.0| | | |`hipblasDtbsv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDtbsv_v2`| | | | |`hipblasDtbsv`|3.6.0| | | | |`rocblas_dtbsv`|3.5.0| | | | | -|`cublasDtbsv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtbsv_v2_64`|12.0| | | |`hipblasDtbsv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDtpmv`| | | | |`hipblasDtpmv`|3.5.0| | | | |`rocblas_dtpmv`|3.5.0| | | | | -|`cublasDtpmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtpmv_64`|12.0| | | |`hipblasDtpmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDtpmv_v2`| | | | |`hipblasDtpmv`|3.5.0| | | | |`rocblas_dtpmv`|3.5.0| | | | | -|`cublasDtpmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtpmv_v2_64`|12.0| | | |`hipblasDtpmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDtpsv`| | | | |`hipblasDtpsv`|3.5.0| | | | |`rocblas_dtpsv`|3.5.0| | | | | -|`cublasDtpsv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtpsv_64`|12.0| | | |`hipblasDtpsv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDtpsv_v2`| | | | |`hipblasDtpsv`|3.5.0| | | | |`rocblas_dtpsv`|3.5.0| | | | | -|`cublasDtpsv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtpsv_v2_64`|12.0| | | |`hipblasDtpsv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDtrmv`| | | | |`hipblasDtrmv`|3.5.0| | | | |`rocblas_dtrmv`|3.5.0| | | | | -|`cublasDtrmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtrmv_64`|12.0| | | |`hipblasDtrmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDtrmv_v2`| | | | |`hipblasDtrmv`|3.5.0| | | | |`rocblas_dtrmv`|3.5.0| | | | | -|`cublasDtrmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtrmv_v2_64`|12.0| | | |`hipblasDtrmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDtrsv`| | | | |`hipblasDtrsv`|3.0.0| | | | |`rocblas_dtrsv`|3.5.0| | | | | -|`cublasDtrsv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtrsv_64`|12.0| | | |`hipblasDtrsv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDtrsv_v2`| | | | |`hipblasDtrsv`|3.0.0| | | | |`rocblas_dtrsv`|3.5.0| | | | | -|`cublasDtrsv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDtrsv_v2_64`|12.0| | | |`hipblasDtrsv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSgbmv`| | | | |`hipblasSgbmv`|3.5.0| | | | |`rocblas_sgbmv`|3.5.0| | | | | |`cublasSgbmv_64`|12.0| | | |`hipblasSgbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSgbmv_v2`| | | | |`hipblasSgbmv`|3.5.0| | | | |`rocblas_sgbmv`|3.5.0| | | | | |`cublasSgbmv_v2_64`|12.0| | | |`hipblasSgbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSgemv`| | | | |`hipblasSgemv`|1.8.2| | | | |`rocblas_sgemv`|1.5.0| | | | | -|`cublasSgemv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSgemv_64`|12.0| | | |`hipblasSgemv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSgemv_v2`| | | | |`hipblasSgemv`|1.8.2| | | | |`rocblas_sgemv`|1.5.0| | | | | -|`cublasSgemv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSgemv_v2_64`|12.0| | | |`hipblasSgemv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSger`| | | | |`hipblasSger`|1.8.2| | | | |`rocblas_sger`|1.5.0| | | | | -|`cublasSger_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSger_64`|12.0| | | |`hipblasSger_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSger_v2`| | | | |`hipblasSger`|1.8.2| | | | |`rocblas_sger`|1.5.0| | | | | -|`cublasSger_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSger_v2_64`|12.0| | | |`hipblasSger_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSsbmv`| | | | |`hipblasSsbmv`|3.5.0| | | | |`rocblas_ssbmv`|3.5.0| | | | | -|`cublasSsbmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSsbmv_64`|12.0| | | |`hipblasSsbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSsbmv_v2`| | | | |`hipblasSsbmv`|3.5.0| | | | |`rocblas_ssbmv`|3.5.0| | | | | -|`cublasSsbmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSsbmv_v2_64`|12.0| | | |`hipblasSsbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSspmv`| | | | |`hipblasSspmv`|3.5.0| | | | |`rocblas_sspmv`|3.5.0| | | | | -|`cublasSspmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSspmv_64`|12.0| | | |`hipblasSspmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSspmv_v2`| | | | |`hipblasSspmv`|3.5.0| | | | |`rocblas_sspmv`|3.5.0| | | | | -|`cublasSspmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSspmv_v2_64`|12.0| | | |`hipblasSspmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSspr`| | | | |`hipblasSspr`|3.5.0| | | | |`rocblas_sspr`|3.5.0| | | | | |`cublasSspr2`| | | | |`hipblasSspr2`|3.5.0| | | | |`rocblas_sspr2`|3.5.0| | | | | -|`cublasSspr2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSspr2_64`|12.0| | | |`hipblasSspr2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSspr2_v2`| | | | |`hipblasSspr2`|3.5.0| | | | |`rocblas_sspr2`|3.5.0| | | | | -|`cublasSspr2_v2_64`|12.0| | | | | | | | | | | | | | | | -|`cublasSspr_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSspr2_v2_64`|12.0| | | |`hipblasSspr2_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasSspr_64`|12.0| | | |`hipblasSspr_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSspr_v2`| | | | |`hipblasSspr`|3.5.0| | | | |`rocblas_sspr`|3.5.0| | | | | -|`cublasSspr_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSspr_v2_64`|12.0| | | |`hipblasSspr_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSsymv`| | | | |`hipblasSsymv`|3.5.0| | | | |`rocblas_ssymv`|1.5.0| | | | | -|`cublasSsymv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSsymv_64`|12.0| | | |`hipblasSsymv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSsymv_v2`| | | | |`hipblasSsymv`|3.5.0| | | | |`rocblas_ssymv`|1.5.0| | | | | -|`cublasSsymv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSsymv_v2_64`|12.0| | | |`hipblasSsymv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSsyr`| | | | |`hipblasSsyr`|3.0.0| | | | |`rocblas_ssyr`|1.7.1| | | | | |`cublasSsyr2`| | | | |`hipblasSsyr2`|3.5.0| | | | |`rocblas_ssyr2`|3.5.0| | | | | -|`cublasSsyr2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSsyr2_64`|12.0| | | |`hipblasSsyr2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSsyr2_v2`| | | | |`hipblasSsyr2`|3.5.0| | | | |`rocblas_ssyr2`|3.5.0| | | | | -|`cublasSsyr2_v2_64`|12.0| | | | | | | | | | | | | | | | -|`cublasSsyr_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSsyr2_v2_64`|12.0| | | |`hipblasSsyr2_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasSsyr_64`|12.0| | | |`hipblasSsyr_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSsyr_v2`| | | | |`hipblasSsyr`|3.0.0| | | | |`rocblas_ssyr`|1.7.1| | | | | -|`cublasSsyr_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSsyr_v2_64`|12.0| | | |`hipblasSsyr_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasStbmv`| | | | |`hipblasStbmv`|3.5.0| | | | |`rocblas_stbmv`|3.5.0| | | | | -|`cublasStbmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStbmv_64`|12.0| | | |`hipblasStbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasStbmv_v2`| | | | |`hipblasStbmv`|3.5.0| | | | |`rocblas_stbmv`|3.5.0| | | | | -|`cublasStbmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStbmv_v2_64`|12.0| | | |`hipblasStbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasStbsv`| | | | |`hipblasStbsv`|3.6.0| | | | |`rocblas_stbsv`|3.5.0| | | | | -|`cublasStbsv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStbsv_64`|12.0| | | |`hipblasStbsv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasStbsv_v2`| | | | |`hipblasStbsv`|3.6.0| | | | |`rocblas_stbsv`|3.5.0| | | | | -|`cublasStbsv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStbsv_v2_64`|12.0| | | |`hipblasStbsv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasStpmv`| | | | |`hipblasStpmv`|3.5.0| | | | |`rocblas_stpmv`|3.5.0| | | | | -|`cublasStpmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStpmv_64`|12.0| | | |`hipblasStpmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasStpmv_v2`| | | | |`hipblasStpmv`|3.5.0| | | | |`rocblas_stpmv`|3.5.0| | | | | -|`cublasStpmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStpmv_v2_64`|12.0| | | |`hipblasStpmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasStpsv`| | | | |`hipblasStpsv`|3.5.0| | | | |`rocblas_stpsv`|3.5.0| | | | | -|`cublasStpsv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStpsv_64`|12.0| | | |`hipblasStpsv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasStpsv_v2`| | | | |`hipblasStpsv`|3.5.0| | | | |`rocblas_stpsv`|3.5.0| | | | | -|`cublasStpsv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStpsv_v2_64`|12.0| | | |`hipblasStpsv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasStrmv`| | | | |`hipblasStrmv`|3.5.0| | | | |`rocblas_strmv`|3.5.0| | | | | -|`cublasStrmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStrmv_64`|12.0| | | |`hipblasStrmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasStrmv_v2`| | | | |`hipblasStrmv`|3.5.0| | | | |`rocblas_strmv`|3.5.0| | | | | -|`cublasStrmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStrmv_v2_64`|12.0| | | |`hipblasStrmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasStrsv`| | | | |`hipblasStrsv`|3.0.0| | | | |`rocblas_strsv`|3.5.0| | | | | -|`cublasStrsv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStrsv_64`|12.0| | | |`hipblasStrsv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasStrsv_v2`| | | | |`hipblasStrsv`|3.0.0| | | | |`rocblas_strsv`|3.5.0| | | | | -|`cublasStrsv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasStrsv_v2_64`|12.0| | | |`hipblasStrsv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZgbmv`| | | | |`hipblasZgbmv_v2`|6.0.0| | | | |`rocblas_zgbmv`|3.5.0| | | | | -|`cublasZgbmv_64`|12.0| | | |`hipblasZgbmv_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasZgbmv_64`|12.0| | | |`hipblasZgbmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZgbmv_v2`| | | | |`hipblasZgbmv_v2`|6.0.0| | | | |`rocblas_zgbmv`|3.5.0| | | | | -|`cublasZgbmv_v2_64`|12.0| | | |`hipblasZgbmv_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasZgbmv_v2_64`|12.0| | | |`hipblasZgbmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZgemv`| | | | |`hipblasZgemv_v2`|6.0.0| | | | |`rocblas_zgemv`|1.5.0| | | | | -|`cublasZgemv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgemv_64`|12.0| | | |`hipblasZgemv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZgemv_v2`| | | | |`hipblasZgemv_v2`|6.0.0| | | | |`rocblas_zgemv`|1.5.0| | | | | -|`cublasZgemv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgemv_v2_64`|12.0| | | |`hipblasZgemv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZgerc`| | | | |`hipblasZgerc_v2`|6.0.0| | | | |`rocblas_zgerc`|3.5.0| | | | | -|`cublasZgerc_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgerc_64`|12.0| | | |`hipblasZgerc_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZgerc_v2`| | | | |`hipblasZgerc_v2`|6.0.0| | | | |`rocblas_zgerc`|3.5.0| | | | | -|`cublasZgerc_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgerc_v2_64`|12.0| | | |`hipblasZgerc_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZgeru`| | | | |`hipblasZgeru_v2`|6.0.0| | | | |`rocblas_zgeru`|3.5.0| | | | | -|`cublasZgeru_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgeru_64`|12.0| | | |`hipblasZgeru_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZgeru_v2`| | | | |`hipblasZgeru_v2`|6.0.0| | | | |`rocblas_zgeru`|3.5.0| | | | | -|`cublasZgeru_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgeru_v2_64`|12.0| | | |`hipblasZgeru_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZhbmv`| | | | |`hipblasZhbmv_v2`|6.0.0| | | | |`rocblas_zhbmv`|3.5.0| | | | | -|`cublasZhbmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZhbmv_64`|12.0| | | |`hipblasZhbmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZhbmv_v2`| | | | |`hipblasZhbmv_v2`|6.0.0| | | | |`rocblas_zhbmv`|3.5.0| | | | | -|`cublasZhbmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZhbmv_v2_64`|12.0| | | |`hipblasZhbmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZhemv`| | | | |`hipblasZhemv_v2`|6.0.0| | | | |`rocblas_zhemv`|1.5.0| | | | | -|`cublasZhemv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZhemv_64`|12.0| | | |`hipblasZhemv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZhemv_v2`| | | | |`hipblasZhemv_v2`|6.0.0| | | | |`rocblas_zhemv`|1.5.0| | | | | -|`cublasZhemv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZhemv_v2_64`|12.0| | | |`hipblasZhemv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZher`| | | | |`hipblasZher_v2`|6.0.0| | | | |`rocblas_zher`|3.5.0| | | | | |`cublasZher2`| | | | |`hipblasZher2_v2`|6.0.0| | | | |`rocblas_zher2`|3.5.0| | | | | -|`cublasZher2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZher2_64`|12.0| | | |`hipblasZher2_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZher2_v2`| | | | |`hipblasZher2_v2`|6.0.0| | | | |`rocblas_zher2`|3.5.0| | | | | -|`cublasZher2_v2_64`|12.0| | | | | | | | | | | | | | | | -|`cublasZher_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZher2_v2_64`|12.0| | | |`hipblasZher2_v2_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasZher_64`|12.0| | | |`hipblasZher_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZher_v2`| | | | |`hipblasZher_v2`|6.0.0| | | | |`rocblas_zher`|3.5.0| | | | | -|`cublasZher_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZher_v2_64`|12.0| | | |`hipblasZher_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZhpmv`| | | | |`hipblasZhpmv_v2`|6.0.0| | | | |`rocblas_zhpmv`|3.5.0| | | | | -|`cublasZhpmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZhpmv_64`|12.0| | | |`hipblasZhpmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZhpmv_v2`| | | | |`hipblasZhpmv_v2`|6.0.0| | | | |`rocblas_zhpmv`|3.5.0| | | | | -|`cublasZhpmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZhpmv_v2_64`|12.0| | | |`hipblasZhpmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZhpr`| | | | |`hipblasZhpr_v2`|6.0.0| | | | |`rocblas_zhpr`|3.5.0| | | | | |`cublasZhpr2`| | | | |`hipblasZhpr2_v2`|6.0.0| | | | |`rocblas_zhpr2`|3.5.0| | | | | -|`cublasZhpr2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZhpr2_64`|12.0| | | |`hipblasZhpr2_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZhpr2_v2`| | | | |`hipblasZhpr2_v2`|6.0.0| | | | |`rocblas_zhpr2`|3.5.0| | | | | -|`cublasZhpr2_v2_64`|12.0| | | | | | | | | | | | | | | | -|`cublasZhpr_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZhpr2_v2_64`|12.0| | | |`hipblasZhpr2_v2_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasZhpr_64`|12.0| | | |`hipblasZhpr_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZhpr_v2`| | | | |`hipblasZhpr_v2`|6.0.0| | | | |`rocblas_zhpr`|3.5.0| | | | | -|`cublasZhpr_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZhpr_v2_64`|12.0| | | |`hipblasZhpr_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZsymv`| | | | |`hipblasZsymv_v2`|6.0.0| | | | |`rocblas_zsymv`|3.5.0| | | | | -|`cublasZsymv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZsymv_64`|12.0| | | |`hipblasZsymv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZsymv_v2`| | | | |`hipblasZsymv_v2`|6.0.0| | | | |`rocblas_zsymv`|3.5.0| | | | | -|`cublasZsymv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZsymv_v2_64`|12.0| | | |`hipblasZsymv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZsyr`| | | | |`hipblasZsyr_v2`|6.0.0| | | | |`rocblas_zsyr`|1.7.1| | | | | |`cublasZsyr2`| | | | |`hipblasZsyr2_v2`|6.0.0| | | | |`rocblas_zsyr2`|3.5.0| | | | | -|`cublasZsyr2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZsyr2_64`|12.0| | | |`hipblasZsyr2_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZsyr2_v2`| | | | |`hipblasZsyr2_v2`|6.0.0| | | | |`rocblas_zsyr2`|3.5.0| | | | | -|`cublasZsyr2_v2_64`|12.0| | | | | | | | | | | | | | | | -|`cublasZsyr_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZsyr2_v2_64`|12.0| | | |`hipblasZsyr2_v2_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasZsyr_64`|12.0| | | |`hipblasZsyr_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZsyr_v2`| | | | |`hipblasZsyr_v2`|6.0.0| | | | |`rocblas_zsyr`|1.7.1| | | | | -|`cublasZsyr_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZsyr_v2_64`|12.0| | | |`hipblasZsyr_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZtbmv`| | | | |`hipblasZtbmv_v2`|6.0.0| | | | |`rocblas_ztbmv`|3.5.0| | | | | -|`cublasZtbmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtbmv_64`|12.0| | | |`hipblasZtbmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZtbmv_v2`| | | | |`hipblasZtbmv_v2`|6.0.0| | | | |`rocblas_ztbmv`|3.5.0| | | | | -|`cublasZtbmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtbmv_v2_64`|12.0| | | |`hipblasZtbmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZtbsv`| | | | |`hipblasZtbsv_v2`|6.0.0| | | | |`rocblas_ztbsv`|3.5.0| | | | | -|`cublasZtbsv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtbsv_64`|12.0| | | |`hipblasZtbsv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZtbsv_v2`| | | | |`hipblasZtbsv_v2`|6.0.0| | | | |`rocblas_ztbsv`|3.5.0| | | | | -|`cublasZtbsv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtbsv_v2_64`|12.0| | | |`hipblasZtbsv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZtpmv`| | | | |`hipblasZtpmv_v2`|6.0.0| | | | |`rocblas_ztpmv`|3.5.0| | | | | -|`cublasZtpmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtpmv_64`|12.0| | | |`hipblasZtpmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZtpmv_v2`| | | | |`hipblasZtpmv_v2`|6.0.0| | | | |`rocblas_ztpmv`|3.5.0| | | | | -|`cublasZtpmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtpmv_v2_64`|12.0| | | |`hipblasZtpmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZtpsv`| | | | |`hipblasZtpsv_v2`|6.0.0| | | | |`rocblas_ztpsv`|3.5.0| | | | | -|`cublasZtpsv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtpsv_64`|12.0| | | |`hipblasZtpsv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZtpsv_v2`| | | | |`hipblasZtpsv_v2`|6.0.0| | | | |`rocblas_ztpsv`|3.5.0| | | | | -|`cublasZtpsv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtpsv_v2_64`|12.0| | | |`hipblasZtpsv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZtrmv`| | | | |`hipblasZtrmv_v2`|6.0.0| | | | |`rocblas_ztrmv`|3.5.0| | | | | -|`cublasZtrmv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtrmv_64`|12.0| | | |`hipblasZtrmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZtrmv_v2`| | | | |`hipblasZtrmv_v2`|6.0.0| | | | |`rocblas_ztrmv`|3.5.0| | | | | -|`cublasZtrmv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtrmv_v2_64`|12.0| | | |`hipblasZtrmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZtrsv`| | | | |`hipblasZtrsv_v2`|6.0.0| | | | |`rocblas_ztrsv`|3.5.0| | | | | -|`cublasZtrsv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtrsv_64`|12.0| | | |`hipblasZtrsv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZtrsv_v2`| | | | |`hipblasZtrsv_v2`|6.0.0| | | | |`rocblas_ztrsv`|3.5.0| | | | | -|`cublasZtrsv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZtrsv_v2_64`|12.0| | | |`hipblasZtrsv_v2_64`|6.2.0| | | |6.2.0| | | | | | | ## **7. CUBLAS Level-3 Function Reference** @@ -1032,9 +1032,9 @@ |`cublasCgemm_v2`| | | | |`hipblasCgemm_v2`|6.0.0| | | | |`rocblas_cgemm`|1.5.0| | | | | |`cublasCgemm_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasCgemvBatched`|11.6| | | |`hipblasCgemvBatched_v2`|6.0.0| | | | |`rocblas_cgemv_batched`|3.5.0| | | | | -|`cublasCgemvBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgemvBatched_64`|12.0| | | |`hipblasCgemvBatched_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCgemvStridedBatched`|11.6| | | |`hipblasCgemvStridedBatched_v2`|6.0.0| | | | |`rocblas_cgemv_strided_batched`|3.5.0| | | | | -|`cublasCgemvStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgemvStridedBatched_64`|12.0| | | |`hipblasCgemvStridedBatched_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasChemm`| | | | |`hipblasChemm_v2`|6.0.0| | | | |`rocblas_chemm`|3.5.0| | | | | |`cublasChemm_64`|12.0| | | | | | | | | | | | | | | | |`cublasChemm_v2`| | | | |`hipblasChemm_v2`|6.0.0| | | | |`rocblas_chemm`|3.5.0| | | | | @@ -1081,10 +1081,10 @@ |`cublasDgemm_64`|12.0| | | | | | | | | | | | | | | | |`cublasDgemm_v2`| | | | |`hipblasDgemm`|1.8.2| | | | |`rocblas_dgemm`|1.5.0| | | | | |`cublasDgemm_v2_64`|12.0| | | | | | | | | | | | | | | | -|`cublasDgemvBatched`|11.6| | | | | | | | | | | | | | | | -|`cublasDgemvBatched_64`|12.0| | | | | | | | | | | | | | | | -|`cublasDgemvStridedBatched`|11.6| | | | | | | | | | | | | | | | -|`cublasDgemvStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDgemvBatched`|11.6| | | |`hipblasDgemvBatched`|3.0.0| | | | | | | | | | | +|`cublasDgemvBatched_64`|12.0| | | |`hipblasDgemvBatched_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasDgemvStridedBatched`|11.6| | | |`hipblasDgemvStridedBatched`|3.0.0| | | | | | | | | | | +|`cublasDgemvStridedBatched_64`|12.0| | | |`hipblasDgemvStridedBatched_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDsymm`| | | | |`hipblasDsymm`|3.6.0| | | | |`rocblas_dsymm`|3.5.0| | | | | |`cublasDsymm_64`|12.0| | | | | | | | | | | | | | | | |`cublasDsymm_v2`| | | | |`hipblasDsymm`|3.6.0| | | | |`rocblas_dsymm`|3.5.0| | | | | @@ -1107,6 +1107,8 @@ |`cublasDtrsm_64`|12.0| | | | | | | | | | | | | | | | |`cublasDtrsm_v2`| | | | |`hipblasDtrsm`|1.8.2| | | | |`rocblas_dtrsm`|1.5.0| | | | | |`cublasDtrsm_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasGemmGroupedBatchedEx`|12.5| | | | | | | | | | | | | | | | +|`cublasGemmGroupedBatchedEx_64`|12.5| | | | | | | | | | | | | | | | |`cublasHSHgemvBatched`|11.6| | | | | | | | | |`rocblas_hshgemv_batched`|6.0.0| | | | | |`cublasHSHgemvBatched_64`|12.0| | | | | | | | | | | | | | | | |`cublasHSHgemvStridedBatched`|11.6| | | | | | | | | |`rocblas_hshgemv_strided_batched`|6.0.0| | | | | @@ -1131,10 +1133,10 @@ |`cublasSgemm_64`|12.0| | | | | | | | | | | | | | | | |`cublasSgemm_v2`| | | | |`hipblasSgemm`|1.8.2| | | | |`rocblas_sgemm`|1.5.0| | | | | |`cublasSgemm_v2_64`|12.0| | | | | | | | | | | | | | | | -|`cublasSgemvBatched`|11.6| | | | | | | | | | | | | | | | -|`cublasSgemvBatched_64`|12.0| | | | | | | | | | | | | | | | -|`cublasSgemvStridedBatched`|11.6| | | | | | | | | | | | | | | | -|`cublasSgemvStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSgemvBatched`|11.6| | | |`hipblasSgemvBatched`|1.6.0| | | | | | | | | | | +|`cublasSgemvBatched_64`|12.0| | | |`hipblasSgemvBatched_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasSgemvStridedBatched`|11.6| | | |`hipblasSgemvStridedBatched`|3.0.0| | | | | | | | | | | +|`cublasSgemvStridedBatched_64`|12.0| | | |`hipblasSgemvStridedBatched_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSsymm`| | | | |`hipblasSsymm`|3.6.0| | | | |`rocblas_ssymm`|3.5.0| | | | | |`cublasSsymm_64`|12.0| | | | | | | | | | | | | | | | |`cublasSsymm_v2`| | | | |`hipblasSsymm`|3.6.0| | | | |`rocblas_ssymm`|3.5.0| | | | | @@ -1176,9 +1178,9 @@ |`cublasZgemm_v2`| | | | |`hipblasZgemm_v2`|6.0.0| | | | |`rocblas_zgemm`|1.5.0| | | | | |`cublasZgemm_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasZgemvBatched`|11.6| | | |`hipblasZgemvBatched_v2`|6.0.0| | | | |`rocblas_zgemv_batched`|3.5.0| | | | | -|`cublasZgemvBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgemvBatched_64`|12.0| | | |`hipblasZgemvBatched_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZgemvStridedBatched`|11.6| | | |`hipblasZgemvStridedBatched_v2`|6.0.0| | | | |`rocblas_zgemv_strided_batched`|3.5.0| | | | | -|`cublasZgemvStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgemvStridedBatched_64`|12.0| | | |`hipblasZgemvStridedBatched_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZhemm`| | | | |`hipblasZhemm_v2`|6.0.0| | | | |`rocblas_zhemm`|3.5.0| | | | | |`cublasZhemm_64`|12.0| | | | | | | | | | | | | | | | |`cublasZhemm_v2`| | | | |`hipblasZhemm_v2`|6.0.0| | | | |`rocblas_zhemm`|3.5.0| | | | | @@ -1223,7 +1225,7 @@ |`cublasAsumEx`|10.1| | | | | | | | | | | | | | | | |`cublasAsumEx_64`|12.0| | | | | | | | | | | | | | | | |`cublasAxpyEx`|8.0| | | |`hipblasAxpyEx_v2`|6.0.0| | | | |`rocblas_axpy_ex`|3.9.0| | | | | -|`cublasAxpyEx_64`|12.0| | | | | | | | | |`rocblas_axpy_ex_64`|6.1.0| | | | | +|`cublasAxpyEx_64`|12.0| | | |`hipblasAxpyEx_v2_64`|6.2.0| | | |6.2.0|`rocblas_axpy_ex_64`|6.1.0| | | | | |`cublasCdgmm`| | | | |`hipblasCdgmm_v2`|6.0.0| | | | |`rocblas_cdgmm`|3.5.0| | | | | |`cublasCdgmm_64`|12.0| | | | | | | | | | | | | | | | |`cublasCgeam`| | | | |`hipblasCgeam_v2`|6.0.0| | | | |`rocblas_cgeam`|3.5.0| | | | | @@ -1261,9 +1263,9 @@ |`cublasDgetrsBatched`| | | | |`hipblasDgetrsBatched`|3.5.0| | | | | | | | | | | |`cublasDmatinvBatched`| | | | | | | | | | | | | | | | | |`cublasDotEx`|8.0| | | |`hipblasDotEx_v2`|6.0.0| | | | |`rocblas_dot_ex`|4.1.0| | | | | -|`cublasDotEx_64`|12.0| | | | | | | | | |`rocblas_dot_ex_64`|6.1.0| | | | | +|`cublasDotEx_64`|12.0| | | |`hipblasDotEx_v2_64`|6.2.0| | | |6.2.0|`rocblas_dot_ex_64`|6.1.0| | | | | |`cublasDotcEx`|8.0| | | |`hipblasDotcEx_v2`|6.0.0| | | | |`rocblas_dotc_ex`|4.1.0| | | | | -|`cublasDotcEx_64`|12.0| | | | | | | | | |`rocblas_dotc_ex_64`|6.1.0| | | | | +|`cublasDotcEx_64`|12.0| | | |`hipblasDotcEx_v2_64`|6.2.0| | | |6.2.0|`rocblas_dotc_ex_64`|6.1.0| | | | | |`cublasDtpttr`| | | | | | | | | | | | | | | | | |`cublasDtrsmBatched`| | | | |`hipblasDtrsmBatched`|3.2.0| | | | |`rocblas_dtrsm_batched`|3.5.0| | | | | |`cublasDtrsmBatched_64`|12.0| | | | | | | | | | | | | | | | @@ -1279,13 +1281,13 @@ |`cublasIaminEx`|10.1| | | | | | | | | | | | | | | | |`cublasIaminEx_64`|12.0| | | | | | | | | | | | | | | | |`cublasRotEx`|10.1| | | |`hipblasRotEx_v2`|6.0.0| | | | |`rocblas_rot_ex`|4.1.0| | | | | -|`cublasRotEx_64`|12.0| | | | | | | | | |`rocblas_rot_ex_64`|6.1.0| | | | | +|`cublasRotEx_64`|12.0| | | |`hipblasRotEx_v2_64`|6.2.0| | | |6.2.0|`rocblas_rot_ex_64`|6.1.0| | | | | |`cublasRotgEx`|10.1| | | | | | | | | | | | | | | | |`cublasRotmEx`|10.1| | | | | | | | | | | | | | | | |`cublasRotmEx_64`|12.0| | | | | | | | | | | | | | | | |`cublasRotmgEx`|10.1| | | | | | | | | | | | | | | | |`cublasScalEx`|8.0| | | |`hipblasScalEx_v2`|6.0.0| | | | |`rocblas_scal_ex`|4.0.0| | | | | -|`cublasScalEx_64`|12.0| | | | | | | | | |`rocblas_scal_ex_64`|6.1.0| | | | | +|`cublasScalEx_64`|12.0| | | |`hipblasScalEx_v2_64`|6.2.0| | | |6.2.0|`rocblas_scal_ex_64`|6.1.0| | | | | |`cublasSdgmm`| | | | |`hipblasSdgmm`|3.6.0| | | | |`rocblas_sdgmm`|3.5.0| | | | | |`cublasSdgmm_64`|12.0| | | | | | | | | | | | | | | | |`cublasSgeam`| | | | |`hipblasSgeam`|1.8.2| | | | |`rocblas_sgeam`|1.6.4| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 891856d4..7f509495 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -1107,6 +1107,8 @@ |`cublasDtrsm_64`|12.0| | | | | | | | | | |`cublasDtrsm_v2`| | | | |`rocblas_dtrsm`|1.5.0| | | | | |`cublasDtrsm_v2_64`|12.0| | | | | | | | | | +|`cublasGemmGroupedBatchedEx`|12.5| | | | | | | | | | +|`cublasGemmGroupedBatchedEx_64`|12.5| | | | | | | | | | |`cublasHSHgemvBatched`|11.6| | | |`rocblas_hshgemv_batched`|6.0.0| | | | | |`cublasHSHgemvBatched_64`|12.0| | | | | | | | | | |`cublasHSHgemvStridedBatched`|11.6| | | |`rocblas_hshgemv_strided_batched`|6.0.0| | | | | diff --git a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md index be15b713..f32ad22f 100644 --- a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md @@ -4,6 +4,8 @@ |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| +|`CIG_DATA_TYPE_D3D12_COMMAND_QUEUE`|12.5| | | | | | | | | | +|`CUCoredumpGenerationFlags`|12.5| | | | | | | | | | |`CUDA_ARRAY3D_2DARRAY`| |5.0| | | | | | | | | |`CUDA_ARRAY3D_COLOR_ATTACHMENT`|10.0| | | | | | | | | | |`CUDA_ARRAY3D_CUBEMAP`| | | | |`hipArrayCubemap`|1.7.0| | | | | @@ -16,6 +18,7 @@ |`CUDA_ARRAY3D_SPARSE`|11.1| | | | | | | | | | |`CUDA_ARRAY3D_SURFACE_LDST`| | | | |`hipArraySurfaceLoadStore`|1.7.0| | | | | |`CUDA_ARRAY3D_TEXTURE_GATHER`| | | | |`hipArrayTextureGather`|1.7.0| | | | | +|`CUDA_ARRAY3D_VIDEO_ENCODE_DECODE`|12.5| | | | | | | | | | |`CUDA_ARRAY_DESCRIPTOR`| | | | |`HIP_ARRAY_DESCRIPTOR`|1.7.0| | | | | |`CUDA_ARRAY_DESCRIPTOR_st`| | | | |`HIP_ARRAY_DESCRIPTOR`|1.7.0| | | | | |`CUDA_ARRAY_DESCRIPTOR_v1`| | | | |`HIP_ARRAY_DESCRIPTOR`|1.7.0| | | | | @@ -250,6 +253,7 @@ |`CU_ACCESS_PROPERTY_NORMAL`|11.0| | | |`hipAccessPropertyNormal`|5.2.0| | | | | |`CU_ACCESS_PROPERTY_PERSISTING`|11.0| | | |`hipAccessPropertyPersisting`|5.2.0| | | | | |`CU_ACCESS_PROPERTY_STREAMING`|11.0| | | |`hipAccessPropertyStreaming`|5.2.0| | | | | +|`CU_AD_FORMAT_AYUV`|12.5| | | | | | | | | | |`CU_AD_FORMAT_BC1_UNORM`|11.5| | | | | | | | | | |`CU_AD_FORMAT_BC1_UNORM_SRGB`|11.5| | | | | | | | | | |`CU_AD_FORMAT_BC2_UNORM`|11.5| | | | | | | | | | @@ -266,7 +270,13 @@ |`CU_AD_FORMAT_BC7_UNORM_SRGB`|11.5| | | | | | | | | | |`CU_AD_FORMAT_FLOAT`| | | | |`HIP_AD_FORMAT_FLOAT`|1.7.0| | | | | |`CU_AD_FORMAT_HALF`| | | | |`HIP_AD_FORMAT_HALF`|1.7.0| | | | | +|`CU_AD_FORMAT_MAX`|12.5| | | | | | | | | | |`CU_AD_FORMAT_NV12`|11.2| | | | | | | | | | +|`CU_AD_FORMAT_NV16`|12.5| | | | | | | | | | +|`CU_AD_FORMAT_P010`|12.5| | | | | | | | | | +|`CU_AD_FORMAT_P016`|12.5| | | | | | | | | | +|`CU_AD_FORMAT_P210`|12.5| | | | | | | | | | +|`CU_AD_FORMAT_P216`|12.5| | | | | | | | | | |`CU_AD_FORMAT_SIGNED_INT16`| | | | |`HIP_AD_FORMAT_SIGNED_INT16`|1.7.0| | | | | |`CU_AD_FORMAT_SIGNED_INT32`| | | | |`HIP_AD_FORMAT_SIGNED_INT32`|1.7.0| | | | | |`CU_AD_FORMAT_SIGNED_INT8`| | | | |`HIP_AD_FORMAT_SIGNED_INT8`|1.7.0| | | | | @@ -285,6 +295,13 @@ |`CU_AD_FORMAT_UNSIGNED_INT16`| | | | |`HIP_AD_FORMAT_UNSIGNED_INT16`|1.7.0| | | | | |`CU_AD_FORMAT_UNSIGNED_INT32`| | | | |`HIP_AD_FORMAT_UNSIGNED_INT32`|1.7.0| | | | | |`CU_AD_FORMAT_UNSIGNED_INT8`| | | | |`HIP_AD_FORMAT_UNSIGNED_INT8`|1.7.0| | | | | +|`CU_AD_FORMAT_Y210`|12.5| | | | | | | | | | +|`CU_AD_FORMAT_Y216`|12.5| | | | | | | | | | +|`CU_AD_FORMAT_Y410`|12.5| | | | | | | | | | +|`CU_AD_FORMAT_Y416`|12.5| | | | | | | | | | +|`CU_AD_FORMAT_Y444_PLANAR10`|12.5| | | | | | | | | | +|`CU_AD_FORMAT_Y444_PLANAR8`|12.5| | | | | | | | | | +|`CU_AD_FORMAT_YUY2`|12.5| | | | | | | | | | |`CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL`|11.1| | | | | | | | | | |`CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL`|11.1| | | |`hipArraySparseSubresourceTypeMiptail`|5.2.0| | | | | |`CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL`|11.1| | | |`hipArraySparseSubresourceTypeSparseLevel`|5.2.0| | | | | @@ -297,12 +314,20 @@ |`CU_COMPUTEMODE_EXCLUSIVE_PROCESS`| | | | |`hipComputeModeExclusiveProcess`|2.0.0| | | | | |`CU_COMPUTEMODE_PROHIBITED`| | | | |`hipComputeModeProhibited`|1.9.0| | | | | |`CU_COMPUTE_ACCELERATED_TARGET_BASE`|12.0| | | | | | | | | | +|`CU_COREDUMP_DEFAULT_FLAGS`|12.5| | | | | | | | | | |`CU_COREDUMP_ENABLE_ON_EXCEPTION`|12.1| | | | | | | | | | |`CU_COREDUMP_ENABLE_USER_TRIGGER`|12.1| | | | | | | | | | |`CU_COREDUMP_FILE`|12.1| | | | | | | | | | +|`CU_COREDUMP_GENERATION_FLAGS`|12.5| | | | | | | | | | |`CU_COREDUMP_LIGHTWEIGHT`|12.1| | | | | | | | | | +|`CU_COREDUMP_LIGHTWEIGHT_FLAGS`|12.5| | | | | | | | | | |`CU_COREDUMP_MAX`|12.1| | | | | | | | | | |`CU_COREDUMP_PIPE`|12.1| | | | | | | | | | +|`CU_COREDUMP_SKIP_ABORT`|12.5| | | | | | | | | | +|`CU_COREDUMP_SKIP_GLOBAL_MEMORY`|12.5| | | | | | | | | | +|`CU_COREDUMP_SKIP_LOCAL_MEMORY`|12.5| | | | | | | | | | +|`CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES`|12.5| | | | | | | | | | +|`CU_COREDUMP_SKIP_SHARED_MEMORY`|12.5| | | | | | | | | | |`CU_COREDUMP_TRIGGER_HOST`|12.1| | | | | | | | | | |`CU_CTX_BLOCKING_SYNC`| |4.0| | |`hipDeviceScheduleBlockingSync`|1.6.0| | | | | |`CU_CTX_COREDUMP_ENABLE`|12.1| | | | | | | | | | @@ -364,6 +389,7 @@ |`CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`|8.0| | | |`hipDeviceAttributeConcurrentManagedAccess`|3.10.0| | | | | |`CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH`|9.0| | | |`hipDeviceAttributeCooperativeLaunch`|2.6.0| | | | | |`CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH`|9.0| | | |`hipDeviceAttributeCooperativeMultiDeviceLaunch`|2.6.0| | | | | +|`CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED`|12.5| | | | | | | | | | |`CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED`|11.6| | | | | | | | | | |`CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST`|9.2| | | |`hipDeviceAttributeDirectManagedMemAccessFromHost`|3.10.0| | | | | |`CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED`|11.7| | | | | | | | | | @@ -498,6 +524,8 @@ |`CU_DEV_RESOURCE_TYPE_INVALID`|12.4| | | | | | | | | | |`CU_DEV_RESOURCE_TYPE_MAX`|12.4| | | | | | | | | | |`CU_DEV_RESOURCE_TYPE_SM`|12.4| | | | | | | | | | +|`CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING`|12.5| | | | | | | | | | +|`CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE`|12.5| | | | | | | | | | |`CU_EGL_COLOR_FORMAT_A`|9.1| | | | | | | | | | |`CU_EGL_COLOR_FORMAT_ABGR`|9.1| | | | | | | | | | |`CU_EGL_COLOR_FORMAT_ARGB`|9.0| | | | | | | | | | @@ -767,6 +795,7 @@ |`CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE`|12.4| | | | | | | | | | |`CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN`|12.0| | | | | | | | | | |`CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP`|12.0| | | | | | | | | | +|`CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`|12.5| | | | | | | | | | |`CU_KERNEL_NODE_ATTRIBUTE_PRIORITY`|11.7| | | |`hipKernelNodeAttributePriority`|6.2.0| | | |6.2.0| |`CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW`|11.8| | | |`hipLaunchAttributeAccessPolicyWindow`|6.2.0| | | |6.2.0| |`CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION`|11.8| | | | | | | | | | @@ -778,6 +807,7 @@ |`CU_LAUNCH_ATTRIBUTE_MAX`|12.1| | | | | | | | | | |`CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN`|12.0| | | | | | | | | | |`CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP`|12.0| | | | | | | | | | +|`CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`|12.5| | | | | | | | | | |`CU_LAUNCH_ATTRIBUTE_PRIORITY`|11.8| | | |`hipLaunchAttributePriority`|6.2.0| | | |6.2.0| |`CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT`|11.8| | | | | | | | | | |`CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION`|11.8| | | | | | | | | | @@ -793,6 +823,8 @@ |`CU_LIBRARY_BINARY_IS_PRESERVED`|12.0| | | | | | | | | | |`CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE`|12.0| | | | | | | | | | |`CU_LIBRARY_NUM_OPTIONS`|12.0| | | | | | | | | | +|`CU_LIMIT_CIG_ENABLED`|12.5| | | | | | | | | | +|`CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED`|12.5| | | | | | | | | | |`CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT`| | | | | | | | | | | |`CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH`| | | | | | | | | | | |`CU_LIMIT_MALLOC_HEAP_SIZE`| | | | |`hipLimitMallocHeapSize`|1.6.0| | | | | @@ -800,6 +832,7 @@ |`CU_LIMIT_MAX_L2_FETCH_GRANULARITY`|10.0| | | | | | | | | | |`CU_LIMIT_PERSISTING_L2_CACHE_SIZE`|11.0| | | | | | | | | | |`CU_LIMIT_PRINTF_FIFO_SIZE`| | | | |`hipLimitPrintfFifoSize`|4.5.0| | | | | +|`CU_LIMIT_SHMEM_SIZE`|12.5| | | | | | | | | | |`CU_LIMIT_STACK_SIZE`| | | | |`hipLimitStackSize`|5.3.0| | | | | |`CU_MEMHOSTALLOC_DEVICEMAP`| | | | |`hipHostMallocMapped`|1.6.0| | | | | |`CU_MEMHOSTALLOC_PORTABLE`| | | | |`hipHostMallocPortable`|1.6.0| | | | | @@ -1067,6 +1100,8 @@ |`CUasyncNotificationInfo_st`|12.4| | | | | | | | | | |`CUasyncNotificationType`|12.4| | | | | | | | | | |`CUasyncNotificationType_enum`|12.4| | | | | | | | | | +|`CUcigDataType`|12.5| | | | | | | | | | +|`CUcigDataType_enum`|12.5| | | | | | | | | | |`CUclusterSchedulingPolicy`|11.8| | | | | | | | | | |`CUclusterSchedulingPolicy_enum`|11.8| | | | | | | | | | |`CUcomputemode`| | | | |`hipComputeMode`|1.9.0| | | | | @@ -1074,6 +1109,10 @@ |`CUcontext`| | | | |`hipCtx_t`|1.6.0| | | | | |`CUcoredumpSettings`|12.1| | | | | | | | | | |`CUcoredumpSettings_enum`|12.1| | | | | | | | | | +|`CUctxCigParam`|12.5| | | | | | | | | | +|`CUctxCigParam_st`|12.5| | | | | | | | | | +|`CUctxCreateParams`|12.5| | | | | | | | | | +|`CUctxCreateParams_st`|12.5| | | | | | | | | | |`CUctx_flags`| | | | | | | | | | | |`CUctx_flags_enum`| | | | | | | | | | | |`CUctx_st`| | | | |`ihipCtx_t`|1.6.0| | | | | @@ -1097,6 +1136,7 @@ |`CUdevResourceType`|12.4| | | | | | | | | | |`CUdevResource_st`|12.4| | | | | | | | | | |`CUdevSmResource`|12.4| | | | | | | | | | +|`CUdevSmResourceSplit_flags`|12.5| | | | | | | | | | |`CUdevSmResource_st`|12.4| | | | | | | | | | |`CUdevice`| | | | |`hipDevice_t`|1.6.0| | | | | |`CUdeviceNumaConfig`|12.2| | | | | | | | | | @@ -1471,6 +1511,7 @@ |`cuCtxCreate`| | | | |`hipCtxCreate`|1.6.0|1.9.0| | | | |`cuCtxCreate_v2`| | | | |`hipCtxCreate`|1.6.0|1.9.0| | | | |`cuCtxCreate_v3`|11.4| | | | | | | | | | +|`cuCtxCreate_v4`|12.5| | | | | | | | | | |`cuCtxDestroy`| | | | |`hipCtxDestroy`|1.6.0|1.9.0| | | | |`cuCtxDestroy_v2`| | | | |`hipCtxDestroy`|1.6.0|1.9.0| | | | |`cuCtxGetApiVersion`| | | | |`hipCtxGetApiVersion`|1.9.0|1.9.0| | | | @@ -1494,6 +1535,7 @@ |`cuCtxSetLimit`| | | | |`hipDeviceSetLimit`|5.3.0| | | | | |`cuCtxSetSharedMemConfig`| | | |12.4|`hipCtxSetSharedMemConfig`|1.9.0|1.9.0| | | | |`cuCtxSynchronize`| | | | |`hipCtxSynchronize`|1.9.0|1.9.0| | | | +|`cuCtxWaitEvent`|12.5| | | | | | | | | | ## **9. Context Management [DEPRECATED]** @@ -1539,6 +1581,7 @@ |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cuKernelGetAttribute`|12.0| | | | | | | | | | |`cuKernelGetFunction`|12.0| | | | | | | | | | +|`cuKernelGetLibrary`|12.5| | | | | | | | | | |`cuKernelGetName`|12.3| | | | | | | | | | |`cuKernelGetParamInfo`|12.4| | | | | | | | | | |`cuKernelSetAttribute`|12.0| | | | | | | | | | @@ -1750,6 +1793,7 @@ |`cuStreamGetCaptureInfo_v2`|11.3| | | |`hipStreamGetCaptureInfo_v2`|5.0.0| | | | | |`cuStreamGetCaptureInfo_v3`|12.3| | | | | | | | | | |`cuStreamGetCtx`|9.2| | | | | | | | | | +|`cuStreamGetCtx_v2`|12.5| | | | | | | | | | |`cuStreamGetFlags`| | | | |`hipStreamGetFlags`|1.6.0| | | | | |`cuStreamGetId`|12.0| | | | | | | | | | |`cuStreamGetPriority`| | | | |`hipStreamGetPriority`|2.0.0| | | | | @@ -2063,6 +2107,7 @@ |`cuGreenCtxDestroy`|12.4| | | | | | | | | | |`cuGreenCtxGetDevResource`|12.4| | | | | | | | | | |`cuGreenCtxRecordEvent`|12.4| | | | | | | | | | +|`cuGreenCtxStreamCreate`|12.5| | | | | | | | | | |`cuGreenCtxWaitEvent`|12.4| | | | | | | | | | |`cuStreamGetGreenCtx`|12.4| | | | | | | | | | diff --git a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md index a1048e03..83f32650 100644 --- a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -15,14 +15,12 @@ |`cudaDeviceGetNvSciSyncAttributes`|10.2| | | | | | | | | | |`cudaDeviceGetP2PAttribute`|8.0| | | |`hipDeviceGetP2PAttribute`|3.8.0| | | | | |`cudaDeviceGetPCIBusId`| | | | |`hipDeviceGetPCIBusId`|1.6.0| | | | | -|`cudaDeviceGetSharedMemConfig`| |12.4| | |`hipDeviceGetSharedMemConfig`|1.6.0| | | | | |`cudaDeviceGetStreamPriorityRange`| | | | |`hipDeviceGetStreamPriorityRange`|2.0.0| | | | | |`cudaDeviceGetTexture1DLinearMaxWidth`|11.1| | | | | | | | | | |`cudaDeviceReset`| | | | |`hipDeviceReset`|1.6.0| | | | | |`cudaDeviceSetCacheConfig`| | | | |`hipDeviceSetCacheConfig`|1.6.0| | | | | |`cudaDeviceSetLimit`| | | | |`hipDeviceSetLimit`|5.3.0| | | | | |`cudaDeviceSetMemPool`|11.2| | | |`hipDeviceSetMemPool`|5.2.0| | | | | -|`cudaDeviceSetSharedMemConfig`| |12.4| | |`hipDeviceSetSharedMemConfig`|1.6.0| | | | | |`cudaDeviceSynchronize`| | | | |`hipDeviceSynchronize`|1.6.0| | | | | |`cudaGetDevice`| | | | |`hipGetDevice`|1.6.0| | | | | |`cudaGetDeviceCount`| | | | |`hipGetDeviceCount`|1.6.0| | | | | @@ -38,7 +36,14 @@ |`cudaSetDeviceFlags`| | | | |`hipSetDeviceFlags`|1.6.0| | | | | |`cudaSetValidDevices`| | | | | | | | | | | -## **2. Thread Management [DEPRECATED]** +## **2. Device Management [DEPRECATED]** + +|**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| +|:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| +|`cudaDeviceGetSharedMemConfig`| |12.4| | |`hipDeviceGetSharedMemConfig`|1.6.0| | | | | +|`cudaDeviceSetSharedMemConfig`| |12.4| | |`hipDeviceSetSharedMemConfig`|1.6.0| | | | | + +## **3. Thread Management [DEPRECATED]** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -49,7 +54,7 @@ |`cudaThreadSetLimit`| |10.0| | | | | | | | | |`cudaThreadSynchronize`| |10.0| | |`hipDeviceSynchronize`|1.6.0| | | | | -## **3. Error Handling** +## **4. Error Handling** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -58,7 +63,7 @@ |`cudaGetLastError`| | | | |`hipGetLastError`|1.6.0| | | | | |`cudaPeekAtLastError`| | | | |`hipPeekAtLastError`|1.6.0| | | | | -## **4. Stream Management** +## **5. Stream Management** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -88,7 +93,7 @@ |`cudaStreamWaitEvent`| | | | |`hipStreamWaitEvent`|1.6.0| | | | | |`cudaThreadExchangeStreamCaptureMode`|10.1| | | |`hipThreadExchangeStreamCaptureMode`|5.2.0| | | | | -## **5. Event Management** +## **6. Event Management** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -101,7 +106,7 @@ |`cudaEventRecordWithFlags`|11.1| | | | | | | | | | |`cudaEventSynchronize`| | | | |`hipEventSynchronize`|1.6.0| | | | | -## **6. External Resource Interoperability** +## **7. External Resource Interoperability** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -114,7 +119,7 @@ |`cudaSignalExternalSemaphoresAsync`|10.0| | | |`hipSignalExternalSemaphoresAsync`|4.4.0| | | | | |`cudaWaitExternalSemaphoresAsync`|10.0| | | |`hipWaitExternalSemaphoresAsync`|4.4.0| | | | | -## **7. Execution Control** +## **8. Execution Control** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -123,7 +128,6 @@ |`cudaFuncGetParamInfo`|12.4| | | | | | | | | | |`cudaFuncSetAttribute`|9.0| | | |`hipFuncSetAttribute`|3.9.0| | | | | |`cudaFuncSetCacheConfig`| | | | |`hipFuncSetCacheConfig`|1.6.0| | | | | -|`cudaFuncSetSharedMemConfig`| |12.4| | |`hipFuncSetSharedMemConfig`|3.9.0| | | | | |`cudaGetParameterBuffer`| | | | | | | | | | | |`cudaGetParameterBufferV2`| | | | | | | | | | | |`cudaLaunchCooperativeKernel`|9.0| | | |`hipLaunchCooperativeKernel`|2.6.0| | | | | @@ -134,7 +138,13 @@ |`cudaSetDoubleForDevice`| |10.0| | | | | | | | | |`cudaSetDoubleForHost`| |10.0| | | | | | | | | -## **8. Occupancy** +## **9. Execution Control [DEPRECATED]** + +|**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| +|:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| +|`cudaFuncSetSharedMemConfig`| |12.4| | |`hipFuncSetSharedMemConfig`|3.9.0| | | | | + +## **10. Occupancy** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -148,7 +158,7 @@ |`cudaOccupancyMaxPotentialBlockSizeWithFlags`| | | | |`hipOccupancyMaxPotentialBlockSizeWithFlags`|3.5.0| | | | | |`cudaOccupancyMaxPotentialClusterSize`|11.8| | | | | | | | | | -## **9. Memory Management** +## **11. Memory Management** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -214,7 +224,7 @@ |`make_cudaPitchedPtr`| | | | |`make_hipPitchedPtr`|1.7.0| | | | | |`make_cudaPos`| | | | |`make_hipPos`|1.7.0| | | | | -## **10. Memory Management [DEPRECATED]** +## **12. Memory Management [DEPRECATED]** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -224,7 +234,7 @@ |`cudaMemcpyToArray`| |10.1| | |`hipMemcpyToArray`|1.6.0|3.8.0| | | | |`cudaMemcpyToArrayAsync`| |10.1| | | | | | | | | -## **11. Stream Ordered Memory Allocator** +## **13. Stream Ordered Memory Allocator** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -243,13 +253,13 @@ |`cudaMemPoolSetAttribute`|11.2| | | |`hipMemPoolSetAttribute`|5.2.0| | | | | |`cudaMemPoolTrimTo`|11.2| | | |`hipMemPoolTrimTo`|5.2.0| | | | | -## **12. Unified Addressing** +## **14. Unified Addressing** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cudaPointerGetAttributes`| | | | |`hipPointerGetAttributes`|1.6.0| | | | | -## **13. Peer Device Memory Access** +## **15. Peer Device Memory Access** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -257,7 +267,7 @@ |`cudaDeviceDisablePeerAccess`| | | | |`hipDeviceDisablePeerAccess`|1.9.0| | | | | |`cudaDeviceEnablePeerAccess`| | | | |`hipDeviceEnablePeerAccess`|1.9.0| | | | | -## **14. OpenGL Interoperability** +## **16. OpenGL Interoperability** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -266,7 +276,7 @@ |`cudaGraphicsGLRegisterImage`| | | | |`hipGraphicsGLRegisterImage`|5.1.0| | | | | |`cudaWGLGetDevice`| | | | | | | | | | | -## **15. OpenGL Interoperability [DEPRECATED]** +## **17. OpenGL Interoperability [DEPRECATED]** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -279,7 +289,7 @@ |`cudaGLUnmapBufferObjectAsync`| |10.0| | | | | | | | | |`cudaGLUnregisterBufferObject`| |10.0| | | | | | | | | -## **16. Direct3D 9 Interoperability** +## **18. Direct3D 9 Interoperability** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -289,7 +299,7 @@ |`cudaD3D9SetDirect3DDevice`| | | | | | | | | | | |`cudaGraphicsD3D9RegisterResource`| | | | | | | | | | | -## **17. Direct3D 9 Interoperability [DEPRECATED]** +## **19. Direct3D 9 Interoperability [DEPRECATED]** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -304,7 +314,7 @@ |`cudaD3D9UnmapResources`| |10.0| | | | | | | | | |`cudaD3D9UnregisterResource`| |10.0| | | | | | | | | -## **18. Direct3D 10 Interoperability** +## **20. Direct3D 10 Interoperability** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -312,7 +322,7 @@ |`cudaD3D10GetDevices`| | | | | | | | | | | |`cudaGraphicsD3D10RegisterResource`| | | | | | | | | | | -## **19. Direct3D 10 Interoperability [DEPRECATED]** +## **21. Direct3D 10 Interoperability [DEPRECATED]** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -329,7 +339,7 @@ |`cudaD3D10UnmapResources`| |10.0| | | | | | | | | |`cudaD3D10UnregisterResource`| |10.0| | | | | | | | | -## **20. Direct3D 11 Interoperability** +## **22. Direct3D 11 Interoperability** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -337,14 +347,14 @@ |`cudaD3D11GetDevices`| | | | | | | | | | | |`cudaGraphicsD3D11RegisterResource`| | | | | | | | | | | -## **21. Direct3D 11 Interoperability [DEPRECATED]** +## **23. Direct3D 11 Interoperability [DEPRECATED]** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cudaD3D11GetDirect3DDevice`| |10.0| | | | | | | | | |`cudaD3D11SetDirect3DDevice`| |10.0| | | | | | | | | -## **22. VDPAU Interoperability** +## **24. VDPAU Interoperability** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -353,7 +363,7 @@ |`cudaVDPAUGetDevice`| | | | | | | | | | | |`cudaVDPAUSetVDPAUDevice`| | | | | | | | | | | -## **23. EGL Interoperability** +## **25. EGL Interoperability** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -370,7 +380,7 @@ |`cudaGraphicsEGLRegisterImage`|9.1| | | | | | | | | | |`cudaGraphicsResourceGetMappedEglFrame`|9.1| | | | | | | | | | -## **24. Graphics Interoperability** +## **26. Graphics Interoperability** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -382,7 +392,7 @@ |`cudaGraphicsUnmapResources`| | | | |`hipGraphicsUnmapResources`|4.5.0| | | | | |`cudaGraphicsUnregisterResource`| | | | |`hipGraphicsUnregisterResource`|4.5.0| | | | | -## **25. Texture Object Management** +## **27. Texture Object Management** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -396,7 +406,7 @@ |`cudaGetTextureObjectTextureDesc`| | | | |`hipGetTextureObjectTextureDesc`|1.7.0| | | | | |`cudaGetTextureObjectTextureDesc_v2`|11.8| | |12.0| | | | | | | -## **26. Surface Object Management** +## **28. Surface Object Management** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -404,14 +414,14 @@ |`cudaDestroySurfaceObject`| | | | |`hipDestroySurfaceObject`|1.9.0| | | | | |`cudaGetSurfaceObjectResourceDesc`| | | | | | | | | | | -## **27. Version Management** +## **29. Version Management** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cudaDriverGetVersion`| | | | |`hipDriverGetVersion`|1.6.0| | | | | |`cudaRuntimeGetVersion`| | | | |`hipRuntimeGetVersion`|1.6.0| | | | | -## **28. Graph Management** +## **30. Graph Management** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -510,35 +520,131 @@ |`cudaUserObjectRelease`|11.3| | | |`hipUserObjectRelease`|5.3.0| | | | | |`cudaUserObjectRetain`|11.3| | | |`hipUserObjectRetain`|5.3.0| | | | | -## **29. Driver Entry Point Access** +## **31. Driver Entry Point Access** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cudaGetDriverEntryPoint`|11.3| |12.0| |`hipGetProcAddress`|6.2.0| | | |6.2.0| +|`cudaGetDriverEntryPointByVersion`|12.5| | | | | | | | | | -## **30. C++ API Routines** +## **32. C++ API Routines** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cudaGetKernel`|12.1| | | | | | | | | | -## **31. Interactions with the CUDA Driver API** +## **33. Interactions with the CUDA Driver API** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cudaGetFuncBySymbol`|11.0| | | |`hipGetFuncBySymbol`|6.2.0| | | |6.2.0| -## **32. Profiler Control** +## **34. Profiler Control** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cudaProfilerStart`| | | | |`hipProfilerStart`|1.6.0|3.0.0| | | | |`cudaProfilerStop`| | | | |`hipProfilerStop`|1.6.0|3.0.0| | | | -## **33. Data types used by CUDA Runtime** +## **35. Data types used by CUDA Runtime** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| +|`CUDART_2_OVER_PI`| | | | |`HIP_2_OVER_PI`|5.7.0| | | | | +|`CUDART_2_OVER_PI_F`| | | | |`HIP_2_OVER_PI_F`|5.3.0| | | | | +|`CUDART_3PIO4`| | | | |`HIP_3PIO4`|5.7.0| | | | | +|`CUDART_3PIO4_F`| | | | |`HIP_3PIO4_F`|5.3.0| | | | | +|`CUDART_DBL2INT_CVT`| | | | |`HIP_DBL2INT_CVT`|5.7.0| | | | | +|`CUDART_INF`| | | | |`HIP_INF`|5.7.0| | | | | +|`CUDART_INF_F`| | | | |`HIP_INF_F`|5.3.0| | | | | +|`CUDART_L2E`| | | | |`HIP_L2E`|5.7.0| | | | | +|`CUDART_L2E_F`| | | | |`HIP_L2E_F`|5.3.0| | | | | +|`CUDART_L2E_HI`| | | | |`HIP_L2E_HI`|5.7.0| | | | | +|`CUDART_L2E_LO`| | | | |`HIP_L2E_LO`|5.7.0| | | | | +|`CUDART_L2T`| | | | |`HIP_L2T`|5.7.0| | | | | +|`CUDART_L2T_F`| | | | |`HIP_L2T_F`|5.3.0| | | | | +|`CUDART_LG2`| | | | |`HIP_LG2`|5.7.0| | | | | +|`CUDART_LG2_F`| | | | |`HIP_LG2_F`|5.3.0| | | | | +|`CUDART_LG2_HI`| | | | |`HIP_LG2_HI`|5.7.0| | | | | +|`CUDART_LG2_LO`| | | | |`HIP_LG2_LO`|5.7.0| | | | | +|`CUDART_LG2_X_1024`| | | | |`HIP_LG2_X_1024`|5.7.0| | | | | +|`CUDART_LG2_X_1075`| | | | |`HIP_LG2_X_1075`|5.7.0| | | | | +|`CUDART_LGE`| | | | |`HIP_LGE`|5.7.0| | | | | +|`CUDART_LGE_F`| | | | |`HIP_LGE_F`|5.3.0| | | | | +|`CUDART_LGE_HI`| | | | |`HIP_LGE_HI`|5.7.0| | | | | +|`CUDART_LGE_LO`| | | | |`HIP_LGE_LO`|5.7.0| | | | | +|`CUDART_LN2`| | | | |`HIP_LN2`|5.7.0| | | | | +|`CUDART_LN2_F`| | | | |`HIP_LN2_F`|5.3.0| | | | | +|`CUDART_LN2_HI`| | | | |`HIP_LN2_HI`|5.7.0| | | | | +|`CUDART_LN2_LO`| | | | |`HIP_LN2_LO`|5.7.0| | | | | +|`CUDART_LN2_X_1024`| | | | |`HIP_LN2_X_1024`|5.7.0| | | | | +|`CUDART_LN2_X_1025`| | | | |`HIP_LN2_X_1025`|5.7.0| | | | | +|`CUDART_LN2_X_1075`| | | | |`HIP_LN2_X_1075`|5.7.0| | | | | +|`CUDART_LNPI`| | | | |`HIP_LNPI`|5.7.0| | | | | +|`CUDART_LNPI_F`| | | | |`HIP_LNPI_F`|5.3.0| | | | | +|`CUDART_LNT`| | | | |`HIP_LNT`|5.7.0| | | | | +|`CUDART_LNT_F`| | | | |`HIP_LNT_F`|5.3.0| | | | | +|`CUDART_LNT_HI`| | | | |`HIP_LNT_HI`|5.7.0| | | | | +|`CUDART_LNT_LO`| | | | |`HIP_LNT_LO`|5.7.0| | | | | +|`CUDART_MAX_NORMAL_F`| | | | |`HIP_MAX_NORMAL_F`|5.3.0| | | | | +|`CUDART_MIN_DENORM`| | | | |`HIP_MIN_DENORM`|5.7.0| | | | | +|`CUDART_MIN_DENORM_F`| | | | |`HIP_MIN_DENORM_F`|5.3.0| | | | | +|`CUDART_NAN`| | | | |`HIP_NAN`|5.7.0| | | | | +|`CUDART_NAN_F`| | | | |`HIP_NAN_F`|5.3.0| | | | | +|`CUDART_NEG_ZERO`| | | | |`HIP_NEG_ZERO`|5.7.0| | | | | +|`CUDART_NEG_ZERO_F`| | | | |`HIP_NEG_ZERO_F`|5.3.0| | | | | +|`CUDART_NORM_HUGE_F`| | | | |`HIP_NORM_HUGE_F`|5.3.0| | | | | +|`CUDART_ONE`| | | | |`HIP_ONE`|5.7.0| | | | | +|`CUDART_ONE_F`| | | | |`HIP_ONE_F`|5.3.0| | | | | +|`CUDART_PI`| | | | |`HIP_PI`|5.7.0| | | | | +|`CUDART_PIO2`| | | | |`HIP_PIO2`|5.7.0| | | | | +|`CUDART_PIO2_F`| | | | |`HIP_PIO2_F`|5.3.0| | | | | +|`CUDART_PIO2_HI`| | | | |`HIP_PIO2_HI`|5.7.0| | | | | +|`CUDART_PIO2_LO`| | | | |`HIP_PIO2_LO`|5.7.0| | | | | +|`CUDART_PIO4`| | | | |`HIP_PIO4`|5.7.0| | | | | +|`CUDART_PIO4_F`| | | | |`HIP_PIO4_F`|5.3.0| | | | | +|`CUDART_PIO4_HI`| | | | |`HIP_PIO4_HI`|5.7.0| | | | | +|`CUDART_PIO4_LO`| | | | |`HIP_PIO4_LO`|5.7.0| | | | | +|`CUDART_PI_F`| | | | |`HIP_PI_F`|5.3.0| | | | | +|`CUDART_PI_HI`| | | | |`HIP_PI_HI`|5.7.0| | | | | +|`CUDART_PI_LO`| | | | |`HIP_PI_LO`|5.7.0| | | | | +|`CUDART_REMQUO_BITS_F`| | | | |`HIP_REMQUO_BITS_F`|5.3.0| | | | | +|`CUDART_REMQUO_MASK_F`| | | | |`HIP_REMQUO_MASK_F`|5.3.0| | | | | +|`CUDART_SQRT_2OPI`| | | | |`HIP_SQRT_2OPI`|5.7.0| | | | | +|`CUDART_SQRT_2PI`| | | | |`HIP_SQRT_2PI`|5.7.0| | | | | +|`CUDART_SQRT_2PI_HI`| | | | |`HIP_SQRT_2PI_HI`|5.7.0| | | | | +|`CUDART_SQRT_2PI_LO`| | | | |`HIP_SQRT_2PI_LO`|5.7.0| | | | | +|`CUDART_SQRT_2_OVER_PI_F`| | | | |`HIP_SQRT_2_OVER_PI_F`|5.3.0| | | | | +|`CUDART_SQRT_HALF`| | | | |`HIP_SQRT_HALF`|5.7.0| | | | | +|`CUDART_SQRT_HALF_F`| | | | |`HIP_SQRT_HALF_F`|5.3.0| | | | | +|`CUDART_SQRT_HALF_HI`| | | | |`HIP_SQRT_HALF_HI`|5.7.0| | | | | +|`CUDART_SQRT_HALF_HI_F`| | | | |`HIP_SQRT_HALF_HI_F`|5.3.0| | | | | +|`CUDART_SQRT_HALF_LO`| | | | |`HIP_SQRT_HALF_LO`|5.7.0| | | | | +|`CUDART_SQRT_HALF_LO_F`| | | | |`HIP_SQRT_HALF_LO_F`|5.3.0| | | | | +|`CUDART_SQRT_PIO2`| | | | |`HIP_SQRT_PIO2`|5.7.0| | | | | +|`CUDART_SQRT_PIO2_HI`| | | | |`HIP_SQRT_PIO2_HI`|5.7.0| | | | | +|`CUDART_SQRT_PIO2_LO`| | | | |`HIP_SQRT_PIO2_LO`|5.7.0| | | | | +|`CUDART_SQRT_TWO`| | | | |`HIP_SQRT_TWO`|5.7.0| | | | | +|`CUDART_SQRT_TWO_F`| | | | |`HIP_SQRT_TWO_F`|5.3.0| | | | | +|`CUDART_THIRD`| | | | |`HIP_THIRD`|5.7.0| | | | | +|`CUDART_THIRD_F`| | | | |`HIP_THIRD_F`|5.3.0| | | | | +|`CUDART_TRIG_PLOSS`| | | | |`HIP_TRIG_PLOSS`|5.7.0| | | | | +|`CUDART_TRIG_PLOSS_F`| | | | |`HIP_TRIG_PLOSS_F`|5.3.0| | | | | +|`CUDART_TWOTHIRD`| | | | |`HIP_TWOTHIRD`|5.7.0| | | | | +|`CUDART_TWO_TO_126_F`| | | | |`HIP_TWO_TO_126_F`|5.3.0| | | | | +|`CUDART_TWO_TO_23`| | | | |`HIP_TWO_TO_23`|5.7.0| | | | | +|`CUDART_TWO_TO_23_F`| | | | |`HIP_TWO_TO_23_F`|5.3.0| | | | | +|`CUDART_TWO_TO_24_F`| | | | |`HIP_TWO_TO_24_F`|5.3.0| | | | | +|`CUDART_TWO_TO_31_F`| | | | |`HIP_TWO_TO_31_F`|5.3.0| | | | | +|`CUDART_TWO_TO_32_F`| | | | |`HIP_TWO_TO_32_F`|5.3.0| | | | | +|`CUDART_TWO_TO_52`| | | | |`HIP_TWO_TO_52`|5.7.0| | | | | +|`CUDART_TWO_TO_53`| | | | |`HIP_TWO_TO_53`|5.7.0| | | | | +|`CUDART_TWO_TO_54`| | | | |`HIP_TWO_TO_54`|5.7.0| | | | | +|`CUDART_TWO_TO_M1022`| | | | |`HIP_TWO_TO_M1022`|5.7.0| | | | | +|`CUDART_TWO_TO_M126_F`| | | | |`HIP_TWO_TO_M126_F`|5.3.0| | | | | +|`CUDART_TWO_TO_M54`| | | | |`HIP_TWO_TO_M54`|5.7.0| | | | | +|`CUDART_ZERO`| | | | |`HIP_ZERO`|5.7.0| | | | | +|`CUDART_ZERO_F`| | | | |`HIP_ZERO_F`|5.3.0| | | | | |`CUDA_EGL_MAX_PLANES`|9.1| | | | | | | | | | |`CUDA_IPC_HANDLE_SIZE`| | | | |`HIP_IPC_HANDLE_SIZE`|1.6.0| | | | | |`CUeglStreamConnection_st`|9.1| | | | | | | | | | @@ -674,6 +780,7 @@ |`cudaDevAttrConcurrentManagedAccess`|8.0| | | |`hipDeviceAttributeConcurrentManagedAccess`|3.10.0| | | | | |`cudaDevAttrCooperativeLaunch`|9.0| | | |`hipDeviceAttributeCooperativeLaunch`|2.6.0| | | | | |`cudaDevAttrCooperativeMultiDeviceLaunch`|9.0| | | |`hipDeviceAttributeCooperativeMultiDeviceLaunch`|2.6.0| | | | | +|`cudaDevAttrD3D12CigSupported`|12.5| | | | | | | | | | |`cudaDevAttrDeferredMappingCudaArraySupported`|11.6| | | | | | | | | | |`cudaDevAttrDirectManagedMemAccessFromHost`|9.2| | | |`hipDeviceAttributeDirectManagedMemAccessFromHost`|3.10.0| | | | | |`cudaDevAttrEccEnabled`| | | | |`hipDeviceAttributeEccEnabled`|2.10.0| | | | | @@ -1253,6 +1360,7 @@ |`cudaKernelNodeAttributeDeviceUpdatableKernelNode`|12.4| | | | | | | | | | |`cudaKernelNodeAttributeMemSyncDomain`|12.0| | | | | | | | | | |`cudaKernelNodeAttributeMemSyncDomainMap`|12.0| | | | | | | | | | +|`cudaKernelNodeAttributePreferredSharedMemoryCarveout`|12.5| | | | | | | | | | |`cudaKernelNodeAttributePriority`|11.7| | | |`hipKernelNodeAttributePriority`|6.2.0| | | |6.2.0| |`cudaKernelNodeParams`|10.0| | | |`hipKernelNodeParams`|4.3.0| | | | | |`cudaKernelNodeParamsV2`|12.2| | | | | | | | | | @@ -1269,6 +1377,7 @@ |`cudaLaunchAttributeLaunchCompletionEvent`|12.3| | | | | | | | | | |`cudaLaunchAttributeMemSyncDomain`|12.0| | | | | | | | | | |`cudaLaunchAttributeMemSyncDomainMap`|12.0| | | | | | | | | | +|`cudaLaunchAttributePreferredSharedMemoryCarveout`|12.5| | | | | | | | | | |`cudaLaunchAttributePriority`|11.8| | | |`hipLaunchAttributePriority`|6.2.0| | | |6.2.0| |`cudaLaunchAttributeProgrammaticEvent`|11.8| | | | | | | | | | |`cudaLaunchAttributeProgrammaticStreamSerialization`|11.8| | | | | | | | | | @@ -1486,7 +1595,7 @@ |`texture`| | | |12.0|`texture`| | | | | | |`textureReference`| | | | |`textureReference`|1.6.0| | | | | -## **34. Execution Control [REMOVED]** +## **36. Execution Control [REMOVED]** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -1494,7 +1603,7 @@ |`cudaLaunch`| | | |10.1|`hipLaunchByPtr`|1.9.0| | | | | |`cudaSetupArgument`| | | |10.1|`hipSetupArgument`|1.9.0| | | | | -## **35. Texture Reference Management [REMOVED]** +## **37. Texture Reference Management [REMOVED]** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| @@ -1506,14 +1615,14 @@ |`cudaGetTextureReference`| |11.0| |12.0|`hipGetTextureReference`|1.7.0|5.3.0| | | | |`cudaUnbindTexture`| |11.0| |12.0|`hipUnbindTexture`|1.6.0|3.8.0| | | | -## **36. Surface Reference Management [REMOVED]** +## **38. Surface Reference Management [REMOVED]** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`cudaBindSurfaceToArray`| |11.0| |12.0| | | | | | | |`cudaGetSurfaceReference`| |11.0| |12.0| | | | | | | -## **37. Profiler Control [REMOVED]** +## **39. Profiler Control [REMOVED]** |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| diff --git a/docs/tables/CUDNN_API_supported_by_HIP.md b/docs/tables/CUDNN_API_supported_by_HIP.md index 01dfbcd8..0f86fb2d 100644 --- a/docs/tables/CUDNN_API_supported_by_HIP.md +++ b/docs/tables/CUDNN_API_supported_by_HIP.md @@ -30,6 +30,8 @@ |`CUDNN_ATTR_ENGINECFG_ENGINE`|8.0.1| | | | | | | | | | |`CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO`|8.0.1| | | | | | | | | | |`CUDNN_ATTR_ENGINECFG_KNOB_CHOICES`|8.0.1| | | | | | | | | | +|`CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED`|9.2.0| | | | | | | | | | +|`CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE`|9.2.0| | | | | | | | | | |`CUDNN_ATTR_ENGINEHEUR_MODE`|8.0.1| | | | | | | | | | |`CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH`|8.0.1| | | | | | | | | | |`CUDNN_ATTR_ENGINEHEUR_RESULTS`|8.0.1| | | | | | | | | | @@ -687,6 +689,7 @@ |`CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING`|6.0.0|9.0.0| | |`HIPDNN_STATUS_RUNTIME_PREREQUISITE_MISSING`| | | | | | |`CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH`|9.0.0| | | | | | | | | | |`CUDNN_STATUS_SPECIFIC_ERROR`|9.0.0| | | | | | | | | | +|`CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED`|9.2.0| | | | | | | | | | |`CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH`|9.0.0| | | | | | | | | | |`CUDNN_STATUS_SUCCESS`|1.0.0| | | |`HIPDNN_STATUS_SUCCESS`| | | | | | |`CUDNN_STATUS_VERSION_MISMATCH`|8.0.1|9.0.0| | | | | | | | | diff --git a/docs/tables/CUDNN_API_supported_by_HIP_and_MIOPEN.md b/docs/tables/CUDNN_API_supported_by_HIP_and_MIOPEN.md index eed8bf52..a803ea4c 100644 --- a/docs/tables/CUDNN_API_supported_by_HIP_and_MIOPEN.md +++ b/docs/tables/CUDNN_API_supported_by_HIP_and_MIOPEN.md @@ -30,6 +30,8 @@ |`CUDNN_ATTR_ENGINECFG_ENGINE`|8.0.1| | | | | | | | | | | | | | | | |`CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO`|8.0.1| | | | | | | | | | | | | | | | |`CUDNN_ATTR_ENGINECFG_KNOB_CHOICES`|8.0.1| | | | | | | | | | | | | | | | +|`CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED`|9.2.0| | | | | | | | | | | | | | | | +|`CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE`|9.2.0| | | | | | | | | | | | | | | | |`CUDNN_ATTR_ENGINEHEUR_MODE`|8.0.1| | | | | | | | | | | | | | | | |`CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH`|8.0.1| | | | | | | | | | | | | | | | |`CUDNN_ATTR_ENGINEHEUR_RESULTS`|8.0.1| | | | | | | | | | | | | | | | @@ -687,6 +689,7 @@ |`CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING`|6.0.0|9.0.0| | |`HIPDNN_STATUS_RUNTIME_PREREQUISITE_MISSING`| | | | | | | | | | | | |`CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH`|9.0.0| | | | | | | | | | | | | | | | |`CUDNN_STATUS_SPECIFIC_ERROR`|9.0.0| | | | | | | | | | | | | | | | +|`CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED`|9.2.0| | | | | | | | | | | | | | | | |`CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH`|9.0.0| | | | | | | | | | | | | | | | |`CUDNN_STATUS_SUCCESS`|1.0.0| | | |`HIPDNN_STATUS_SUCCESS`| | | | | |`miopenStatusSuccess`| | | | | | |`CUDNN_STATUS_VERSION_MISMATCH`|8.0.1|9.0.0| | | | | | | | | | | | | | | diff --git a/docs/tables/CUDNN_API_supported_by_MIOPEN.md b/docs/tables/CUDNN_API_supported_by_MIOPEN.md index a90fa541..26a01db0 100644 --- a/docs/tables/CUDNN_API_supported_by_MIOPEN.md +++ b/docs/tables/CUDNN_API_supported_by_MIOPEN.md @@ -30,6 +30,8 @@ |`CUDNN_ATTR_ENGINECFG_ENGINE`|8.0.1| | | | | | | | | | |`CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO`|8.0.1| | | | | | | | | | |`CUDNN_ATTR_ENGINECFG_KNOB_CHOICES`|8.0.1| | | | | | | | | | +|`CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED`|9.2.0| | | | | | | | | | +|`CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE`|9.2.0| | | | | | | | | | |`CUDNN_ATTR_ENGINEHEUR_MODE`|8.0.1| | | | | | | | | | |`CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH`|8.0.1| | | | | | | | | | |`CUDNN_ATTR_ENGINEHEUR_RESULTS`|8.0.1| | | | | | | | | | @@ -687,6 +689,7 @@ |`CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING`|6.0.0|9.0.0| | | | | | | | | |`CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH`|9.0.0| | | | | | | | | | |`CUDNN_STATUS_SPECIFIC_ERROR`|9.0.0| | | | | | | | | | +|`CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED`|9.2.0| | | | | | | | | | |`CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH`|9.0.0| | | | | | | | | | |`CUDNN_STATUS_SUCCESS`|1.0.0| | | |`miopenStatusSuccess`| | | | | | |`CUDNN_STATUS_VERSION_MISMATCH`|8.0.1|9.0.0| | | | | | | | | diff --git a/docs/tables/CUFFT_API_supported_by_HIP.md b/docs/tables/CUFFT_API_supported_by_HIP.md index cc2ea49e..2a9167e5 100644 --- a/docs/tables/CUFFT_API_supported_by_HIP.md +++ b/docs/tables/CUFFT_API_supported_by_HIP.md @@ -58,6 +58,7 @@ |`CUFFT_Z2D`| | | | |`HIPFFT_Z2D`|1.7.0| | | | | |`CUFFT_Z2Z`| | | | |`HIPFFT_Z2Z`|1.7.0| | | | | |`MAX_CUFFT_ERROR`| | | | | | | | | | | +|`NVFFT_PLAN_PROPERTY_INT64_MAX_NUM_HOST_THREADS`|12.5| | | | | | | | | | |`NVFFT_PLAN_PROPERTY_INT64_PATIENT_JIT`|12.4| | | | | | | | | | |`cufftBox3d`|11.8| | | | | | | | | | |`cufftBox3d_t`|11.8| | | | | | | | | | diff --git a/docs/tables/CUSPARSE_API_supported_by_HIP.md b/docs/tables/CUSPARSE_API_supported_by_HIP.md index 9664cbf0..fe116ea8 100644 --- a/docs/tables/CUSPARSE_API_supported_by_HIP.md +++ b/docs/tables/CUSPARSE_API_supported_by_HIP.md @@ -78,6 +78,7 @@ |`CUSPARSE_SPMMA_PREPROCESS`|11.1| | |11.2| | | | | | | |`CUSPARSE_SPMM_ALG_DEFAULT`|11.0| | | |`HIPSPARSE_SPMM_ALG_DEFAULT`|4.2.0| | | | | |`CUSPARSE_SPMM_BLOCKED_ELL_ALG1`|11.2| | | |`HIPSPARSE_SPMM_BLOCKED_ELL_ALG1`|4.5.0| | | | | +|`CUSPARSE_SPMM_BSR_ALG1`|12.5| | | | | | | | | | |`CUSPARSE_SPMM_COO_ALG1`|11.0| | | |`HIPSPARSE_SPMM_COO_ALG1`|4.2.0| | | | | |`CUSPARSE_SPMM_COO_ALG2`|11.0| | | |`HIPSPARSE_SPMM_COO_ALG2`|4.2.0| | | | | |`CUSPARSE_SPMM_COO_ALG3`|11.0| | | |`HIPSPARSE_SPMM_COO_ALG3`|4.2.0| | | | | diff --git a/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md b/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md index 74f49cbf..74aa1585 100644 --- a/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUSPARSE_API_supported_by_HIP_and_ROC.md @@ -78,6 +78,7 @@ |`CUSPARSE_SPMMA_PREPROCESS`|11.1| | |11.2| | | | | | | | | | | | | |`CUSPARSE_SPMM_ALG_DEFAULT`|11.0| | | |`HIPSPARSE_SPMM_ALG_DEFAULT`|4.2.0| | | | |`rocsparse_spmm_alg_default`|4.2.0| | | | | |`CUSPARSE_SPMM_BLOCKED_ELL_ALG1`|11.2| | | |`HIPSPARSE_SPMM_BLOCKED_ELL_ALG1`|4.5.0| | | | |`rocsparse_spmm_alg_bell`|4.5.0| | | | | +|`CUSPARSE_SPMM_BSR_ALG1`|12.5| | | | | | | | | | | | | | | | |`CUSPARSE_SPMM_COO_ALG1`|11.0| | | |`HIPSPARSE_SPMM_COO_ALG1`|4.2.0| | | | |`rocsparse_spmm_alg_coo_segmented`|4.2.0| | | | | |`CUSPARSE_SPMM_COO_ALG2`|11.0| | | |`HIPSPARSE_SPMM_COO_ALG2`|4.2.0| | | | |`rocsparse_spmm_alg_coo_atomic`|4.2.0| | | | | |`CUSPARSE_SPMM_COO_ALG3`|11.0| | | |`HIPSPARSE_SPMM_COO_ALG3`|4.2.0| | | | |`rocsparse_spmm_alg_coo_segmented_atomic`|4.5.0| | | | | diff --git a/docs/tables/CUSPARSE_API_supported_by_ROC.md b/docs/tables/CUSPARSE_API_supported_by_ROC.md index d98d336e..cdd05d7c 100644 --- a/docs/tables/CUSPARSE_API_supported_by_ROC.md +++ b/docs/tables/CUSPARSE_API_supported_by_ROC.md @@ -78,6 +78,7 @@ |`CUSPARSE_SPMMA_PREPROCESS`|11.1| | |11.2| | | | | | | |`CUSPARSE_SPMM_ALG_DEFAULT`|11.0| | | |`rocsparse_spmm_alg_default`|4.2.0| | | | | |`CUSPARSE_SPMM_BLOCKED_ELL_ALG1`|11.2| | | |`rocsparse_spmm_alg_bell`|4.5.0| | | | | +|`CUSPARSE_SPMM_BSR_ALG1`|12.5| | | | | | | | | | |`CUSPARSE_SPMM_COO_ALG1`|11.0| | | |`rocsparse_spmm_alg_coo_segmented`|4.2.0| | | | | |`CUSPARSE_SPMM_COO_ALG2`|11.0| | | |`rocsparse_spmm_alg_coo_atomic`|4.2.0| | | | | |`CUSPARSE_SPMM_COO_ALG3`|11.0| | | |`rocsparse_spmm_alg_coo_segmented_atomic`|4.5.0| | | | | diff --git a/src/CUDA2HIP.cpp b/src/CUDA2HIP.cpp index e5e641ad..3aae6ba3 100644 --- a/src/CUDA2HIP.cpp +++ b/src/CUDA2HIP.cpp @@ -38,6 +38,7 @@ const std::map CUDA_INCLUDE_MAP { {"cuda_profiler_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, {"cooperative_groups.h", {"hip/hip_cooperative_groups.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, {"library_types.h", {"hip/library_types.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, + {"math_constants.h", {"hip/hip_math_constants.h", "", CONV_INCLUDE, API_RUNTIME, 0}}, // cuComplex includes {"cuComplex.h", {"hip/hip_complex.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_COMPLEX, 0}}, // cuBLAS includes diff --git a/src/CUDA2HIP.h b/src/CUDA2HIP.h index 835e2c0f..b262f44b 100644 --- a/src/CUDA2HIP.h +++ b/src/CUDA2HIP.h @@ -233,42 +233,44 @@ namespace driver { namespace runtime { enum CUDA_RUNTIME_API_SECTIONS { DEVICE = 1, - THREAD_DEPRECATED = 2, - ERROR = 3, - STREAM = 4, - EVENT = 5, - EXTERNAL_RES = 6, - EXECUTION = 7, - OCCUPANCY = 8, - MEMORY = 9, - MEMORY_DEPRECATED = 10, - ORDERED_MEMORY = 11, - UNIFIED = 12, - PEER = 13, - OPENGL = 14, - OPENGL_DEPRECATED = 15, - D3D9 = 16, - D3D9_DEPRECATED = 17, - D3D10 = 18, - D3D10_DEPRECATED = 19, - D3D11 = 20, - D3D11_DEPRECATED = 21, - VDPAU = 22, - EGL = 23, - GRAPHICS = 24, - TEXTURE = 25, - SURFACE = 26, - VERSION = 27, - GRAPH = 28, - DRIVER_ENTRY_POINT = 29, - CPP = 30, - DRIVER_INTERACT = 31, - PROFILER = 32, - DATA_TYPES = 33, - EXECUTION_REMOVED = 34, - TEXTURE_REMOVED = 35, - SURFACE_REMOVED = 36, - PROFILER_REMOVED = 37, + DEVICE_DEPRECATED = 2, + THREAD_DEPRECATED = 3, + ERROR = 4, + STREAM = 5, + EVENT = 6, + EXTERNAL_RES = 7, + EXECUTION = 8, + EXECUTION_DEPRECATED = 9, + OCCUPANCY = 10, + MEMORY = 11, + MEMORY_DEPRECATED = 12, + ORDERED_MEMORY = 13, + UNIFIED = 14, + PEER = 15, + OPENGL = 16, + OPENGL_DEPRECATED = 17, + D3D9 = 18, + D3D9_DEPRECATED = 19, + D3D10 = 20, + D3D10_DEPRECATED = 21, + D3D11 = 22, + D3D11_DEPRECATED = 23, + VDPAU = 24, + EGL = 25, + GRAPHICS = 26, + TEXTURE = 27, + SURFACE = 28, + VERSION = 29, + GRAPH = 30, + DRIVER_ENTRY_POINT = 31, + CPP = 32, + DRIVER_INTERACT = 33, + PROFILER = 34, + DATA_TYPES = 35, + EXECUTION_REMOVED = 36, + TEXTURE_REMOVED = 37, + SURFACE_REMOVED = 38, + PROFILER_REMOVED = 39, }; } diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 806f0065..91efd755 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -98,7 +98,7 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasDznrm2", {"hipblasDznrm2_v2", "rocblas_dznrm2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1, HIP_SUPPORTED_V2_ONLY}}, {"cublasDznrm2_64", {"hipblasDznrm2_v2_64", "rocblas_dznrm2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, {"cublasNrm2Ex", {"hipblasNrm2Ex_v2", "rocblas_nrm2_ex", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, - {"cublasNrm2Ex_64", {"hipblasNrm2Ex_64", "rocblas_nrm2_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1, HIP_UNSUPPORTED}}, + {"cublasNrm2Ex_64", {"hipblasNrm2Ex_v2_64", "rocblas_nrm2_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1, HIP_EXPERIMENTAL}}, // DOT // DOT functions' signatures differ from _v2 ones, hipblas and rocblas DOT functions have mapping to DOT_v2 functions only @@ -222,179 +222,179 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // GEMV {"cublasSgemv", {"hipblasSgemv", "rocblas_sgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSgemv_64", {"hipblasSgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSgemv_64", {"hipblasSgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDgemv", {"hipblasDgemv", "rocblas_dgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDgemv_64", {"hipblasDgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDgemv_64", {"hipblasDgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCgemv", {"hipblasCgemv_v2", "rocblas_cgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCgemv_64", {"hipblasCgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCgemv_64", {"hipblasCgemv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZgemv", {"hipblasZgemv_v2", "rocblas_zgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZgemv_64", {"hipblasZgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZgemv_64", {"hipblasZgemv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // GBMV {"cublasSgbmv", {"hipblasSgbmv", "rocblas_sgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSgbmv_64", {"hipblasSgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasSgbmv_64", {"hipblasSgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDgbmv", {"hipblasDgbmv", "rocblas_dgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDgbmv_64", {"hipblasDgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasDgbmv_64", {"hipblasDgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCgbmv", {"hipblasCgbmv_v2", "rocblas_cgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCgbmv_64", {"hipblasCgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasCgbmv_64", {"hipblasCgbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZgbmv", {"hipblasZgbmv_v2", "rocblas_zgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZgbmv_64", {"hipblasZgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasZgbmv_64", {"hipblasZgbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // TRMV {"cublasStrmv", {"hipblasStrmv", "rocblas_strmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStrmv_64", {"hipblasStrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasStrmv_64", {"hipblasStrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDtrmv", {"hipblasDtrmv", "rocblas_dtrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtrmv_64", {"hipblasDtrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDtrmv_64", {"hipblasDtrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCtrmv", {"hipblasCtrmv_v2", "rocblas_ctrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtrmv_64", {"hipblasCtrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCtrmv_64", {"hipblasCtrmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZtrmv", {"hipblasZtrmv_v2", "rocblas_ztrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtrmv_64", {"hipblasZtrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZtrmv_64", {"hipblasZtrmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // TBMV {"cublasStbmv", {"hipblasStbmv", "rocblas_stbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStbmv_64", {"hipblasStbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasStbmv_64", {"hipblasStbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDtbmv", {"hipblasDtbmv", "rocblas_dtbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtbmv_64", {"hipblasDtbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDtbmv_64", {"hipblasDtbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCtbmv", {"hipblasCtbmv_v2", "rocblas_ctbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtbmv_64", {"hipblasCtbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCtbmv_64", {"hipblasCtbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZtbmv", {"hipblasZtbmv_v2", "rocblas_ztbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtbmv_64", {"hipblasZtbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZtbmv_64", {"hipblasZtbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // TPMV {"cublasStpmv", {"hipblasStpmv", "rocblas_stpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStpmv_64", {"hipblasStpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasStpmv_64", {"hipblasStpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDtpmv", {"hipblasDtpmv", "rocblas_dtpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtpmv_64", {"hipblasDtpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDtpmv_64", {"hipblasDtpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCtpmv", {"hipblasCtpmv_v2", "rocblas_ctpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtpmv_64", {"hipblasCtpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCtpmv_64", {"hipblasCtpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZtpmv", {"hipblasZtpmv_v2", "rocblas_ztpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtpmv_64", {"hipblasZtpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZtpmv_64", {"hipblasZtpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // TRSV {"cublasStrsv", {"hipblasStrsv", "rocblas_strsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStrsv_64", {"hipblasStrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasStrsv_64", {"hipblasStrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDtrsv", {"hipblasDtrsv", "rocblas_dtrsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtrsv_64", {"hipblasDtrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDtrsv_64", {"hipblasDtrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCtrsv", {"hipblasCtrsv_v2", "rocblas_ctrsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtrsv_64", {"hipblasCtrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCtrsv_64", {"hipblasCtrsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZtrsv", {"hipblasZtrsv_v2", "rocblas_ztrsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtrsv_64", {"hipblasZtrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZtrsv_64", {"hipblasZtrsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // TPSV {"cublasStpsv", {"hipblasStpsv", "rocblas_stpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStpsv_64", {"hipblasStpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasStpsv_64", {"hipblasStpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDtpsv", {"hipblasDtpsv", "rocblas_dtpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtpsv_64", {"hipblasDtpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDtpsv_64", {"hipblasDtpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCtpsv", {"hipblasCtpsv_v2", "rocblas_ctpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtpsv_64", {"hipblasCtpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCtpsv_64", {"hipblasCtpsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZtpsv", {"hipblasZtpsv_v2", "rocblas_ztpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtpsv_64", {"hipblasZtpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZtpsv_64", {"hipblasZtpsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // TBSV {"cublasStbsv", {"hipblasStbsv", "rocblas_stbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStbsv_64", {"hipblasStbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasStbsv_64", {"hipblasStbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDtbsv", {"hipblasDtbsv", "rocblas_dtbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtbsv_64", {"hipblasDtbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDtbsv_64", {"hipblasDtbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCtbsv", {"hipblasCtbsv_v2", "rocblas_ctbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtbsv_64", {"hipblasCtbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCtbsv_64", {"hipblasCtbsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZtbsv", {"hipblasZtbsv_v2", "rocblas_ztbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtbsv_64", {"hipblasZtbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZtbsv_64", {"hipblasZtbsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // SYMV/HEMV {"cublasSsymv", {"hipblasSsymv", "rocblas_ssymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSsymv_64", {"hipblasSsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSsymv_64", {"hipblasSsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDsymv", {"hipblasDsymv", "rocblas_dsymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDsymv_64", {"hipblasDsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDsymv_64", {"hipblasDsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCsymv", {"hipblasCsymv_v2", "rocblas_csymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCsymv_64", {"hipblasCsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCsymv_64", {"hipblasCsymv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZsymv", {"hipblasZsymv_v2", "rocblas_zsymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZsymv_64", {"hipblasZsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZsymv_64", {"hipblasZsymv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasChemv", {"hipblasChemv_v2", "rocblas_chemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasChemv_64", {"hipblasChemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasChemv_64", {"hipblasChemv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZhemv", {"hipblasZhemv_v2", "rocblas_zhemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZhemv_64", {"hipblasZhemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZhemv_64", {"hipblasZhemv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // SBMV/HBMV {"cublasSsbmv", {"hipblasSsbmv", "rocblas_ssbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSsbmv_64", {"hipblasSsbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSsbmv_64", {"hipblasSsbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDsbmv", {"hipblasDsbmv", "rocblas_dsbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDsbmv_64", {"hipblasDsbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDsbmv_64", {"hipblasDsbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasChbmv", {"hipblasChbmv_v2", "rocblas_chbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasChbmv_64", {"hipblasChbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasChbmv_64", {"hipblasChbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZhbmv", {"hipblasZhbmv_v2", "rocblas_zhbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZhbmv_64", {"hipblasZhbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZhbmv_64", {"hipblasZhbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // SPMV/HPMV {"cublasSspmv", {"hipblasSspmv", "rocblas_sspmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSspmv_64", {"hipblasSspmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSspmv_64", {"hipblasSspmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDspmv", {"hipblasDspmv", "rocblas_dspmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDspmv_64", {"hipblasDspmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDspmv_64", {"hipblasDspmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasChpmv", {"hipblasChpmv_v2", "rocblas_chpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasChpmv_64", {"hipblasChpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasChpmv_64", {"hipblasChpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZhpmv", {"hipblasZhpmv_v2", "rocblas_zhpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZhpmv_64", {"hipblasZhpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZhpmv_64", {"hipblasZhpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // GER {"cublasSger", {"hipblasSger", "rocblas_sger", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSger_64", {"hipblasSger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSger_64", {"hipblasSger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDger", {"hipblasDger", "rocblas_dger", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDger_64", {"hipblasDger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDger_64", {"hipblasDger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCgeru", {"hipblasCgeru_v2", "rocblas_cgeru", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCgeru_64", {"hipblasCgeru_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCgeru_64", {"hipblasCgeru_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCgerc", {"hipblasCgerc_v2", "rocblas_cgerc", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCgerc_64", {"hipblasCgerc_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCgerc_64", {"hipblasCgerc_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZgeru", {"hipblasZgeru_v2", "rocblas_zgeru", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZgeru_64", {"hipblasZgeru_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZgeru_64", {"hipblasZgeru_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZgerc", {"hipblasZgerc_v2", "rocblas_zgerc", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZgerc_64", {"hipblasZgerc_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZgerc_64", {"hipblasZgerc_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // SYR/HER {"cublasSsyr", {"hipblasSsyr", "rocblas_ssyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSsyr_64", {"hipblasSsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSsyr_64", {"hipblasSsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDsyr", {"hipblasDsyr", "rocblas_dsyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDsyr_64", {"hipblasDsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDsyr_64", {"hipblasDsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCsyr", {"hipblasCsyr_v2", "rocblas_csyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCsyr_64", {"hipblasCsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCsyr_64", {"hipblasCsyr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZsyr", {"hipblasZsyr_v2", "rocblas_zsyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZsyr_64", {"hipblasZsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZsyr_64", {"hipblasZsyr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCher", {"hipblasCher_v2", "rocblas_cher", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCher_64", {"hipblasCher_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCher_64", {"hipblasCher_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZher", {"hipblasZher_v2", "rocblas_zher", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZher_64", {"hipblasZher_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZher_64", {"hipblasZher_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // SPR/HPR {"cublasSspr", {"hipblasSspr", "rocblas_sspr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSspr_64", {"hipblasSspr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSspr_64", {"hipblasSspr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDspr", {"hipblasDspr", "rocblas_dspr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDspr_64", {"hipblasDspr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDspr_64", {"hipblasDspr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasChpr", {"hipblasChpr_v2", "rocblas_chpr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasChpr_64", {"hipblasChpr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasChpr_64", {"hipblasChpr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZhpr", {"hipblasZhpr_v2", "rocblas_zhpr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZhpr_64", {"hipblasZhpr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZhpr_64", {"hipblasZhpr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // SYR2/HER2 {"cublasSsyr2", {"hipblasSsyr2", "rocblas_ssyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSsyr2_64", {"hipblasSsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSsyr2_64", {"hipblasSsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDsyr2", {"hipblasDsyr2", "rocblas_dsyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDsyr2_64", {"hipblasDsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDsyr2_64", {"hipblasDsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCsyr2", {"hipblasCsyr2_v2", "rocblas_csyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCsyr2_64", {"hipblasCsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCsyr2_64", {"hipblasCsyr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZsyr2", {"hipblasZsyr2_v2", "rocblas_zsyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZsyr2_64", {"hipblasZsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZsyr2_64", {"hipblasZsyr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCher2", {"hipblasCher2_v2", "rocblas_cher2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCher2_64", {"hipblasCher2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCher2_64", {"hipblasCher2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZher2", {"hipblasZher2_v2", "rocblas_zher2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZher2_64", {"hipblasZher2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZher2_64", {"hipblasZher2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // SPR2/HPR2 {"cublasSspr2", {"hipblasSspr2", "rocblas_sspr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSspr2_64", {"hipblasSspr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSspr2_64", {"hipblasSspr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDspr2", {"hipblasDspr2", "rocblas_dspr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDspr2_64", {"hipblasDspr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDspr2_64", {"hipblasDspr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasChpr2", {"hipblasChpr2_v2", "rocblas_chpr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasChpr2_64", {"hipblasChpr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasChpr2_64", {"hipblasChpr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZhpr2", {"hipblasZhpr2_v2", "rocblas_zhpr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZhpr2_64", {"hipblasZhpr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZhpr2_64", {"hipblasZhpr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // Blas3 (v1) Routines // GEMM @@ -438,16 +438,18 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasZgemmStridedBatched_64", {"hipblasZgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasHgemmStridedBatched", {"hipblasHgemmStridedBatched", "rocblas_hgemm_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasHgemmStridedBatched_64", {"hipblasHgemmStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasGemmGroupedBatchedEx", {"hipblasGemmGroupedBatchedEx", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasGemmGroupedBatchedEx_64", {"hipblasGemmGroupedBatchedEx_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, // BATCH GEMV - {"cublasSgemvBatched", {"hipblasSgemvBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, - {"cublasSgemvBatched_64", {"hipblasSgemvBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, - {"cublasDgemvBatched", {"hipblasDgemvBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, - {"cublasDgemvBatched_64", {"hipblasDgemvBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSgemvBatched", {"hipblasSgemvBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED}}, + {"cublasSgemvBatched_64", {"hipblasSgemvBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cublasDgemvBatched", {"hipblasDgemvBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED}}, + {"cublasDgemvBatched_64", {"hipblasDgemvBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCgemvBatched", {"hipblasCgemvBatched_v2", "rocblas_cgemv_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCgemvBatched_64", {"hipblasCgemvBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCgemvBatched_64", {"hipblasCgemvBatched_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZgemvBatched", {"hipblasZgemvBatched_v2", "rocblas_zgemv_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZgemvBatched_64", {"hipblasZgemvBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZgemvBatched_64", {"hipblasZgemvBatched_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasHSHgemvBatched", {"hipblasHSHgemvBatched", "rocblas_hshgemv_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasHSHgemvBatched_64", {"hipblasHSHgemvBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasHSSgemvBatched", {"hipblasHSSgemvBatched", "rocblas_hssgemv_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, @@ -456,14 +458,14 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasTSTgemvBatched_64", {"hipblasTSTgemvBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasTSSgemvBatched", {"hipblasTSSgemvBatched", "rocblas_tssgemv_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasTSSgemvBatched_64", {"hipblasTSSgemvBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, - {"cublasSgemvStridedBatched", {"hipblasSgemvStridedBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, - {"cublasSgemvStridedBatched_64", {"hipblasSgemvStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, - {"cublasDgemvStridedBatched", {"hipblasDgemvStridedBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, - {"cublasDgemvStridedBatched_64", {"hipblasDgemvStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasSgemvStridedBatched", {"hipblasSgemvStridedBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED}}, + {"cublasSgemvStridedBatched_64", {"hipblasSgemvStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cublasDgemvStridedBatched", {"hipblasDgemvStridedBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED}}, + {"cublasDgemvStridedBatched_64", {"hipblasDgemvStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCgemvStridedBatched", {"hipblasCgemvStridedBatched_v2", "rocblas_cgemv_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCgemvStridedBatched_64", {"hipblasCgemvStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasCgemvStridedBatched_64", {"hipblasCgemvStridedBatched_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZgemvStridedBatched", {"hipblasZgemvStridedBatched_v2", "rocblas_zgemv_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZgemvStridedBatched_64", {"hipblasZgemvStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasZgemvStridedBatched_64", {"hipblasZgemvStridedBatched_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasHSHgemvStridedBatched", {"hipblasHSHgemvStridedBatched", "rocblas_hshgemv_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasHSHgemvStridedBatched_64", {"hipblasHSHgemvStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, {"cublasHSSgemvStridedBatched", {"hipblasHSSgemvStridedBatched", "rocblas_hssgemv_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, @@ -638,179 +640,179 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // GEMV {"cublasSgemv_v2", {"hipblasSgemv", "rocblas_sgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSgemv_v2_64", {"hipblasSgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSgemv_v2_64", {"hipblasSgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDgemv_v2", {"hipblasDgemv", "rocblas_dgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDgemv_v2_64", {"hipblasDgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDgemv_v2_64", {"hipblasDgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCgemv_v2", {"hipblasCgemv_v2", "rocblas_cgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCgemv_v2_64", {"hipblasCgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCgemv_v2_64", {"hipblasCgemv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZgemv_v2", {"hipblasZgemv_v2", "rocblas_zgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZgemv_v2_64", {"hipblasZgemv_64", "rocblas_zgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZgemv_v2_64", {"hipblasZgemv_v2_64", "rocblas_zgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // GBMV {"cublasSgbmv_v2", {"hipblasSgbmv", "rocblas_sgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSgbmv_v2_64", {"hipblasSgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasSgbmv_v2_64", {"hipblasSgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDgbmv_v2", {"hipblasDgbmv", "rocblas_dgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDgbmv_v2_64", {"hipblasDgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasDgbmv_v2_64", {"hipblasDgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCgbmv_v2", {"hipblasCgbmv_v2", "rocblas_cgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCgbmv_v2_64", {"hipblasCgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasCgbmv_v2_64", {"hipblasCgbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZgbmv_v2", {"hipblasZgbmv_v2", "rocblas_zgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZgbmv_v2_64", {"hipblasZgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasZgbmv_v2_64", {"hipblasZgbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // TRMV {"cublasStrmv_v2", {"hipblasStrmv", "rocblas_strmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasStrmv_v2_64", {"hipblasStrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasStrmv_v2_64", {"hipblasStrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDtrmv_v2", {"hipblasDtrmv", "rocblas_dtrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDtrmv_v2_64", {"hipblasDtrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDtrmv_v2_64", {"hipblasDtrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCtrmv_v2", {"hipblasCtrmv_v2", "rocblas_ctrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCtrmv_v2_64", {"hipblasCtrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCtrmv_v2_64", {"hipblasCtrmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZtrmv_v2", {"hipblasZtrmv_v2", "rocblas_ztrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZtrmv_v2_64", {"hipblasZtrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZtrmv_v2_64", {"hipblasZtrmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // TBMV {"cublasStbmv_v2", {"hipblasStbmv", "rocblas_stbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasStbmv_v2_64", {"hipblasStbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasStbmv_v2_64", {"hipblasStbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDtbmv_v2", {"hipblasDtbmv", "rocblas_dtbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDtbmv_v2_64", {"hipblasDtbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDtbmv_v2_64", {"hipblasDtbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCtbmv_v2", {"hipblasCtbmv_v2", "rocblas_ctbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCtbmv_v2_64", {"hipblasCtbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCtbmv_v2_64", {"hipblasCtbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZtbmv_v2", {"hipblasZtbmv_v2", "rocblas_ztbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZtbmv_v2_64", {"hipblasZtbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZtbmv_v2_64", {"hipblasZtbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // TPMV {"cublasStpmv_v2", {"hipblasStpmv", "rocblas_stpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasStpmv_v2_64", {"hipblasStpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasStpmv_v2_64", {"hipblasStpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDtpmv_v2", {"hipblasDtpmv", "rocblas_dtpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDtpmv_v2_64", {"hipblasDtpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDtpmv_v2_64", {"hipblasDtpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCtpmv_v2", {"hipblasCtpmv_v2", "rocblas_ctpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCtpmv_v2_64", {"hipblasCtpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCtpmv_v2_64", {"hipblasCtpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZtpmv_v2", {"hipblasZtpmv_v2", "rocblas_ztpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZtpmv_v2_64", {"hipblasZtpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZtpmv_v2_64", {"hipblasZtpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // TRSV {"cublasStrsv_v2", {"hipblasStrsv", "rocblas_strsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasStrsv_v2_64", {"hipblasStrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasStrsv_v2_64", {"hipblasStrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDtrsv_v2", {"hipblasDtrsv", "rocblas_dtrsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDtrsv_v2_64", {"hipblasDtrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDtrsv_v2_64", {"hipblasDtrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCtrsv_v2", {"hipblasCtrsv_v2", "rocblas_ctrsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCtrsv_v2_64", {"hipblasCtrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCtrsv_v2_64", {"hipblasCtrsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZtrsv_v2", {"hipblasZtrsv_v2", "rocblas_ztrsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZtrsv_v2_64", {"hipblasZtrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZtrsv_v2_64", {"hipblasZtrsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // TPSV {"cublasStpsv_v2", {"hipblasStpsv", "rocblas_stpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasStpsv_v2_64", {"hipblasStpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasStpsv_v2_64", {"hipblasStpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDtpsv_v2", {"hipblasDtpsv", "rocblas_dtpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDtpsv_v2_64", {"hipblasDtpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDtpsv_v2_64", {"hipblasDtpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCtpsv_v2", {"hipblasCtpsv_v2", "rocblas_ctpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCtpsv_v2_64", {"hipblasCtpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCtpsv_v2_64", {"hipblasCtpsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZtpsv_v2", {"hipblasZtpsv_v2", "rocblas_ztpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZtpsv_v2_64", {"hipblasZtpsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZtpsv_v2_64", {"hipblasZtpsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // TBSV {"cublasStbsv_v2", {"hipblasStbsv", "rocblas_stbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasStbsv_v2_64", {"hipblasStbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasStbsv_v2_64", {"hipblasStbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDtbsv_v2", {"hipblasDtbsv", "rocblas_dtbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDtbsv_v2_64", {"hipblasDtbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDtbsv_v2_64", {"hipblasDtbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCtbsv_v2", {"hipblasCtbsv_v2", "rocblas_ctbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCtbsv_v2_64", {"hipblasCtbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCtbsv_v2_64", {"hipblasCtbsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZtbsv_v2", {"hipblasZtbsv_v2", "rocblas_ztbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZtbsv_v2_64", {"hipblasZtbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZtbsv_v2_64", {"hipblasZtbsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // SYMV/HEMV {"cublasSsymv_v2", {"hipblasSsymv", "rocblas_ssymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSsymv_v2_64", {"hipblasSsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSsymv_v2_64", {"hipblasSsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDsymv_v2", {"hipblasDsymv", "rocblas_dsymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDsymv_v2_64", {"hipblasDsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDsymv_v2_64", {"hipblasDsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCsymv_v2", {"hipblasCsymv_v2", "rocblas_csymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCsymv_v2_64", {"hipblasCsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCsymv_v2_64", {"hipblasCsymv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZsymv_v2", {"hipblasZsymv_v2", "rocblas_zsymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZsymv_v2_64", {"hipblasZsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZsymv_v2_64", {"hipblasZsymv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasChemv_v2", {"hipblasChemv_v2", "rocblas_chemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasChemv_v2_64", {"hipblasChemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasChemv_v2_64", {"hipblasChemv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZhemv_v2", {"hipblasZhemv_v2", "rocblas_zhemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZhemv_v2_64", {"hipblasZhemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZhemv_v2_64", {"hipblasZhemv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // SBMV/HBMV {"cublasSsbmv_v2", {"hipblasSsbmv", "rocblas_ssbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSsbmv_v2_64", {"hipblasSsbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSsbmv_v2_64", {"hipblasSsbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDsbmv_v2", {"hipblasDsbmv", "rocblas_dsbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDsbmv_v2_64", {"hipblasDsbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDsbmv_v2_64", {"hipblasDsbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasChbmv_v2", {"hipblasChbmv_v2", "rocblas_chbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasChbmv_v2_64", {"hipblasChbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasChbmv_v2_64", {"hipblasChbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZhbmv_v2", {"hipblasZhbmv_v2", "rocblas_zhbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZhbmv_v2_64", {"hipblasZhbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZhbmv_v2_64", {"hipblasZhbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // SPMV/HPMV {"cublasSspmv_v2", {"hipblasSspmv", "rocblas_sspmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSspmv_v2_64", {"hipblasSspmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSspmv_v2_64", {"hipblasSspmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDspmv_v2", {"hipblasDspmv", "rocblas_dspmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDspmv_v2_64", {"hipblasDspmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDspmv_v2_64", {"hipblasDspmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasChpmv_v2", {"hipblasChpmv_v2", "rocblas_chpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasChpmv_v2_64", {"hipblasChpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasChpmv_v2_64", {"hipblasChpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZhpmv_v2", {"hipblasZhpmv_v2", "rocblas_zhpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZhpmv_v2_64", {"hipblasZhpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZhpmv_v2_64", {"hipblasZhpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // GER {"cublasSger_v2", {"hipblasSger", "rocblas_sger", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSger_v2_64", {"hipblasSger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSger_v2_64", {"hipblasSger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDger_v2", {"hipblasDger", "rocblas_dger", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDger_v2_64", {"hipblasDger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDger_v2_64", {"hipblasDger_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCgeru_v2", {"hipblasCgeru_v2", "rocblas_cgeru", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCgeru_v2_64", {"hipblasCgeru_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCgeru_v2_64", {"hipblasCgeru_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCgerc_v2", {"hipblasCgerc_v2", "rocblas_cgerc", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCgerc_v2_64", {"hipblasCgerc_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCgerc_v2_64", {"hipblasCgerc_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZgeru_v2", {"hipblasZgeru_v2", "rocblas_zgeru", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZgeru_v2_64", {"hipblasZgeru_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZgeru_v2_64", {"hipblasZgeru_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZgerc_v2", {"hipblasZgerc_v2", "rocblas_zgerc", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZgerc_v2_64", {"hipblasZgerc_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZgerc_v2_64", {"hipblasZgerc_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // SYR/HER {"cublasSsyr_v2", {"hipblasSsyr", "rocblas_ssyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSsyr_v2_64", {"hipblasSsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSsyr_v2_64", {"hipblasSsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDsyr_v2", {"hipblasDsyr", "rocblas_dsyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDsyr_v2_64", {"hipblasDsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDsyr_v2_64", {"hipblasDsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCsyr_v2", {"hipblasCsyr_v2", "rocblas_csyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCsyr_v2_64", {"hipblasCsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCsyr_v2_64", {"hipblasCsyr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZsyr_v2", {"hipblasZsyr_v2", "rocblas_zsyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZsyr_v2_64", {"hipblasZsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZsyr_v2_64", {"hipblasZsyr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCher_v2", {"hipblasCher_v2", "rocblas_cher", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCher_v2_64", {"hipblasCher_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCher_v2_64", {"hipblasCher_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZher_v2", {"hipblasZher_v2", "rocblas_zher", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZher_v2_64", {"hipblasZher_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZher_v2_64", {"hipblasZher_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // SPR/HPR {"cublasSspr_v2", {"hipblasSspr", "rocblas_sspr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSspr_v2_64", {"hipblasSspr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSspr_v2_64", {"hipblasSspr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDspr_v2", {"hipblasDspr", "rocblas_dspr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDspr_v2_64", {"hipblasDspr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDspr_v2_64", {"hipblasDspr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasChpr_v2", {"hipblasChpr_v2", "rocblas_chpr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasChpr_v2_64", {"hipblasChpr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasChpr_v2_64", {"hipblasChpr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZhpr_v2", {"hipblasZhpr_v2", "rocblas_zhpr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZhpr_v2_64", {"hipblasZhpr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZhpr_v2_64", {"hipblasZhpr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // SYR2/HER2 {"cublasSsyr2_v2", {"hipblasSsyr2", "rocblas_ssyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSsyr2_v2_64", {"hipblasSsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSsyr2_v2_64", {"hipblasSsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDsyr2_v2", {"hipblasDsyr2", "rocblas_dsyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDsyr2_v2_64", {"hipblasDsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDsyr2_v2_64", {"hipblasDsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCsyr2_v2", {"hipblasCsyr2_v2", "rocblas_csyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCsyr2_v2_64", {"hipblasCsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCsyr2_v2_64", {"hipblasCsyr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZsyr2_v2", {"hipblasZsyr2_v2", "rocblas_zsyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZsyr2_v2_64", {"hipblasZsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZsyr2_v2_64", {"hipblasZsyr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCher2_v2", {"hipblasCher2_v2", "rocblas_cher2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCher2_v2_64", {"hipblasCher2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCher2_v2_64", {"hipblasCher2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZher2_v2", {"hipblasZher2_v2", "rocblas_zher2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZher2_v2_64", {"hipblasZher2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZher2_v2_64", {"hipblasZher2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // SPR2/HPR2 {"cublasSspr2_v2", {"hipblasSspr2", "rocblas_sspr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSspr2_v2_64", {"hipblasSspr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSspr2_v2_64", {"hipblasSspr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDspr2_v2", {"hipblasDspr2", "rocblas_dspr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDspr2_v2_64", {"hipblasDspr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDspr2_v2_64", {"hipblasDspr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasChpr2_v2", {"hipblasChpr2_v2", "rocblas_chpr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasChpr2_v2_64", {"hipblasChpr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasChpr2_v2_64", {"hipblasChpr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZhpr2_v2", {"hipblasZhpr2_v2", "rocblas_zhpr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZhpr2_v2_64", {"hipblasZhpr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZhpr2_v2_64", {"hipblasZhpr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // Blas3 (v2) Routines // GEMM @@ -936,9 +938,9 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // DOT {"cublasDotEx", {"hipblasDotEx_v2", "rocblas_dot_ex", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasDotEx_64", {"hipblasDotEx_64", "rocblas_dot_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_UNSUPPORTED}}, + {"cublasDotEx_64", {"hipblasDotEx_v2_64", "rocblas_dot_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_EXPERIMENTAL}}, {"cublasDotcEx", {"hipblasDotcEx_v2", "rocblas_dotc_ex", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasDotcEx_64", {"hipblasDotcEx_64", "rocblas_dotc_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_UNSUPPORTED}}, + {"cublasDotcEx_64", {"hipblasDotcEx_v2_64", "rocblas_dotc_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_EXPERIMENTAL}}, {"cublasSdot_v2", {"hipblasSdot", "rocblas_sdot", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, {"cublasSdot_v2_64", {"hipblasSdot_64", "rocblas_sdot_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, @@ -956,7 +958,7 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // SCAL {"cublasScalEx", {"hipblasScalEx_v2", "rocblas_scal_ex", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasScalEx_64", {"hipblasScalEx_64", "rocblas_scal_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_UNSUPPORTED}}, + {"cublasScalEx_64", {"hipblasScalEx_v2_64", "rocblas_scal_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_EXPERIMENTAL}}, {"cublasSscal_v2", {"hipblasSscal", "rocblas_sscal", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, {"cublasSscal_v2_64", {"hipblasSscal_64", "rocblas_sscal_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, {"cublasDscal_v2", {"hipblasDscal", "rocblas_dscal", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, @@ -972,7 +974,7 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // AXPY {"cublasAxpyEx", {"hipblasAxpyEx_v2", "rocblas_axpy_ex", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasAxpyEx_64", {"hipblasAxpyEx_64", "rocblas_axpy_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_UNSUPPORTED}}, + {"cublasAxpyEx_64", {"hipblasAxpyEx_v2_64", "rocblas_axpy_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_EXPERIMENTAL}}, {"cublasSaxpy_v2", {"hipblasSaxpy", "rocblas_saxpy", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, {"cublasSaxpy_v2_64", {"hipblasSaxpy_64", "rocblas_saxpy_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, {"cublasDaxpy_v2", {"hipblasDaxpy", "rocblas_daxpy", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, @@ -1044,7 +1046,7 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // ROT {"cublasRotEx", {"hipblasRotEx_v2", "rocblas_rot_ex", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT}}, - {"cublasRotEx_64", {"hipblasRotEx_64", "rocblas_rot_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_UNSUPPORTED}}, + {"cublasRotEx_64", {"hipblasRotEx_v2_64", "rocblas_rot_ex_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_EXT, HIP_EXPERIMENTAL}}, {"cublasSrot_v2", {"hipblasSrot", "rocblas_srot", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, {"cublasSrot_v2_64", {"hipblasSrot_64", "rocblas_srot_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, {"cublasDrot_v2", {"hipblasDrot", "rocblas_drot", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_1}}, @@ -1455,7 +1457,7 @@ const std::map CUDA_BLAS_FUNCTION_VER_MAP { {"cublasTSTgemvBatched_64", {CUDA_120, CUDA_0, CUDA_0 }}, {"cublasTSSgemvBatched", {CUDA_116, CUDA_0, CUDA_0 }}, {"cublasTSSgemvBatched_64", {CUDA_120, CUDA_0, CUDA_0 }}, - {"cublasSgemvStridedBatched", {CUDA_116, CUDA_0, CUDA_0 }}, + {"cublasSgemvStridedBatched", {CUDA_116, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 11062, CUBLAS_VERSION 110902, CUBLAS_VER_MAJOR 11 CUBLAS_VER_MINOR 9 CUBLAS_VER_PATCH 2 {"cublasSgemvStridedBatched_64", {CUDA_120, CUDA_0, CUDA_0 }}, {"cublasDgemvStridedBatched", {CUDA_116, CUDA_0, CUDA_0 }}, {"cublasDgemvStridedBatched_64", {CUDA_120, CUDA_0, CUDA_0 }}, @@ -1623,6 +1625,8 @@ const std::map CUDA_BLAS_FUNCTION_VER_MAP { {"cublasLtLoggerSetLevel", {CUDA_110, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 11003, CUBLAS_VERSION 11200, CUBLAS_VER_MAJOR 11 CUBLAS_VER_MINOR 2 {"cublasLtLoggerSetMask", {CUDA_110, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 11003, CUBLAS_VERSION 11200, CUBLAS_VER_MAJOR 11 CUBLAS_VER_MINOR 2 {"cublasLtLoggerForceDisable", {CUDA_110, CUDA_0, CUDA_0 }}, // A: CUDA_VERSION 11003, CUBLAS_VERSION 11200, CUBLAS_VER_MAJOR 11 CUBLAS_VER_MINOR 2 + {"cublasGemmGroupedBatchedEx", {CUDA_125, CUDA_0, CUDA_0 }}, + {"cublasGemmGroupedBatchedEx_64", {CUDA_125, CUDA_0, CUDA_0 }}, }; const std::map HIP_BLAS_FUNCTION_VER_MAP { @@ -1644,214 +1648,97 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasGetMatrixAsync", {HIP_3070, HIP_0, HIP_0 }}, {"hipblasSnrm2", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasDnrm2", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasScnrm2", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasDznrm2", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasSdot", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasDdot", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasCdotu", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasCdotc", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasZdotu", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasZdotc", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasSscal", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasDscal", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasCscal", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasCsscal", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasZscal", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasZdscal", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasSaxpy", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasDaxpy", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasCaxpy", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasZaxpy", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasScopy", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasDcopy", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasCcopy", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasZcopy", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasSswap", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasDswap", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasCswap", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasZswap", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasIsamax", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasIdamax", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasIcamax", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasIzamax", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasIsamin", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasIdamin", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasIcamin", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasIzamin", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasSasum", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasDasum", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasScasum", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasDzasum", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasSrot", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasDrot", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasCrot", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasCsrot", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasZrot", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasZdrot", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasSrotg", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasDrotg", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasCrotg", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasZrotg", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasSrotm", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasDrotm", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasSrotmg", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasDrotmg", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasSgemv", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasDgemv", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasCgemv", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasZgemv", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasSgbmv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDgbmv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCgbmv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZgbmv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasStrmv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDtrmv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCtrmv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZtrmv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasStbmv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDtbmv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCtbmv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZtbmv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasStpmv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDtpmv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCtpmv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZtpmv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasStrsv", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasDtrsv", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasCtrsv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZtrsv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasStpsv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDtpsv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCtpsv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZtpsv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasStbsv", {HIP_3060, HIP_0, HIP_0 }}, {"hipblasDtbsv", {HIP_3060, HIP_0, HIP_0 }}, - {"hipblasCtbsv", {HIP_3060, HIP_0, HIP_0 }}, - {"hipblasZtbsv", {HIP_3060, HIP_0, HIP_0 }}, {"hipblasSsymv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDsymv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCsymv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZsymv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasChemv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZhemv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSsbmv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDsbmv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasChbmv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZhbmv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSspmv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDspmv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasChpmv", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZhpmv", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSger", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasDger", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasCgeru", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCgerc", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZgeru", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZgerc", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSsyr", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasDsyr", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasCsyr", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZsyr", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCher", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZher", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSspr", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDspr", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasChpr", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZhpr", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSsyr2", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDsyr2", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCsyr2", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZsyr2", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCher2", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZher2", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSspr2", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDspr2", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasChpr2", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZhpr2", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSgemm", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasDgemm", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasCgemm", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasZgemm", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasHgemm", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasSgemmBatched", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasDgemmBatched", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasHgemmBatched", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasSgemmStridedBatched", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasDgemmStridedBatched", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasCgemmBatched", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasZgemmBatched", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasCgemmStridedBatched", {HIP_3000, HIP_0, HIP_0 }}, - {"hipblasZgemmStridedBatched", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasHgemmStridedBatched", {HIP_3000, HIP_0, HIP_0 }}, {"hipblasSsyrk", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDsyrk", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCsyrk", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZsyrk", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCherk", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZherk", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSsyr2k", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDsyr2k", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCsyr2k", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZsyr2k", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSsyrkx", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDsyrkx", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCsyrkx", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZsyrkx", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCher2k", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZher2k", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCherkx", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZherkx", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSsymm", {HIP_3060, HIP_0, HIP_0 }}, {"hipblasDsymm", {HIP_3060, HIP_0, HIP_0 }}, - {"hipblasCsymm", {HIP_3060, HIP_0, HIP_0 }}, - {"hipblasZsymm", {HIP_3060, HIP_0, HIP_0 }}, - {"hipblasChemm", {HIP_3060, HIP_0, HIP_0 }}, - {"hipblasZhemm", {HIP_3060, HIP_0, HIP_0 }}, {"hipblasStrsm", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasDtrsm", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasCtrsm", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZtrsm", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasStrmm", {HIP_3020, HIP_0, HIP_0 }}, {"hipblasDtrmm", {HIP_3020, HIP_0, HIP_0 }}, - {"hipblasCtrmm", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZtrmm", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSgeam", {HIP_1082, HIP_0, HIP_0 }}, {"hipblasDgeam", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasCgeam", {HIP_3060, HIP_0, HIP_0 }}, - {"hipblasZgeam", {HIP_3060, HIP_0, HIP_0 }}, {"hipblasSgetrfBatched", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDgetrfBatched", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCgetrfBatched", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZgetrfBatched", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSgetriBatched", {HIP_3070, HIP_0, HIP_0 }}, {"hipblasDgetriBatched", {HIP_3070, HIP_0, HIP_0 }}, - {"hipblasCgetriBatched", {HIP_3070, HIP_0, HIP_0 }}, - {"hipblasZgetriBatched", {HIP_3070, HIP_0, HIP_0 }}, {"hipblasSgetrsBatched", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDgetrsBatched", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCgetrsBatched", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZgetrsBatched", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasStrsmBatched", {HIP_3020, HIP_0, HIP_0 }}, {"hipblasDtrsmBatched", {HIP_3020, HIP_0, HIP_0 }}, - {"hipblasCtrsmBatched", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZtrsmBatched", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSgeqrfBatched", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasDgeqrfBatched", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasCgeqrfBatched", {HIP_3050, HIP_0, HIP_0 }}, - {"hipblasZgeqrfBatched", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSdgmm", {HIP_3060, HIP_0, HIP_0 }}, {"hipblasDdgmm", {HIP_3060, HIP_0, HIP_0 }}, - {"hipblasCdgmm", {HIP_3060, HIP_0, HIP_0 }}, - {"hipblasZdgmm", {HIP_3060, HIP_0, HIP_0 }}, - {"hipblasGemmEx", {HIP_1082, HIP_0, HIP_0 }}, - {"hipblasGemmBatchedEx", {HIP_3060, HIP_0, HIP_0 }}, - {"hipblasGemmStridedBatchedEx", {HIP_3060, HIP_0, HIP_0 }}, - {"hipblasDotEx", {HIP_4010, HIP_0, HIP_0 }}, - {"hipblasDotcEx", {HIP_4010, HIP_0, HIP_0 }}, - {"hipblasAxpyEx", {HIP_4010, HIP_0, HIP_0 }}, - {"hipblasNrm2Ex", {HIP_4010, HIP_0, HIP_0 }}, - {"hipblasRotEx", {HIP_4010, HIP_0, HIP_0 }}, - {"hipblasScalEx", {HIP_4010, HIP_0, HIP_0 }}, {"hipblasIcamax_v2", {HIP_6000, HIP_0, HIP_0 }}, {"hipblasIzamax_v2", {HIP_6000, HIP_0, HIP_0 }}, {"hipblasIcamin_v2", {HIP_6000, HIP_0, HIP_0 }}, @@ -2048,8 +1935,94 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasLtMatmulAlgoGetHeuristic", {HIP_5050, HIP_0, HIP_0 }}, {"hipblasSgbmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasDgbmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, - {"hipblasCgbmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, - {"hipblasZgbmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgbmv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgbmv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSgemv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDgemv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgemv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgemv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSgemvBatched_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDgemvBatched_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgemvBatched_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgemvBatched_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSgemvStridedBatched", {HIP_3000, HIP_0, HIP_0 }}, + {"hipblasDgemvStridedBatched", {HIP_3000, HIP_0, HIP_0 }}, + {"hipblasSgemvBatched", {HIP_1060, HIP_0, HIP_0 }}, + {"hipblasDgemvBatched", {HIP_3000, HIP_0, HIP_0 }}, + {"hipblasSgemvStridedBatched_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDgemvStridedBatched_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgemvStridedBatched_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgemvStridedBatched_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSger_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDger_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgeru_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgerc_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgeru_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgerc_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasChbmv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZhbmv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasChemv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZhemv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCher_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZher_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCher2_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZher2_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasChpmv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZhpmv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasChpr_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZhpr_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasChpr2_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZhpr2_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSsbmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDsbmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSspmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDspmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSspr_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDspr_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSspr2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDspr2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSsymv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDsymv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCsymv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZsymv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSsyr_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDsyr_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCsyr_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZsyr_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSsyr2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDsyr2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCsyr2_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZsyr2_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasStbmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDtbmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCtbmv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZtbmv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasStbsv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDtbsv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCtbsv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZtbsv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasStpmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDtpmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCtpmv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZtpmv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasStpsv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDtpsv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCtpsv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZtpsv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasStrmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDtrmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCtrmv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZtrmv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasStrsv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDtrsv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCtrsv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZtrsv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasAxpyEx_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDotEx_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDotcEx_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasNrm2Ex_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasRotEx_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasScalEx_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, @@ -2266,8 +2239,6 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_get_matrix_async", {HIP_3050, HIP_0, HIP_0 }}, {"hipblasSgelsBatched", {HIP_5040, HIP_0, HIP_0 }}, {"hipblasDgelsBatched", {HIP_5040, HIP_0, HIP_0 }}, - {"hipblasCgelsBatched", {HIP_5040, HIP_0, HIP_0 }}, - {"hipblasZgelsBatched", {HIP_5040, HIP_0, HIP_0 }}, {"rocblas_get_math_mode", {HIP_5070, HIP_0, HIP_0 }}, {"rocblas_set_math_mode", {HIP_5070, HIP_0, HIP_0 }}, {"rocblas_cgemv_batched", {HIP_3050, HIP_0, HIP_0 }}, @@ -2345,8 +2316,6 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { {"hipblasStrmm", {HIP_6000}}, {"hipblasDtrmm", {HIP_6000}}, - {"hipblasCtrmm", {HIP_6000}}, - {"hipblasZtrmm", {HIP_6000}}, {"rocblas_strmm", {HIP_6000}}, {"rocblas_dtrmm", {HIP_6000}}, {"rocblas_ctrmm", {HIP_6000}}, diff --git a/src/CUDA2HIP_DNN_API_types.cpp b/src/CUDA2HIP_DNN_API_types.cpp index 8fc44aec..8e39f587 100644 --- a/src/CUDA2HIP_DNN_API_types.cpp +++ b/src/CUDA2HIP_DNN_API_types.cpp @@ -55,6 +55,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", {"HIPDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED | CUDA_DEPRECATED}}, {"CUDNN_STATUS_RUNTIME_IN_PROGRESS", {"HIPDNN_STATUS_RUNTIME_IN_PROGRESS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_STATUS_RUNTIME_FP_OVERFLOW", {"HIPDNN_STATUS_RUNTIME_FP_OVERFLOW", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, + {"CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED", {"HIPDNN_STATUS_SUBLIBRARY_LOADING_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_STATUS_VERSION_MISMATCH", {"HIPDNN_STATUS_VERSION_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED | CUDA_DEPRECATED}}, {"CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH", {"HIPDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH", {"HIPDNN_STATUS_SERIALIZATION_VERSION_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, @@ -463,6 +464,8 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_ENGINECFG_ENGINE", {"HIPDNN_ATTR_ENGINECFG_ENGINE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 300 {"CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO", {"HIPDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 301 {"CUDNN_ATTR_ENGINECFG_KNOB_CHOICES", {"HIPDNN_ATTR_ENGINECFG_KNOB_CHOICES", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 302 + {"CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE", {"HIPDNN_ATTR_ENGINECFG_WORKSPACE_SIZE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 303 + {"CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED", {"HIPDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 304 {"CUDNN_ATTR_EXECUTION_PLAN_HANDLE", {"HIPDNN_ATTR_EXECUTION_PLAN_HANDLE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 400 {"CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG", {"HIPDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 401 {"CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE", {"HIPDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 402 @@ -1761,6 +1764,9 @@ const std::map CUDA_DNN_TYPE_NAME_VER_MAP { {"CUDNN_STATUS_SPECIFIC_ERROR", {CUDNN_900, CUDA_0, CUDA_0 }}, {"CUDNN_POINTWISE_ATAN2", {CUDNN_910, CUDA_0, CUDA_0 }}, {"CUDNN_NUMERICAL_NOTE_STRICT_NAN_PROP", {CUDNN_910, CUDA_0, CUDA_0 }}, + {"CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED", {CUDNN_920, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE", {CUDNN_920, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED", {CUDNN_920, CUDA_0, CUDA_0 }}, }; const std::map HIP_DNN_TYPE_NAME_VER_MAP { diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp index 19d70c1b..99ae10fa 100644 --- a/src/CUDA2HIP_Driver_API_functions.cpp +++ b/src/CUDA2HIP_Driver_API_functions.cpp @@ -99,6 +99,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuCtxCreate", {"hipCtxCreate", "", CONV_CONTEXT, API_DRIVER, SEC::CONTEXT, HIP_DEPRECATED}}, {"cuCtxCreate_v2", {"hipCtxCreate", "", CONV_CONTEXT, API_DRIVER, SEC::CONTEXT, HIP_DEPRECATED}}, {"cuCtxCreate_v3", {"hipCtxCreate_v3", "", CONV_CONTEXT, API_DRIVER, SEC::CONTEXT, HIP_UNSUPPORTED}}, + {"cuCtxCreate_v4", {"hipCtxCreate_v4", "", CONV_CONTEXT, API_DRIVER, SEC::CONTEXT, HIP_UNSUPPORTED}}, {"cuCtxDestroy", {"hipCtxDestroy", "", CONV_CONTEXT, API_DRIVER, SEC::CONTEXT, HIP_DEPRECATED}}, {"cuCtxDestroy_v2", {"hipCtxDestroy", "", CONV_CONTEXT, API_DRIVER, SEC::CONTEXT, HIP_DEPRECATED}}, {"cuCtxGetApiVersion", {"hipCtxGetApiVersion", "", CONV_CONTEXT, API_DRIVER, SEC::CONTEXT, HIP_DEPRECATED}}, @@ -136,6 +137,8 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuCtxGetExecAffinity", {"hipCtxGetExecAffinity", "", CONV_CONTEXT, API_DRIVER, SEC::CONTEXT, HIP_UNSUPPORTED}}, // {"cuCtxGetId", {"hipCtxGetId", "", CONV_CONTEXT, API_DRIVER, SEC::CONTEXT, HIP_UNSUPPORTED}}, + // + {"cuCtxWaitEvent", {"hipCtxWaitEvent", "", CONV_CONTEXT, API_DRIVER, SEC::CONTEXT, HIP_UNSUPPORTED}}, // 9. Context Management [DEPRECATED] // no analogues @@ -185,6 +188,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuLibraryGetKernelCount", {"hipLibraryGetKernelCount", "", CONV_LIBRARY, API_DRIVER, SEC::LIBRARY, HIP_UNSUPPORTED}}, {"cuLibraryEnumerateKernels", {"hipLibraryEnumerateKernels", "", CONV_LIBRARY, API_DRIVER, SEC::LIBRARY, HIP_UNSUPPORTED}}, {"cuKernelGetParamInfo", {"hipKernelGetParamInfo", "", CONV_LIBRARY, API_DRIVER, SEC::LIBRARY, HIP_UNSUPPORTED}}, + {"cuKernelGetLibrary", {"hipKernelGetLibrary", "", CONV_LIBRARY, API_DRIVER, SEC::LIBRARY, HIP_UNSUPPORTED}}, // 13. Memory Management // no analogue @@ -494,6 +498,8 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuStreamUpdateCaptureDependencies_v2", {"hipStreamUpdateCaptureDependencies_v2", "", CONV_STREAM, API_DRIVER, SEC::STREAM, HIP_UNSUPPORTED}}, // no analogue {"cuStreamGetCtx", {"hipStreamGetContext", "", CONV_STREAM, API_DRIVER, SEC::STREAM, HIP_UNSUPPORTED}}, + // no analogue + {"cuStreamGetCtx_v2", {"hipStreamGetContext_v2", "", CONV_STREAM, API_DRIVER, SEC::STREAM, HIP_UNSUPPORTED}}, // cudaStreamGetFlags {"cuStreamGetFlags", {"hipStreamGetFlags", "", CONV_STREAM, API_DRIVER, SEC::STREAM}}, // cudaStreamGetPriority @@ -981,6 +987,8 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { {"cuGreenCtxWaitEvent", {"hipGreenCtxWaitEvent", "", CONV_COREDUMP, API_DRIVER, SEC::GREEN_CONTEXT, HIP_UNSUPPORTED}}, // {"cuStreamGetGreenCtx", {"hipStreamGetGreenCtx", "", CONV_COREDUMP, API_DRIVER, SEC::GREEN_CONTEXT, HIP_UNSUPPORTED}}, + // + {"cuGreenCtxStreamCreate", {"hipGreenCtxStreamCreate", "", CONV_COREDUMP, API_DRIVER, SEC::GREEN_CONTEXT, HIP_UNSUPPORTED}}, // 36. Profiler Control [DEPRECATED] // cudaProfilerInitialize @@ -1489,6 +1497,11 @@ const std::map CUDA_DRIVER_FUNCTION_VER_MAP { {"cuGreenCtxRecordEvent", {CUDA_124, CUDA_0, CUDA_0 }}, {"cuGreenCtxWaitEvent", {CUDA_124, CUDA_0, CUDA_0 }}, {"cuStreamGetGreenCtx", {CUDA_124, CUDA_0, CUDA_0 }}, + {"cuCtxCreate_v4", {CUDA_125, CUDA_0, CUDA_0 }}, + {"cuCtxWaitEvent", {CUDA_125, CUDA_0, CUDA_0 }}, + {"cuKernelGetLibrary", {CUDA_125, CUDA_0, CUDA_0 }}, + {"cuStreamGetCtx_v2", {CUDA_125, CUDA_0, CUDA_0 }}, + {"cuGreenCtxStreamCreate", {CUDA_125, CUDA_0, CUDA_0 }}, }; const std::map HIP_DRIVER_FUNCTION_VER_MAP { diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp index 1a17db5b..503df0dc 100644 --- a/src/CUDA2HIP_Driver_API_types.cpp +++ b/src/CUDA2HIP_Driver_API_types.cpp @@ -463,6 +463,16 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // {"CUdevResource", {"hipDevResource", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CUctxCigParam_st", {"hipCtxCigParam", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CUctxCigParam", {"hipCtxCigParam", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + + // + {"CUctxCreateParams_st", {"hipCtxCreateParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CUctxCreateParams", {"hipCtxCreateParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // 2. Unions {"CUstreamBatchMemOpParams", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -558,6 +568,20 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_AD_FORMAT_BC6H_SF16", {"HIP_AD_FORMAT_BC6H_SF16", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x9c {"CU_AD_FORMAT_BC7_UNORM", {"HIP_AD_FORMAT_BC7_UNORM", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x9d {"CU_AD_FORMAT_BC7_UNORM_SRGB", {"HIP_AD_FORMAT_BC7_UNORM_SRGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x9e + {"CU_AD_FORMAT_P010", {"HIP_AD_FORMAT_P010", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x9f + {"CU_AD_FORMAT_P016", {"HIP_AD_FORMAT_P016", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0xa1 + {"CU_AD_FORMAT_NV16", {"HIP_AD_FORMAT_NV16", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0xa2 + {"CU_AD_FORMAT_P210", {"HIP_AD_FORMAT_P210", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0xa3 + {"CU_AD_FORMAT_P216", {"HIP_AD_FORMAT_P216", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0xa4 + {"CU_AD_FORMAT_YUY2", {"HIP_AD_FORMAT_YUY2", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0xa5 + {"CU_AD_FORMAT_Y210", {"HIP_AD_FORMAT_Y210", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0xa6 + {"CU_AD_FORMAT_Y216", {"HIP_AD_FORMAT_Y216", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0xa7 + {"CU_AD_FORMAT_AYUV", {"HIP_AD_FORMAT_AYUV", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0xa8 + {"CU_AD_FORMAT_Y410", {"HIP_AD_FORMAT_Y410", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0xa9 + {"CU_AD_FORMAT_Y416", {"HIP_AD_FORMAT_Y416", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0xb1 + {"CU_AD_FORMAT_Y444_PLANAR8", {"HIP_AD_FORMAT_Y444_PLANAR8", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0xb2 + {"CU_AD_FORMAT_Y444_PLANAR10", {"HIP_AD_FORMAT_Y444_PLANAR10", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0xb3 + {"CU_AD_FORMAT_MAX", {"HIP_AD_FORMAT_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x7FFFFFFF // cudaComputeMode {"CUcomputemode", {"hipComputeMode", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, @@ -933,6 +957,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_DEVICE_ATTRIBUTE_MPS_ENABLED", {"hipDeviceAttributeMpsEnables", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 133 // cudaDevAttrHostNumaId {"CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID", {"hipDeviceAttributeHostNumaId", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 134 + // cudaDevAttrD3D12CigSupported + {"CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED", {"hipDeviceAttributeD3D12CigSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 135 // cudaDevAttrMax {"CU_DEVICE_ATTRIBUTE_MAX", {"hipDeviceAttributeMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -1461,6 +1487,12 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_LIMIT_MAX_L2_FETCH_GRANULARITY", {"hipLimitMaxL2FetchGranularity", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x05 // cudaLimitPersistingL2CacheSize {"CU_LIMIT_PERSISTING_L2_CACHE_SIZE", {"hipLimitPersistingL2CacheSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x06 + // + {"CU_LIMIT_SHMEM_SIZE", {"hipLimitShmemSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x07 + // + {"CU_LIMIT_CIG_ENABLED", {"hipLimitCigEnabled", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x08 + // + {"CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED", {"hipLimitCigShmemFallbackEnabled", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x09 // no analogue {"CU_LIMIT_MAX", {"hipLimitMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -2130,8 +2162,10 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP", {"hipKernelNodeAttributeMemSyncDomainMap", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // cudaKernelNodeAttributeMemSyncDomain {"CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN", {"hipKernelNodeAttributeMemSyncDomain", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, - // + // cudaKernelNodeAttributeDeviceUpdatableKernelNode {"CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE", {"hipKernelNodeAttributeDeviceUpdatableKernelMode", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaKernelNodeAttributePreferredSharedMemoryCarveout + {"CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", {"hipKernelNodeAttributePreferredSharedMemoryCarveout", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // cudaStreamAttrID {"CUstreamAttrID", {"hipStreamAttrID", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -2456,6 +2490,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT", {"hipLaunchAttributeLaunchCompletionEvent", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // cudaLaunchAttributeDeviceUpdatableKernelNode {"CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE", {"hipLaunchAttributeDeviceUpdatableKernelNode", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaLaunchAttributePreferredSharedMemoryCarveout + {"CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", {"hipLaunchAttributePreferredSharedMemoryCarveout", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // {"CU_LAUNCH_ATTRIBUTE_MAX", {"hipLaunchAttributeMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -2607,8 +2643,38 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // {"CU_COREDUMP_PIPE", {"HIP_COREDUMP_PIPE", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // + {"CU_COREDUMP_GENERATION_FLAGS", {"HIP_COREDUMP_GENERATION_FLAGS", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // {"CU_COREDUMP_MAX", {"HIP_COREDUMP_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CUCoredumpGenerationFlags", {"hipCoredumpGenerationFlags", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUCoredumpGenerationFlags enum values + // + {"CU_COREDUMP_DEFAULT_FLAGS", {"HIP_COREDUMP_DEFAULT_FLAGS", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES", {"HIP_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CU_COREDUMP_LIGHTWEIGHT", {"HIP_COREDUMP_LIGHTWEIGHT", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CU_COREDUMP_SKIP_GLOBAL_MEMORY", {"HIP_COREDUMP_SKIP_GLOBAL_MEMORY", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CU_COREDUMP_SKIP_SHARED_MEMORY", {"HIP_COREDUMP_SKIP_SHARED_MEMORY", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CU_COREDUMP_SKIP_LOCAL_MEMORY", {"HIP_COREDUMP_SKIP_LOCAL_MEMORY", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CU_COREDUMP_SKIP_ABORT", {"HIP_COREDUMP_SKIP_ABORT", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CU_COREDUMP_LIGHTWEIGHT_FLAGS", {"HIP_COREDUMP_LIGHTWEIGHT_FLAGS", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + + // + {"CUdevSmResourceSplit_flags", {"hipDevSmResourceSplit_flags", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUdevSmResourceSplit_flags enum values + // + {"CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING", {"HIP_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE", {"HIP_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // cudaDeviceNumaConfig {"CUdeviceNumaConfig", {"hipDeviceNumaConfig", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, {"CUdeviceNumaConfig_enum", {"hipDeviceNumaConfig", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -2673,6 +2739,14 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // {"CU_DEV_RESOURCE_TYPE_MAX", {"HIP_DEV_RESOURCE_TYPE_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CUcigDataType", {"hipCigDataType", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CUcigDataType_enum", {"hipCigDataType", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUcigDataType enum values + // + {"CIG_DATA_TYPE_D3D12_COMMAND_QUEUE", {"HIP_CIG_DATA_TYPE_D3D12_COMMAND_QUEUE", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // 4. Typedefs // no analogue @@ -2817,6 +2891,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC", {"hipGraphKernelNodePortProgrammatic", "", CONV_DEFINE, API_DRIVER, SEC::DATA_TYPES, HIP_EXPERIMENTAL}}, // 1 // cudaGraphKernelNodePortLaunchCompletion {"CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER", {"hipGraphKernelNodePortLaunchCompletion", "", CONV_DEFINE, API_DRIVER, SEC::DATA_TYPES, HIP_EXPERIMENTAL}}, // 2 + // + {"CUDA_ARRAY3D_VIDEO_ENCODE_DECODE", {"HIP_ARRAY3D_VIDEO_ENCODE_DECODE", "", CONV_DEFINE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 2 }; const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { @@ -3734,6 +3810,47 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CUdevResource", {CUDA_124, CUDA_0, CUDA_0 }}, {"CUasyncCallbackEntry_st", {CUDA_124, CUDA_0, CUDA_0 }}, {"CUasyncCallbackHandle", {CUDA_124, CUDA_0, CUDA_0 }}, + {"CU_AD_FORMAT_P010", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_AD_FORMAT_P016", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_AD_FORMAT_NV16", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_AD_FORMAT_P210", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_AD_FORMAT_P216", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_AD_FORMAT_YUY2", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_AD_FORMAT_Y210", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_AD_FORMAT_Y216", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_AD_FORMAT_AYUV", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_AD_FORMAT_Y410", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_AD_FORMAT_Y416", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_AD_FORMAT_Y444_PLANAR8", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_AD_FORMAT_Y444_PLANAR10", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_AD_FORMAT_MAX", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_LIMIT_SHMEM_SIZE", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_LIMIT_CIG_ENABLED", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CUcigDataType", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CUcigDataType_enum", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CIG_DATA_TYPE_D3D12_COMMAND_QUEUE", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CUctxCigParam_st", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CUctxCigParam", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CUctxCreateParams_st", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CUctxCreateParams", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CUDA_ARRAY3D_VIDEO_ENCODE_DECODE", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_COREDUMP_GENERATION_FLAGS", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CUCoredumpGenerationFlags", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_COREDUMP_DEFAULT_FLAGS", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_COREDUMP_LIGHTWEIGHT", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_COREDUMP_SKIP_GLOBAL_MEMORY", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_COREDUMP_SKIP_SHARED_MEMORY", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_COREDUMP_SKIP_LOCAL_MEMORY", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_COREDUMP_SKIP_ABORT", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_COREDUMP_LIGHTWEIGHT_FLAGS", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CUdevSmResourceSplit_flags", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING", {CUDA_125, CUDA_0, CUDA_0 }}, + {"CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE", {CUDA_125, CUDA_0, CUDA_0 }}, }; const std::map HIP_DRIVER_TYPE_NAME_VER_MAP { diff --git a/src/CUDA2HIP_FFT_API_types.cpp b/src/CUDA2HIP_FFT_API_types.cpp index 1b239fed..9011dd23 100644 --- a/src/CUDA2HIP_FFT_API_types.cpp +++ b/src/CUDA2HIP_FFT_API_types.cpp @@ -26,160 +26,162 @@ THE SOFTWARE. const std::map CUDA_FFT_TYPE_NAME_MAP { // cuFFT defines - {"CUFFT_FORWARD", {"HIPFFT_FORWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // -1 - {"CUFFT_INVERSE", {"HIPFFT_BACKWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 1 - {"CUFFT_COMPATIBILITY_DEFAULT", {"HIPFFT_COMPATIBILITY_DEFAULT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // CUFFT_COMPATIBILITY_FFTW_PADDING - {"MAX_CUFFT_ERROR", {"HIPFFT_MAX_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x11 + {"CUFFT_FORWARD", {"HIPFFT_FORWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // -1 + {"CUFFT_INVERSE", {"HIPFFT_BACKWARD", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 1 + {"CUFFT_COMPATIBILITY_DEFAULT", {"HIPFFT_COMPATIBILITY_DEFAULT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // CUFFT_COMPATIBILITY_FFTW_PADDING + {"MAX_CUFFT_ERROR", {"HIPFFT_MAX_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x11 // cuFFT enums - {"cufftResult_t", {"hipfftResult_t", "", CONV_TYPE, API_FFT, 1}}, - {"cufftResult", {"hipfftResult", "", CONV_TYPE, API_FFT, 1}}, - {"CUFFT_SUCCESS", {"HIPFFT_SUCCESS", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0 0 - {"CUFFT_INVALID_PLAN", {"HIPFFT_INVALID_PLAN", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1 1 - {"CUFFT_ALLOC_FAILED", {"HIPFFT_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2 2 - {"CUFFT_INVALID_TYPE", {"HIPFFT_INVALID_TYPE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3 3 - {"CUFFT_INVALID_VALUE", {"HIPFFT_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4 4 - {"CUFFT_INTERNAL_ERROR", {"HIPFFT_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5 5 - {"CUFFT_EXEC_FAILED", {"HIPFFT_EXEC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6 6 - {"CUFFT_SETUP_FAILED", {"HIPFFT_SETUP_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 7 - {"CUFFT_INVALID_SIZE", {"HIPFFT_INVALID_SIZE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x8 8 - {"CUFFT_UNALIGNED_DATA", {"HIPFFT_UNALIGNED_DATA", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x9 9 - {"CUFFT_INCOMPLETE_PARAMETER_LIST", {"HIPFFT_INCOMPLETE_PARAMETER_LIST", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xA 10 - {"CUFFT_INVALID_DEVICE", {"HIPFFT_INVALID_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xB 11 - {"CUFFT_PARSE_ERROR", {"HIPFFT_PARSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xC 12 - {"CUFFT_NO_WORKSPACE", {"HIPFFT_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xD 13 - {"CUFFT_NOT_IMPLEMENTED", {"HIPFFT_NOT_IMPLEMENTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xE 14 - {"CUFFT_LICENSE_ERROR", {"HIPFFT_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, - {"CUFFT_NOT_SUPPORTED", {"HIPFFT_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x10 16 - - {"cufftType_t", {"hipfftType_t", "", CONV_TYPE, API_FFT, 1}}, - {"cufftType", {"hipfftType", "", CONV_TYPE, API_FFT, 1}}, - {"CUFFT_R2C", {"HIPFFT_R2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2a - {"CUFFT_C2R", {"HIPFFT_C2R", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2c - {"CUFFT_C2C", {"HIPFFT_C2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x29 - {"CUFFT_D2Z", {"HIPFFT_D2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6a - {"CUFFT_Z2D", {"HIPFFT_Z2D", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6c - {"CUFFT_Z2Z", {"HIPFFT_Z2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x69 - - {"cufftCompatibility_t", {"hipfftCompatibility_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, - {"cufftCompatibility", {"hipfftCompatibility", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, - {"CUFFT_COMPATIBILITY_FFTW_PADDING", {"HIPFFT_COMPATIBILITY_FFTW_PADDING", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 - - {"cufftXtSubFormat_t", {"hipfftXtSubFormat_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, - {"cufftXtSubFormat", {"hipfftXtSubFormat", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, - {"CUFFT_XT_FORMAT_INPUT", {"HIPFFT_XT_FORMAT_INPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00 - {"CUFFT_XT_FORMAT_OUTPUT", {"HIPFFT_XT_FORMAT_OUTPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 - {"CUFFT_XT_FORMAT_INPLACE", {"HIPFFT_XT_FORMAT_INPLACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x02 - {"CUFFT_XT_FORMAT_INPLACE_SHUFFLED", {"HIPFFT_XT_FORMAT_INPLACE_SHUFFLED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x03 - {"CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED", {"HIPFFT_XT_FORMAT_1D_INPUT_SHUFFLED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x04 - {"CUFFT_XT_FORMAT_DISTRIBUTED_INPUT", {"HIPFFT_XT_FORMAT_DISTRIBUTED_INPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x05 - {"CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", {"HIPFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x06 - {"CUFFT_FORMAT_UNDEFINED", {"HIPFFT_FORMAT_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x07 - - {"cufftXtCopyType_t", {"hipfftXtCopyType_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, - {"cufftXtCopyType", {"hipfftXtCopyType", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, - {"CUFFT_COPY_HOST_TO_DEVICE", {"HIPFFT_COPY_HOST_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00 - {"CUFFT_COPY_DEVICE_TO_HOST", {"HIPFFT_COPY_DEVICE_TO_HOST", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 - {"CUFFT_COPY_DEVICE_TO_DEVICE", {"HIPFFT_COPY_DEVICE_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x02 - {"CUFFT_COPY_UNDEFINED", {"HIPFFT_COPY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x03 - - {"cufftXtQueryType_t", {"hipfftXtQueryType_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, - {"cufftXtQueryType", {"hipfftXtQueryType", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, - {"CUFFT_QUERY_1D_FACTORS", {"HIPFFT_QUERY_1D_FACTORS", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00 - {"CUFFT_QUERY_UNDEFINED", {"HIPFFT_QUERY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 - - {"cufftXtWorkAreaPolicy_t", {"hipfftXtWorkAreaPolicy_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, - {"cufftXtWorkAreaPolicy", {"hipfftXtWorkAreaPolicy", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, - {"CUFFT_WORKAREA_MINIMAL", {"HIPFFT_WORKAREA_MINIMAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0 - {"CUFFT_WORKAREA_USER", {"HIPFFT_WORKAREA_USER", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 1 - {"CUFFT_WORKAREA_PERFORMANCE", {"HIPFFT_WORKAREA_PERFORMANCE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 2 - - {"cufftXtCallbackType_t", {"hipfftXtCallbackType_t", "", CONV_TYPE, API_FFT, 1}}, - {"cufftXtCallbackType", {"hipfftXtCallbackType", "", CONV_TYPE, API_FFT, 1}}, - {"CUFFT_CB_LD_COMPLEX", {"HIPFFT_CB_LD_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0 - {"CUFFT_CB_LD_COMPLEX_DOUBLE", {"HIPFFT_CB_LD_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1 - {"CUFFT_CB_LD_REAL", {"HIPFFT_CB_LD_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2 - {"CUFFT_CB_LD_REAL_DOUBLE", {"HIPFFT_CB_LD_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3 - {"CUFFT_CB_ST_COMPLEX", {"HIPFFT_CB_ST_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4 - {"CUFFT_CB_ST_COMPLEX_DOUBLE", {"HIPFFT_CB_ST_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5 - {"CUFFT_CB_ST_REAL", {"HIPFFT_CB_ST_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6 - {"CUFFT_CB_ST_REAL_DOUBLE", {"HIPFFT_CB_ST_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 - {"CUFFT_CB_UNDEFINED", {"HIPFFT_CB_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 - - {"cufftProperty_t", {"hipfftProperty", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"cufftProperty", {"hipfftProperty", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, - {"NVFFT_PLAN_PROPERTY_INT64_PATIENT_JIT", {"HIPFFT_PLAN_PROPERTY_INT64_PATIENT_JIT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x1 + {"cufftResult_t", {"hipfftResult_t", "", CONV_TYPE, API_FFT, 1}}, + {"cufftResult", {"hipfftResult", "", CONV_TYPE, API_FFT, 1}}, + {"CUFFT_SUCCESS", {"HIPFFT_SUCCESS", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0 0 + {"CUFFT_INVALID_PLAN", {"HIPFFT_INVALID_PLAN", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1 1 + {"CUFFT_ALLOC_FAILED", {"HIPFFT_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2 2 + {"CUFFT_INVALID_TYPE", {"HIPFFT_INVALID_TYPE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3 3 + {"CUFFT_INVALID_VALUE", {"HIPFFT_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4 4 + {"CUFFT_INTERNAL_ERROR", {"HIPFFT_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5 5 + {"CUFFT_EXEC_FAILED", {"HIPFFT_EXEC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6 6 + {"CUFFT_SETUP_FAILED", {"HIPFFT_SETUP_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 7 + {"CUFFT_INVALID_SIZE", {"HIPFFT_INVALID_SIZE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x8 8 + {"CUFFT_UNALIGNED_DATA", {"HIPFFT_UNALIGNED_DATA", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x9 9 + {"CUFFT_INCOMPLETE_PARAMETER_LIST", {"HIPFFT_INCOMPLETE_PARAMETER_LIST", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xA 10 + {"CUFFT_INVALID_DEVICE", {"HIPFFT_INVALID_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xB 11 + {"CUFFT_PARSE_ERROR", {"HIPFFT_PARSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xC 12 + {"CUFFT_NO_WORKSPACE", {"HIPFFT_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xD 13 + {"CUFFT_NOT_IMPLEMENTED", {"HIPFFT_NOT_IMPLEMENTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0xE 14 + {"CUFFT_LICENSE_ERROR", {"HIPFFT_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_NOT_SUPPORTED", {"HIPFFT_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x10 16 + + {"cufftType_t", {"hipfftType_t", "", CONV_TYPE, API_FFT, 1}}, + {"cufftType", {"hipfftType", "", CONV_TYPE, API_FFT, 1}}, + {"CUFFT_R2C", {"HIPFFT_R2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2a + {"CUFFT_C2R", {"HIPFFT_C2R", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2c + {"CUFFT_C2C", {"HIPFFT_C2C", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x29 + {"CUFFT_D2Z", {"HIPFFT_D2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6a + {"CUFFT_Z2D", {"HIPFFT_Z2D", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6c + {"CUFFT_Z2Z", {"HIPFFT_Z2Z", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x69 + + {"cufftCompatibility_t", {"hipfftCompatibility_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftCompatibility", {"hipfftCompatibility", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_COMPATIBILITY_FFTW_PADDING", {"HIPFFT_COMPATIBILITY_FFTW_PADDING", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 + + {"cufftXtSubFormat_t", {"hipfftXtSubFormat_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXtSubFormat", {"hipfftXtSubFormat", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_XT_FORMAT_INPUT", {"HIPFFT_XT_FORMAT_INPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00 + {"CUFFT_XT_FORMAT_OUTPUT", {"HIPFFT_XT_FORMAT_OUTPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 + {"CUFFT_XT_FORMAT_INPLACE", {"HIPFFT_XT_FORMAT_INPLACE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x02 + {"CUFFT_XT_FORMAT_INPLACE_SHUFFLED", {"HIPFFT_XT_FORMAT_INPLACE_SHUFFLED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x03 + {"CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED", {"HIPFFT_XT_FORMAT_1D_INPUT_SHUFFLED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x04 + {"CUFFT_XT_FORMAT_DISTRIBUTED_INPUT", {"HIPFFT_XT_FORMAT_DISTRIBUTED_INPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x05 + {"CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", {"HIPFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x06 + {"CUFFT_FORMAT_UNDEFINED", {"HIPFFT_FORMAT_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x07 + + {"cufftXtCopyType_t", {"hipfftXtCopyType_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXtCopyType", {"hipfftXtCopyType", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_COPY_HOST_TO_DEVICE", {"HIPFFT_COPY_HOST_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00 + {"CUFFT_COPY_DEVICE_TO_HOST", {"HIPFFT_COPY_DEVICE_TO_HOST", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 + {"CUFFT_COPY_DEVICE_TO_DEVICE", {"HIPFFT_COPY_DEVICE_TO_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x02 + {"CUFFT_COPY_UNDEFINED", {"HIPFFT_COPY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x03 + + {"cufftXtQueryType_t", {"hipfftXtQueryType_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXtQueryType", {"hipfftXtQueryType", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_QUERY_1D_FACTORS", {"HIPFFT_QUERY_1D_FACTORS", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x00 + {"CUFFT_QUERY_UNDEFINED", {"HIPFFT_QUERY_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0x01 + + {"cufftXtWorkAreaPolicy_t", {"hipfftXtWorkAreaPolicy_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXtWorkAreaPolicy", {"hipfftXtWorkAreaPolicy", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"CUFFT_WORKAREA_MINIMAL", {"HIPFFT_WORKAREA_MINIMAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 0 + {"CUFFT_WORKAREA_USER", {"HIPFFT_WORKAREA_USER", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 1 + {"CUFFT_WORKAREA_PERFORMANCE", {"HIPFFT_WORKAREA_PERFORMANCE", "", CONV_NUMERIC_LITERAL, API_FFT, 1, UNSUPPORTED}}, // 2 + + {"cufftXtCallbackType_t", {"hipfftXtCallbackType_t", "", CONV_TYPE, API_FFT, 1}}, + {"cufftXtCallbackType", {"hipfftXtCallbackType", "", CONV_TYPE, API_FFT, 1}}, + {"CUFFT_CB_LD_COMPLEX", {"HIPFFT_CB_LD_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x0 + {"CUFFT_CB_LD_COMPLEX_DOUBLE", {"HIPFFT_CB_LD_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x1 + {"CUFFT_CB_LD_REAL", {"HIPFFT_CB_LD_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x2 + {"CUFFT_CB_LD_REAL_DOUBLE", {"HIPFFT_CB_LD_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x3 + {"CUFFT_CB_ST_COMPLEX", {"HIPFFT_CB_ST_COMPLEX", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x4 + {"CUFFT_CB_ST_COMPLEX_DOUBLE", {"HIPFFT_CB_ST_COMPLEX_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x5 + {"CUFFT_CB_ST_REAL", {"HIPFFT_CB_ST_REAL", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x6 + {"CUFFT_CB_ST_REAL_DOUBLE", {"HIPFFT_CB_ST_REAL_DOUBLE", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 + {"CUFFT_CB_UNDEFINED", {"HIPFFT_CB_UNDEFINED", "", CONV_NUMERIC_LITERAL, API_FFT, 1}}, // 0x7 + + {"cufftProperty_t", {"hipfftProperty", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, + {"cufftProperty", {"hipfftProperty", "", CONV_TYPE, API_FFT, 1, HIP_UNSUPPORTED}}, + {"NVFFT_PLAN_PROPERTY_INT64_PATIENT_JIT", {"HIPFFT_PLAN_PROPERTY_INT64_PATIENT_JIT", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x1 + {"NVFFT_PLAN_PROPERTY_INT64_MAX_NUM_HOST_THREADS", {"HIPFFT_PLAN_PROPERTY_INT64_MAX_NUM_HOST_THREADS", "", CONV_NUMERIC_LITERAL, API_FFT, 1, HIP_UNSUPPORTED}}, // 0x2 // cuFFT types - {"cufftReal", {"hipfftReal", "", CONV_TYPE, API_FFT, 1}}, - {"cufftDoubleReal", {"hipfftDoubleReal", "", CONV_TYPE, API_FFT, 1}}, - {"cufftComplex", {"hipfftComplex", "", CONV_TYPE, API_FFT, 1}}, - {"cufftDoubleComplex", {"hipfftDoubleComplex", "", CONV_TYPE, API_FFT, 1}}, - {"cufftHandle", {"hipfftHandle", "", CONV_TYPE, API_FFT, 1}}, - {"cufftXt1dFactors_t", {"hipfftXt1dFactors_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, - {"cufftXt1dFactors", {"hipfftXt1dFactors", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, - {"cufftBox3d_t", {"hipfftBox3d_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, - {"cufftBox3d", {"hipfftBox3d", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftReal", {"hipfftReal", "", CONV_TYPE, API_FFT, 1}}, + {"cufftDoubleReal", {"hipfftDoubleReal", "", CONV_TYPE, API_FFT, 1}}, + {"cufftComplex", {"hipfftComplex", "", CONV_TYPE, API_FFT, 1}}, + {"cufftDoubleComplex", {"hipfftDoubleComplex", "", CONV_TYPE, API_FFT, 1}}, + {"cufftHandle", {"hipfftHandle", "", CONV_TYPE, API_FFT, 1}}, + {"cufftXt1dFactors_t", {"hipfftXt1dFactors_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftXt1dFactors", {"hipfftXt1dFactors", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftBox3d_t", {"hipfftBox3d_t", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, + {"cufftBox3d", {"hipfftBox3d", "", CONV_TYPE, API_FFT, 1, UNSUPPORTED}}, }; const std::map CUDA_FFT_TYPE_NAME_VER_MAP { - {"CUFFT_NOT_SUPPORTED", {CUDA_80, CUDA_0, CUDA_0 }}, - {"cufftXtWorkAreaPolicy_t", {CUDA_92, CUDA_0, CUDA_0 }}, - {"cufftXtWorkAreaPolicy", {CUDA_92, CUDA_0, CUDA_0 }}, - {"CUFFT_WORKAREA_MINIMAL", {CUDA_92, CUDA_0, CUDA_0 }}, - {"CUFFT_WORKAREA_USER", {CUDA_92, CUDA_0, CUDA_0 }}, - {"CUFFT_XT_FORMAT_DISTRIBUTED_INPUT", {CUDA_118, CUDA_0, CUDA_0 }}, - {"CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", {CUDA_118, CUDA_0, CUDA_0 }}, - {"cufftBox3d_t", {CUDA_118, CUDA_0, CUDA_0 }}, - {"cufftBox3d", {CUDA_118, CUDA_0, CUDA_0 }}, - {"cufftProperty_t", {CUDA_124, CUDA_0, CUDA_0 }}, - {"cufftProperty", {CUDA_124, CUDA_0, CUDA_0 }}, - {"NVFFT_PLAN_PROPERTY_INT64_PATIENT_JIT", {CUDA_124, CUDA_0, CUDA_0 }}, + {"CUFFT_NOT_SUPPORTED", {CUDA_80, CUDA_0, CUDA_0 }}, + {"cufftXtWorkAreaPolicy_t", {CUDA_92, CUDA_0, CUDA_0 }}, + {"cufftXtWorkAreaPolicy", {CUDA_92, CUDA_0, CUDA_0 }}, + {"CUFFT_WORKAREA_MINIMAL", {CUDA_92, CUDA_0, CUDA_0 }}, + {"CUFFT_WORKAREA_USER", {CUDA_92, CUDA_0, CUDA_0 }}, + {"CUFFT_XT_FORMAT_DISTRIBUTED_INPUT", {CUDA_118, CUDA_0, CUDA_0 }}, + {"CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cufftBox3d_t", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cufftBox3d", {CUDA_118, CUDA_0, CUDA_0 }}, + {"cufftProperty_t", {CUDA_124, CUDA_0, CUDA_0 }}, + {"cufftProperty", {CUDA_124, CUDA_0, CUDA_0 }}, + {"NVFFT_PLAN_PROPERTY_INT64_PATIENT_JIT", {CUDA_124, CUDA_0, CUDA_0 }}, + {"NVFFT_PLAN_PROPERTY_INT64_MAX_NUM_HOST_THREADS", {CUDA_125, CUDA_0, CUDA_0 }}, }; const std::map HIP_FFT_TYPE_NAME_VER_MAP { - {"HIPFFT_FORWARD", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_BACKWARD", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftResult_t", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftResult", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_SUCCESS", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_PLAN", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_ALLOC_FAILED", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_TYPE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_VALUE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INTERNAL_ERROR", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_EXEC_FAILED", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_SETUP_FAILED", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_SIZE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_UNALIGNED_DATA", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INCOMPLETE_PARAMETER_LIST", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_INVALID_DEVICE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_PARSE_ERROR", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_NO_WORKSPACE", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_NOT_IMPLEMENTED", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_NOT_SUPPORTED", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftType_t", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftType", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_R2C", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_C2R", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_C2C", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_D2Z", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_Z2D", {HIP_1070, HIP_0, HIP_0 }}, - {"HIPFFT_Z2Z", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftReal", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftDoubleReal", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftComplex", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftDoubleComplex", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftHandle", {HIP_1070, HIP_0, HIP_0 }}, - {"hipfftXtCallbackType_t", {HIP_4030, HIP_0, HIP_0 }}, - {"hipfftXtCallbackType", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_LD_COMPLEX", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_LD_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_LD_REAL", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_LD_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_ST_COMPLEX", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_ST_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_ST_REAL", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_ST_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, - {"HIPFFT_CB_UNDEFINED", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_FORWARD", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_BACKWARD", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftResult_t", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftResult", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_SUCCESS", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_PLAN", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_ALLOC_FAILED", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_TYPE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_VALUE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INTERNAL_ERROR", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_EXEC_FAILED", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_SETUP_FAILED", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_SIZE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_UNALIGNED_DATA", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INCOMPLETE_PARAMETER_LIST", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_INVALID_DEVICE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_PARSE_ERROR", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_NO_WORKSPACE", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_NOT_IMPLEMENTED", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_NOT_SUPPORTED", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftType_t", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftType", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_R2C", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_C2R", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_C2C", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_D2Z", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_Z2D", {HIP_1070, HIP_0, HIP_0 }}, + {"HIPFFT_Z2Z", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftReal", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftDoubleReal", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftComplex", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftDoubleComplex", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftHandle", {HIP_1070, HIP_0, HIP_0 }}, + {"hipfftXtCallbackType_t", {HIP_4030, HIP_0, HIP_0 }}, + {"hipfftXtCallbackType", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_LD_COMPLEX", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_LD_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_LD_REAL", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_LD_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_ST_COMPLEX", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_ST_COMPLEX_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_ST_REAL", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_ST_REAL_DOUBLE", {HIP_4030, HIP_0, HIP_0 }}, + {"HIPFFT_CB_UNDEFINED", {HIP_4030, HIP_0, HIP_0 }}, }; diff --git a/src/CUDA2HIP_Runtime_API_functions.cpp b/src/CUDA2HIP_Runtime_API_functions.cpp index c9044645..d5eb758c 100644 --- a/src/CUDA2HIP_Runtime_API_functions.cpp +++ b/src/CUDA2HIP_Runtime_API_functions.cpp @@ -45,8 +45,6 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { {"cudaDeviceGetP2PAttribute", {"hipDeviceGetP2PAttribute", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE}}, // cuDeviceGetPCIBusId {"cudaDeviceGetPCIBusId", {"hipDeviceGetPCIBusId", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE}}, - // cuCtxGetSharedMemConfig -> hipCtxGetSharedMemConfig - {"cudaDeviceGetSharedMemConfig", {"hipDeviceGetSharedMemConfig", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE, CUDA_DEPRECATED}}, // cuCtxGetStreamPriorityRange {"cudaDeviceGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE}}, // no analogue @@ -55,8 +53,6 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { {"cudaDeviceSetCacheConfig", {"hipDeviceSetCacheConfig", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE}}, // cuCtxSetLimit {"cudaDeviceSetLimit", {"hipDeviceSetLimit", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE}}, - // cuCtxSetSharedMemConfig -> hipCtxSetSharedMemConfig - {"cudaDeviceSetSharedMemConfig", {"hipDeviceSetSharedMemConfig", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE, CUDA_DEPRECATED}}, // cuCtxSynchronize {"cudaDeviceSynchronize", {"hipDeviceSynchronize", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE}}, // cuDeviceGet @@ -96,7 +92,13 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // {"cudaInitDevice", {"hipInitDevice", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE, HIP_UNSUPPORTED}}, - // 2. Thread Management [DEPRECATED] + // 2. Device Management [DEPRECATED] + // cuCtxGetSharedMemConfig -> hipCtxGetSharedMemConfig + {"cudaDeviceGetSharedMemConfig", {"hipDeviceGetSharedMemConfig", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE_DEPRECATED, CUDA_DEPRECATED}}, + // cuCtxSetSharedMemConfig -> hipCtxSetSharedMemConfig + {"cudaDeviceSetSharedMemConfig", {"hipDeviceSetSharedMemConfig", "", CONV_DEVICE, API_RUNTIME, SEC::DEVICE_DEPRECATED, CUDA_DEPRECATED}}, + + // 3. Thread Management [DEPRECATED] // no analogue {"cudaThreadExit", {"hipDeviceReset", "", CONV_THREAD, API_RUNTIME, SEC::THREAD_DEPRECATED, CUDA_DEPRECATED}}, // no analogue @@ -110,7 +112,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuCtxSynchronize {"cudaThreadSynchronize", {"hipDeviceSynchronize", "", CONV_THREAD, API_RUNTIME, SEC::THREAD_DEPRECATED, CUDA_DEPRECATED}}, - // 3. Error Handling + // 4. Error Handling // no analogue // NOTE: cudaGetErrorName and cuGetErrorName have different signatures {"cudaGetErrorName", {"hipGetErrorName", "", CONV_ERROR, API_RUNTIME, SEC::ERROR}}, @@ -122,7 +124,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // no analogue {"cudaPeekAtLastError", {"hipPeekAtLastError", "", CONV_ERROR, API_RUNTIME, SEC::ERROR}}, - // 4. Stream Management + // 5. Stream Management // cuStreamAddCallback {"cudaStreamAddCallback", {"hipStreamAddCallback", "", CONV_STREAM, API_RUNTIME, SEC::STREAM}}, // cuCtxResetPersistingL2Cache @@ -175,7 +177,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuStreamGetId {"cudaStreamGetId", {"hipStreamGetId", "", CONV_STREAM, API_RUNTIME, SEC::STREAM, HIP_UNSUPPORTED}}, - // 5. Event Management + // 6. Event Management // no analogue // NOTE: Not equal to cuEventCreate due to different signatures {"cudaEventCreate", {"hipEventCreate", "", CONV_EVENT, API_RUNTIME, SEC::EVENT, CUDA_OVERLOADED}}, @@ -194,7 +196,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuEventRecordWithFlags {"cudaEventRecordWithFlags", {"hipEventRecordWithFlags", "", CONV_EVENT, API_RUNTIME, SEC::EVENT, HIP_UNSUPPORTED}}, - // 6. External Resource Interoperability + // 7. External Resource Interoperability // cuDestroyExternalMemory {"cudaDestroyExternalMemory", {"hipDestroyExternalMemory", "", CONV_EXTERNAL_RES, API_RUNTIME, SEC::EXTERNAL_RES}}, // cuDestroyExternalSemaphore @@ -212,7 +214,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuWaitExternalSemaphoresAsync {"cudaWaitExternalSemaphoresAsync", {"hipWaitExternalSemaphoresAsync", "", CONV_EXTERNAL_RES, API_RUNTIME, SEC::EXTERNAL_RES}}, - // 7. Execution Control + // 8. Execution Control // no analogue {"cudaFuncGetAttributes", {"hipFuncGetAttributes", "", CONV_EXECUTION, API_RUNTIME, SEC::EXECUTION}}, // no analogue @@ -222,9 +224,6 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // NOTE: Not equal to cuFuncSetCacheConfig due to different signatures {"cudaFuncSetCacheConfig", {"hipFuncSetCacheConfig", "", CONV_EXECUTION, API_RUNTIME, SEC::EXECUTION}}, // no analogue - // NOTE: Not equal to cuFuncSetSharedMemConfig due to different signatures - {"cudaFuncSetSharedMemConfig", {"hipFuncSetSharedMemConfig", "", CONV_EXECUTION, API_RUNTIME, SEC::EXECUTION, CUDA_DEPRECATED}}, - // no analogue {"cudaGetParameterBuffer", {"hipGetParameterBuffer", "", CONV_EXECUTION, API_RUNTIME, SEC::EXECUTION, HIP_UNSUPPORTED}}, // no analogue {"cudaGetParameterBufferV2", {"hipGetParameterBufferV2", "", CONV_EXECUTION, API_RUNTIME, SEC::EXECUTION, HIP_UNSUPPORTED}}, @@ -251,7 +250,12 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuFuncGetParamInfo {"cudaFuncGetParamInfo", {"hipFuncGetParamInfo", "", CONV_EXECUTION, API_RUNTIME, SEC::EXECUTION, HIP_UNSUPPORTED}}, - // 8. Occupancy + // 9. Execution Control [DEPRECATED] + // no analogue + // NOTE: Not equal to cuFuncSetSharedMemConfig due to different signatures + { "cudaFuncSetSharedMemConfig", {"hipFuncSetSharedMemConfig", "", CONV_EXECUTION, API_RUNTIME, SEC::EXECUTION_DEPRECATED, CUDA_DEPRECATED} }, + + // 10. Occupancy // cuOccupancyAvailableDynamicSMemPerBlock {"cudaOccupancyAvailableDynamicSMemPerBlock", {"hipOccupancyAvailableDynamicSMemPerBlock", "", CONV_OCCUPANCY, API_RUNTIME, SEC::OCCUPANCY, HIP_UNSUPPORTED}}, // cuOccupancyMaxActiveBlocksPerMultiprocessor @@ -271,7 +275,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuOccupancyMaxActiveClusters {"cudaOccupancyMaxActiveClusters", {"hipOccupancyMaxActiveClusters", "", CONV_OCCUPANCY, API_RUNTIME, SEC::OCCUPANCY, HIP_UNSUPPORTED}}, - // 9. Memory Management + // 11. Memory Management // no analogue {"cudaArrayGetInfo", {"hipArrayGetInfo", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY}}, // cuMemFree @@ -409,7 +413,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuDeviceUnregisterAsyncNotification {"cudaDeviceUnregisterAsyncNotification", {"hipDeviceUnregisterAsyncNotification", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY, HIP_UNSUPPORTED}}, - // 10. Memory Management [DEPRECATED] + // 12. Memory Management [DEPRECATED] // no analogue // NOTE: Not equal to cuMemcpyAtoA due to different signatures {"cudaMemcpyArrayToArray", {"hipMemcpyArrayToArray", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY_DEPRECATED, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, @@ -422,7 +426,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // no analogue {"cudaMemcpyToArrayAsync", {"hipMemcpyToArrayAsync", "", CONV_MEMORY, API_RUNTIME, SEC::MEMORY_DEPRECATED, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, - // 11. Stream Ordered Memory Allocator + // 13. Stream Ordered Memory Allocator // cuMemAllocAsync {"cudaMallocAsync", {"hipMallocAsync", "", CONV_MEMORY, API_RUNTIME, SEC::ORDERED_MEMORY}}, @@ -453,12 +457,12 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuMemPoolImportPointer {"cudaMemPoolImportPointer", {"hipMemPoolImportPointer", "", CONV_MEMORY, API_RUNTIME, SEC::ORDERED_MEMORY}}, - // 12. Unified Addressing + // 14 Unified Addressing // no analogue // NOTE: Not equal to cuPointerGetAttributes due to different signatures {"cudaPointerGetAttributes", {"hipPointerGetAttributes", "", CONV_UNIFIED, API_RUNTIME, SEC::UNIFIED}}, - // 13. Peer Device Memory Access + // 15. Peer Device Memory Access // cuDeviceCanAccessPeer {"cudaDeviceCanAccessPeer", {"hipDeviceCanAccessPeer", "", CONV_PEER, API_RUNTIME, SEC::PEER}}, // no analogue @@ -468,7 +472,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // NOTE: Not equal to cuCtxEnablePeerAccess due to different signatures {"cudaDeviceEnablePeerAccess", {"hipDeviceEnablePeerAccess", "", CONV_PEER, API_RUNTIME, SEC::PEER}}, - // 14. OpenGL Interoperability + // 16. OpenGL Interoperability // cuGLGetDevices {"cudaGLGetDevices", {"hipGLGetDevices", "", CONV_OPENGL, API_RUNTIME, SEC::OPENGL}}, // cuGraphicsGLRegisterBuffer @@ -478,7 +482,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuWGLGetDevice {"cudaWGLGetDevice", {"hipWGLGetDevice", "", CONV_OPENGL, API_RUNTIME, SEC::OPENGL, HIP_UNSUPPORTED}}, - // 15. OpenGL Interoperability [DEPRECATED] + // 17. OpenGL Interoperability [DEPRECATED] // no analogue // NOTE: Not equal to cuGLMapBufferObject due to different signatures {"cudaGLMapBufferObject", {"hipGLMapBufferObject", "", CONV_OPENGL, API_RUNTIME, SEC::OPENGL_DEPRECATED, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, @@ -498,7 +502,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGLUnregisterBufferObject {"cudaGLUnregisterBufferObject", {"hipGLUnregisterBufferObject", "", CONV_OPENGL, API_RUNTIME, SEC::OPENGL_DEPRECATED, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, - // 16. Direct3D 9 Interoperability + // 18. Direct3D 9 Interoperability // cuD3D9GetDevice {"cudaD3D9GetDevice", {"hipD3D9GetDevice", "", CONV_D3D9, API_RUNTIME, SEC::D3D9, HIP_UNSUPPORTED}}, // cuD3D9GetDevices @@ -510,7 +514,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphicsD3D9RegisterResource {"cudaGraphicsD3D9RegisterResource", {"hipGraphicsD3D9RegisterResource", "", CONV_D3D9, API_RUNTIME, SEC::D3D9, HIP_UNSUPPORTED}}, - // 17. Direct3D 9 Interoperability[DEPRECATED] + // 19. Direct3D 9 Interoperability[DEPRECATED] // cuD3D9MapResources {"cudaD3D9MapResources", {"hipD3D9MapResources", "", CONV_D3D9, API_RUNTIME, SEC::D3D9_DEPRECATED, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, // cuD3D9RegisterResource @@ -533,7 +537,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuD3D9UnregisterResource {"cudaD3D9UnregisterResource", {"hipD3D9UnregisterResource", "", CONV_D3D9, API_RUNTIME, SEC::D3D9_DEPRECATED, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, - // 18. Direct3D 10 Interoperability + // 20. Direct3D 10 Interoperability // cuD3D10GetDevice {"cudaD3D10GetDevice", {"hipD3D10GetDevice", "", CONV_D3D10, API_RUNTIME, SEC::D3D10, HIP_UNSUPPORTED}}, // cuD3D10GetDevices @@ -541,7 +545,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphicsD3D10RegisterResource {"cudaGraphicsD3D10RegisterResource", {"hipGraphicsD3D10RegisterResource", "", CONV_D3D10, API_RUNTIME, SEC::D3D10, HIP_UNSUPPORTED}}, - // 19. Direct3D 10 Interoperability [DEPRECATED] + // 21. Direct3D 10 Interoperability [DEPRECATED] // cuD3D10GetDirect3DDevice {"cudaD3D10GetDirect3DDevice", {"hipD3D10GetDirect3DDevice", "", CONV_D3D10, API_RUNTIME, SEC::D3D10_DEPRECATED, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, // cuD3D10MapResources @@ -567,7 +571,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuD3D10UnregisterResource {"cudaD3D10UnregisterResource", {"hipD3D10UnregisterResource", "", CONV_D3D10, API_RUNTIME, SEC::D3D10_DEPRECATED, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, - // 20. Direct3D 11 Interoperability + // 22. Direct3D 11 Interoperability // cuD3D11GetDevice {"cudaD3D11GetDevice", {"hipD3D11GetDevice", "", CONV_D3D11, API_RUNTIME, SEC::D3D11, HIP_UNSUPPORTED}}, // cuD3D11GetDevices @@ -575,13 +579,13 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphicsD3D11RegisterResource {"cudaGraphicsD3D11RegisterResource", {"hipGraphicsD3D11RegisterResource", "", CONV_D3D11, API_RUNTIME, SEC::D3D11, HIP_UNSUPPORTED}}, - // 21. Direct3D 11 Interoperability [DEPRECATED] + // 23. Direct3D 11 Interoperability [DEPRECATED] // cuD3D11GetDirect3DDevice {"cudaD3D11GetDirect3DDevice", {"hipD3D11GetDirect3DDevice", "", CONV_D3D11, API_RUNTIME, SEC::D3D11_DEPRECATED, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, // no analogue {"cudaD3D11SetDirect3DDevice", {"hipD3D11SetDirect3DDevice", "", CONV_D3D11, API_RUNTIME, SEC::D3D11_DEPRECATED, HIP_UNSUPPORTED | CUDA_DEPRECATED}}, - // 22. VDPAU Interoperability + // 24. VDPAU Interoperability // cuGraphicsVDPAURegisterOutputSurface {"cudaGraphicsVDPAURegisterOutputSurface", {"hipGraphicsVDPAURegisterOutputSurface", "", CONV_VDPAU, API_RUNTIME, SEC::VDPAU, HIP_UNSUPPORTED}}, // cuGraphicsVDPAURegisterVideoSurface @@ -591,7 +595,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // no analogue {"cudaVDPAUSetVDPAUDevice", {"hipVDPAUSetDevice", "", CONV_VDPAU, API_RUNTIME, SEC::VDPAU, HIP_UNSUPPORTED}}, - // 23. EGL Interoperability + // 25. EGL Interoperability // cuEGLStreamConsumerAcquireFrame {"cudaEGLStreamConsumerAcquireFrame", {"hipEGLStreamConsumerAcquireFrame", "", CONV_EGL, API_RUNTIME, SEC::EGL, HIP_UNSUPPORTED}}, // cuEGLStreamConsumerConnect @@ -617,7 +621,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphicsResourceGetMappedEglFrame {"cudaGraphicsResourceGetMappedEglFrame", {"hipGraphicsResourceGetMappedEglFrame", "", CONV_EGL, API_RUNTIME, SEC::EGL, HIP_UNSUPPORTED}}, - // 24. Graphics Interoperability + // 26. Graphics Interoperability // cuGraphicsMapResources {"cudaGraphicsMapResources", {"hipGraphicsMapResources", "", CONV_GRAPHICS, API_RUNTIME, SEC::GRAPHICS}}, // cuGraphicsResourceGetMappedMipmappedArray @@ -633,7 +637,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphicsUnregisterResource {"cudaGraphicsUnregisterResource", {"hipGraphicsUnregisterResource", "", CONV_GRAPHICS, API_RUNTIME, SEC::GRAPHICS}}, - // 25. Texture Object Management + // 27. Texture Object Management // no analogue // NOTE: Not equal to cuTexObjectCreate due to different signatures {"cudaCreateTextureObject", {"hipCreateTextureObject", "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE}}, @@ -656,7 +660,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // no analogue {"cudaGetChannelDesc", {"hipGetChannelDesc", "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE}}, - // 26. Surface Object Management + // 28. Surface Object Management // no analogue // NOTE: Not equal to cuSurfObjectCreate due to different signatures {"cudaCreateSurfaceObject", {"hipCreateSurfaceObject", "", CONV_SURFACE, API_RUNTIME, SEC::SURFACE}}, @@ -666,13 +670,13 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // NOTE: Not equal to cuSurfObjectGetResourceDesc due to different signatures {"cudaGetSurfaceObjectResourceDesc", {"hipGetSurfaceObjectResourceDesc", "", CONV_SURFACE, API_RUNTIME, SEC::SURFACE, HIP_UNSUPPORTED}}, - // 27. Version Management + // 29. Version Management // cuDriverGetVersion {"cudaDriverGetVersion", {"hipDriverGetVersion", "", CONV_VERSION, API_RUNTIME, SEC::VERSION}}, // no analogue {"cudaRuntimeGetVersion", {"hipRuntimeGetVersion", "", CONV_VERSION, API_RUNTIME, SEC::VERSION}}, - // 28. Graph Management + // 30. Graph Management // cuGraphAddChildGraphNode {"cudaGraphAddChildGraphNode", {"hipGraphAddChildGraphNode", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH}}, // cuGraphAddDependencies @@ -871,26 +875,28 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphConditionalHandleCreate {"cudaGraphConditionalHandleCreate", {"hipGraphConditionalHandleCreate", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, - // 29. Driver Entry Point Access + // 31. Driver Entry Point Access // cuGetProcAddress {"cudaGetDriverEntryPoint", {"hipGetProcAddress", "", CONV_DRIVER_ENTRY_POINT, API_RUNTIME, SEC::DRIVER_ENTRY_POINT, HIP_EXPERIMENTAL}}, + // + {"cudaGetDriverEntryPointByVersion", {"hipGetDriverEntryPointByVersion", "", CONV_DRIVER_ENTRY_POINT, API_RUNTIME, SEC::DRIVER_ENTRY_POINT, HIP_UNSUPPORTED}}, - // 30. C++ API Routines + // 32. C++ API Routines {"cudaGetKernel", {"hipGetKernel", "", CONV_CPP, API_RUNTIME, SEC::CPP, HIP_UNSUPPORTED}}, - // 31. Interactions with the CUDA Driver API + // 33. Interactions with the CUDA Driver API {"cudaGetFuncBySymbol", {"hipGetFuncBySymbol", "", CONV_DRIVER_INTERACT, API_RUNTIME, SEC::DRIVER_INTERACT, HIP_EXPERIMENTAL}}, - // 32. Profiler Control + // 34. Profiler Control // cuProfilerStart {"cudaProfilerStart", {"hipProfilerStart", "", CONV_PROFILER, API_RUNTIME, SEC::PROFILER, HIP_DEPRECATED}}, // cuProfilerStop {"cudaProfilerStop", {"hipProfilerStop", "", CONV_PROFILER, API_RUNTIME, SEC::PROFILER, HIP_DEPRECATED}}, - // 33. Data types used by CUDA Runtime + // 35. Data types used by CUDA Runtime // NOTE: in a separate file - // 34. Execution Control [REMOVED] + // 36. Execution Control [REMOVED] // NOTE: Removed in CUDA 10.1 // no analogue {"cudaConfigureCall", {"hipConfigureCall", "", CONV_EXECUTION, API_RUNTIME, SEC::EXECUTION_REMOVED, CUDA_REMOVED}}, @@ -900,7 +906,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // no analogue {"cudaSetupArgument", {"hipSetupArgument", "", CONV_EXECUTION, API_RUNTIME, SEC::EXECUTION_REMOVED, CUDA_REMOVED}}, - // 35. Texture Reference Management [REMOVED] + // 37. Texture Reference Management [REMOVED] // NOTE: Removed in CUDA 12.0 // no analogue {"cudaBindTexture", {"hipBindTexture", "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE_REMOVED, HIP_DEPRECATED | CUDA_REMOVED}}, @@ -917,14 +923,14 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // no analogue {"cudaUnbindTexture", {"hipUnbindTexture", "", CONV_TEXTURE, API_RUNTIME, SEC::TEXTURE_REMOVED, HIP_DEPRECATED | CUDA_REMOVED}}, - // 36. Surface Reference Management [REMOVED] + // 38. Surface Reference Management [REMOVED] // NOTE: Removed in CUDA 12.0 // no analogue {"cudaBindSurfaceToArray", {"hipBindSurfaceToArray", "", CONV_SURFACE, API_RUNTIME, SEC::SURFACE_REMOVED, HIP_UNSUPPORTED | CUDA_REMOVED}}, // no analogue {"cudaGetSurfaceReference", {"hipGetSurfaceReference", "", CONV_SURFACE, API_RUNTIME, SEC::SURFACE_REMOVED, HIP_UNSUPPORTED | CUDA_REMOVED}}, - // 37. Profiler Control [REMOVED] + // 39. Profiler Control [REMOVED] // cuProfilerInitialize {"cudaProfilerInitialize", {"hipProfilerInitialize", "", CONV_PROFILER, API_RUNTIME, SEC::PROFILER_REMOVED, HIP_UNSUPPORTED | CUDA_REMOVED}}, }; @@ -1167,6 +1173,7 @@ const std::map CUDA_RUNTIME_FUNCTION_VER_MAP { {"cudaDeviceRegisterAsyncNotification", {CUDA_124, CUDA_0, CUDA_0 }}, {"cudaDeviceUnregisterAsyncNotification", {CUDA_124, CUDA_0, CUDA_0 }}, {"cudaFuncGetParamInfo", {CUDA_124, CUDA_0, CUDA_0 }}, + {"cudaGetDriverEntryPointByVersion", {CUDA_125, CUDA_0, CUDA_0 }}, }; const std::map HIP_RUNTIME_FUNCTION_VER_MAP { @@ -1436,12 +1443,14 @@ const std::map CUDA_RUNTIME_FUNCTION_CH const std::map CUDA_RUNTIME_API_SECTION_MAP { {SEC::DEVICE, "Device Management"}, + {SEC::DEVICE_DEPRECATED, "Device Management [DEPRECATED]"}, {SEC::THREAD_DEPRECATED, "Thread Management [DEPRECATED]"}, {SEC::ERROR, "Error Handling"}, {SEC::STREAM, "Stream Management"}, {SEC::EVENT, "Event Management"}, {SEC::EXTERNAL_RES, "External Resource Interoperability"}, {SEC::EXECUTION, "Execution Control"}, + {SEC::EXECUTION_DEPRECATED, "Execution Control [DEPRECATED]"}, {SEC::OCCUPANCY, "Occupancy"}, {SEC::MEMORY, "Memory Management"}, {SEC::MEMORY_DEPRECATED, "Memory Management [DEPRECATED]"}, diff --git a/src/CUDA2HIP_Runtime_API_types.cpp b/src/CUDA2HIP_Runtime_API_types.cpp index 9703980e..034d4fc8 100644 --- a/src/CUDA2HIP_Runtime_API_types.cpp +++ b/src/CUDA2HIP_Runtime_API_types.cpp @@ -666,6 +666,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaDevAttrMpsEnabled", {"hipDeviceAttributeMpsEnables", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 133 // CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID {"cudaDevAttrHostNumaId", {"hipDeviceAttributeHostNumaId", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 134 + // CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED + {"cudaDevAttrD3D12CigSupported", {"hipDeviceAttributeD3D12CigSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 135 // CU_DEVICE_ATTRIBUTE_MAX {"cudaDevAttrMax", {"hipDeviceAttributeMax", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, @@ -1877,6 +1879,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaLaunchAttributeLaunchCompletionEvent", {"hipLaunchAttributeLaunchCompletionEvent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE {"cudaLaunchAttributeDeviceUpdatableKernelNode", {"hipLaunchAttributeDeviceUpdatableKernelNode", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT + {"cudaLaunchAttributePreferredSharedMemoryCarveout", {"hipLaunchAttributePreferredSharedMemoryCarveout", "", CONV_NUMERIC_LITERAL, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // CUgraphInstantiateResult {"cudaGraphInstantiateResult", {"hipGraphInstantiateResult", "", CONV_TYPE, API_RUNTIME, SEC::DATA_TYPES}}, @@ -2125,6 +2129,8 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaKernelNodeAttributeMemSyncDomainMap", {"hipKernelNodeAttributeMemSyncDomainMap", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // cudaLaunchAttributeMemSyncDomainMap // CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN {"cudaKernelNodeAttributeMemSyncDomain", {"hipKernelNodeAttributeMemSyncDomain", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // cudaLaunchAttributeMemSyncDomain + // CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN + {"cudaKernelNodeAttributePreferredSharedMemoryCarveout", {"hipKernelNodeAttributePreferredSharedMemoryCarveout", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // cudaLaunchAttributePreferredSharedMemoryCarveout // {"cudaInitDeviceFlagsAreValid", {"hipInitDeviceFlagsAreValid", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, // 0x01 // CUstreamAttrID @@ -2145,8 +2151,104 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaGraphKernelNodePortProgrammatic", {"hipGraphKernelNodePortProgrammatic", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES, HIP_EXPERIMENTAL}}, // 1 // CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER {"cudaGraphKernelNodePortLaunchCompletion", {"hipGraphKernelNodePortLaunchCompletion", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES, HIP_EXPERIMENTAL}}, // 2 - // + // CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE {"cudaKernelNodeAttributeDeviceUpdatableKernelNode", {"hipKernelNodeAttributeDeviceUpdatableKernelNode", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + + {"CUDART_INF_F", {"HIP_INF_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_NAN_F", {"HIP_NAN_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_MIN_DENORM_F", {"HIP_MIN_DENORM_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_MAX_NORMAL_F", {"HIP_MAX_NORMAL_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_NEG_ZERO_F", {"HIP_NEG_ZERO_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_ZERO_F", {"HIP_ZERO_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_ONE_F", {"HIP_ONE_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_HALF_F", {"HIP_SQRT_HALF_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_HALF_HI_F", {"HIP_SQRT_HALF_HI_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_HALF_LO_F", {"HIP_SQRT_HALF_LO_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_TWO_F", {"HIP_SQRT_TWO_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_THIRD_F", {"HIP_THIRD_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_PIO4_F", {"HIP_PIO4_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_PIO2_F", {"HIP_PIO2_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_3PIO4_F", {"HIP_3PIO4_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_2_OVER_PI_F", {"HIP_2_OVER_PI_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_2_OVER_PI_F", {"HIP_SQRT_2_OVER_PI_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_PI_F", {"HIP_PI_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_L2E_F", {"HIP_L2E_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_L2T_F", {"HIP_L2T_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LG2_F", {"HIP_LG2_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LGE_F", {"HIP_LGE_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LN2_F", {"HIP_LN2_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LNT_F", {"HIP_LNT_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LNPI_F", {"HIP_LNPI_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TWO_TO_M126_F", {"HIP_TWO_TO_M126_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TWO_TO_126_F", {"HIP_TWO_TO_126_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_NORM_HUGE_F", {"HIP_NORM_HUGE_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TWO_TO_23_F", {"HIP_TWO_TO_23_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TWO_TO_24_F", {"HIP_TWO_TO_24_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TWO_TO_31_F", {"HIP_TWO_TO_31_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TWO_TO_32_F", {"HIP_TWO_TO_32_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_REMQUO_BITS_F", {"HIP_REMQUO_BITS_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_REMQUO_MASK_F", {"HIP_REMQUO_MASK_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TRIG_PLOSS_F", {"HIP_TRIG_PLOSS_F", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_INF", {"HIP_INF", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_NAN", {"HIP_NAN", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_NEG_ZERO", {"HIP_NEG_ZERO", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_MIN_DENORM", {"HIP_MIN_DENORM", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_ZERO", {"HIP_ZERO", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_ONE", {"HIP_ONE", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_TWO", {"HIP_SQRT_TWO", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_HALF", {"HIP_SQRT_HALF", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_HALF_HI", {"HIP_SQRT_HALF_HI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_HALF_LO", {"HIP_SQRT_HALF_LO", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_THIRD", {"HIP_THIRD", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TWOTHIRD", {"HIP_TWOTHIRD", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_PIO4", {"HIP_PIO4", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_PIO4_HI", {"HIP_PIO4_HI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_PIO4_LO", {"HIP_PIO4_LO", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_PIO2", {"HIP_PIO2", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_PIO2_HI", {"HIP_PIO2_HI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_PIO2_LO", {"HIP_PIO2_LO", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_3PIO4", {"HIP_3PIO4", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_2_OVER_PI", {"HIP_2_OVER_PI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_PI", {"HIP_PI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_PI_HI", {"HIP_PI_HI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_PI_LO", {"HIP_PI_LO", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_2PI", {"HIP_SQRT_2PI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_2PI_HI", {"HIP_SQRT_2PI_HI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_2PI_LO", {"HIP_SQRT_2PI_LO", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_PIO2", {"HIP_SQRT_PIO2", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_PIO2_HI", {"HIP_SQRT_PIO2_HI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_PIO2_LO", {"HIP_SQRT_PIO2_LO", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_SQRT_2OPI", {"HIP_SQRT_2OPI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_L2E", {"HIP_L2E", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_L2E_HI", {"HIP_L2E_HI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_L2E_LO", {"HIP_L2E_LO", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_L2T", {"HIP_L2T", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LG2", {"HIP_LG2", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LG2_HI", {"HIP_LG2_HI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LG2_LO", {"HIP_LG2_LO", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LGE", {"HIP_LGE", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LGE_HI", {"HIP_LGE_HI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LGE_LO", {"HIP_LGE_LO", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LN2", {"HIP_LN2", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LN2_HI", {"HIP_LN2_HI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LN2_LO", {"HIP_LN2_LO", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LNT", {"HIP_LNT", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LNT_HI", {"HIP_LNT_HI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LNT_LO", {"HIP_LNT_LO", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LNPI", {"HIP_LNPI", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LN2_X_1024", {"HIP_LN2_X_1024", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LN2_X_1025", {"HIP_LN2_X_1025", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LN2_X_1075", {"HIP_LN2_X_1075", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LG2_X_1024", {"HIP_LG2_X_1024", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_LG2_X_1075", {"HIP_LG2_X_1075", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TWO_TO_23", {"HIP_TWO_TO_23", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TWO_TO_52", {"HIP_TWO_TO_52", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TWO_TO_53", {"HIP_TWO_TO_53", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TWO_TO_54", {"HIP_TWO_TO_54", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TWO_TO_M54", {"HIP_TWO_TO_M54", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TWO_TO_M1022", {"HIP_TWO_TO_M1022", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_TRIG_PLOSS", {"HIP_TRIG_PLOSS", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, + {"CUDART_DBL2INT_CVT", {"HIP_DBL2INT_CVT", "", CONV_DEFINE, API_RUNTIME, SEC::DATA_TYPES}}, }; const std::map CUDA_RUNTIME_TYPE_NAME_VER_MAP { @@ -2722,6 +2824,9 @@ const std::map CUDA_RUNTIME_TYPE_NAME_VER_MAP {"cudaAsyncNotificationInfo", {CUDA_124, CUDA_0, CUDA_0 }}, {"cudaAsyncNotificationInfo_t", {CUDA_124, CUDA_0, CUDA_0 }}, {"cudaStreamLegacy", {CUDA_90, CUDA_0, CUDA_0 }}, + {"cudaDevAttrD3D12CigSupported", {CUDA_125, CUDA_0, CUDA_0 }}, + {"cudaLaunchAttributePreferredSharedMemoryCarveout", {CUDA_125, CUDA_0, CUDA_0 }}, + {"cudaKernelNodeAttributePreferredSharedMemoryCarveout", {CUDA_125, CUDA_0, CUDA_0 }}, }; const std::map HIP_RUNTIME_TYPE_NAME_VER_MAP { @@ -2993,4 +3098,99 @@ const std::map HIP_RUNTIME_TYPE_NAME_VER_MAP { {"hipGraphDependencyTypeDefault", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, {"hipGraphDependencyTypeProgrammatic", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, {"hipGraphEdgeData", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"HIP_INF_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_NAN_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_MIN_DENORM_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_MAX_NORMAL_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_NEG_ZERO_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_ZERO_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_ONE_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_SQRT_HALF_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_SQRT_HALF_HI_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_SQRT_HALF_LO_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_SQRT_TWO_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_THIRD_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_PIO4_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_PIO2_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_3PIO4_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_2_OVER_PI_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_SQRT_2_OVER_PI_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_PI_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_L2E_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_L2T_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_LG2_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_LGE_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_LN2_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_LNT_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_LNPI_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_TWO_TO_M126_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_TWO_TO_126_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_NORM_HUGE_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_TWO_TO_23_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_TWO_TO_24_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_TWO_TO_31_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_TWO_TO_32_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_REMQUO_BITS_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_REMQUO_MASK_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_TRIG_PLOSS_F", {HIP_5030, HIP_0, HIP_0 }}, + {"HIP_INF", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_NAN", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_NEG_ZERO", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_MIN_DENORM", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_ZERO", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_ONE", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_SQRT_TWO", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_SQRT_HALF", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_SQRT_HALF_HI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_SQRT_HALF_LO", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_THIRD", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_TWOTHIRD", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_PIO4", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_PIO4_HI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_PIO4_LO", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_PIO2", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_PIO2_HI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_PIO2_LO", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_3PIO4", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_2_OVER_PI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_PI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_PI_HI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_PI_LO", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_SQRT_2PI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_SQRT_2PI_HI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_SQRT_2PI_LO", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_SQRT_PIO2", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_SQRT_PIO2_HI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_SQRT_PIO2_LO", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_SQRT_2OPI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_L2E", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_L2E_HI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_L2E_LO", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_L2T", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LG2", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LG2_HI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LG2_LO", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LGE", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LGE_HI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LGE_LO", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LN2", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LN2_HI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LN2_LO", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LNT", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LNT_HI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LNT_LO", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LNPI", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LN2_X_1024", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LN2_X_1025", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LN2_X_1075", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LG2_X_1024", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_LG2_X_1075", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_TWO_TO_23", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_TWO_TO_52", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_TWO_TO_53", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_TWO_TO_54", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_TWO_TO_M54", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_TWO_TO_M1022", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_TRIG_PLOSS", {HIP_5070, HIP_0, HIP_0 }}, + {"HIP_DBL2INT_CVT", {HIP_5070, HIP_0, HIP_0 }}, }; diff --git a/src/CUDA2HIP_SPARSE_API_types.cpp b/src/CUDA2HIP_SPARSE_API_types.cpp index fd7b407c..10731dba 100644 --- a/src/CUDA2HIP_SPARSE_API_types.cpp +++ b/src/CUDA2HIP_SPARSE_API_types.cpp @@ -209,6 +209,7 @@ const std::map CUDA_SPARSE_TYPE_NAME_MAP { {"CUSPARSE_SPMM_CSR_ALG2", {"HIPSPARSE_SPMM_CSR_ALG2", "rocsparse_spmm_alg_csr_row_split", CONV_NUMERIC_LITERAL, API_SPARSE, 4}}, {"CUSPARSE_SPMM_CSR_ALG3", {"HIPSPARSE_SPMM_CSR_ALG3", "rocsparse_spmm_alg_csr_merge", CONV_NUMERIC_LITERAL, API_SPARSE, 4}}, {"CUSPARSE_SPMM_BLOCKED_ELL_ALG1", {"HIPSPARSE_SPMM_BLOCKED_ELL_ALG1", "rocsparse_spmm_alg_bell", CONV_NUMERIC_LITERAL, API_SPARSE, 4}}, + {"CUSPARSE_SPMM_BSR_ALG1", {"HIPSPARSE_SPMM_BSR_ALG1", "rocsparse_spmm_alg_bell", CONV_NUMERIC_LITERAL, API_SPARSE, 4, UNSUPPORTED}}, {"CUSPARSE_SPMMA_PREPROCESS", {"HIPSPARSE_SPMMA_PREPROCESS", "", CONV_NUMERIC_LITERAL, API_SPARSE, 4, CUDA_REMOVED | UNSUPPORTED}}, {"CUSPARSE_SPMMA_ALG1", {"HIPSPARSE_SPMMA_ALG1", "", CONV_NUMERIC_LITERAL, API_SPARSE, 4, CUDA_REMOVED | UNSUPPORTED}}, {"CUSPARSE_SPMMA_ALG2", {"HIPSPARSE_SPMMA_ALG2", "", CONV_NUMERIC_LITERAL, API_SPARSE, 4, CUDA_REMOVED | UNSUPPORTED}}, @@ -424,6 +425,7 @@ const std::map CUDA_SPARSE_TYPE_NAME_VER_MAP { {"cusparseSpSMUpdate_t", {CUDA_124, CUDA_0, CUDA_0 }}, {"CUSPARSE_SPSM_UPDATE_GENERAL", {CUDA_124, CUDA_0, CUDA_0 }}, {"CUSPARSE_SPSM_UPDATE_DIAGONAL", {CUDA_124, CUDA_0, CUDA_0 }}, + {"CUSPARSE_SPMM_BSR_ALG1", {CUDA_125, CUDA_0, CUDA_0 }}, // CUSPARSE_VERSION 12501 }; const std::map HIP_SPARSE_TYPE_NAME_VER_MAP { diff --git a/src/Statistics.cpp b/src/Statistics.cpp index 0b013260..b79cdfbc 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -489,6 +489,7 @@ std::string Statistics::getCudaVersion(const cudaVersions &ver) { case CUDA_122: return "12.2"; case CUDA_123: return "12.3"; case CUDA_124: return "12.4"; + case CUDA_125: return "12.5"; case CUDNN_10: return "1.0.0"; case CUDNN_20: return "2.0.0"; case CUDNN_30: return "3.0.0"; @@ -539,6 +540,7 @@ std::string Statistics::getCudaVersion(const cudaVersions &ver) { case CUDNN_897: return "8.9.7"; case CUDNN_900: return "9.0.0"; case CUDNN_910: return "9.1.0"; + case CUDNN_920: return "9.2.0"; } return ""; } diff --git a/src/Statistics.h b/src/Statistics.h index fd889e65..b24cc48e 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -242,7 +242,8 @@ enum cudaVersions { CUDA_122 = 12020, CUDA_123 = 12030, CUDA_124 = 12040, - CUDA_LATEST = CUDA_124, + CUDA_125 = 12050, + CUDA_LATEST = CUDA_125, CUDNN_10 = 100, CUDNN_20 = 200, CUDNN_30 = 300, @@ -293,7 +294,8 @@ enum cudaVersions { CUDNN_897 = 897, CUDNN_900 = 900, CUDNN_910 = 910, - CUDNN_LATEST = CUDNN_910, + CUDNN_920 = 920, + CUDNN_LATEST = CUDNN_920, }; enum hipVersions { diff --git a/tests/lit.cfg b/tests/lit.cfg index 57196586..f18c72ec 100644 --- a/tests/lit.cfg +++ b/tests/lit.cfg @@ -31,6 +31,7 @@ if not config.cuda_dnn_root or config.cuda_dnn_root == "OFF": config.excludes.append('cudnn_convolution_forward.cu') config.excludes.append('cudnn_softmax.cu') config.excludes.append('cudnn2miopen.cu') + config.excludes.append('cudnn2miopen_before_9000.cu') print("WARN: cuDNN tests are excluded because CUDA_DNN_ROOT_DIR is not specified") warns = True if not config.cuda_cub_root or config.cuda_cub_root == "OFF": @@ -139,11 +140,9 @@ if config.cuda_version_major >= 12: config.excludes.append('cuSPARSE_11.cu') config.excludes.append('cublas_0_based_indexing.cu') config.excludes.append('cublas_0_based_indexing_rocblas.cu') - config.excludes.append('cub_01.cu') - config.excludes.append('cub_02.cu') - config.excludes.append('cub_03.cu') config.excludes.append('cusparse2rocsparse_9020_12000.cu') config.excludes.append('cusparse2rocsparse_before_12000.cu') + clang_arguments += " -D_LIBCUDACXX_OBJECT_FORMAT_COFF" if config.cudnn_version_major >= 9: config.excludes.append('cudnn2miopen_before_9000.cu') @@ -155,6 +154,9 @@ if config.llvm_version_major < 10: config.excludes.append('pp_if_else_conditionals_LLVM_10.cu') config.excludes.append('pp_if_else_conditionals_01_LLVM_10.cu') +if config.llvm_version_major > 18: + clang_arguments += " -Wno-missing-template-arg-list-after-template-kw" + # name: The name of this test suite. config.name = 'hipify' @@ -200,13 +202,8 @@ if sys.platform in ['win32']: run_test_ext = ".bat" if not config.hipify_clang_tests_only or config.hipify_clang_tests_only == "0" or config.hipify_clang_tests_only.upper() == "OFF": hipify_path += "/" + config.build_type - # CUDA SDK ROOT - clang_arguments += " -isystem'%s'/common" + inc_subfolder - else: run_test_ext = ".sh" - # CUDA SDK ROOT - clang_arguments += " -isystem'%s'/samples/common" + inc_subfolder if config.pointer_size == 8: clang_arguments += " -D__LP64__" @@ -233,20 +230,39 @@ elif config.cuda_version_major == 11: if config.cuda_version_minor > 0 and config.llvm_version_major >= 13: clang_arguments += " --cuda-gpu-arch=sm_86" +# CUDA SDK ROOT +if config.cuda_sdk_root and config.cuda_sdk_root != "OFF": + clang_arguments += " -isystem'%s'/common" + inc_subfolder + # cuDNN ROOT if config.cuda_dnn_root and config.cuda_dnn_root != "OFF": clang_arguments += " -I'%s'/include" + # CUB ROOT if config.cuda_cub_root and config.cuda_cub_root != "OFF": clang_arguments += " -I'%s'" -if config.cuda_sdk_root and config.cuda_sdk_root != "OFF" and config.cuda_dnn_root and config.cuda_dnn_root != "OFF" and config.cuda_cub_root and config.cuda_cub_root != "OFF": +i_subst = 0 +if config.cuda_sdk_root != "OFF": + i_subst = 1 +if config.cuda_dnn_root != "OFF": + i_subst += 2 +if config.cuda_cub_root != "OFF": + i_subst += 4 + +if i_subst == 7: config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_sdk_root, config.cuda_dnn_root, config.cuda_cub_root))) -elif config.cuda_sdk_root and config.cuda_sdk_root != "OFF" and config.cuda_dnn_root and config.cuda_dnn_root != "OFF": - config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_sdk_root, config.cuda_dnn_root))) -elif config.cuda_sdk_root and config.cuda_sdk_root != "OFF" and config.cuda_cub_root and config.cuda_cub_root != "OFF": +elif i_subst == 6: + config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_dnn_root, config.cuda_cub_root))) +elif i_subst == 5: config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_sdk_root, config.cuda_cub_root))) -elif config.cuda_sdk_root and config.cuda_sdk_root != "OFF": +elif i_subst == 4: + config.substitutions.append(("%clang_args", clang_arguments % config.cuda_cub_root)) +elif i_subst == 3: + config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_sdk_root, config.cuda_dnn_root))) +elif i_subst == 2: + config.substitutions.append(("%clang_args", clang_arguments % config.cuda_dnn_root)) +elif i_subst == 1: config.substitutions.append(("%clang_args", clang_arguments % config.cuda_sdk_root)) if config.llvm_version_major < 4: diff --git a/tests/lit.site.cfg.in b/tests/lit.site.cfg.in index 52caa468..9ca7bbc8 100644 --- a/tests/lit.site.cfg.in +++ b/tests/lit.site.cfg.in @@ -9,6 +9,7 @@ config.llvm_version_major = int("@LLVM_VERSION_MAJOR@") config.llvm_tools_dir = "@LLVM_TOOLS_BINARY_DIR@" config.obj_root = "@CMAKE_CURRENT_BINARY_DIR@" config.cuda_root = "@CUDA_TOOLKIT_ROOT_DIR@" +config.cuda_sdk_root = "@CUDA_SDK_ROOT_DIR@" config.cuda_dnn_root = "@CUDA_DNN_ROOT_DIR@" config.cuda_cub_root = "@CUDA_CUB_ROOT_DIR@" config.cuda_version_major = int("@CUDA_VERSION_MAJOR@") @@ -17,12 +18,9 @@ config.cuda_version_full = "@CUDA_VERSION_FULL@" config.cuda_version = "@CUDA_VERSION@" config.clang_resource_dir = "@HIPIFY_CLANG_RES@" if sys.platform in ['win32']: - config.cuda_sdk_root = "@CUDA_SDK_ROOT_DIR@" config.build_type = "@CMAKE_BUILD_TYPE@" if not config.build_type: config.build_type = "Debug" -else: - config.cuda_sdk_root = config.cuda_root config.cudnn_version_major = int("0") if config.cuda_dnn_root and config.cuda_dnn_root != "OFF": diff --git a/tests/unit_tests/headers/headers_test_13.cu b/tests/unit_tests/headers/headers_test_13.cu new file mode 100644 index 00000000..0ae53d20 --- /dev/null +++ b/tests/unit_tests/headers/headers_test_13.cu @@ -0,0 +1,9 @@ +// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args + +// CHECK: #include +// CHECK: #include +// CHECK-NOT: #include +// CHECK: #include +#include +#include +#include diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu index 25845dc1..034b8c33 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas.cu @@ -1703,7 +1703,7 @@ int main() { blasStatus = cublasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, blasComputeType, blasGemmAlgo); #endif -#if CUDA_VERSION >= 11060 && CUBLAS_VERSION >= 110902 // CUDA 11.6.2 +#if CUDA_VERSION > 11060 && CUBLAS_VERSION >= 110902 // CUDA 11.6.2 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemvBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const xarray[], int incx, const cuComplex* beta, cuComplex* const yarray[], int incy, int batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemvBatched_v2(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const hipComplex* alpha, const hipComplex* const AP[], int lda, const hipComplex* const x[], int incx, const hipComplex* beta, hipComplex* const y[], int incy, int batchCount); // CHECK: blasStatus = hipblasCgemvBatched_v2(blasHandle, blasOperation, m, n, &complexa, complexAarray_const, lda, complexXarray_const, incx, &complexb, complexYarray, incy, batchCount); diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 7260f9e0..8e081166 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -162,11 +162,13 @@ int main() { int incy = 0; int64_t incy_64 = 0; int k = 0; + int64_t k_64 = 0; int kl = 0; int64_t kl_64 = 0; int ku = 0; int64_t ku_64 = 0; int batchCount = 0; + int64_t batchCount_64 = 0; int P = 0; int info = 0; void* image = nullptr; @@ -237,6 +239,10 @@ int main() { const float** const fAarray_const = const_cast(fAarray); float** fBarray = nullptr; const float** const fBarray_const = const_cast(fBarray); + float** fXarray = nullptr; + const float** const fXarray_const = const_cast(fXarray); + float** fYarray = nullptr; + const float** const fYarray_const = const_cast(fYarray); float** fCarray = nullptr; float** fTauarray = nullptr; @@ -260,6 +266,10 @@ int main() { const double** const dAarray_const = const_cast(dAarray); double** dBarray = nullptr; const double** const dBarray_const = const_cast(dBarray); + double** dXarray = nullptr; + const double** const dXarray_const = const_cast(dXarray); + double** dYarray = nullptr; + const double** const dYarray_const = const_cast(dYarray); double** dCarray = nullptr; double** dTauarray = nullptr; @@ -291,26 +301,42 @@ int main() { // CHECK: hipComplex** complexAarray = 0; // CHECK: const hipComplex** const complexAarray_const = const_cast(complexAarray); // CHECK-NEXT: hipComplex** complexBarray = 0; - // CHECK: const hipComplex** const complexBarray_const = const_cast(complexBarray); + // CHECK-NEXT: const hipComplex** const complexBarray_const = const_cast(complexBarray); + // CHECK-NEXT: hipComplex** complexXarray = 0; + // CHECK-NEXT: const hipComplex** const complexXarray_const = const_cast(complexXarray); + // CHECK-NEXT: hipComplex** complexYarray = 0; + // CHECK-NEXT: const hipComplex** const complexYarray_const = const_cast(complexYarray); // CHECK-NEXT: hipComplex** complexCarray = 0; // CHECK-NEXT: hipComplex** complexTauarray = 0; cuComplex** complexAarray = 0; const cuComplex** const complexAarray_const = const_cast(complexAarray); cuComplex** complexBarray = 0; const cuComplex** const complexBarray_const = const_cast(complexBarray); + cuComplex** complexXarray = 0; + const cuComplex** const complexXarray_const = const_cast(complexXarray); + cuComplex** complexYarray = 0; + const cuComplex** const complexYarray_const = const_cast(complexYarray); cuComplex** complexCarray = 0; cuComplex** complexTauarray = 0; // CHECK: hipDoubleComplex** dcomplexAarray = 0; // CHECK: const hipDoubleComplex** const dcomplexAarray_const = const_cast(dcomplexAarray); // CHECK-NEXT: hipDoubleComplex** dcomplexBarray = 0; - // CHECK: const hipDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); + // CHECK-NEXT: const hipDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); + // CHECK-NEXT: hipDoubleComplex** dcomplexXarray = 0; + // CHECK-NEXT: const hipDoubleComplex** const dcomplexXarray_const = const_cast(dcomplexXarray); + // CHECK-NEXT: hipDoubleComplex** dcomplexYarray = 0; + // CHECK-NEXT: const hipDoubleComplex** const dcomplexYarray_const = const_cast(dcomplexYarray); // CHECK-NEXT: hipDoubleComplex** dcomplexCarray = 0; // CHECK-NEXT: hipDoubleComplex** dcomplexTauarray = 0; cuDoubleComplex** dcomplexAarray = 0; const cuDoubleComplex** const dcomplexAarray_const = const_cast(dcomplexAarray); cuDoubleComplex** dcomplexBarray = 0; const cuDoubleComplex** const dcomplexBarray_const = const_cast(dcomplexBarray); + cuDoubleComplex** dcomplexXarray = 0; + const cuDoubleComplex** const dcomplexXarray_const = const_cast(dcomplexXarray); + cuDoubleComplex** dcomplexYarray = 0; + const cuDoubleComplex** const dcomplexYarray_const = const_cast(dcomplexYarray); cuDoubleComplex** dcomplexCarray = 0; cuDoubleComplex** dcomplexTauarray = 0; @@ -1609,6 +1635,8 @@ int main() { long long int strideA = 0; long long int strideB = 0; long long int strideC = 0; + long long int strideX = 0; + long long int strideY = 0; #if CUDA_VERSION >= 7050 // CHECK: __half* ha = 0; @@ -1850,6 +1878,28 @@ int main() { blasStatus = cublasGemmStridedBatchedEx(blasHandle, transa, transb, m, n, k, aptr, Aptr, Atype, lda, strideA, Bptr, Btype, ldb, strideB, bptr, Cptr, Ctype, ldc, strideC, batchCount, blasComputeType, blasGemmAlgo); #endif +#if CUDA_VERSION > 11060 && CUBLAS_VERSION >= 110902 // CUDA 11.6.2 + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemvStridedBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const float* alpha, const float* A, int lda, long long int strideA, const float* x, int incx, long long int stridex, const float* beta, float* y, int incy, long long int stridey, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const float* alpha, const float* AP, int lda, hipblasStride strideA, const float* x, int incx, hipblasStride stridex, const float* beta, float* y, int incy, hipblasStride stridey, int batchCount); + // CHECK: blasStatus = hipblasSgemvStridedBatched(blasHandle, blasOperation, m, n, &fa, &fA, lda, strideA, &fx, incx, strideX, &fb, &fy, incy, strideY, batchCount); + blasStatus = cublasSgemvStridedBatched(blasHandle, blasOperation, m, n, &fa, &fA, lda, strideA, &fx, incx, strideX, &fb, &fy, incy, strideY, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemvStridedBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const double* alpha, const double* A, int lda, long long int strideA, const double* x, int incx, long long int stridex, const double* beta, double* y, int incy, long long int stridey, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemvStridedBatched(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const double* alpha, const double* AP, int lda, hipblasStride strideA, const double* x, int incx, hipblasStride stridex, const double* beta, double* y, int incy, hipblasStride stridey, int batchCount); + // CHECK: blasStatus = hipblasDgemvStridedBatched(blasHandle, blasOperation, m, n, &da, &dA, lda, strideA, &dx, incx, strideX, &db, &dy, incy, strideY, batchCount); + blasStatus = cublasDgemvStridedBatched(blasHandle, blasOperation, m, n, &da, &dA, lda, strideA, &dx, incx, strideX, &db, &dy, incy, strideY, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemvBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const float* alpha, const float* const Aarray[], int lda, const float* const xarray[], int incx, const float* beta, float* const yarray[], int incy, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemvBatched(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float* alpha, const float* const AP[], int lda, const float* const x[], int incx, const float* beta, float* const y[], int incy, int batchCount); + // CHECK: blasStatus = hipblasSgemvBatched(blasHandle, blasOperation, m, n, &fa, fAarray_const, lda, fXarray_const, incx, &fb, fYarray, incy, batchCount); + blasStatus = cublasSgemvBatched(blasHandle, blasOperation, m, n, &fa, fAarray_const, lda, fXarray_const, incx, &fb, fYarray, incy, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemvBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const double* alpha, const double* const Aarray[], int lda, const double* const xarray[], int incx, const double* beta, double* const yarray[], int incy, int batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemvBatched(hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const double* alpha, const double* const AP[], int lda, const double* const x[], int incx, const double* beta, double* const y[], int incy, int batchCount); + // CHECK: blasStatus = hipblasDgemvBatched(blasHandle, blasOperation, m, n, &da, dAarray_const, lda, dXarray_const, incx, &db, dYarray, incy, batchCount); + blasStatus = cublasDgemvBatched(blasHandle, blasOperation, m, n, &da, dAarray_const, lda, dXarray_const, incx, &db, dYarray, incy, batchCount); +#endif + #if CUDA_VERSION >= 12000 // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIsamax_v2_64(cublasHandle_t handle, int64_t n, const float* x, int64_t incx, int64_t* result); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasIsamax_64(hipblasHandle_t handle, int64_t n, const float* x, int64_t incx, int64_t* result); @@ -2202,18 +2252,564 @@ int main() { blasStatus = cublasDgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgbmv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, const cuComplex* beta, cuComplex* y, int64_t incy); - // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgbmv_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const hipblasComplex* alpha, const hipblasComplex* AP, int64_t lda, const hipblasComplex* x, int64_t incx, const hipblasComplex* beta, hipblasComplex* y, int64_t incy); - // CHECK: blasStatus = hipblasCgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); - // CHECK-NEXT: blasStatus = hipblasCgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgbmv_v2_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* x, int64_t incx, const hipComplex* beta, hipComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasCgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasCgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); blasStatus = cublasCgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); blasStatus = cublasCgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgbmv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy); - // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgbmv_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int64_t lda, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy); - // CHECK: blasStatus = hipblasZgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); - // CHECK-NEXT: blasStatus = hipblasZgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgbmv_v2_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* x, int64_t incx, const hipDoubleComplex* beta, hipDoubleComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasZgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasZgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); blasStatus = cublasZgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); blasStatus = cublasZgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemv_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, const float* alpha, const float* AP, int64_t lda, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // CHECK: blasStatus = hipblasSgemv_64(blasHandle, blasOperation, m_64, n_64, &fa, &fAP, lda_64, &fx, incx_64, &fb, &fy, incy_64); + // CHECK-NEXT: blasStatus = hipblasSgemv_64(blasHandle, blasOperation, m_64, n_64, &fa, &fAP, lda_64, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSgemv_64(blasHandle, blasOperation, m_64, n_64, &fa, &fAP, lda_64, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSgemv_v2_64(blasHandle, blasOperation, m_64, n_64, &fa, &fAP, lda_64, &fx, incx_64, &fb, &fy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemv_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, const double* alpha, const double* AP, int64_t lda, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // CHECK: blasStatus = hipblasDgemv_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + // CHECK-NEXT: blasStatus = hipblasDgemv_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDgemv_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDgemv_v2_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, const cuComplex* beta, cuComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemv_v2_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* x, int64_t incx, const hipComplex* beta, hipComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasCgemv_v2_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasCgemv_v2_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasCgemv_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasCgemv_v2_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemv_v2_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* x, int64_t incx, const hipDoubleComplex* beta, hipDoubleComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasZgemv_v2_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasZgemv_v2_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZgemv_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZgemv_v2_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemvBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const float* alpha, const float* const Aarray[], int64_t lda, const float* const xarray[], int64_t incx, const float* beta, float* const yarray[], int64_t incy, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemvBatched_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, const float* alpha, const float* const AP[], int64_t lda, const float* const x[], int64_t incx, const float* beta, float* const y[], int64_t incy, int64_t batchCount); + // CHECK: blasStatus = hipblasSgemvBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, fAarray_const, lda_64, fXarray_const, incx_64, &fb, fYarray, incy_64, batchCount_64); + blasStatus = cublasSgemvBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, fAarray_const, lda_64, fXarray_const, incx_64, &fb, fYarray, incy_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemvBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const double* alpha, const double* const Aarray[], int64_t lda, const double* const xarray[], int64_t incx, const double* beta, double* const yarray[], int64_t incy, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemvBatched_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, const double* alpha, const double* const AP[], int64_t lda, const double* const x[], int64_t incx, const double* beta, double* const y[], int64_t incy, int64_t batchCount); + // CHECK: blasStatus = hipblasDgemvBatched_64(blasHandle, blasOperation, m_64, n_64, &da, dAarray_const, lda_64, dXarray_const, incx_64, &db, dYarray, incy_64, batchCount_64); + blasStatus = cublasDgemvBatched_64(blasHandle, blasOperation, m_64, n_64, &da, dAarray_const, lda_64, dXarray_const, incx_64, &db, dYarray, incy_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemvBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* const Aarray[], int64_t lda, const cuComplex* const xarray[], int64_t incx, const cuComplex* beta, cuComplex* const yarray[], int64_t incy, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemvBatched_v2_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, const hipComplex* alpha, const hipComplex* const AP[], int64_t lda, const hipComplex* const x[], int64_t incx, const hipComplex* beta, hipComplex* const y[], int64_t incy, int64_t batchCount); + // CHECK: blasStatus = hipblasCgemvBatched_v2_64(blasHandle, blasOperation, m_64, n_64, &complexa, complexAarray_const, lda_64, complexXarray_const, incx_64, &complexb, complexYarray, incy_64, batchCount_64); + blasStatus = cublasCgemvBatched_64(blasHandle, blasOperation, m_64, n_64, &complexa, complexAarray_const, lda_64, complexXarray_const, incx_64, &complexb, complexYarray, incy_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemvBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int64_t lda, const cuDoubleComplex* const xarray[], int64_t incx, const cuDoubleComplex* beta, cuDoubleComplex* const yarray[], int64_t incy, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemvBatched_v2_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* const AP[], int64_t lda, const hipDoubleComplex* const x[], int64_t incx, const hipDoubleComplex* beta, hipDoubleComplex* const y[], int64_t incy, int64_t batchCount); + // CHECK: blasStatus = hipblasZgemvBatched_v2_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexXarray_const, incx_64, &dcomplexb, dcomplexYarray, incy_64, batchCount_64); + blasStatus = cublasZgemvBatched_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, dcomplexAarray_const, lda_64, dcomplexXarray_const, incx_64, &dcomplexb, dcomplexYarray, incy_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, long long int strideA, const float* x, int64_t incx, long long int stridex, const float* beta, float* y, int64_t incy, long long int stridey, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemvStridedBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, const float* alpha, const float* AP, int64_t lda, hipblasStride strideA, const float* x, int64_t incx, hipblasStride stridex, const float* beta, float* y, int64_t incy, hipblasStride stridey, int64_t batchCount); + // CHECK: blasStatus = hipblasSgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, &fA, lda_64, strideA, &fx, incx_64, strideX, &fb, &fy, incy_64, strideY, batchCount); + blasStatus = cublasSgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, &fA, lda_64, strideA, &fx, incx_64, strideX, &fb, &fy, incy_64, strideY, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, long long int strideA, const double* x, int64_t incx, long long int stridex, const double* beta, double* y, int64_t incy, long long int stridey, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemvStridedBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, const double* alpha, const double* AP, int64_t lda, hipblasStride strideA, const double* x, int64_t incx, hipblasStride stridex, const double* beta, double* y, int64_t incy, hipblasStride stridey, int64_t batchCount); + // CHECK: blasStatus = hipblasDgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, strideA, &dx, incx_64, strideX, &db, &dy, incy_64, strideY, batchCount); + blasStatus = cublasDgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, strideA, &dx, incx_64, strideX, &db, &dy, incy_64, strideY, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, long long int strideA, const cuComplex* x, int64_t incx, long long int stridex, const cuComplex* beta, cuComplex* y, int64_t incy, long long int stridey, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemvStridedBatched_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n,const hipComplex* alpha, const hipComplex* AP, int64_t lda,hipblasStride strideA, const hipComplex* x, int64_t incx, hipblasStride stridex, const hipComplex* beta, hipComplex* y, int64_t incy, hipblasStride stridey, int64_t batchCount); + // CHECK: blasStatus = hipblasCgemvStridedBatched_v2_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, strideA, &complexx, incx_64, strideX, &complexb, &complexy, incy_64, strideY, batchCount); + blasStatus = cublasCgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, strideA, &complexx, incx_64, strideX, &complexb, &complexy, incy_64, strideY, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, long long int strideA, const cuDoubleComplex* x, int64_t incx, long long int stridex, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy, long long int stridey, int64_t batchCount); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemvStridedBatched_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, hipblasStride strideA, const hipDoubleComplex* x, int64_t incx, hipblasStride stridex, const hipDoubleComplex* beta, hipDoubleComplex* y, int64_t incy, hipblasStride stridey, int64_t batchCount); + // CHECK: blasStatus = hipblasZgemvStridedBatched_v2_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexx, incx_64, strideX, &dcomplexb, &dcomplexy, incy_64, strideY, batchCount); + blasStatus = cublasZgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexx, incx_64, strideX, &dcomplexb, &dcomplexy, incy_64, strideY, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSger_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSger_64(hipblasHandle_t handle, int64_t m, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* AP, int64_t lda); + // CHECK: blasStatus = hipblasSger_64(blasHandle, m_64, n_64, &fa, &fx, incx_64, &fy, incy_64, &fAP, lda_64); + // CHECK-NEXT: blasStatus = hipblasSger_64(blasHandle, m_64, n_64, &fa, &fx, incx_64, &fy, incy_64, &fAP, lda_64); + blasStatus = cublasSger_64(blasHandle, m_64, n_64, &fa, &fx, incx_64, &fy, incy_64, &fAP, lda_64); + blasStatus = cublasSger_v2_64(blasHandle, m_64, n_64, &fa, &fx, incx_64, &fy, incy_64, &fAP, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDger_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const double* alpha, const double* x, int64_t incx, const double* y, int64_t incy, double* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDger_64(hipblasHandle_t handle, int64_t m, int64_t n, const double* alpha, const double* x, int64_t incx, const double* y, int64_t incy, double* AP, int64_t lda); + // CHECK: blasStatus = hipblasDger_64(blasHandle, m_64, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + // CHECK-NEXT: blasStatus = hipblasDger_64(blasHandle, m_64, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + blasStatus = cublasDger_64(blasHandle, m_64, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + blasStatus = cublasDger_v2_64(blasHandle, m_64, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeru_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, const cuComplex* y, int64_t incy, cuComplex* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgeru_v2_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipComplex* alpha, const hipComplex* x, int64_t incx, const hipComplex* y, int64_t incy, hipComplex* AP, int64_t lda); + // CHECK: blasStatus = hipblasCgeru_v2_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + // CHECK-NEXT: blasStatus = hipblasCgeru_v2_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCgeru_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCgeru_v2_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgerc_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, const cuComplex* y, int64_t incy, cuComplex* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgerc_v2_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipComplex* alpha, const hipComplex* x, int64_t incx, const hipComplex* y, int64_t incy, hipComplex* AP, int64_t lda); + // CHECK: blasStatus = hipblasCgerc_v2_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + // CHECK-NEXT: blasStatus = hipblasCgerc_v2_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCgerc_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCgerc_v2_64(blasHandle, m_64, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeru_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* y, int64_t incy, cuDoubleComplex* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgeru_v2_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* x, int64_t incx, const hipDoubleComplex* y, int64_t incy, hipDoubleComplex* AP, int64_t lda); + // CHECK: blasStatus = hipblasZgeru_v2_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + // CHECK-NEXT: blasStatus = hipblasZgeru_v2_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZgeru_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZgeru_v2_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgerc_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* y, int64_t incy, cuDoubleComplex* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgerc_v2_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* x, int64_t incx, const hipDoubleComplex* y, int64_t incy, hipDoubleComplex* AP, int64_t lda); + // CHECK: blasStatus = hipblasZgerc_v2_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + // CHECK-NEXT: blasStatus = hipblasZgerc_v2_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZgerc_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZgerc_v2_64(blasHandle, m_64, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, const cuComplex* beta, cuComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasChbmv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* x, int64_t incx, const hipComplex* beta, hipComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasChbmv_v2_64(blasHandle, blasFillMode, n_64, k_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasChbmv_v2_64(blasHandle, blasFillMode, n_64, k_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasChbmv_64(blasHandle, blasFillMode, n_64, k_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasChbmv_v2_64(blasHandle, blasFillMode, n_64, k_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZhbmv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* x, int64_t incx, const hipDoubleComplex* beta, hipDoubleComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasZhbmv_v2_64(blasHandle, blasFillMode, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasZhbmv_v2_64(blasHandle, blasFillMode, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZhbmv_64(blasHandle, blasFillMode, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZhbmv_v2_64(blasHandle, blasFillMode, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, const cuComplex* beta, cuComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasChemv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* x, int64_t incx, const hipComplex* beta, hipComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasChemv_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasChemv_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasChemv_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasChemv_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZhemv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* x, int64_t incx, const hipDoubleComplex* beta, hipDoubleComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasZhemv_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasZhemv_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZhemv_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZhemv_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const cuComplex* x, int64_t incx, cuComplex* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCher_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const float* alpha, const hipComplex* x, int64_t incx, hipComplex* AP, int64_t lda); + // CHECK: blasStatus = hipblasCher_v2_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA, lda_64); + // CHECK-NEXT: blasStatus = hipblasCher_v2_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA, lda_64); + blasStatus = cublasCher_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA, lda_64); + blasStatus = cublasCher_v2_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const cuDoubleComplex* x, int64_t incx, cuDoubleComplex* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZher_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const double* alpha, const hipDoubleComplex* x, int64_t incx, hipDoubleComplex* AP, int64_t lda); + // CHECK: blasStatus = hipblasZher_v2_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA, lda_64); + // CHECK-NEXT: blasStatus = hipblasZher_v2_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA, lda_64); + blasStatus = cublasZher_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA, lda_64); + blasStatus = cublasZher_v2_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, const cuComplex* y, int64_t incy, cuComplex* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCher2_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipComplex* alpha, const hipComplex* x, int64_t incx, const hipComplex* y, int64_t incy, hipComplex* AP, int64_t lda); + // CHECK: blasStatus = hipblasCher2_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + // CHECK-NEXT: blasStatus = hipblasCher2_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCher2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCher2_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* y, int64_t incy, cuDoubleComplex* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZher2_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* x, int64_t incx, const hipDoubleComplex* y, int64_t incy, hipDoubleComplex* AP, int64_t lda); + // CHECK: blasStatus = hipblasZher2_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + // CHECK-NEXT: blasStatus = hipblasZher2_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZher2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZher2_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuComplex* alpha, const cuComplex* AP, const cuComplex* x, int64_t incx, const cuComplex* beta, cuComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasChpmv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipComplex* alpha, const hipComplex* AP, const hipComplex* x, int64_t incx, const hipComplex* beta, hipComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasChpmv_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, &complexx, incx_64, &complexb, &complexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasChpmv_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasChpmv_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasChpmv_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, &complexx, incx_64, &complexb, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* AP, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZhpmv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, const hipDoubleComplex* x, int64_t incx, const hipDoubleComplex* beta, hipDoubleComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasZhpmv_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasZhpmv_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZhpmv_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZhpmv_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const cuComplex* x, int64_t incx, cuComplex* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasChpr_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const float* alpha, const hipComplex* x, int64_t incx, hipComplex* AP); + // CHECK: blasStatus = hipblasChpr_v2_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA); + // CHECK-NEXT: blasStatus = hipblasChpr_v2_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA); + blasStatus = cublasChpr_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA); + blasStatus = cublasChpr_v2_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const cuDoubleComplex* x, int64_t incx, cuDoubleComplex* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZhpr_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const double* alpha, const hipDoubleComplex* x, int64_t incx, hipDoubleComplex* AP); + // CHECK: blasStatus = hipblasZhpr_v2_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA); + // CHECK-NEXT: blasStatus = hipblasZhpr_v2_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA); + blasStatus = cublasZhpr_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA); + blasStatus = cublasZhpr_v2_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, const cuComplex* y, int64_t incy, cuComplex* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasChpr2_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipComplex* alpha, const hipComplex* x, int64_t incx, const hipComplex* y, int64_t incy, hipComplex* AP); + // CHECK: blasStatus = hipblasChpr2_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA); + // CHECK-NEXT: blasStatus = hipblasChpr2_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA); + blasStatus = cublasChpr2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA); + blasStatus = cublasChpr2_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* y, int64_t incy, cuDoubleComplex* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZhpr2_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* x, int64_t incx, const hipDoubleComplex* y, int64_t incy, hipDoubleComplex* AP); + // CHECK: blasStatus = hipblasZhpr2_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA); + // CHECK-NEXT: blasStatus = hipblasZhpr2_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA); + blasStatus = cublasZhpr2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA); + blasStatus = cublasZhpr2_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsbmv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, int64_t k, const float* alpha, const float* AP, int64_t lda, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // CHECK: blasStatus = hipblasSsbmv_64(blasHandle, blasFillMode, n_64, k_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + // CHECK-NEXT: blasStatus = hipblasSsbmv_64(blasHandle, blasFillMode, n_64, k_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSsbmv_64(blasHandle, blasFillMode, n_64, k_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSsbmv_v2_64(blasHandle, blasFillMode, n_64, k_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsbmv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, int64_t k, const double* alpha, const double* AP, int64_t lda, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // CHECK: blasStatus = hipblasDsbmv_64(blasHandle, blasFillMode, n_64, k_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + // CHECK-NEXT: blasStatus = hipblasDsbmv_64(blasHandle, blasFillMode, n_64, k_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDsbmv_64(blasHandle, blasFillMode, n_64, k_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDsbmv_v2_64(blasHandle, blasFillMode, n_64, k_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const float* AP, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSspmv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const float* alpha, const float* AP, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // CHECK: blasStatus = hipblasSspmv_64(blasHandle, blasFillMode, n_64, &fa, &fA, &fx, incx_64, &fb, &fy, incy_64); + // CHECK-NEXT: blasStatus = hipblasSspmv_64(blasHandle, blasFillMode, n_64, &fa, &fA, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSspmv_64(blasHandle, blasFillMode, n_64, &fa, &fA, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSspmv_v2_64(blasHandle, blasFillMode, n_64, &fa, &fA, &fx, incx_64, &fb, &fy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const double* AP, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDspmv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const double* alpha, const double* AP, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // CHECK: blasStatus = hipblasDspmv_64(blasHandle, blasFillMode, n_64, &da, &dA, &dx, incx_64, &db, &dy, incy_64); + // CHECK-NEXT: blasStatus = hipblasDspmv_64(blasHandle, blasFillMode, n_64, &da, &dA, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDspmv_64(blasHandle, blasFillMode, n_64, &da, &dA, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDspmv_v2_64(blasHandle, blasFillMode, n_64, &da, &dA, &dx, incx_64, &db, &dy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const float* x, int64_t incx, float* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSspr_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const float* alpha, const float* x, int64_t incx, float* AP); + // CHECK: blasStatus = hipblasSspr_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA); + // CHECK-NEXT: blasStatus = hipblasSspr_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA); + blasStatus = cublasSspr_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA); + blasStatus = cublasSspr_v2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const double* x, int64_t incx, double* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDspr_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const double* alpha, const double* x, int64_t incx, double* AP); + // CHECK: blasStatus = hipblasDspr_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA); + // CHECK-NEXT: blasStatus = hipblasDspr_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA); + blasStatus = cublasDspr_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA); + blasStatus = cublasDspr_v2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSspr2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* AP); + // CHECK: blasStatus = hipblasSspr2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA); + // CHECK-NEXT: blasStatus = hipblasSspr2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA); + blasStatus = cublasSspr2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA); + blasStatus = cublasSspr2_v2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const double* x, int64_t incx, const double* y, int64_t incy, double* AP); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDspr2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const double* alpha, const double* x, int64_t incx, const double* y, int64_t incy, double* AP); + // CHECK: blasStatus = hipblasDspr2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA); + // CHECK-NEXT: blasStatus = hipblasDspr2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA); + blasStatus = cublasDspr2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA); + blasStatus = cublasDspr2_v2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const float* A, int64_t lda, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsymv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const float* alpha, const float* AP, int64_t lda, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // CHECK: blasStatus = hipblasSsymv_64(blasHandle, blasFillMode, n_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + // CHECK-NEXT: blasStatus = hipblasSsymv_64(blasHandle, blasFillMode, n_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSsymv_64(blasHandle, blasFillMode, n_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSsymv_v2_64(blasHandle, blasFillMode, n_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const double* A, int64_t lda, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsymv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const double* alpha, const double* AP, int64_t lda, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // CHECK: blasStatus = hipblasDsymv_64(blasHandle, blasFillMode, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + // CHECK-NEXT: blasStatus = hipblasDsymv_64(blasHandle, blasFillMode, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDsymv_64(blasHandle, blasFillMode, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDsymv_v2_64(blasHandle, blasFillMode, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, const cuComplex* beta, cuComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsymv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* x, int64_t incx, const hipComplex* beta, hipComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasCsymv_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasCsymv_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasCsymv_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasCsymv_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsymv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* x, int64_t incx, const hipDoubleComplex* beta, hipDoubleComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasZsymv_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasZsymv_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZsymv_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZsymv_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const float* x, int64_t incx, float* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const float* alpha, const float* x, int64_t incx, float* AP, int64_t lda); + // CHECK: blasStatus = hipblasSsyr_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA, lda_64); + // CHECK-NEXT: blasStatus = hipblasSsyr_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA, lda_64); + blasStatus = cublasSsyr_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA, lda_64); + blasStatus = cublasSsyr_v2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const double* x, int64_t incx, double* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const double* alpha, const double* x, int64_t incx, double* AP, int64_t lda); + // CHECK: blasStatus = hipblasDsyr_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA, lda_64); + // CHECK-NEXT: blasStatus = hipblasDsyr_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA, lda_64); + blasStatus = cublasDsyr_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA, lda_64); + blasStatus = cublasDsyr_v2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, cuComplex* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipComplex* alpha, const hipComplex* x, int64_t incx, hipComplex* AP, int64_t lda); + // CHECK: blasStatus = hipblasCsyr_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexA, lda_64); + // CHECK-NEXT: blasStatus = hipblasCsyr_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexA, lda_64); + blasStatus = cublasCsyr_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexA, lda_64); + blasStatus = cublasCsyr_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, cuDoubleComplex* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* x, int64_t incx, hipDoubleComplex* AP, int64_t lda); + // CHECK: blasStatus = hipblasZsyr_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexA, lda_64); + // CHECK-NEXT: blasStatus = hipblasZsyr_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexA, lda_64); + blasStatus = cublasZsyr_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexA, lda_64); + blasStatus = cublasZsyr_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* AP, int64_t lda); + // CHECK: blasStatus = hipblasSsyr2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA, lda_64); + // CHECK-NEXT: blasStatus = hipblasSsyr2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA, lda_64); + blasStatus = cublasSsyr2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA, lda_64); + blasStatus = cublasSsyr2_v2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const double* x, int64_t incx, const double* y, int64_t incy, double* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const double* alpha, const double* x, int64_t incx, const double* y, int64_t incy, double* AP, int64_t lda); + // CHECK: blasStatus = hipblasDsyr2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + // CHECK-NEXT: blasStatus = hipblasDsyr2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + blasStatus = cublasDsyr2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + blasStatus = cublasDsyr2_v2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, const cuComplex* y, int64_t incy, cuComplex* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr2_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipComplex* alpha, const hipComplex* x, int64_t incx, const hipComplex* y, int64_t incy, hipComplex* AP, int64_t lda); + // CHECK: blasStatus = hipblasCsyr2_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + // CHECK-NEXT: blasStatus = hipblasCsyr2_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCsyr2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCsyr2_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* y, int64_t incy, cuDoubleComplex* A, int64_t lda); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr2_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* x, int64_t incx, const hipDoubleComplex* y, int64_t incy, hipDoubleComplex* AP, int64_t lda); + // CHECK: blasStatus = hipblasZsyr2_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + // CHECK-NEXT: blasStatus = hipblasZsyr2_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZsyr2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZsyr2_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const float* A, int64_t lda, float* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStbmv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, int64_t k, const float* AP, int64_t lda, float* x, int64_t incx); + // CHECK: blasStatus = hipblasStbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + // CHECK-NEXT: blasStatus = hipblasStbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStbmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const double* A, int64_t lda, double* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtbmv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, int64_t k, const double* AP, int64_t lda, double* x, int64_t incx); + // CHECK: blasStatus = hipblasDtbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + // CHECK-NEXT: blasStatus = hipblasDtbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtbmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const cuComplex* A, int64_t lda, cuComplex* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtbmv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, int64_t k, const hipComplex* AP, int64_t lda, hipComplex* x, int64_t incx); + // CHECK: blasStatus = hipblasCtbmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + // CHECK-NEXT: blasStatus = hipblasCtbmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtbmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const cuDoubleComplex* A, int64_t lda, cuDoubleComplex* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtbmv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, int64_t k, const hipDoubleComplex* AP, int64_t lda, hipDoubleComplex* x, int64_t incx); + // CHECK: blasStatus = hipblasZtbmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + // CHECK-NEXT: blasStatus = hipblasZtbmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtbmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const float* A, int64_t lda, float* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStbsv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, int64_t k, const float* AP, int64_t lda, float* x, int64_t incx); + // CHECK: blasStatus = hipblasStbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + // CHECK-NEXT: blasStatus = hipblasStbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStbsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const double* A, int64_t lda, double* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtbsv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, int64_t k, const double* AP, int64_t lda, double* x, int64_t incx); + // CHECK: blasStatus = hipblasDtbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + // CHECK-NEXT: blasStatus = hipblasDtbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtbsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const cuComplex* A, int64_t lda, cuComplex* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtbsv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, int64_t k, const hipComplex* AP, int64_t lda, hipComplex* x, int64_t incx); + // CHECK: blasStatus = hipblasCtbsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + // CHECK-NEXT: blasStatus = hipblasCtbsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtbsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const cuDoubleComplex* A, int64_t lda, cuDoubleComplex* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtbsv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, int64_t k, const hipDoubleComplex* AP, int64_t lda, hipDoubleComplex* x, int64_t incx); + // CHECK: blasStatus = hipblasZtbsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + // CHECK-NEXT: blasStatus = hipblasZtbsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtbsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const float* AP, float* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStpmv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const float* AP, float* x, int64_t incx); + // CHECK: blasStatus = hipblasStpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + // CHECK-NEXT: blasStatus = hipblasStpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + blasStatus = cublasStpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + blasStatus = cublasStpmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const double* AP, double* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtpmv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const double* AP, double* x, int64_t incx); + // CHECK: blasStatus = hipblasDtpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + // CHECK-NEXT: blasStatus = hipblasDtpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + blasStatus = cublasDtpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + blasStatus = cublasDtpmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuComplex* AP, cuComplex* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtpmv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const hipComplex* AP, hipComplex* x, int64_t incx); + // CHECK: blasStatus = hipblasCtpmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + // CHECK-NEXT: blasStatus = hipblasCtpmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + blasStatus = cublasCtpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + blasStatus = cublasCtpmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuDoubleComplex* AP, cuDoubleComplex* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtpmv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const hipDoubleComplex* AP, hipDoubleComplex* x, int64_t incx); + // CHECK: blasStatus = hipblasZtpmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + // CHECK-NEXT: blasStatus = hipblasZtpmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + blasStatus = cublasZtpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + blasStatus = cublasZtpmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const float* AP, float* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStpsv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const float* AP, float* x, int64_t incx); + // CHECK: blasStatus = hipblasStpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + // CHECK-NEXT: blasStatus = hipblasStpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + blasStatus = cublasStpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + blasStatus = cublasStpsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const double* AP, double* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtpsv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const double* AP, double* x, int64_t incx); + // CHECK: blasStatus = hipblasDtpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + // CHECK-NEXT: blasStatus = hipblasDtpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + blasStatus = cublasDtpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + blasStatus = cublasDtpsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuComplex* AP, cuComplex* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtpsv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const hipComplex* AP, hipComplex* x, int64_t incx); + // CHECK: blasStatus = hipblasCtpsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + // CHECK-NEXT: blasStatus = hipblasCtpsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + blasStatus = cublasCtpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + blasStatus = cublasCtpsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuDoubleComplex* AP, cuDoubleComplex* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtpsv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const hipDoubleComplex* AP, hipDoubleComplex* x, int64_t incx); + // CHECK: blasStatus = hipblasZtpsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + // CHECK-NEXT: blasStatus = hipblasZtpsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + blasStatus = cublasZtpsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + blasStatus = cublasZtpsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const float* A, int64_t lda, float* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrmv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const float* AP, int64_t lda, float* x, int64_t incx); + // CHECK: blasStatus = hipblasStrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + // CHECK-NEXT: blasStatus = hipblasStrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const double* A, int64_t lda, double* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrmv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const double* AP, int64_t lda, double* x, int64_t incx); + // CHECK: blasStatus = hipblasDtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + // CHECK-NEXT: blasStatus = hipblasDtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuComplex* A, int64_t lda, cuComplex* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrmv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const hipComplex* AP, int64_t lda, hipComplex* x, int64_t incx); + // CHECK: blasStatus = hipblasCtrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + // CHECK-NEXT: blasStatus = hipblasCtrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuDoubleComplex* A, int64_t lda, cuDoubleComplex* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrmv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const hipDoubleComplex* AP, int64_t lda, hipDoubleComplex* x, int64_t incx); + // CHECK: blasStatus = hipblasZtrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + // CHECK-NEXT: blasStatus = hipblasZtrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const float* A, int64_t lda, float* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasStrsv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const float* AP, int64_t lda, float* x, int64_t incx); + // CHECK: blasStatus = hipblasStrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + // CHECK-NEXT: blasStatus = hipblasStrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStrsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const double* A, int64_t lda, double* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsv_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const double* AP, int64_t lda, double* x, int64_t incx); + // CHECK: blasStatus = hipblasDtrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + // CHECK-NEXT: blasStatus = hipblasDtrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtrsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuComplex* A, int64_t lda, cuComplex* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const hipComplex* AP, int64_t lda, hipComplex* x, int64_t incx); + // CHECK: blasStatus = hipblasCtrsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + // CHECK-NEXT: blasStatus = hipblasCtrsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtrsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuDoubleComplex* A, int64_t lda, cuDoubleComplex* x, int64_t incx); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsv_v2_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const hipDoubleComplex* AP, int64_t lda, hipDoubleComplex* x, int64_t incx); + // CHECK: blasStatus = hipblasZtrsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + // CHECK-NEXT: blasStatus = hipblasZtrsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtrsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasAxpyEx_64(cublasHandle_t handle, int64_t n, const void* alpha, cudaDataType alphaType, const void* x, cudaDataType xType, int64_t incx, void* y, cudaDataType yType, int64_t incy, cudaDataType executiontype); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasAxpyEx_v2_64(hipblasHandle_t handle, int64_t n, const void* alpha, hipDataType alphaType, const void* x, hipDataType xType, int64_t incx, void* y, hipDataType yType, int64_t incy, hipDataType executionType); + // CHECK: blasStatus = hipblasAxpyEx_v2_64(blasHandle, n_64, aptr, Atype, xptr, Xtype, incx_64, yptr, Ytype, incy_64, Executiontype); + blasStatus = cublasAxpyEx_64(blasHandle, n_64, aptr, Atype, xptr, Xtype, incx_64, yptr, Ytype, incy_64, Executiontype); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDotEx_64(cublasHandle_t handle, int64_t n, const void* x, cudaDataType xType, int64_t incx, const void* y, cudaDataType yType, int64_t incy, void* result, cudaDataType resultType, cudaDataType executionType); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDotEx_v2_64(hipblasHandle_t handle, int64_t n, const void* x, hipDataType xType, int64_t incx, const void* y, hipDataType yType, int64_t incy, void* result, hipDataType resultType, hipDataType executionType); + // CHECK: blasStatus = hipblasDotEx_v2_64(blasHandle, n_64, xptr, Xtype, incx_64, yptr, Ytype, incy_64, image, DataType, Executiontype); + blasStatus = cublasDotEx_64(blasHandle, n_64, xptr, Xtype, incx_64, yptr, Ytype, incy_64, image, DataType, Executiontype); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDotcEx_64(cublasHandle_t handle, int64_t n, const void* x, cudaDataType xType, int64_t incx, const void* y, cudaDataType yType, int64_t incy, void* result, cudaDataType resultType, cudaDataType executionType); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDotcEx_v2_64(hipblasHandle_t handle, int64_t n, const void* x, hipDataType xType, int64_t incx, const void* y, hipDataType yType, int64_t incy, void* result, hipDataType resultType, hipDataType executionType); + // CHECK: blasStatus = hipblasDotcEx_v2_64(blasHandle, n_64, xptr, Xtype, incx_64, yptr, Ytype, incy_64, image, DataType, Executiontype); + blasStatus = cublasDotcEx_64(blasHandle, n_64, xptr, Xtype, incx_64, yptr, Ytype, incy_64, image, DataType, Executiontype); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasNrm2Ex_64(cublasHandle_t handle, int64_t n, const void* x, cudaDataType xType, int64_t incx, void* result, cudaDataType resultType, cudaDataType executionType); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasNrm2Ex_v2_64(hipblasHandle_t handle, int64_t n, const void* x, hipDataType xType, int64_t incx, void* result, hipDataType resultType, hipDataType executionType); + // CHECK: blasStatus = hipblasNrm2Ex_v2_64(blasHandle, n_64, xptr, Xtype, incx_64, image, DataType, Executiontype); + blasStatus = cublasNrm2Ex_64(blasHandle, n_64, xptr, Xtype, incx_64, image, DataType, Executiontype); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasRotEx_64(cublasHandle_t handle, int64_t n, void* x, cudaDataType xType, int64_t incx, void* y, cudaDataType yType, int64_t incy, const void* c, const void* s, cudaDataType csType, cudaDataType executiontype); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasRotEx_v2_64(hipblasHandle_t handle, int64_t n, void* x, hipDataType xType, int64_t incx, void* y, hipDataType yType, int64_t incy, const void* c, const void* s, hipDataType csType, hipDataType executionType); + // CHECK: blasStatus = hipblasRotEx_v2_64(blasHandle, n_64, xptr, Xtype, incx_64, yptr, Ytype, incy_64, cptr, sptr, CStype, Executiontype); + blasStatus = cublasRotEx_64(blasHandle, n_64, xptr, Xtype, incx_64, yptr, Ytype, incy_64, cptr, sptr, CStype, Executiontype); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScalEx_64(cublasHandle_t handle, int64_t n, const void* alpha, cudaDataType alphaType, void* x, cudaDataType xType, int64_t incx, cudaDataType executionType); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasScalEx_v2_64(hipblasHandle_t handle, int64_t n, const void* alpha, hipDataType alphaType, void* x, hipDataType xType, int64_t incx, hipDataType executionType); + // CHECK: blasStatus = hipblasScalEx_v2_64(blasHandle, n_64, aptr, Atype, xptr, Xtype, incx_64, Executiontype); + blasStatus = cublasScalEx_64(blasHandle, n_64, aptr, Atype, xptr, Xtype, incx_64, Executiontype); #endif return 0; diff --git a/tests/unit_tests/synthetic/runtime_defines.cu b/tests/unit_tests/synthetic/runtime_defines.cu index abe0f425..01b4497f 100644 --- a/tests/unit_tests/synthetic/runtime_defines.cu +++ b/tests/unit_tests/synthetic/runtime_defines.cu @@ -1,9 +1,302 @@ // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --amap --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL -// CHECK: #include +// CHECK: #include +// CHECK-NEXT: #include +// CHECK-NEXT: #include #include +#include +#include #include + __global__ __constant__ float INF_F; + __global__ __constant__ float NAN_F; + __global__ __constant__ float MIN_DENORM_F; + __global__ __constant__ float MAX_NORMAL_F; + __global__ __constant__ float NEG_ZERO_F; + __global__ __constant__ float ZERO_F; + __global__ __constant__ float ONE_F; + __global__ __constant__ float SQRT_HALF_F; + __global__ __constant__ float SQRT_HALF_HI_F; + __global__ __constant__ float SQRT_HALF_LO_F; + __global__ __constant__ float SQRT_TWO_F; + __global__ __constant__ float THIRD_F; + __global__ __constant__ float PIO4_F; + __global__ __constant__ float PIO2_F; + __global__ __constant__ float _3PIO4_F; + __global__ __constant__ float _2_OVER_PI_F; + __global__ __constant__ float SQRT_2_OVER_PI_F; + __global__ __constant__ float PI_F; + __global__ __constant__ float L2E_F; + __global__ __constant__ float L2T_F; + __global__ __constant__ float LG2_F; + __global__ __constant__ float LGE_F; + __global__ __constant__ float LN2_F; + __global__ __constant__ float LNT_F; + __global__ __constant__ float LNPI_F; + __global__ __constant__ float TWO_TO_M126_F; + __global__ __constant__ float TWO_TO_126_F; + __global__ __constant__ float NORM_HUGE_F; + __global__ __constant__ float TWO_TO_23_F; + __global__ __constant__ float TWO_TO_24_F; + __global__ __constant__ float TWO_TO_31_F; + __global__ __constant__ float TWO_TO_32_F; + __global__ __constant__ float REMQUO_BITS_F; + __global__ __constant__ float REMQUO_MASK_F; + __global__ __constant__ double TRIG_PLOSS_F; + __global__ __constant__ double INF; + __global__ __constant__ double NAN_; + __global__ __constant__ double NEG_ZERO; + __global__ __constant__ double MIN_DENORM; + __global__ __constant__ double ZERO; + __global__ __constant__ double ONE; + __global__ __constant__ double SQRT_TWO; + __global__ __constant__ double SQRT_HALF; + __global__ __constant__ double SQRT_HALF_HI; + __global__ __constant__ double SQRT_HALF_LO; + __global__ __constant__ double THIRD; + __global__ __constant__ double TWOTHIRD; + __global__ __constant__ double PIO4; + __global__ __constant__ double PIO4_HI; + __global__ __constant__ double PIO4_LO; + __global__ __constant__ double PIO2; + __global__ __constant__ double PIO2_HI; + __global__ __constant__ double PIO2_LO; + __global__ __constant__ double _3PIO4; + __global__ __constant__ double _2_OVER_PI; + __global__ __constant__ double _PI; + __global__ __constant__ double PI_HI; + __global__ __constant__ double PI_LO; + __global__ __constant__ double SQRT_2PI; + __global__ __constant__ double SQRT_2PI_HI; + __global__ __constant__ double SQRT_2PI_LO; + __global__ __constant__ double SQRT_PIO2; + __global__ __constant__ double SQRT_PIO2_HI; + __global__ __constant__ double SQRT_PIO2_LO; + __global__ __constant__ double SQRT_2OPI; + __global__ __constant__ double L2E; + __global__ __constant__ double L2E_HI; + __global__ __constant__ double L2E_LO; + __global__ __constant__ double L2T; + __global__ __constant__ double LG2; + __global__ __constant__ double LG2_HI; + __global__ __constant__ double LG2_LO; + __global__ __constant__ double LGE; + __global__ __constant__ double LGE_HI; + __global__ __constant__ double LGE_LO; + __global__ __constant__ double LN2; + __global__ __constant__ double LN2_HI; + __global__ __constant__ double LN2_LO; + __global__ __constant__ double LNT; + __global__ __constant__ double LNT_HI; + __global__ __constant__ double LNT_LO; + __global__ __constant__ double LNPI; + __global__ __constant__ double LN2_X_1024; + __global__ __constant__ double LN2_X_1025; + __global__ __constant__ double LN2_X_1075; + __global__ __constant__ double LG2_X_1024; + __global__ __constant__ double LG2_X_1075; + __global__ __constant__ double TWO_TO_23; + __global__ __constant__ double TWO_TO_52; + __global__ __constant__ double TWO_TO_53; + __global__ __constant__ double TWO_TO_54; + __global__ __constant__ double TWO_TO_M54; + __global__ __constant__ double TWO_TO_M1022; + __global__ __constant__ double TRIG_PLOSS; + __global__ __constant__ double DBL2INT_CVT; + +__global__ void init() { + // CHECK: INF_F = HIP_INF_F; + // CHECK-NEXT: NAN_F = HIP_NAN_F; + // CHECK-NEXT: MIN_DENORM_F = HIP_MIN_DENORM_F; + // CHECK-NEXT: MAX_NORMAL_F = HIP_MAX_NORMAL_F; + // CHECK-NEXT: NEG_ZERO_F = HIP_NEG_ZERO_F; + // CHECK-NEXT: ZERO_F = HIP_ZERO_F; + // CHECK-NEXT: ONE_F = HIP_ONE_F; + // CHECK-NEXT: SQRT_HALF_F = HIP_SQRT_HALF_F; + // CHECK-NEXT: SQRT_HALF_HI_F = HIP_SQRT_HALF_HI_F; + // CHECK-NEXT: SQRT_HALF_LO_F = HIP_SQRT_HALF_LO_F; + // CHECK-NEXT: SQRT_TWO_F = HIP_SQRT_TWO_F; + // CHECK-NEXT: THIRD_F = HIP_THIRD_F; + // CHECK-NEXT: PIO4_F = HIP_PIO4_F; + // CHECK-NEXT: PIO2_F = HIP_PIO2_F; + // CHECK-NEXT: _3PIO4_F = HIP_3PIO4_F; + // CHECK-NEXT: _2_OVER_PI_F = HIP_2_OVER_PI_F; + // CHECK-NEXT: SQRT_2_OVER_PI_F = HIP_SQRT_2_OVER_PI_F; + // CHECK-NEXT: PI_F = HIP_PI_F; + // CHECK-NEXT: L2E_F = HIP_L2E_F; + // CHECK-NEXT: L2T_F = HIP_L2T_F; + // CHECK-NEXT: LG2_F = HIP_LG2_F; + // CHECK-NEXT: LGE_F = HIP_LGE_F; + // CHECK-NEXT: LN2_F = HIP_LN2_F; + // CHECK-NEXT: LNT_F = HIP_LNT_F; + // CHECK-NEXT: LNPI_F = HIP_LNPI_F; + // CHECK-NEXT: TWO_TO_M126_F = HIP_TWO_TO_M126_F; + // CHECK-NEXT: TWO_TO_126_F = HIP_TWO_TO_126_F; + // CHECK-NEXT: NORM_HUGE_F = HIP_NORM_HUGE_F; + // CHECK-NEXT: TWO_TO_23_F = HIP_TWO_TO_23_F; + // CHECK-NEXT: TWO_TO_24_F = HIP_TWO_TO_24_F; + // CHECK-NEXT: TWO_TO_31_F = HIP_TWO_TO_31_F; + // CHECK-NEXT: TWO_TO_32_F = HIP_TWO_TO_32_F; + // CHECK-NEXT: REMQUO_BITS_F = HIP_REMQUO_BITS_F; + // CHECK-NEXT: REMQUO_MASK_F = HIP_REMQUO_MASK_F; + // CHECK-NEXT: TRIG_PLOSS_F = HIP_TRIG_PLOSS_F; + // CHECK-NEXT: INF = HIP_INF; + // CHECK-NEXT: NAN_ = HIP_NAN; + // CHECK-NEXT: NEG_ZERO = HIP_NEG_ZERO; + // CHECK-NEXT: MIN_DENORM = HIP_MIN_DENORM; + // CHECK-NEXT: ZERO = HIP_ZERO; + // CHECK-NEXT: ONE = HIP_ONE; + // CHECK-NEXT: SQRT_TWO = HIP_SQRT_TWO; + // CHECK-NEXT: SQRT_HALF = HIP_SQRT_HALF; + // CHECK-NEXT: SQRT_HALF_HI = HIP_SQRT_HALF_HI; + // CHECK-NEXT: SQRT_HALF_LO = HIP_SQRT_HALF_LO; + // CHECK-NEXT: THIRD = HIP_THIRD; + // CHECK-NEXT: TWOTHIRD = HIP_TWOTHIRD; + // CHECK-NEXT: PIO4 = HIP_PIO4; + // CHECK-NEXT: PIO4_HI = HIP_PIO4_HI; + // CHECK-NEXT: PIO4_LO = HIP_PIO4_LO; + // CHECK-NEXT: PIO2 = HIP_PIO2; + // CHECK-NEXT: PIO2_HI = HIP_PIO2_HI; + // CHECK-NEXT: PIO2_LO = HIP_PIO2_LO; + // CHECK-NEXT: _3PIO4 = HIP_3PIO4; + // CHECK-NEXT: _2_OVER_PI = HIP_2_OVER_PI; + // CHECK-NEXT: _PI = HIP_PI; + // CHECK-NEXT: PI_HI = HIP_PI_HI; + // CHECK-NEXT: PI_LO = HIP_PI_LO; + // CHECK-NEXT: SQRT_2PI = HIP_SQRT_2PI; + // CHECK-NEXT: SQRT_2PI_HI = HIP_SQRT_2PI_HI; + // CHECK-NEXT: SQRT_2PI_LO = HIP_SQRT_2PI_LO; + // CHECK-NEXT: SQRT_PIO2 = HIP_SQRT_PIO2; + // CHECK-NEXT: SQRT_PIO2_HI = HIP_SQRT_PIO2_HI; + // CHECK-NEXT: SQRT_PIO2_LO = HIP_SQRT_PIO2_LO; + // CHECK-NEXT: SQRT_2OPI = HIP_SQRT_2OPI; + // CHECK-NEXT: L2E = HIP_L2E; + // CHECK-NEXT: L2E_HI = HIP_L2E_HI; + // CHECK-NEXT: L2E_LO = HIP_L2E_LO; + // CHECK-NEXT: L2T = HIP_L2T; + // CHECK-NEXT: LG2 = HIP_LG2; + // CHECK-NEXT: LG2_HI = HIP_LG2_HI; + // CHECK-NEXT: LG2_LO = HIP_LG2_LO; + // CHECK-NEXT: LGE = HIP_LGE; + // CHECK-NEXT: LGE_HI = HIP_LGE_HI; + // CHECK-NEXT: LGE_LO = HIP_LGE_LO; + // CHECK-NEXT: LN2 = HIP_LN2; + // CHECK-NEXT: LN2_HI = HIP_LN2_HI; + // CHECK-NEXT: LN2_LO = HIP_LN2_LO; + // CHECK-NEXT: LNT = HIP_LNT; + // CHECK-NEXT: LNT_HI = HIP_LNT_HI; + // CHECK-NEXT: LNT_LO = HIP_LNT_LO; + // CHECK-NEXT: LNPI = HIP_LNPI; + // CHECK-NEXT: LN2_X_1024 = HIP_LN2_X_1024; + // CHECK-NEXT: LN2_X_1025 = HIP_LN2_X_1025; + // CHECK-NEXT: LN2_X_1075 = HIP_LN2_X_1075; + // CHECK-NEXT: LG2_X_1024 = HIP_LG2_X_1024; + // CHECK-NEXT: LG2_X_1075 = HIP_LG2_X_1075; + // CHECK-NEXT: TWO_TO_23 = HIP_TWO_TO_23; + // CHECK-NEXT: TWO_TO_52 = HIP_TWO_TO_52; + // CHECK-NEXT: TWO_TO_53 = HIP_TWO_TO_53; + // CHECK-NEXT: TWO_TO_54 = HIP_TWO_TO_54; + // CHECK-NEXT: TWO_TO_M54 = HIP_TWO_TO_M54; + // CHECK-NEXT: TWO_TO_M1022 = HIP_TWO_TO_M1022; + // CHECK-NEXT: TRIG_PLOSS = HIP_TRIG_PLOSS; + // CHECK-NEXT: DBL2INT_CVT = HIP_DBL2INT_CVT; + INF_F = CUDART_INF_F; + NAN_F = CUDART_NAN_F; + MIN_DENORM_F = CUDART_MIN_DENORM_F; + MAX_NORMAL_F = CUDART_MAX_NORMAL_F; + NEG_ZERO_F = CUDART_NEG_ZERO_F; + ZERO_F = CUDART_ZERO_F; + ONE_F = CUDART_ONE_F; + SQRT_HALF_F = CUDART_SQRT_HALF_F; + SQRT_HALF_HI_F = CUDART_SQRT_HALF_HI_F; + SQRT_HALF_LO_F = CUDART_SQRT_HALF_LO_F; + SQRT_TWO_F = CUDART_SQRT_TWO_F; + THIRD_F = CUDART_THIRD_F; + PIO4_F = CUDART_PIO4_F; + PIO2_F = CUDART_PIO2_F; + _3PIO4_F = CUDART_3PIO4_F; + _2_OVER_PI_F = CUDART_2_OVER_PI_F; + SQRT_2_OVER_PI_F = CUDART_SQRT_2_OVER_PI_F; + PI_F = CUDART_PI_F; + L2E_F = CUDART_L2E_F; + L2T_F = CUDART_L2T_F; + LG2_F = CUDART_LG2_F; + LGE_F = CUDART_LGE_F; + LN2_F = CUDART_LN2_F; + LNT_F = CUDART_LNT_F; + LNPI_F = CUDART_LNPI_F; + TWO_TO_M126_F = CUDART_TWO_TO_M126_F; + TWO_TO_126_F = CUDART_TWO_TO_126_F; + NORM_HUGE_F = CUDART_NORM_HUGE_F; + TWO_TO_23_F = CUDART_TWO_TO_23_F; + TWO_TO_24_F = CUDART_TWO_TO_24_F; + TWO_TO_31_F = CUDART_TWO_TO_31_F; + TWO_TO_32_F = CUDART_TWO_TO_32_F; + REMQUO_BITS_F = CUDART_REMQUO_BITS_F; + REMQUO_MASK_F = CUDART_REMQUO_MASK_F; + TRIG_PLOSS_F = CUDART_TRIG_PLOSS_F; + INF = CUDART_INF; + NAN_ = CUDART_NAN; + NEG_ZERO = CUDART_NEG_ZERO; + MIN_DENORM = CUDART_MIN_DENORM; + ZERO = CUDART_ZERO; + ONE = CUDART_ONE; + SQRT_TWO = CUDART_SQRT_TWO; + SQRT_HALF = CUDART_SQRT_HALF; + SQRT_HALF_HI = CUDART_SQRT_HALF_HI; + SQRT_HALF_LO = CUDART_SQRT_HALF_LO; + THIRD = CUDART_THIRD; + TWOTHIRD = CUDART_TWOTHIRD; + PIO4 = CUDART_PIO4; + PIO4_HI = CUDART_PIO4_HI; + PIO4_LO = CUDART_PIO4_LO; + PIO2 = CUDART_PIO2; + PIO2_HI = CUDART_PIO2_HI; + PIO2_LO = CUDART_PIO2_LO; + _3PIO4 = CUDART_3PIO4; + _2_OVER_PI = CUDART_2_OVER_PI; + _PI = CUDART_PI; + PI_HI = CUDART_PI_HI; + PI_LO = CUDART_PI_LO; + SQRT_2PI = CUDART_SQRT_2PI; + SQRT_2PI_HI = CUDART_SQRT_2PI_HI; + SQRT_2PI_LO = CUDART_SQRT_2PI_LO; + SQRT_PIO2 = CUDART_SQRT_PIO2; + SQRT_PIO2_HI = CUDART_SQRT_PIO2_HI; + SQRT_PIO2_LO = CUDART_SQRT_PIO2_LO; + SQRT_2OPI = CUDART_SQRT_2OPI; + L2E = CUDART_L2E; + L2E_HI = CUDART_L2E_HI; + L2E_LO = CUDART_L2E_LO; + L2T = CUDART_L2T; + LG2 = CUDART_LG2; + LG2_HI = CUDART_LG2_HI; + LG2_LO = CUDART_LG2_LO; + LGE = CUDART_LGE; + LGE_HI = CUDART_LGE_HI; + LGE_LO = CUDART_LGE_LO; + LN2 = CUDART_LN2; + LN2_HI = CUDART_LN2_HI; + LN2_LO = CUDART_LN2_LO; + LNT = CUDART_LNT; + LNT_HI = CUDART_LNT_HI; + LNT_LO = CUDART_LNT_LO; + LNPI = CUDART_LNPI; + LN2_X_1024 = CUDART_LN2_X_1024; + LN2_X_1025 = CUDART_LN2_X_1025; + LN2_X_1075 = CUDART_LN2_X_1075; + LG2_X_1024 = CUDART_LG2_X_1024; + LG2_X_1075 = CUDART_LG2_X_1075; + TWO_TO_23 = CUDART_TWO_TO_23; + TWO_TO_52 = CUDART_TWO_TO_52; + TWO_TO_53 = CUDART_TWO_TO_53; + TWO_TO_54 = CUDART_TWO_TO_54; + TWO_TO_M54 = CUDART_TWO_TO_M54; + TWO_TO_M1022 = CUDART_TWO_TO_M1022; + TRIG_PLOSS = CUDART_TRIG_PLOSS; + DBL2INT_CVT = CUDART_DBL2INT_CVT; +} + int main() { printf("08. CUDA Runtime API Defines synthetic test\n");