diff --git a/CHANGELOG.md b/CHANGELOG.md index 7aa4a9b8..64f823f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,27 @@ Documentation for HIPIFY is available at [https://rocmdocs.amd.com/projects/HIPIFY/en/latest/](https://rocmdocs.amd.com/projects/HIPIFY/en/latest/). -## HIPIFY for ROCm 6.2.2 +## HIPIFY for ROCm 6.3.0 + +### Additions + +* CUDA 12.6.1 support +* cuDNN 9.5.0 support +* LLVM 19.1.1 support +* `rocBLAS` 64-bit APIs support +* Initial support for direct hipification of `cuDNN` into `MIOpen` under the `--roc` option +* Initial support for direct hipification of `cuRAND` into `rocRAND` under the `--roc` option +* [#1650] Added a filtering ability for the supplementary hipification scripts + +### Fixes + +* Correct `roc` header files support + +### Known issues + +* [#1617] Support for `fp8` data types + +## HIPIFY for ROCm 6.2.4 ### Additions diff --git a/bin/hipify-perl b/bin/hipify-perl index e9c09657..74249276 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1255,6 +1255,7 @@ my %removed_funcs = ( "cudnnSaveAlgorithm" => "9.0.0", "cudnnRestoreAlgorithm" => "9.0.0", "cudnnRNNSetClip" => "9.0.0", + "cudnnRNNPaddingMode_t" => "9.0.0", "cudnnRNNGetClip" => "9.0.0", "cudnnRNNForwardTrainingEx" => "9.0.0", "cudnnRNNForwardTraining" => "9.0.0", @@ -1375,6 +1376,8 @@ my %removed_funcs = ( "CUSPARSE_ALG_MERGE_PATH" => "12.0", "CUSPARSE_ALG1" => "11.0", "CUSPARSE_ALG0" => "11.0", + "CUDNN_RNN_PADDED_IO_ENABLED" => "9.0.0", + "CUDNN_RNN_PADDED_IO_DISABLED" => "9.0.0", "CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT" => "8.0.1", "CUDNN_CONVOLUTION_FWD_PREFER_FASTEST" => "8.0.1", "CUDNN_CONVOLUTION_FWD_NO_WORKSPACE" => "8.0.1", @@ -1389,7 +1392,24 @@ my %removed_funcs = ( ); my %experimental_funcs = ( - + "cusolverDnXpotrs" => "6.3.0", + "cusolverDnXpotrf_bufferSize" => "6.3.0", + "cusolverDnXpotrf" => "6.3.0", + "cusolverDnXgeqrf_bufferSize" => "6.3.0", + "cusolverDnXgeqrf" => "6.3.0", + "cusolverDnSetDeterministicMode" => "6.3.0", + "cusolverDnGetDeterministicMode" => "6.3.0", + "cudaGraphNodeSetParams" => "6.3.0", + "cudaGraphExecNodeSetParams" => "6.3.0", + "cudaGraphExecGetFlags" => "6.3.0", + "cuGraphNodeSetParams" => "6.3.0", + "cuGraphMemcpyNodeSetParams" => "6.3.0", + "cuGraphMemcpyNodeGetParams" => "6.3.0", + "cuGraphExecNodeSetParams" => "6.3.0", + "cuGraphExecMemsetNodeSetParams" => "6.3.0", + "cuGraphExecMemcpyNodeSetParams" => "6.3.0", + "cuGraphExecGetFlags" => "6.3.0", + "cuGraphAddMemFreeNode" => "6.3.0" ); $print_stats = 1 if $examine; @@ -1527,6 +1547,24 @@ sub subst { } sub experimentalSubstitutions { + subst("cuGraphAddMemFreeNode", "hipDrvGraphAddMemFreeNode", "graph"); + subst("cuGraphExecGetFlags", "hipGraphExecGetFlags", "graph"); + subst("cuGraphExecMemcpyNodeSetParams", "hipDrvGraphExecMemcpyNodeSetParams", "graph"); + subst("cuGraphExecMemsetNodeSetParams", "hipDrvGraphExecMemsetNodeSetParams", "graph"); + subst("cuGraphExecNodeSetParams", "hipGraphExecNodeSetParams", "graph"); + subst("cuGraphMemcpyNodeGetParams", "hipDrvGraphMemcpyNodeGetParams", "graph"); + subst("cuGraphMemcpyNodeSetParams", "hipDrvGraphMemcpyNodeSetParams", "graph"); + subst("cuGraphNodeSetParams", "hipGraphNodeSetParams", "graph"); + subst("cudaGraphExecGetFlags", "hipGraphExecGetFlags", "graph"); + subst("cudaGraphExecNodeSetParams", "hipGraphExecNodeSetParams", "graph"); + subst("cudaGraphNodeSetParams", "hipGraphNodeSetParams", "graph"); + subst("cusolverDnGetDeterministicMode", "hipsolverDnGetDeterministicMode", "library"); + subst("cusolverDnSetDeterministicMode", "hipsolverDnSetDeterministicMode", "library"); + subst("cusolverDnXgeqrf", "hipsolverDnXgeqrf", "library"); + subst("cusolverDnXgeqrf_bufferSize", "hipsolverDnXgeqrf_bufferSize", "library"); + subst("cusolverDnXpotrf", "hipsolverDnXpotrf", "library"); + subst("cusolverDnXpotrf_bufferSize", "hipsolverDnXpotrf_bufferSize", "library"); + subst("cusolverDnXpotrs", "hipsolverDnXpotrs", "library"); } sub rocSubstitutions { @@ -1562,6 +1600,7 @@ sub rocSubstitutions { subst("cublasCgemvBatched", "rocblas_cgemv_batched", "library"); subst("cublasCgemvBatched_64", "rocblas_cgemv_batched_64", "library"); subst("cublasCgemvStridedBatched", "rocblas_cgemv_strided_batched", "library"); + subst("cublasCgemvStridedBatched_64", "rocblas_cgemv_strided_batched_64", "library"); subst("cublasCgemv_64", "rocblas_cgemv_64", "library"); subst("cublasCgemv_v2", "rocblas_cgemv", "library"); subst("cublasCgemv_v2_64", "rocblas_cgemv_64", "library"); @@ -1570,26 +1609,40 @@ sub rocSubstitutions { subst("cublasCgeru", "rocblas_cgeru", "library"); subst("cublasCgeru_v2", "rocblas_cgeru", "library"); subst("cublasChbmv", "rocblas_chbmv", "library"); + subst("cublasChbmv_64", "rocblas_chbmv_64", "library"); subst("cublasChbmv_v2", "rocblas_chbmv", "library"); + subst("cublasChbmv_v2_64", "rocblas_chbmv_64", "library"); subst("cublasChemm", "rocblas_chemm", "library"); subst("cublasChemm_v2", "rocblas_chemm", "library"); subst("cublasChemv", "rocblas_chemv", "library"); + subst("cublasChemv_64", "rocblas_chemv_64", "library"); subst("cublasChemv_v2", "rocblas_chemv", "library"); + subst("cublasChemv_v2_64", "rocblas_chemv_64", "library"); subst("cublasCher", "rocblas_cher", "library"); subst("cublasCher2", "rocblas_cher2", "library"); + subst("cublasCher2_64", "rocblas_cher2_64", "library"); subst("cublasCher2_v2", "rocblas_cher2", "library"); + subst("cublasCher2_v2_64", "rocblas_cher2_64", "library"); subst("cublasCher2k", "rocblas_cher2k", "library"); subst("cublasCher2k_v2", "rocblas_cher2k", "library"); + subst("cublasCher_64", "rocblas_cher_64", "library"); subst("cublasCher_v2", "rocblas_cher", "library"); + subst("cublasCher_v2_64", "rocblas_cher_64", "library"); subst("cublasCherk", "rocblas_cherk", "library"); subst("cublasCherk_v2", "rocblas_cherk", "library"); subst("cublasCherkx", "rocblas_cherkx", "library"); subst("cublasChpmv", "rocblas_chpmv", "library"); + subst("cublasChpmv_64", "rocblas_chpmv_64", "library"); subst("cublasChpmv_v2", "rocblas_chpmv", "library"); + subst("cublasChpmv_v2_64", "rocblas_chpmv_64", "library"); subst("cublasChpr", "rocblas_chpr", "library"); subst("cublasChpr2", "rocblas_chpr2", "library"); + subst("cublasChpr2_64", "rocblas_chpr2_64", "library"); subst("cublasChpr2_v2", "rocblas_chpr2", "library"); + subst("cublasChpr2_v2_64", "rocblas_chpr2_64", "library"); + subst("cublasChpr_64", "rocblas_chpr_64", "library"); subst("cublasChpr_v2", "rocblas_chpr", "library"); + subst("cublasChpr_v2_64", "rocblas_chpr_64", "library"); subst("cublasCreate", "rocblas_create_handle", "library"); subst("cublasCreate_v2", "rocblas_create_handle", "library"); subst("cublasCrot", "rocblas_crot", "library"); @@ -1617,33 +1670,49 @@ sub rocSubstitutions { subst("cublasCsymm", "rocblas_csymm", "library"); subst("cublasCsymm_v2", "rocblas_csymm", "library"); subst("cublasCsymv", "rocblas_csymv", "library"); + subst("cublasCsymv_64", "rocblas_csymv_64", "library"); subst("cublasCsymv_v2", "rocblas_csymv", "library"); + subst("cublasCsymv_v2_64", "rocblas_csymv_64", "library"); subst("cublasCsyr", "rocblas_csyr", "library"); subst("cublasCsyr2", "rocblas_csyr2", "library"); + subst("cublasCsyr2_64", "rocblas_csyr2_64", "library"); subst("cublasCsyr2_v2", "rocblas_csyr2", "library"); + subst("cublasCsyr2_v2_64", "rocblas_csyr2_64", "library"); subst("cublasCsyr2k", "rocblas_csyr2k", "library"); subst("cublasCsyr2k_v2", "rocblas_csyr2k", "library"); + subst("cublasCsyr_64", "rocblas_csyr_64", "library"); subst("cublasCsyr_v2", "rocblas_csyr", "library"); + subst("cublasCsyr_v2_64", "rocblas_csyr_64", "library"); subst("cublasCsyrk", "rocblas_csyrk", "library"); subst("cublasCsyrk_v2", "rocblas_csyrk", "library"); subst("cublasCsyrkx", "rocblas_csyrkx", "library"); subst("cublasCtbmv", "rocblas_ctbmv", "library"); + subst("cublasCtbmv_64", "rocblas_ctbmv_64", "library"); subst("cublasCtbmv_v2", "rocblas_ctbmv", "library"); + subst("cublasCtbmv_v2_64", "rocblas_ctbmv_64", "library"); subst("cublasCtbsv", "rocblas_ctbsv", "library"); + subst("cublasCtbsv_64", "rocblas_ctbsv_64", "library"); subst("cublasCtbsv_v2", "rocblas_ctbsv", "library"); + subst("cublasCtbsv_v2_64", "rocblas_ctbsv_64", "library"); subst("cublasCtpmv", "rocblas_ctpmv", "library"); + subst("cublasCtpmv_64", "rocblas_ctpmv_64", "library"); subst("cublasCtpmv_v2", "rocblas_ctpmv", "library"); + subst("cublasCtpmv_v2_64", "rocblas_ctpmv_64", "library"); subst("cublasCtpsv", "rocblas_ctpsv", "library"); subst("cublasCtpsv_v2", "rocblas_ctpsv", "library"); subst("cublasCtrmm", "rocblas_ctrmm", "library"); subst("cublasCtrmm_v2", "rocblas_ctrmm", "library"); subst("cublasCtrmv", "rocblas_ctrmv", "library"); + subst("cublasCtrmv_64", "rocblas_ctrmv_64", "library"); subst("cublasCtrmv_v2", "rocblas_ctrmv", "library"); + subst("cublasCtrmv_v2_64", "rocblas_ctrmv_64", "library"); subst("cublasCtrsm", "rocblas_ctrsm", "library"); subst("cublasCtrsmBatched", "rocblas_ctrsm_batched", "library"); subst("cublasCtrsm_v2", "rocblas_ctrsm", "library"); subst("cublasCtrsv", "rocblas_ctrsv", "library"); + subst("cublasCtrsv_64", "rocblas_ctrsv_64", "library"); subst("cublasCtrsv_v2", "rocblas_ctrsv", "library"); + subst("cublasCtrsv_v2_64", "rocblas_ctrsv_64", "library"); subst("cublasDasum", "rocblas_dasum", "library"); subst("cublasDasum_64", "rocblas_dasum_64", "library"); subst("cublasDasum_v2", "rocblas_dasum", "library"); @@ -1675,6 +1744,8 @@ sub rocSubstitutions { subst("cublasDgemv", "rocblas_dgemv", "library"); subst("cublasDgemvBatched", "rocblas_dgemv_batched", "library"); subst("cublasDgemvBatched_64", "rocblas_dgemv_batched_64", "library"); + subst("cublasDgemvStridedBatched", "rocblas_dgemv_strided_batched", "library"); + subst("cublasDgemvStridedBatched_64", "rocblas_dgemv_strided_batched_64", "library"); subst("cublasDgemv_64", "rocblas_dgemv_64", "library"); subst("cublasDgemv_v2", "rocblas_dgemv", "library"); subst("cublasDgemv_v2_64", "rocblas_dgemv_64", "library"); @@ -1701,17 +1772,25 @@ sub rocSubstitutions { subst("cublasDrotmg", "rocblas_drotmg", "library"); subst("cublasDrotmg_v2", "rocblas_drotmg", "library"); subst("cublasDsbmv", "rocblas_dsbmv", "library"); + subst("cublasDsbmv_64", "rocblas_dsbmv_64", "library"); subst("cublasDsbmv_v2", "rocblas_dsbmv", "library"); + subst("cublasDsbmv_v2_64", "rocblas_dsbmv_64", "library"); subst("cublasDscal", "rocblas_dscal", "library"); subst("cublasDscal_64", "rocblas_dscal_64", "library"); subst("cublasDscal_v2", "rocblas_dscal", "library"); subst("cublasDscal_v2_64", "rocblas_dscal_64", "library"); subst("cublasDspmv", "rocblas_dspmv", "library"); + subst("cublasDspmv_64", "rocblas_dspmv_64", "library"); subst("cublasDspmv_v2", "rocblas_dspmv", "library"); + subst("cublasDspmv_v2_64", "rocblas_dspmv_64", "library"); subst("cublasDspr", "rocblas_dspr", "library"); subst("cublasDspr2", "rocblas_dspr2", "library"); + subst("cublasDspr2_64", "rocblas_dspr2_64", "library"); subst("cublasDspr2_v2", "rocblas_dspr2", "library"); + subst("cublasDspr2_v2_64", "rocblas_dspr2_64", "library"); + subst("cublasDspr_64", "rocblas_dspr_64", "library"); subst("cublasDspr_v2", "rocblas_dspr", "library"); + subst("cublasDspr_v2_64", "rocblas_dspr_64", "library"); subst("cublasDswap", "rocblas_dswap", "library"); subst("cublasDswap_64", "rocblas_dswap_64", "library"); subst("cublasDswap_v2", "rocblas_dswap", "library"); @@ -1719,33 +1798,49 @@ sub rocSubstitutions { subst("cublasDsymm", "rocblas_dsymm", "library"); subst("cublasDsymm_v2", "rocblas_dsymm", "library"); subst("cublasDsymv", "rocblas_dsymv", "library"); + subst("cublasDsymv_64", "rocblas_dsymv_64", "library"); subst("cublasDsymv_v2", "rocblas_dsymv", "library"); + subst("cublasDsymv_v2_64", "rocblas_dsymv_64", "library"); subst("cublasDsyr", "rocblas_dsyr", "library"); subst("cublasDsyr2", "rocblas_dsyr2", "library"); + subst("cublasDsyr2_64", "rocblas_dsyr2_64", "library"); subst("cublasDsyr2_v2", "rocblas_dsyr2", "library"); + subst("cublasDsyr2_v2_64", "rocblas_dsyr2_64", "library"); subst("cublasDsyr2k", "rocblas_dsyr2k", "library"); subst("cublasDsyr2k_v2", "rocblas_dsyr2k", "library"); + subst("cublasDsyr_64", "rocblas_dsyr_64", "library"); subst("cublasDsyr_v2", "rocblas_dsyr", "library"); + subst("cublasDsyr_v2_64", "rocblas_dsyr_64", "library"); subst("cublasDsyrk", "rocblas_dsyrk", "library"); subst("cublasDsyrk_v2", "rocblas_dsyrk", "library"); subst("cublasDsyrkx", "rocblas_dsyrkx", "library"); subst("cublasDtbmv", "rocblas_dtbmv", "library"); + subst("cublasDtbmv_64", "rocblas_dtbmv_64", "library"); subst("cublasDtbmv_v2", "rocblas_dtbmv", "library"); + subst("cublasDtbmv_v2_64", "rocblas_dtbmv_64", "library"); subst("cublasDtbsv", "rocblas_dtbsv", "library"); + subst("cublasDtbsv_64", "rocblas_dtbsv_64", "library"); subst("cublasDtbsv_v2", "rocblas_dtbsv", "library"); + subst("cublasDtbsv_v2_64", "rocblas_dtbsv_64", "library"); subst("cublasDtpmv", "rocblas_dtpmv", "library"); + subst("cublasDtpmv_64", "rocblas_dtpmv_64", "library"); subst("cublasDtpmv_v2", "rocblas_dtpmv", "library"); + subst("cublasDtpmv_v2_64", "rocblas_dtpmv_64", "library"); subst("cublasDtpsv", "rocblas_dtpsv", "library"); subst("cublasDtpsv_v2", "rocblas_dtpsv", "library"); subst("cublasDtrmm", "rocblas_dtrmm", "library"); subst("cublasDtrmm_v2", "rocblas_dtrmm", "library"); subst("cublasDtrmv", "rocblas_dtrmv", "library"); + subst("cublasDtrmv_64", "rocblas_dtrmv_64", "library"); subst("cublasDtrmv_v2", "rocblas_dtrmv", "library"); + subst("cublasDtrmv_v2_64", "rocblas_dtrmv_64", "library"); subst("cublasDtrsm", "rocblas_dtrsm", "library"); subst("cublasDtrsmBatched", "rocblas_dtrsm_batched", "library"); subst("cublasDtrsm_v2", "rocblas_dtrsm", "library"); subst("cublasDtrsv", "rocblas_dtrsv", "library"); + subst("cublasDtrsv_64", "rocblas_dtrsv_64", "library"); subst("cublasDtrsv_v2", "rocblas_dtrsv", "library"); + subst("cublasDtrsv_v2_64", "rocblas_dtrsv_64", "library"); subst("cublasDzasum", "rocblas_dzasum", "library"); subst("cublasDzasum_64", "rocblas_dzasum_64", "library"); subst("cublasDzasum_v2", "rocblas_dzasum", "library"); @@ -1771,9 +1866,11 @@ sub rocSubstitutions { subst("cublasHSHgemvBatched", "rocblas_hshgemv_batched", "library"); subst("cublasHSHgemvBatched_64", "rocblas_hshgemv_batched_64", "library"); subst("cublasHSHgemvStridedBatched", "rocblas_hshgemv_strided_batched", "library"); + subst("cublasHSHgemvStridedBatched_64", "rocblas_hshgemv_strided_batched_64", "library"); subst("cublasHSSgemvBatched", "rocblas_hssgemv_batched", "library"); subst("cublasHSSgemvBatched_64", "rocblas_hssgemv_batched_64", "library"); subst("cublasHSSgemvStridedBatched", "rocblas_hssgemv_strided_batched", "library"); + subst("cublasHSSgemvStridedBatched_64", "rocblas_hssgemv_strided_batched_64", "library"); subst("cublasHgemm", "rocblas_hgemm", "library"); subst("cublasHgemmBatched", "rocblas_hgemm_batched", "library"); subst("cublasHgemmStridedBatched", "rocblas_hgemm_strided_batched", "library"); @@ -1863,6 +1960,8 @@ sub rocSubstitutions { subst("cublasSgemv", "rocblas_sgemv", "library"); subst("cublasSgemvBatched", "rocblas_sgemv_batched", "library"); subst("cublasSgemvBatched_64", "rocblas_sgemv_batched_64", "library"); + subst("cublasSgemvStridedBatched", "rocblas_sgemv_strided_batched", "library"); + subst("cublasSgemvStridedBatched_64", "rocblas_sgemv_strided_batched_64", "library"); subst("cublasSgemv_64", "rocblas_sgemv_64", "library"); subst("cublasSgemv_v2", "rocblas_sgemv", "library"); subst("cublasSgemv_v2_64", "rocblas_sgemv_64", "library"); @@ -1885,17 +1984,25 @@ sub rocSubstitutions { subst("cublasSrotmg", "rocblas_srotmg", "library"); subst("cublasSrotmg_v2", "rocblas_srotmg", "library"); subst("cublasSsbmv", "rocblas_ssbmv", "library"); + subst("cublasSsbmv_64", "rocblas_ssbmv_64", "library"); subst("cublasSsbmv_v2", "rocblas_ssbmv", "library"); + subst("cublasSsbmv_v2_64", "rocblas_ssbmv_64", "library"); subst("cublasSscal", "rocblas_sscal", "library"); subst("cublasSscal_64", "rocblas_sscal_64", "library"); subst("cublasSscal_v2", "rocblas_sscal", "library"); subst("cublasSscal_v2_64", "rocblas_sscal_64", "library"); subst("cublasSspmv", "rocblas_sspmv", "library"); + subst("cublasSspmv_64", "rocblas_sspmv_64", "library"); subst("cublasSspmv_v2", "rocblas_sspmv", "library"); + subst("cublasSspmv_v2_64", "rocblas_sspmv_64", "library"); subst("cublasSspr", "rocblas_sspr", "library"); subst("cublasSspr2", "rocblas_sspr2", "library"); + subst("cublasSspr2_64", "rocblas_sspr2_64", "library"); subst("cublasSspr2_v2", "rocblas_sspr2", "library"); + subst("cublasSspr2_v2_64", "rocblas_sspr2_64", "library"); + subst("cublasSspr_64", "rocblas_sspr_64", "library"); subst("cublasSspr_v2", "rocblas_sspr", "library"); + subst("cublasSspr_v2_64", "rocblas_sspr_64", "library"); subst("cublasSswap", "rocblas_sswap", "library"); subst("cublasSswap_64", "rocblas_sswap_64", "library"); subst("cublasSswap_v2", "rocblas_sswap", "library"); @@ -1903,39 +2010,57 @@ sub rocSubstitutions { subst("cublasSsymm", "rocblas_ssymm", "library"); subst("cublasSsymm_v2", "rocblas_ssymm", "library"); subst("cublasSsymv", "rocblas_ssymv", "library"); + subst("cublasSsymv_64", "rocblas_ssymv_64", "library"); subst("cublasSsymv_v2", "rocblas_ssymv", "library"); + subst("cublasSsymv_v2_64", "rocblas_ssymv_64", "library"); subst("cublasSsyr", "rocblas_ssyr", "library"); subst("cublasSsyr2", "rocblas_ssyr2", "library"); + subst("cublasSsyr2_64", "rocblas_ssyr2_64", "library"); subst("cublasSsyr2_v2", "rocblas_ssyr2", "library"); + subst("cublasSsyr2_v2_64", "rocblas_ssyr2_64", "library"); subst("cublasSsyr2k", "rocblas_ssyr2k", "library"); subst("cublasSsyr2k_v2", "rocblas_ssyr2k", "library"); + subst("cublasSsyr_64", "rocblas_ssyr_64", "library"); subst("cublasSsyr_v2", "rocblas_ssyr", "library"); + subst("cublasSsyr_v2_64", "rocblas_ssyr_64", "library"); subst("cublasSsyrk", "rocblas_ssyrk", "library"); subst("cublasSsyrk_v2", "rocblas_ssyrk", "library"); subst("cublasSsyrkx", "rocblas_ssyrkx", "library"); subst("cublasStbmv", "rocblas_stbmv", "library"); + subst("cublasStbmv_64", "rocblas_stbmv_64", "library"); subst("cublasStbmv_v2", "rocblas_stbmv", "library"); + subst("cublasStbmv_v2_64", "rocblas_stbmv_64", "library"); subst("cublasStbsv", "rocblas_stbsv", "library"); + subst("cublasStbsv_64", "rocblas_stbsv_64", "library"); subst("cublasStbsv_v2", "rocblas_stbsv", "library"); + subst("cublasStbsv_v2_64", "rocblas_stbsv_64", "library"); subst("cublasStpmv", "rocblas_stpmv", "library"); + subst("cublasStpmv_64", "rocblas_stpmv_64", "library"); subst("cublasStpmv_v2", "rocblas_stpmv", "library"); + subst("cublasStpmv_v2_64", "rocblas_stpmv_64", "library"); subst("cublasStpsv", "rocblas_stpsv", "library"); subst("cublasStpsv_v2", "rocblas_stpsv", "library"); subst("cublasStrmm", "rocblas_strmm", "library"); subst("cublasStrmm_v2", "rocblas_strmm", "library"); subst("cublasStrmv", "rocblas_strmv", "library"); + subst("cublasStrmv_64", "rocblas_strmv_64", "library"); subst("cublasStrmv_v2", "rocblas_strmv", "library"); + subst("cublasStrmv_v2_64", "rocblas_strmv_64", "library"); subst("cublasStrsm", "rocblas_strsm", "library"); subst("cublasStrsmBatched", "rocblas_strsm_batched", "library"); subst("cublasStrsm_v2", "rocblas_strsm", "library"); subst("cublasStrsv", "rocblas_strsv", "library"); + subst("cublasStrsv_64", "rocblas_strsv_64", "library"); subst("cublasStrsv_v2", "rocblas_strsv", "library"); + subst("cublasStrsv_v2_64", "rocblas_strsv_64", "library"); subst("cublasTSSgemvBatched", "rocblas_tssgemv_batched", "library"); subst("cublasTSSgemvBatched_64", "rocblas_tssgemv_batched_64", "library"); subst("cublasTSSgemvStridedBatched", "rocblas_tssgemv_strided_batched", "library"); + subst("cublasTSSgemvStridedBatched_64", "rocblas_tssgemv_strided_batched_64", "library"); subst("cublasTSTgemvBatched", "rocblas_tstgemv_batched", "library"); subst("cublasTSTgemvBatched_64", "rocblas_tstgemv_batched_64", "library"); subst("cublasTSTgemvStridedBatched", "rocblas_tstgemv_strided_batched", "library"); + subst("cublasTSTgemvStridedBatched_64", "rocblas_tstgemv_strided_batched_64", "library"); subst("cublasZaxpy", "rocblas_zaxpy", "library"); subst("cublasZaxpy_64", "rocblas_zaxpy_64", "library"); subst("cublasZaxpy_v2", "rocblas_zaxpy", "library"); @@ -1974,6 +2099,7 @@ sub rocSubstitutions { subst("cublasZgemvBatched", "rocblas_zgemv_batched", "library"); subst("cublasZgemvBatched_64", "rocblas_zgemv_batched_64", "library"); subst("cublasZgemvStridedBatched", "rocblas_zgemv_strided_batched", "library"); + subst("cublasZgemvStridedBatched_64", "rocblas_zgemv_strided_batched_64", "library"); subst("cublasZgemv_64", "rocblas_zgemv_64", "library"); subst("cublasZgemv_v2", "rocblas_zgemv", "library"); subst("cublasZgemv_v2_64", "rocblas_zgemv_64", "library"); @@ -1982,26 +2108,40 @@ sub rocSubstitutions { subst("cublasZgeru", "rocblas_zgeru", "library"); subst("cublasZgeru_v2", "rocblas_zgeru", "library"); subst("cublasZhbmv", "rocblas_zhbmv", "library"); + subst("cublasZhbmv_64", "rocblas_zhbmv_64", "library"); subst("cublasZhbmv_v2", "rocblas_zhbmv", "library"); + subst("cublasZhbmv_v2_64", "rocblas_zhbmv_64", "library"); subst("cublasZhemm", "rocblas_zhemm", "library"); subst("cublasZhemm_v2", "rocblas_zhemm", "library"); subst("cublasZhemv", "rocblas_zhemv", "library"); + subst("cublasZhemv_64", "rocblas_zhemv_64", "library"); subst("cublasZhemv_v2", "rocblas_zhemv", "library"); + subst("cublasZhemv_v2_64", "rocblas_zhemv_64", "library"); subst("cublasZher", "rocblas_zher", "library"); subst("cublasZher2", "rocblas_zher2", "library"); + subst("cublasZher2_64", "rocblas_zher2_64", "library"); subst("cublasZher2_v2", "rocblas_zher2", "library"); + subst("cublasZher2_v2_64", "rocblas_zher2_64", "library"); subst("cublasZher2k", "rocblas_zher2k", "library"); subst("cublasZher2k_v2", "rocblas_zher2k", "library"); + subst("cublasZher_64", "rocblas_zher_64", "library"); subst("cublasZher_v2", "rocblas_zher", "library"); + subst("cublasZher_v2_64", "rocblas_zher_64", "library"); subst("cublasZherk", "rocblas_zherk", "library"); subst("cublasZherk_v2", "rocblas_zherk", "library"); subst("cublasZherkx", "rocblas_zherkx", "library"); subst("cublasZhpmv", "rocblas_zhpmv", "library"); + subst("cublasZhpmv_64", "rocblas_zhpmv_64", "library"); subst("cublasZhpmv_v2", "rocblas_zhpmv", "library"); + subst("cublasZhpmv_v2_64", "rocblas_zhpmv_64", "library"); subst("cublasZhpr", "rocblas_zhpr", "library"); subst("cublasZhpr2", "rocblas_zhpr2", "library"); + subst("cublasZhpr2_64", "rocblas_zhpr2_64", "library"); subst("cublasZhpr2_v2", "rocblas_zhpr2", "library"); + subst("cublasZhpr2_v2_64", "rocblas_zhpr2_64", "library"); + subst("cublasZhpr_64", "rocblas_zhpr_64", "library"); subst("cublasZhpr_v2", "rocblas_zhpr", "library"); + subst("cublasZhpr_v2_64", "rocblas_zhpr_64", "library"); subst("cublasZrot", "rocblas_zrot", "library"); subst("cublasZrot_64", "rocblas_zrot_64", "library"); subst("cublasZrot_v2", "rocblas_zrot", "library"); @@ -2019,33 +2159,49 @@ sub rocSubstitutions { subst("cublasZsymm", "rocblas_zsymm", "library"); subst("cublasZsymm_v2", "rocblas_zsymm", "library"); subst("cublasZsymv", "rocblas_zsymv", "library"); + subst("cublasZsymv_64", "rocblas_zsymv_64", "library"); subst("cublasZsymv_v2", "rocblas_zsymv", "library"); + subst("cublasZsymv_v2_64", "rocblas_zsymv_64", "library"); subst("cublasZsyr", "rocblas_zsyr", "library"); subst("cublasZsyr2", "rocblas_zsyr2", "library"); + subst("cublasZsyr2_64", "rocblas_zsyr2_64", "library"); subst("cublasZsyr2_v2", "rocblas_zsyr2", "library"); + subst("cublasZsyr2_v2_64", "rocblas_zsyr2_64", "library"); subst("cublasZsyr2k", "rocblas_zsyr2k", "library"); subst("cublasZsyr2k_v2", "rocblas_zsyr2k", "library"); + subst("cublasZsyr_64", "rocblas_zsyr_64", "library"); subst("cublasZsyr_v2", "rocblas_zsyr", "library"); + subst("cublasZsyr_v2_64", "rocblas_zsyr_64", "library"); subst("cublasZsyrk", "rocblas_zsyrk", "library"); subst("cublasZsyrk_v2", "rocblas_zsyrk", "library"); subst("cublasZsyrkx", "rocblas_zsyrkx", "library"); subst("cublasZtbmv", "rocblas_ztbmv", "library"); + subst("cublasZtbmv_64", "rocblas_ztbmv_64", "library"); subst("cublasZtbmv_v2", "rocblas_ztbmv", "library"); + subst("cublasZtbmv_v2_64", "rocblas_ztbmv_64", "library"); subst("cublasZtbsv", "rocblas_ztbsv", "library"); + subst("cublasZtbsv_64", "rocblas_ztbsv_64", "library"); subst("cublasZtbsv_v2", "rocblas_ztbsv", "library"); + subst("cublasZtbsv_v2_64", "rocblas_ztbsv_64", "library"); subst("cublasZtpmv", "rocblas_ztpmv", "library"); + subst("cublasZtpmv_64", "rocblas_ztpmv_64", "library"); subst("cublasZtpmv_v2", "rocblas_ztpmv", "library"); + subst("cublasZtpmv_v2_64", "rocblas_ztpmv_64", "library"); subst("cublasZtpsv", "rocblas_ztpsv", "library"); subst("cublasZtpsv_v2", "rocblas_ztpsv", "library"); subst("cublasZtrmm", "rocblas_ztrmm", "library"); subst("cublasZtrmm_v2", "rocblas_ztrmm", "library"); subst("cublasZtrmv", "rocblas_ztrmv", "library"); + subst("cublasZtrmv_64", "rocblas_ztrmv_64", "library"); subst("cublasZtrmv_v2", "rocblas_ztrmv", "library"); + subst("cublasZtrmv_v2_64", "rocblas_ztrmv_64", "library"); subst("cublasZtrsm", "rocblas_ztrsm", "library"); subst("cublasZtrsmBatched", "rocblas_ztrsm_batched", "library"); subst("cublasZtrsm_v2", "rocblas_ztrsm", "library"); subst("cublasZtrsv", "rocblas_ztrsv", "library"); + subst("cublasZtrsv_64", "rocblas_ztrsv_64", "library"); subst("cublasZtrsv_v2", "rocblas_ztrsv", "library"); + subst("cublasZtrsv_v2_64", "rocblas_ztrsv_64", "library"); subst("cudnnActivationBackward", "miopenActivationBackward", "library"); subst("cudnnActivationForward", "miopenActivationForward", "library"); subst("cudnnBackendCreateDescriptor", "miopenBackendCreateDescriptor", "library"); @@ -2133,6 +2289,8 @@ sub rocSubstitutions { subst("cudnnTransformTensor", "miopenTransformTensor", "library"); subst("curandCreateGenerator", "rocrand_create_generator", "library"); subst("curandCreateGeneratorHost", "rocrand_create_generator_host_blocking", "library"); + subst("curandCreatePoissonDistribution", "rocrand_create_poisson_distribution", "library"); + subst("curandDestroyDistribution", "rocrand_destroy_discrete_distribution", "library"); subst("curandDestroyGenerator", "rocrand_destroy_generator", "library"); subst("curandGenerate", "rocrand_generate", "library"); subst("curandGenerateLogNormal", "rocrand_generate_log_normal", "library"); @@ -2144,8 +2302,17 @@ sub rocSubstitutions { subst("curandGenerateSeeds", "rocrand_initialize_generator", "library"); subst("curandGenerateUniform", "rocrand_generate_uniform", "library"); subst("curandGenerateUniformDouble", "rocrand_generate_uniform_double", "library"); + subst("curandGetDirectionVectors32", "rocrand_get_direction_vectors32", "library"); + subst("curandGetDirectionVectors64", "rocrand_get_direction_vectors64", "library"); + subst("curandGetScrambleConstants32", "rocrand_get_scramble_constants32", "library"); + subst("curandGetScrambleConstants64", "rocrand_get_scramble_constants64", "library"); + subst("curandGetVersion", "rocrand_get_version", "library"); + subst("curandMakeMTGP32Constants", "rocrand_make_constant", "library"); + subst("curandMakeMTGP32KernelState", "rocrand_make_state_mtgp32", "library"); subst("curandSetGeneratorOffset", "rocrand_set_offset", "library"); + subst("curandSetGeneratorOrdering", "rocrand_set_ordering", "library"); subst("curandSetPseudoRandomGeneratorSeed", "rocrand_set_seed", "library"); + subst("curandSetQuasiRandomGeneratorDimensions", "rocrand_set_quasi_random_generator_dimensions", "library"); subst("curandSetStream", "rocrand_set_stream", "library"); subst("cusolverDnCpotrf", "rocsolver_cpotrf", "library"); subst("cusolverDnCreate", "rocblas_create_handle", "library"); @@ -2638,6 +2805,29 @@ sub rocSubstitutions { subst("cusparseZnnz", "rocsparse_znnz", "library"); subst("cusparseZnnz_compress", "rocsparse_znnz_compress", "library"); subst("cusparseZsctr", "rocsparse_zsctr", "library"); + subst("curand", "rocrand", "device_library"); + subst("curand_discrete", "rocrand_discrete", "device_library"); + subst("curand_discrete4", "rocrand_discrete4", "device_library"); + subst("curand_init", "rocrand_init", "device_library"); + subst("curand_log_normal", "rocrand_log_normal", "device_library"); + subst("curand_log_normal2", "rocrand_log_normal2", "device_library"); + subst("curand_log_normal2_double", "rocrand_log_normal_double2", "device_library"); + subst("curand_log_normal4", "rocrand_log_normal4", "device_library"); + subst("curand_log_normal4_double", "rocrand_log_normal_double4", "device_library"); + subst("curand_log_normal_double", "rocrand_log_normal_double", "device_library"); + subst("curand_normal", "rocrand_normal", "device_library"); + subst("curand_normal2", "rocrand_normal2", "device_library"); + subst("curand_normal2_double", "rocrand_normal_double2", "device_library"); + subst("curand_normal4", "rocrand_normal4", "device_library"); + subst("curand_normal4_double", "rocrand_normal_double4", "device_library"); + subst("curand_normal_double", "rocrand_normal_double", "device_library"); + subst("curand_poisson", "rocrand_poisson", "device_library"); + subst("curand_poisson4", "rocrand_poisson4", "device_library"); + subst("curand_uniform", "rocrand_uniform", "device_library"); + subst("curand_uniform2_double", "rocrand_uniform_double2", "device_library"); + subst("curand_uniform4", "rocrand_uniform4", "device_library"); + subst("curand_uniform4_double", "rocrand_uniform_double4", "device_library"); + subst("curand_uniform_double", "rocrand_uniform_double", "device_library"); subst("__half", "rocblas_half", "device_type"); subst("__nv_bfloat16", "rocblas_bfloat16", "device_type"); subst("cublas_api.h", "rocblas.h", "include"); @@ -2735,6 +2925,7 @@ sub rocSubstitutions { subst("cudnnLRNMode_t", "miopenLRNMode_t", "type"); subst("cudnnNanPropagation_t", "miopenNanPropagation_t", "type"); subst("cudnnOpTensorOp_t", "miopenTensorOp_t", "type"); + subst("cudnnPaddingMode_t", "miopenPaddingMode_t", "type"); subst("cudnnPointwiseMode_t", "miopenPointwiseMode_t", "type"); subst("cudnnPoolingDescriptor_t", "miopenPoolingDescriptor_t", "type"); subst("cudnnPoolingMode_t", "miopenPoolingMode_t", "type"); @@ -2743,6 +2934,7 @@ sub rocSubstitutions { subst("cudnnRNNDescriptor_t", "miopenRNNDescriptor_t", "type"); subst("cudnnRNNInputMode_t", "miopenRNNInputMode_t", "type"); subst("cudnnRNNMode_t", "miopenRNNMode_t", "type"); + subst("cudnnRNNPaddingMode_t", "miopenRNNPaddingMode_t", "type"); subst("cudnnReduceTensorDescriptor_t", "miopenReduceTensorDescriptor_t", "type"); subst("cudnnReduceTensorIndices_t", "miopenReduceTensorIndices_t", "type"); subst("cudnnReduceTensorOp_t", "miopenReduceTensorOp_t", "type"); @@ -2754,12 +2946,28 @@ sub rocSubstitutions { subst("cudnnTensorFormat_t", "miopenTensorLayout_t", "type"); subst("curandDirectionVectorSet", "rocrand_direction_vector_set", "type"); subst("curandDirectionVectorSet_t", "rocrand_direction_vector_set", "type"); + subst("curandDiscreteDistribution_st", "rocrand_discrete_distribution_st", "type"); + subst("curandDiscreteDistribution_t", "rocrand_discrete_distribution", "type"); subst("curandGenerator_st", "rocrand_generator_base_type", "type"); subst("curandGenerator_t", "rocrand_generator", "type"); subst("curandOrdering", "rocrand_ordering", "type"); subst("curandOrdering_t", "rocrand_ordering", "type"); subst("curandRngType", "rocrand_rng_type", "type"); subst("curandRngType_t", "rocrand_rng_type", "type"); + subst("curandStateMRG32k3a", "rocrand_device::mrg32k3a_engine", "type"); + subst("curandStateMRG32k3a_t", "rocrand_state_mrg32k3a", "type"); + subst("curandStateMtgp32", "rocrand_device::mtgp32_engine", "type"); + subst("curandStateMtgp32_t", "rocrand_state_mtgp32", "type"); + subst("curandStatePhilox4_32_10", "rocrand_device::philox4x32_10_engine", "type"); + subst("curandStatePhilox4_32_10_t", "rocrand_state_philox4x32_10", "type"); + subst("curandStateScrambledSobol32", "rocrand_device::scrambled_sobol32_engine", "type"); + subst("curandStateScrambledSobol32_t", "rocrand_state_scrambled_sobol32", "type"); + subst("curandStateScrambledSobol64", "rocrand_device::scrambled_sobol64_engine", "type"); + subst("curandStateScrambledSobol64_t", "rocrand_state_scrambled_sobol64", "type"); + subst("curandStateSobol32", "rocrand_device::sobol32_engine", "type"); + subst("curandStateSobol32_t", "rocrand_state_sobol32", "type"); + subst("curandStateSobol64", "rocrand_device::sobol64_engine", "type"); + subst("curandStateSobol64_t", "rocrand_state_sobol64", "type"); subst("curandStatus", "rocrand_status", "type"); subst("curandStatus_t", "rocrand_status", "type"); subst("cusolverDnHandle_t", "rocblas_handle", "type"); @@ -2864,6 +3072,7 @@ sub rocSubstitutions { subst("CUDNN_ACTIVATION_ELU", "miopenActivationELU", "numeric_literal"); subst("CUDNN_ACTIVATION_IDENTITY", "miopenActivationPASTHRU", "numeric_literal"); subst("CUDNN_ACTIVATION_RELU", "miopenActivationRELU", "numeric_literal"); + subst("CUDNN_ACTIVATION_SIGMOID", "miopenActivationLOGISTIC", "numeric_literal"); subst("CUDNN_ACTIVATION_TANH", "miopenActivationTANH", "numeric_literal"); subst("CUDNN_ATTR_CONVOLUTION_COMP_TYPE", "MIOPEN_ATTR_CONVOLUTION_COMP_TYPE", "numeric_literal"); subst("CUDNN_ATTR_CONVOLUTION_CONV_MODE", "MIOPEN_ATTR_CONVOLUTION_CONV_MODE", "numeric_literal"); @@ -3114,6 +3323,7 @@ sub rocSubstitutions { subst("CUDNN_CONVOLUTION", "miopenConvolution", "numeric_literal"); subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_0", "miopenConvolutionBwdDataAlgoGEMM", "numeric_literal"); subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_1", "miopenConvolutionBwdDataAlgoDirect", "numeric_literal"); + subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT", "miopenTransposeBwdDataAlgoGEMM", "numeric_literal"); subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT", "miopenConvolutionBwdDataAlgoFFT", "numeric_literal"); subst("CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD", "miopenConvolutionBwdDataAlgoWinograd", "numeric_literal"); subst("CUDNN_CONVOLUTION_FWD_ALGO_DIRECT", "miopenConvolutionFwdAlgoDirect", "numeric_literal"); @@ -3126,11 +3336,16 @@ sub rocSubstitutions { subst("CUDNN_DATA_BFLOAT16", "miopenBFloat16", "numeric_literal"); subst("CUDNN_DATA_DOUBLE", "miopenDouble", "numeric_literal"); subst("CUDNN_DATA_FLOAT", "miopenFloat", "numeric_literal"); + subst("CUDNN_DATA_FP8_E4M3", "miopenFloat8", "numeric_literal"); + subst("CUDNN_DATA_FP8_E5M2", "miopenBFloat8", "numeric_literal"); subst("CUDNN_DATA_HALF", "miopenHalf", "numeric_literal"); subst("CUDNN_DATA_INT32", "miopenInt32", "numeric_literal"); subst("CUDNN_DATA_INT64", "miopenInt64", "numeric_literal"); subst("CUDNN_DATA_INT8", "miopenInt8", "numeric_literal"); subst("CUDNN_DATA_INT8x4", "miopenInt8x4", "numeric_literal"); + subst("CUDNN_EDGE_VAL_PAD", "miopenPaddingValid", "numeric_literal"); + subst("CUDNN_FWD_MODE_INFERENCE", "miopenRNNInference", "numeric_literal"); + subst("CUDNN_FWD_MODE_TRAINING", "miopenRNNTraining", "numeric_literal"); subst("CUDNN_GRU", "miopenGRU", "numeric_literal"); subst("CUDNN_HEUR_MODES_COUNT", "MIOPEN_HEUR_MODES_COUNT", "numeric_literal"); subst("CUDNN_HEUR_MODE_A", "MIOPEN_HEUR_MODE_A", "numeric_literal"); @@ -3140,6 +3355,7 @@ sub rocSubstitutions { subst("CUDNN_LINEAR_INPUT", "miopenRNNlinear", "numeric_literal"); subst("CUDNN_LRN_CROSS_CHANNEL_DIM1", "miopenLRNCrossChannel", "numeric_literal"); subst("CUDNN_LSTM", "miopenLSTM", "numeric_literal"); + subst("CUDNN_NEG_INF_PAD", "miopenPaddingSame", "numeric_literal"); subst("CUDNN_NOT_PROPAGATE_NAN", "MIOPEN_NOT_PROPAGATE_NAN", "numeric_literal"); subst("CUDNN_OP_TENSOR_ADD", "miopenTensorOpAdd", "numeric_literal"); subst("CUDNN_OP_TENSOR_MAX", "miopenTensorOpMax", "numeric_literal"); @@ -3195,6 +3411,8 @@ sub rocSubstitutions { subst("CUDNN_POINTWISE_TAN", "MIOPEN_POINTWISE_TAN", "numeric_literal"); subst("CUDNN_POINTWISE_TANH_BWD", "MIOPEN_POINTWISE_TANH_BWD", "numeric_literal"); subst("CUDNN_POINTWISE_TANH_FWD", "MIOPEN_POINTWISE_TANH_FWD", "numeric_literal"); + subst("CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", "miopenPoolingAverage", "numeric_literal"); + subst("CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", "miopenPoolingAverageInclusive", "numeric_literal"); subst("CUDNN_POOLING_MAX", "miopenPoolingMax", "numeric_literal"); subst("CUDNN_PROPAGATE_NAN", "MIOPEN_PROPAGATE_NAN", "numeric_literal"); subst("CUDNN_REDUCE_TENSOR_ADD", "MIOPEN_REDUCE_TENSOR_ADD", "numeric_literal"); @@ -3213,6 +3431,8 @@ sub rocSubstitutions { subst("CUDNN_RNN_ALGO_STANDARD", "miopenRNNdefault", "numeric_literal"); subst("CUDNN_RNN_DOUBLE_BIAS", "miopenRNNwithBias", "numeric_literal"); subst("CUDNN_RNN_NO_BIAS", "miopenRNNNoBias", "numeric_literal"); + subst("CUDNN_RNN_PADDED_IO_DISABLED", "miopenRNNIONotPadded", "numeric_literal"); + subst("CUDNN_RNN_PADDED_IO_ENABLED", "miopenRNNIOWithPadding", "numeric_literal"); subst("CUDNN_RNN_RELU", "miopenRNNRELU", "numeric_literal"); subst("CUDNN_RNN_SINGLE_INP_BIAS", "miopenRNNwithBias", "numeric_literal"); subst("CUDNN_RNN_SINGLE_REC_BIAS", "miopenRNNwithBias", "numeric_literal"); @@ -3264,6 +3484,7 @@ sub rocSubstitutions { subst("CUDNN_TYPE_TENSOR_REORDERING_MODE", "MIOPEN_TYPE_TENSOR_REORDERING_MODE", "numeric_literal"); subst("CUDNN_TYPE_VOID_PTR", "MIOPEN_TYPE_VOID_PTR", "numeric_literal"); subst("CUDNN_UNIDIRECTIONAL", "miopenRNNunidirection", "numeric_literal"); + subst("CUDNN_ZERO_PAD", "miopenPaddingDefault", "numeric_literal"); subst("CURAND_DIRECTION_VECTORS_32_JOEKUO6", "ROCRAND_DIRECTION_VECTORS_32_JOEKUO6", "numeric_literal"); subst("CURAND_DIRECTION_VECTORS_64_JOEKUO6", "ROCRAND_DIRECTION_VECTORS_64_JOEKUO6", "numeric_literal"); subst("CURAND_ORDERING_PSEUDO_BEST", "ROCRAND_ORDERING_PSEUDO_BEST", "numeric_literal"); @@ -3379,6 +3600,7 @@ sub rocSubstitutions { subst("CUSPARSE_STATUS_NOT_SUPPORTED", "rocsparse_status_not_implemented", "numeric_literal"); subst("CUSPARSE_STATUS_SUCCESS", "rocsparse_status_success", "numeric_literal"); subst("CUSPARSE_STATUS_ZERO_PIVOT", "rocsparse_status_zero_pivot", "numeric_literal"); + subst("cudnnForwardMode_t", "miopenRNNFWDMode_t", "numeric_literal"); subst("cusolver_int_t", "rocblas_int", "numeric_literal"); } @@ -6318,6 +6540,7 @@ sub simpleSubstitutions { subst("curandStatus", "hiprandStatus", "type"); subst("curandStatus_t", "hiprandStatus_t", "type"); subst("cusolverAlgMode_t", "hipsolverAlgMode_t", "type"); + subst("cusolverDeterministicMode_t", "hipsolverDeterministicMode_t", "type"); subst("cusolverDnFunction_t", "hipsolverDnFunction_t", "type"); subst("cusolverDnHandle_t", "hipsolverHandle_t", "type"); subst("cusolverDnParams_t", "hipsolverDnParams_t", "type"); @@ -6763,6 +6986,8 @@ sub simpleSubstitutions { subst("CUSOLVERRF_UNIT_DIAGONAL_STORED_U", "HIPSOLVERRF_UNIT_DIAGONAL_STORED_U", "numeric_literal"); subst("CUSOLVER_ALG_0", "HIPSOLVER_ALG_0", "numeric_literal"); subst("CUSOLVER_ALG_1", "HIPSOLVER_ALG_1", "numeric_literal"); + subst("CUSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS", "HIPSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS", "numeric_literal"); + subst("CUSOLVER_DETERMINISTIC_RESULTS", "HIPSOLVER_DETERMINISTIC_RESULTS", "numeric_literal"); subst("CUSOLVER_EIG_MODE_NOVECTOR", "HIPSOLVER_EIG_MODE_NOVECTOR", "numeric_literal"); subst("CUSOLVER_EIG_MODE_VECTOR", "HIPSOLVER_EIG_MODE_VECTOR", "numeric_literal"); subst("CUSOLVER_EIG_RANGE_ALL", "HIPSOLVER_EIG_RANGE_ALL", "numeric_literal"); @@ -9208,9 +9433,6 @@ sub warnUnsupportedFunctions { "cusolverDnXsyevdx", "cusolverDnXsyevd_bufferSize", "cusolverDnXsyevd", - "cusolverDnXpotrs", - "cusolverDnXpotrf_bufferSize", - "cusolverDnXpotrf", "cusolverDnXlarft_bufferSize", "cusolverDnXlarft", "cusolverDnXgesvdr_bufferSize", @@ -9219,8 +9441,6 @@ sub warnUnsupportedFunctions { "cusolverDnXgesvdp", "cusolverDnXgesvd_bufferSize", "cusolverDnXgesvd", - "cusolverDnXgeqrf_bufferSize", - "cusolverDnXgeqrf", "cusolverDnSyevdx_bufferSize", "cusolverDnSyevdx", "cusolverDnSyevd_bufferSize", @@ -9230,7 +9450,6 @@ sub warnUnsupportedFunctions { "cusolverDnSlauum_bufferSize", "cusolverDnSlauum", "cusolverDnSlaswp", - "cusolverDnSetDeterministicMode", "cusolverDnSXgesv_bufferSize", "cusolverDnSXgesv", "cusolverDnSXgels_bufferSize", @@ -9285,7 +9504,6 @@ sub warnUnsupportedFunctions { "cusolverDnGetrs", "cusolverDnGetrf_bufferSize", "cusolverDnGetrf", - "cusolverDnGetDeterministicMode", "cusolverDnGesvd_bufferSize", "cusolverDnGesvd", "cusolverDnGeqrf_bufferSize", @@ -9330,7 +9548,6 @@ sub warnUnsupportedFunctions { "cusolverDnCEgels_bufferSize", "cusolverDnCEgels", "cusolverDirectMode_t", - "cusolverDeterministicMode_t", "curand_mtgp32_specific", "curand_mtgp32_single_specific", "curand_mtgp32_single", @@ -9615,8 +9832,10 @@ sub warnUnsupportedFunctions { "cudnnBatchNormalizationForwardTrainingEx", "cudnnBatchNormalizationBackwardEx", "cudnnBatchNormOps_t", + "cudnnBackendUpdateCudaGraph", "cudnnBackendTensorReordering_t", "cudnnBackendSetAttribute", + "cudnnBackendPopulateCudaGraph", "cudnnBackendNumericalNote_t", "cudnnBackendNormMode_t", "cudnnBackendNormFwdPhase_t", @@ -9766,7 +9985,6 @@ sub warnUnsupportedFunctions { "cudaGraphicsCubeFaceNegativeX", "cudaGraphicsCubeFace", "cudaGraphRemoveDependencies_v2", - "cudaGraphNodeSetParams", "cudaGraphNodeGetDependentNodes_v2", "cudaGraphNodeGetDependencies_v2", "cudaGraphKernelNodeUpdate", @@ -9779,8 +9997,6 @@ sub warnUnsupportedFunctions { "cudaGraphExecUpdateResultInfo_st", "cudaGraphExecUpdateResultInfo", "cudaGraphExecUpdateErrorAttributesChanged", - "cudaGraphExecNodeSetParams", - "cudaGraphExecGetFlags", "cudaGraphDeviceNode_t", "cudaGraphDebugDotFlagsConditionalNodeParams", "cudaGraphConditionalNodeType", @@ -10263,18 +10479,11 @@ sub warnUnsupportedFunctions { "cuGraphicsD3D11RegisterResource", "cuGraphicsD3D10RegisterResource", "cuGraphRemoveDependencies_v2", - "cuGraphNodeSetParams", "cuGraphNodeGetDependentNodes_v2", "cuGraphNodeGetDependencies_v2", - "cuGraphMemcpyNodeSetParams", - "cuGraphMemcpyNodeGetParams", "cuGraphGetEdges_v2", - "cuGraphExecNodeSetParams", - "cuGraphExecMemcpyNodeSetParams", - "cuGraphExecGetFlags", "cuGraphConditionalHandleCreate", "cuGraphAddNode_v2", - "cuGraphAddMemFreeNode", "cuGraphAddDependencies_v2", "cuGLUnregisterBufferObject", "cuGLUnmapBufferObjectAsync", @@ -11083,7 +11292,6 @@ sub warnUnsupportedFunctions { "CUSOLVER_IRS_REFINE_CLASSICAL", "CUSOLVER_INF_NORM", "CUSOLVER_FRO_NORM", - "CUSOLVER_DETERMINISTIC_RESULTS", "CUSOLVER_C_TF32", "CUSOLVER_C_AP", "CUSOLVER_C_8U", @@ -11092,7 +11300,6 @@ sub warnUnsupportedFunctions { "CUSOLVER_C_32F", "CUSOLVER_C_16F", "CUSOLVER_C_16BF", - "CUSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS", "CUSOLVER_ALG_2", "CUSOLVERDN_POTRF", "CURAND_REJECTION", @@ -11187,6 +11394,7 @@ sub warnUnsupportedFunctions { "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART", "CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN", "CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE", + "CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API", "CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM", "CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH", "CUDNN_STATUS_INTERNAL_ERROR_UNEXPECTED_VALUE", @@ -11210,6 +11418,7 @@ sub warnUnsupportedFunctions { "CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED", "CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER", "CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES", + "CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH", "CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE", "CUDNN_SIGNAL_WAIT", "CUDNN_SIGNAL_SET", @@ -11489,6 +11698,7 @@ sub warnUnsupportedFunctions { "CUDNN_BN_FINALIZE_STATISTICS_TRAINING", "CUDNN_BN_FINALIZE_STATISTICS_INFERENCE", "CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", + "CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API", "CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", "CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", "CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", @@ -11718,6 +11928,7 @@ sub warnUnsupportedFunctions { "CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE", "CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE", "CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE", + "CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH", "CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED", "CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID", "CUDNN_ATTR_INTERMEDIATE_INFO_SIZE", @@ -12444,59 +12655,29 @@ sub warnRocOnlyUnsupportedFunctions { my $k = 0; foreach $func ( "cublasZtrttp", - "cublasZtrsv_v2_64", - "cublasZtrsv_64", "cublasZtrsm_v2_64", "cublasZtrsm_64", "cublasZtrsmBatched_64", - "cublasZtrmv_v2_64", - "cublasZtrmv_64", "cublasZtrmm_v2_64", "cublasZtrmm_64", "cublasZtpttr", "cublasZtpsv_v2_64", "cublasZtpsv_64", - "cublasZtpmv_v2_64", - "cublasZtpmv_64", - "cublasZtbsv_v2_64", - "cublasZtbsv_64", - "cublasZtbmv_v2_64", - "cublasZtbmv_64", "cublasZsyrkx_64", "cublasZsyrk_v2_64", "cublasZsyrk_64", - "cublasZsyr_v2_64", - "cublasZsyr_64", "cublasZsyr2k_v2_64", "cublasZsyr2k_64", - "cublasZsyr2_v2_64", - "cublasZsyr2_64", - "cublasZsymv_v2_64", - "cublasZsymv_64", "cublasZsymm_v2_64", "cublasZsymm_64", "cublasZmatinvBatched", - "cublasZhpr_v2_64", - "cublasZhpr_64", - "cublasZhpr2_v2_64", - "cublasZhpr2_64", - "cublasZhpmv_v2_64", - "cublasZhpmv_64", "cublasZherkx_64", "cublasZherk_v2_64", "cublasZherk_64", - "cublasZher_v2_64", - "cublasZher_64", "cublasZher2k_v2_64", "cublasZher2k_64", - "cublasZher2_v2_64", - "cublasZher2_64", - "cublasZhemv_v2_64", - "cublasZhemv_64", "cublasZhemm_v2_64", "cublasZhemm_64", - "cublasZhbmv_v2_64", - "cublasZhbmv_64", "cublasZgetrsBatched", "cublasZgetriBatched", "cublasZgetrfBatched", @@ -12505,7 +12686,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgerc_v2_64", "cublasZgerc_64", "cublasZgeqrfBatched", - "cublasZgemvStridedBatched_64", "cublasZgemm_v2_64", "cublasZgemm_64", "cublasZgemmStridedBatched_64", @@ -12517,50 +12697,24 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZdgmm_64", "cublasXerbla", "cublasUint8gemmBias", - "cublasTSTgemvStridedBatched_64", - "cublasTSSgemvStridedBatched_64", "cublasSwapEx_64", "cublasSwapEx", "cublasStrttp", - "cublasStrsv_v2_64", - "cublasStrsv_64", "cublasStrsm_v2_64", "cublasStrsm_64", "cublasStrsmBatched_64", - "cublasStrmv_v2_64", - "cublasStrmv_64", "cublasStrmm_v2_64", "cublasStrmm_64", "cublasStpttr", "cublasStpsv_v2_64", "cublasStpsv_64", - "cublasStpmv_v2_64", - "cublasStpmv_64", - "cublasStbsv_v2_64", - "cublasStbsv_64", - "cublasStbmv_v2_64", - "cublasStbmv_64", "cublasSsyrkx_64", "cublasSsyrk_v2_64", "cublasSsyrk_64", - "cublasSsyr_v2_64", - "cublasSsyr_64", "cublasSsyr2k_v2_64", "cublasSsyr2k_64", - "cublasSsyr2_v2_64", - "cublasSsyr2_64", - "cublasSsymv_v2_64", - "cublasSsymv_64", "cublasSsymm_v2_64", "cublasSsymm_64", - "cublasSspr_v2_64", - "cublasSspr_64", - "cublasSspr2_v2_64", - "cublasSspr2_64", - "cublasSspmv_v2_64", - "cublasSspmv_64", - "cublasSsbmv_v2_64", - "cublasSsbmv_64", "cublasSmatinvBatched", "cublasShutdown", "cublasSgetrsBatched", @@ -12569,8 +12723,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSger_v2_64", "cublasSger_64", "cublasSgeqrfBatched", - "cublasSgemvStridedBatched_64", - "cublasSgemvStridedBatched", "cublasSgemm_v2_64", "cublasSgemm_64", "cublasSgemmStridedBatched_64", @@ -12666,8 +12818,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasHgemm_64", "cublasHgemmStridedBatched_64", "cublasHgemmBatched_64", - "cublasHSSgemvStridedBatched_64", - "cublasHSHgemvStridedBatched_64", "cublasGetVersion_v2", "cublasGetVersion", "cublasGetVector_64", @@ -12687,45 +12837,21 @@ sub warnRocOnlyUnsupportedFunctions { "cublasGemmBatchedEx_64", "cublasFree", "cublasDtrttp", - "cublasDtrsv_v2_64", - "cublasDtrsv_64", "cublasDtrsm_v2_64", "cublasDtrsm_64", "cublasDtrsmBatched_64", - "cublasDtrmv_v2_64", - "cublasDtrmv_64", "cublasDtrmm_v2_64", "cublasDtrmm_64", "cublasDtpttr", "cublasDtpsv_v2_64", "cublasDtpsv_64", - "cublasDtpmv_v2_64", - "cublasDtpmv_64", - "cublasDtbsv_v2_64", - "cublasDtbsv_64", - "cublasDtbmv_v2_64", - "cublasDtbmv_64", "cublasDsyrkx_64", "cublasDsyrk_v2_64", "cublasDsyrk_64", - "cublasDsyr_v2_64", - "cublasDsyr_64", "cublasDsyr2k_v2_64", "cublasDsyr2k_64", - "cublasDsyr2_v2_64", - "cublasDsyr2_64", - "cublasDsymv_v2_64", - "cublasDsymv_64", "cublasDsymm_v2_64", "cublasDsymm_64", - "cublasDspr_v2_64", - "cublasDspr_64", - "cublasDspr2_v2_64", - "cublasDspr2_64", - "cublasDspmv_v2_64", - "cublasDspmv_64", - "cublasDsbmv_v2_64", - "cublasDsbmv_64", "cublasDmatinvBatched", "cublasDgetrsBatched", "cublasDgetriBatched", @@ -12733,8 +12859,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDger_v2_64", "cublasDger_64", "cublasDgeqrfBatched", - "cublasDgemvStridedBatched_64", - "cublasDgemvStridedBatched", "cublasDgemm_v2_64", "cublasDgemm_64", "cublasDgemmStridedBatched_64", @@ -12745,24 +12869,14 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgeam_64", "cublasDdgmm_64", "cublasCtrttp", - "cublasCtrsv_v2_64", - "cublasCtrsv_64", "cublasCtrsm_v2_64", "cublasCtrsm_64", "cublasCtrsmBatched_64", - "cublasCtrmv_v2_64", - "cublasCtrmv_64", "cublasCtrmm_v2_64", "cublasCtrmm_64", "cublasCtpttr", "cublasCtpsv_v2_64", "cublasCtpsv_64", - "cublasCtpmv_v2_64", - "cublasCtpmv_64", - "cublasCtbsv_v2_64", - "cublasCtbsv_64", - "cublasCtbmv_v2_64", - "cublasCtbmv_64", "cublasCsyrkx_64", "cublasCsyrk_v2_64", "cublasCsyrk_64", @@ -12770,25 +12884,13 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCsyrkEx", "cublasCsyrk3mEx_64", "cublasCsyrk3mEx", - "cublasCsyr_v2_64", - "cublasCsyr_64", "cublasCsyr2k_v2_64", "cublasCsyr2k_64", - "cublasCsyr2_v2_64", - "cublasCsyr2_64", - "cublasCsymv_v2_64", - "cublasCsymv_64", "cublasCsymm_v2_64", "cublasCsymm_64", "cublasCopyEx_64", "cublasCopyEx", "cublasCmatinvBatched", - "cublasChpr_v2_64", - "cublasChpr_64", - "cublasChpr2_v2_64", - "cublasChpr2_64", - "cublasChpmv_v2_64", - "cublasChpmv_64", "cublasCherkx_64", "cublasCherk_v2_64", "cublasCherk_64", @@ -12796,18 +12898,10 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCherkEx", "cublasCherk3mEx_64", "cublasCherk3mEx", - "cublasCher_v2_64", - "cublasCher_64", "cublasCher2k_v2_64", "cublasCher2k_64", - "cublasCher2_v2_64", - "cublasCher2_64", - "cublasChemv_v2_64", - "cublasChemv_64", "cublasChemm_v2_64", "cublasChemm_64", - "cublasChbmv_v2_64", - "cublasChbmv_64", "cublasCgetrsBatched", "cublasCgetriBatched", "cublasCgetrfBatched", @@ -12816,7 +12910,6 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgerc_v2_64", "cublasCgerc_64", "cublasCgeqrfBatched", - "cublasCgemvStridedBatched_64", "cublasCgemm_v2_64", "cublasCgemm_64", "cublasCgemmStridedBatched_64", @@ -13201,7 +13294,7 @@ if ($help) { print STDERR "$USAGE\n"; } if ($version) { - print STDERR "HIP version 6.2.0\n"; + print STDERR "HIP version 6.3.0\n"; } while (@ARGV) { $fileName=shift (@ARGV); diff --git a/docs/hipify-clang.rst b/docs/hipify-clang.rst index 3ec3c768..dec00c4d 100644 --- a/docs/hipify-clang.rst +++ b/docs/hipify-clang.rst @@ -37,7 +37,7 @@ Dependencies * `LLVM+Clang `_ of at least version `4.0.0 `_; the latest stable and recommended release: - `19.1.0 `_. + `19.1.1 `_. * `CUDA `_ of at least version `7.0 `_, the latest supported version is @@ -184,7 +184,8 @@ Dependencies - `12.3.2 `_ - ✅ - ✅ - * - `19.1.0 `_:sup:`4` + * - `19.1.0 `_, + `19.1.1 `_:sup:`4` - `12.6.1 `_:sup:`4` - **Latest stable config** - **Latest stable config** @@ -227,7 +228,7 @@ Dependencies In most cases, you can get a suitable version of ``LLVM+Clang`` with your package manager. However, you can also `download a release archive `_ and build or install it. In case of multiple versions of ``LLVM`` installed, set `CMAKE_PREFIX_PATH `_ so that -``CMake`` can find the desired version of ``LLVM``. For example, ``-DCMAKE_PREFIX_PATH=D:\LLVM\19.1.0\dist``. +``CMake`` can find the desired version of ``LLVM``. For example, ``-DCMAKE_PREFIX_PATH=D:\LLVM\19.1.1\dist``. Usage ============================================================ @@ -260,7 +261,7 @@ header files used during the hipification process: .. code:: shell - ./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.6 --clang-resource-directory=/usr/llvm/19.1.0/dist/lib/clang/19 + ./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.6 --clang-resource-directory=/usr/llvm/19.1.1/dist/lib/clang/19 For more information, refer to the `Clang manual for compiling CUDA `_. @@ -397,7 +398,7 @@ To ensure LLVM being found or in case of multiple LLVM instances, specify the pa .. code-block:: bash - -DCMAKE_PREFIX_PATH=/usr/llvm/19.1.0/dist + -DCMAKE_PREFIX_PATH=/usr/llvm/19.1.1/dist On Windows, specify the following option for CMake in the first place: ``-G "Visual Studio 17 2022"``. @@ -471,7 +472,7 @@ LLVM <= 9.0.1 LLVM >= 10.0.0 ----------------- -1. Download `LLVM project `_ sources. +1. Download `LLVM project `_ sources. 2. Build `LLVM project `_: @@ -545,7 +546,7 @@ LLVM >= 10.0.0 .. code-block:: shell - -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.4.0 + -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.5.0 5. [Optional] Install `CUB 1.9.8 `_ for ``CUDA < 11.0`` only; for ``CUDA >= 11.0``, the CUB shipped with CUDA will be used for testing. @@ -574,13 +575,13 @@ LLVM >= 10.0.0 .. code-block:: bash - python /usr/llvm/19.1.0/llvm-project/llvm/utils/lit/setup.py install + python /usr/llvm/19.1.1/llvm-project/llvm/utils/lit/setup.py install **Windows**: .. code-block:: shell - python D:/LLVM/19.1.0/llvm-project/llvm/utils/lit/setup.py install + python D:/LLVM/19.1.1/llvm-project/llvm/utils/lit/setup.py install In case of errors similar to ``ModuleNotFoundError: No module named 'setuptools'``, upgrade the ``setuptools`` package: @@ -594,23 +595,23 @@ LLVM >= 10.0.0 .. code-block:: bash - -DLLVM_EXTERNAL_LIT=/usr/llvm/19.1.0/build/bin/llvm-lit + -DLLVM_EXTERNAL_LIT=/usr/llvm/19.1.1/build/bin/llvm-lit **Windows**: .. code-block:: shell - -DLLVM_EXTERNAL_LIT=D:/LLVM/19.1.0/build/Release/bin/llvm-lit.py + -DLLVM_EXTERNAL_LIT=D:/LLVM/19.1.1/build/Release/bin/llvm-lit.py * ``FileCheck``: **Linux**: - Copy from ``/usr/llvm/19.1.0/build/bin/`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. + Copy from ``/usr/llvm/19.1.1/build/bin/`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. **Windows**: - Copy from ``D:/LLVM/19.1.0/build/Release/bin`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. + Copy from ``D:/LLVM/19.1.1/build/Release/bin`` to ``CMAKE_INSTALL_PREFIX/dist/bin``. Alternatively, specify the path to ``FileCheck`` in the ``CMAKE_INSTALL_PREFIX`` option. @@ -637,8 +638,8 @@ On Linux, the following configurations are tested: * Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 * Ubuntu 16-19: LLVM 8.0.0 - 14.0.6, CUDA 7.0 - 10.2, cuDNN 5.1.10 - 8.0.5 -* Ubuntu 20-21: LLVM 9.0.0 - 19.1.0, CUDA 7.0 - 12.6.1, cuDNN 5.1.10 - 9.4.0 -* Ubuntu 22-23: LLVM 13.0.0 - 19.1.0, CUDA 7.0 - 12.6.1, cuDNN 8.0.5 - 9.4.0 +* Ubuntu 20-21: LLVM 9.0.0 - 19.1.1, CUDA 7.0 - 12.6.1, cuDNN 5.1.10 - 9.5.0 +* Ubuntu 22-23: LLVM 13.0.0 - 19.1.1, CUDA 7.0 - 12.6.1, cuDNN 8.0.5 - 9.5.0 Minimum build system requirements for the above configurations: @@ -646,7 +647,7 @@ Minimum build system requirements for the above configurations: Recommended build system requirements: -* CMake 3.30.2, GNU C/C++ 13.2, Python 3.12.5. +* CMake 3.30.4, GNU C/C++ 13.2, Python 3.12.7. Here's how to build ``hipify-clang`` with testing support on ``Ubuntu 23.10.01``: @@ -656,10 +657,10 @@ Here's how to build ``hipify-clang`` with testing support on ``Ubuntu 23.10.01`` -DHIPIFY_CLANG_TESTS=ON \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=/usr/llvm/19.1.0/dist \ + -DCMAKE_PREFIX_PATH=/usr/llvm/19.1.1/dist \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.6.1 \ - -DCUDA_DNN_ROOT_DIR=/usr/local/cudnn-9.4.0 \ - -DLLVM_EXTERNAL_LIT=/usr/llvm/19.1.0/build/bin/llvm-lit \ + -DCUDA_DNN_ROOT_DIR=/usr/local/cudnn-9.5.0 \ + -DLLVM_EXTERNAL_LIT=/usr/llvm/19.1.1/build/bin/llvm-lit \ ../hipify The corresponding successful output is: @@ -683,20 +684,20 @@ The corresponding successful output is: -- - Test hipify-clang : ON -- - Is part of HIP SDK : OFF -- Found ZLIB: /usr/lib/x86_64-linux-gnu/libz.so (found version "1.2.13") - -- Found LLVM 19.1.0: - -- - CMake module path : /usr/llvm/19.1.0/dist/lib/cmake/llvm - -- - Clang include path : /usr/llvm/19.1.0/dist/include - -- - LLVM Include path : /usr/llvm/19.1.0/dist/include - -- - Binary path : /usr/llvm/19.1.0/dist/bin + -- Found LLVM 19.1.1: + -- - CMake module path : /usr/llvm/19.1.1/dist/lib/cmake/llvm + -- - Clang include path : /usr/llvm/19.1.1/dist/include + -- - LLVM Include path : /usr/llvm/19.1.1/dist/include + -- - Binary path : /usr/llvm/19.1.1/dist/bin -- Linker detection: GNU ld -- ---- The below configuring for hipify-clang testing only ---- - -- Found Python: /usr/bin/python3.12 (found version "3.12.5") found components: Interpreter + -- Found Python: /usr/bin/python3.12 (found version "3.12.7") found components: Interpreter -- Found lit: /usr/local/bin/lit -- Found FileCheck: /GIT/LLVM/trunk/dist/FileCheck -- Initial CUDA to configure: -- - CUDA Toolkit path : /usr/local/cuda-12.6.1 -- - CUDA Samples path : - -- - cuDNN path : /usr/local/cudnn-9.4.0 + -- - cuDNN path : /usr/local/cudnn-9.5.0 -- - CUB path : -- Found CUDAToolkit: /usr/local/cuda-12.6.1/targets/x86_64-linux/include (found version "12.6.68") -- Performing Test CMAKE_HAVE_LIBC_PTHREAD @@ -705,7 +706,7 @@ The corresponding successful output is: -- Found CUDA config: -- - CUDA Toolkit path : /usr/local/cuda-12.6.1 -- - CUDA Samples path : OFF - -- - cuDNN path : /usr/local/cudnn-9.4.0 + -- - cuDNN path : /usr/local/cudnn-9.5.0 -- - CUB path : /usr/local/cuda-12.6.1/include/cub -- Configuring done (0.5s) -- Generating done (0.0s) @@ -722,11 +723,11 @@ The corresponding successful output is: Running HIPify regression tests =============================================================== CUDA 12.6.68 - will be used for testing - LLVM 19.1.0 - will be used for testing + LLVM 19.1.1 - will be used for testing x86_64 - Platform architecture Linux 6.5.0-15-generic - Platform OS 64 - hipify-clang binary bitness - 64 - python 3.12.5 binary bitness + 64 - python 3.12.7 binary bitness =============================================================== -- Testing: 106 tests, 12 threads -- Testing Time: 6.91s @@ -818,16 +819,16 @@ Tested configurations: - ``3.11.4`` * - ``17.0.1`` :sup:`6` - ``18.1.8`` :sup:`7` - ``7.0 - 12.3.2`` - - ``8.0.5 - 9.4.0`` - - ``2019.16.11.38, 2022.17.10.5`` - - ``3.30.0`` - - ``3.12.5`` - * - ``19.1.0`` + - ``8.0.5 - 9.5.0`` + - ``2019.16.11.40, 2022.17.11.4`` + - ``3.30.4`` + - ``3.12.7`` + * - ``19.1.1`` - ``7.0 - 12.6.1`` - - ``8.0.5 - 9.4.0`` - - ``2019.16.11.38, 2022.17.10.5`` - - ``3.30.0`` - - ``3.12.5`` + - ``8.0.5 - 9.5.0`` + - ``2019.16.11.40, 2022.17.11.4`` + - ``3.30.4`` + - ``3.12.7`` :sup:`5` LLVM 14.x.x is the latest major release supporting Visual Studio 2017. @@ -852,11 +853,11 @@ Building with testing support using ``Visual Studio 17 2022`` on ``Windows 11``: -DHIPIFY_CLANG_TESTS=ON \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=D:/LLVM/19.1.0/dist \ + -DCMAKE_PREFIX_PATH=D:/LLVM/19.1.1/dist \ -DCUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6" \ -DCUDA_SDK_ROOT_DIR="C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.5" \ - -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.4.0 \ - -DLLVM_EXTERNAL_LIT=D:/LLVM/19.1.0/build/Release/bin/llvm-lit.py \ + -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.5.0 \ + -DLLVM_EXTERNAL_LIT=D:/LLVM/19.1.1/build/Release/bin/llvm-lit.py \ ../hipify The corresponding successful output is: @@ -864,43 +865,43 @@ The corresponding successful output is: .. code-block:: shell -- Selecting Windows SDK version 10.0.22621.0 to target Windows 10.0.22631. - -- The C compiler identification is MSVC 19.39.33523.0 - -- The CXX compiler identification is MSVC 19.39.33523.0 + -- The C compiler identification is MSVC 19.41.34120.0 + -- The CXX compiler identification is MSVC 19.41.34120.0 -- Detecting C compiler ABI info -- Detecting C compiler ABI info - done - -- Check for working C compiler: C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.39.33519/bin/Hostx64/x64/cl.exe - skipped + -- Check for working C compiler: C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.41.34120/bin/Hostx64/x64/cl.exe - skipped -- Detecting C compile features -- Detecting C compile features - done -- Detecting CXX compiler ABI info -- Detecting CXX compiler ABI info - done - -- Check for working CXX compiler: C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.39.33519/bin/Hostx64/x64/cl.exe - skipped + -- Check for working CXX compiler: C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.41.34120/bin/Hostx64/x64/cl.exe - skipped -- Detecting CXX compile features -- Detecting CXX compile features - done -- HIPIFY config: -- - Build hipify-clang : ON -- - Test hipify-clang : ON -- - Is part of HIP SDK : OFF - -- Found LLVM 19.1.0: - -- - CMake module path : D:/LLVM/19.1.0/dist/lib/cmake/llvm - -- - Clang include path : D:/LLVM/19.1.0/dist/include - -- - LLVM Include path : D:/LLVM/19.1.0/dist/include - -- - Binary path : D:/LLVM/19.1.0/dist/bin + -- Found LLVM 19.1.1: + -- - CMake module path : D:/LLVM/19.1.1/dist/lib/cmake/llvm + -- - Clang include path : D:/LLVM/19.1.1/dist/include + -- - LLVM Include path : D:/LLVM/19.1.1/dist/include + -- - Binary path : D:/LLVM/19.1.1/dist/bin -- ---- The below configuring for hipify-clang testing only ---- - -- Found Python: C:/Users/TT/AppData/Local/Programs/Python/Python312/python.exe (found version "3.12.5") found components: Interpreter + -- Found Python: C:/Users/TT/AppData/Local/Programs/Python/Python312/python.exe (found version "3.12.7") found components: Interpreter -- Found lit: C:/Users/TT/AppData/Local/Programs/Python/Python312/Scripts/lit.exe - -- Found FileCheck: D:/LLVM/19.1.0/dist/bin/FileCheck.exe + -- Found FileCheck: D:/LLVM/19.1.1/dist/bin/FileCheck.exe -- Initial CUDA to configure: -- - CUDA Toolkit path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6 -- - CUDA Samples path : C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.5 - -- - cuDNN path : D:/CUDA/cuDNN/9.4.0 + -- - cuDNN path : D:/CUDA/cuDNN/9.5.0 -- - CUB path : -- Found CUDAToolkit: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6/include (found version "12.6.68") -- Found CUDA config: -- - CUDA Toolkit path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6 - -- - CUDA Samples path : C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.6 - -- - cuDNN path : D:/CUDA/cuDNN/9.4.0 + -- - CUDA Samples path : C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.5 + -- - cuDNN path : D:/CUDA/cuDNN/9.5.0 -- - CUB path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6/include/cub - -- Configuring done (1.4s) + -- Configuring done (2.1s) -- Generating done (0.1s) -- Build files have been written to: D:/HIPIFY/build diff --git a/docs/supported_apis.md b/docs/supported_apis.md index 1dd441a0..f27a70cc 100644 --- a/docs/supported_apis.md +++ b/docs/supported_apis.md @@ -10,7 +10,7 @@ | CUBLAS API | [HIP BLAS API](tables/CUBLAS_API_supported_by_HIP.md) | [ROC BLAS API](tables/CUBLAS_API_supported_by_ROC.md) | [HIP + ROC BLAS API](tables/CUBLAS_API_supported_by_HIP_and_ROC.md) | | CUSPARSE API | [HIP SPARSE API](tables/CUSPARSE_API_supported_by_HIP.md) | [ROC SPARSE API](tables/CUSPARSE_API_supported_by_ROC.md) | [HIP + ROC SPARSE API](tables/CUSPARSE_API_supported_by_HIP_and_ROC.md) | | CUSOLVER API | [HIP SOLVER API](tables/CUSOLVER_API_supported_by_HIP.md) | | | -| CURAND API | [HIP RAND API](tables/CURAND_API_supported_by_HIP.md) | | | +| CURAND API | [HIP RAND API](tables/CURAND_API_supported_by_HIP.md) |[ROC RAND API](tables/CURAND_API_supported_by_ROC.md) | [HIP + ROC RAND API](tables/CURAND_API_supported_by_HIP_and_ROC.md) | | CUFFT API | [HIP FFT API](tables/CUFFT_API_supported_by_HIP.md) | | | | CUDNN API | [HIP DNN API](tables/CUDNN_API_supported_by_HIP.md) | | | | CUB API | [HIP CUB API](tables/CUB_API_supported_by_HIP.md) | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index 141afe35..8ad6be72 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -739,69 +739,69 @@ |`cublasCgeru_v2`| | | | |`hipblasCgeru_v2`|6.0.0| | | | |`rocblas_cgeru`|3.5.0| | | | | |`cublasCgeru_v2_64`|12.0| | | |`hipblasCgeru_v2_64`|6.2.0| | | | | | | | | | | |`cublasChbmv`| | | | |`hipblasChbmv_v2`|6.0.0| | | | |`rocblas_chbmv`|3.5.0| | | | | -|`cublasChbmv_64`|12.0| | | |`hipblasChbmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasChbmv_64`|12.0| | | |`hipblasChbmv_v2_64`|6.2.0| | | | |`rocblas_chbmv_64`|6.2.0| | | | | |`cublasChbmv_v2`| | | | |`hipblasChbmv_v2`|6.0.0| | | | |`rocblas_chbmv`|3.5.0| | | | | -|`cublasChbmv_v2_64`|12.0| | | |`hipblasChbmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasChbmv_v2_64`|12.0| | | |`hipblasChbmv_v2_64`|6.2.0| | | | |`rocblas_chbmv_64`|6.2.0| | | | | |`cublasChemv`| | | | |`hipblasChemv_v2`|6.0.0| | | | |`rocblas_chemv`|1.5.0| | | | | -|`cublasChemv_64`|12.0| | | |`hipblasChemv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasChemv_64`|12.0| | | |`hipblasChemv_v2_64`|6.2.0| | | | |`rocblas_chemv_64`|6.2.0| | | | | |`cublasChemv_v2`| | | | |`hipblasChemv_v2`|6.0.0| | | | |`rocblas_chemv`|1.5.0| | | | | -|`cublasChemv_v2_64`|12.0| | | |`hipblasChemv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasChemv_v2_64`|12.0| | | |`hipblasChemv_v2_64`|6.2.0| | | | |`rocblas_chemv_64`|6.2.0| | | | | |`cublasCher`| | | | |`hipblasCher_v2`|6.0.0| | | | |`rocblas_cher`|3.5.0| | | | | |`cublasCher2`| | | | |`hipblasCher2_v2`|6.0.0| | | | |`rocblas_cher2`|3.5.0| | | | | -|`cublasCher2_64`|12.0| | | |`hipblasCher2_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCher2_64`|12.0| | | |`hipblasCher2_v2_64`|6.2.0| | | | |`rocblas_cher2_64`|6.2.0| | | | | |`cublasCher2_v2`| | | | |`hipblasCher2_v2`|6.0.0| | | | |`rocblas_cher2`|3.5.0| | | | | -|`cublasCher2_v2_64`|12.0| | | |`hipblasCher2_v2_64`|6.2.0| | | | | | | | | | | -|`cublasCher_64`|12.0| | | |`hipblasCher_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCher2_v2_64`|12.0| | | |`hipblasCher2_v2_64`|6.2.0| | | | |`rocblas_cher2_64`|6.2.0| | | | | +|`cublasCher_64`|12.0| | | |`hipblasCher_v2_64`|6.2.0| | | | |`rocblas_cher_64`|6.2.0| | | | | |`cublasCher_v2`| | | | |`hipblasCher_v2`|6.0.0| | | | |`rocblas_cher`|3.5.0| | | | | -|`cublasCher_v2_64`|12.0| | | |`hipblasCher_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCher_v2_64`|12.0| | | |`hipblasCher_v2_64`|6.2.0| | | | |`rocblas_cher_64`|6.2.0| | | | | |`cublasChpmv`| | | | |`hipblasChpmv_v2`|6.0.0| | | | |`rocblas_chpmv`|3.5.0| | | | | -|`cublasChpmv_64`|12.0| | | |`hipblasChpmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasChpmv_64`|12.0| | | |`hipblasChpmv_v2_64`|6.2.0| | | | |`rocblas_chpmv_64`|6.2.0| | | | | |`cublasChpmv_v2`| | | | |`hipblasChpmv_v2`|6.0.0| | | | |`rocblas_chpmv`|3.5.0| | | | | -|`cublasChpmv_v2_64`|12.0| | | |`hipblasChpmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasChpmv_v2_64`|12.0| | | |`hipblasChpmv_v2_64`|6.2.0| | | | |`rocblas_chpmv_64`|6.2.0| | | | | |`cublasChpr`| | | | |`hipblasChpr_v2`|6.0.0| | | | |`rocblas_chpr`|3.5.0| | | | | |`cublasChpr2`| | | | |`hipblasChpr2_v2`|6.0.0| | | | |`rocblas_chpr2`|3.5.0| | | | | -|`cublasChpr2_64`|12.0| | | |`hipblasChpr2_v2_64`|6.2.0| | | | | | | | | | | +|`cublasChpr2_64`|12.0| | | |`hipblasChpr2_v2_64`|6.2.0| | | | |`rocblas_chpr2_64`|6.2.0| | | | | |`cublasChpr2_v2`| | | | |`hipblasChpr2_v2`|6.0.0| | | | |`rocblas_chpr2`|3.5.0| | | | | -|`cublasChpr2_v2_64`|12.0| | | |`hipblasChpr2_v2_64`|6.2.0| | | | | | | | | | | -|`cublasChpr_64`|12.0| | | |`hipblasChpr_v2_64`|6.2.0| | | | | | | | | | | +|`cublasChpr2_v2_64`|12.0| | | |`hipblasChpr2_v2_64`|6.2.0| | | | |`rocblas_chpr2_64`|6.2.0| | | | | +|`cublasChpr_64`|12.0| | | |`hipblasChpr_v2_64`|6.2.0| | | | |`rocblas_chpr_64`|6.2.0| | | | | |`cublasChpr_v2`| | | | |`hipblasChpr_v2`|6.0.0| | | | |`rocblas_chpr`|3.5.0| | | | | -|`cublasChpr_v2_64`|12.0| | | |`hipblasChpr_v2_64`|6.2.0| | | | | | | | | | | +|`cublasChpr_v2_64`|12.0| | | |`hipblasChpr_v2_64`|6.2.0| | | | |`rocblas_chpr_64`|6.2.0| | | | | |`cublasCsymv`| | | | |`hipblasCsymv_v2`|6.0.0| | | | |`rocblas_csymv`|3.5.0| | | | | -|`cublasCsymv_64`|12.0| | | |`hipblasCsymv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCsymv_64`|12.0| | | |`hipblasCsymv_v2_64`|6.2.0| | | | |`rocblas_csymv_64`|6.2.0| | | | | |`cublasCsymv_v2`| | | | |`hipblasCsymv_v2`|6.0.0| | | | |`rocblas_csymv`|3.5.0| | | | | -|`cublasCsymv_v2_64`|12.0| | | |`hipblasCsymv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCsymv_v2_64`|12.0| | | |`hipblasCsymv_v2_64`|6.2.0| | | | |`rocblas_csymv_64`|6.2.0| | | | | |`cublasCsyr`| | | | |`hipblasCsyr_v2`|6.0.0| | | | |`rocblas_csyr`|1.7.1| | | | | |`cublasCsyr2`| | | | |`hipblasCsyr2_v2`|6.0.0| | | | |`rocblas_csyr2`|3.5.0| | | | | -|`cublasCsyr2_64`|12.0| | | |`hipblasCsyr2_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCsyr2_64`|12.0| | | |`hipblasCsyr2_v2_64`|6.2.0| | | | |`rocblas_csyr2_64`|6.2.0| | | | | |`cublasCsyr2_v2`| | | | |`hipblasCsyr2_v2`|6.0.0| | | | |`rocblas_csyr2`|3.5.0| | | | | -|`cublasCsyr2_v2_64`|12.0| | | |`hipblasCsyr2_v2_64`|6.2.0| | | | | | | | | | | -|`cublasCsyr_64`|12.0| | | |`hipblasCsyr_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCsyr2_v2_64`|12.0| | | |`hipblasCsyr2_v2_64`|6.2.0| | | | |`rocblas_csyr2_64`|6.2.0| | | | | +|`cublasCsyr_64`|12.0| | | |`hipblasCsyr_v2_64`|6.2.0| | | | |`rocblas_csyr_64`|6.2.0| | | | | |`cublasCsyr_v2`| | | | |`hipblasCsyr_v2`|6.0.0| | | | |`rocblas_csyr`|1.7.1| | | | | -|`cublasCsyr_v2_64`|12.0| | | |`hipblasCsyr_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCsyr_v2_64`|12.0| | | |`hipblasCsyr_v2_64`|6.2.0| | | | |`rocblas_csyr_64`|6.2.0| | | | | |`cublasCtbmv`| | | | |`hipblasCtbmv_v2`|6.0.0| | | | |`rocblas_ctbmv`|3.5.0| | | | | -|`cublasCtbmv_64`|12.0| | | |`hipblasCtbmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCtbmv_64`|12.0| | | |`hipblasCtbmv_v2_64`|6.2.0| | | | |`rocblas_ctbmv_64`|6.2.0| | | | | |`cublasCtbmv_v2`| | | | |`hipblasCtbmv_v2`|6.0.0| | | | |`rocblas_ctbmv`|3.5.0| | | | | -|`cublasCtbmv_v2_64`|12.0| | | |`hipblasCtbmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCtbmv_v2_64`|12.0| | | |`hipblasCtbmv_v2_64`|6.2.0| | | | |`rocblas_ctbmv_64`|6.2.0| | | | | |`cublasCtbsv`| | | | |`hipblasCtbsv_v2`|6.0.0| | | | |`rocblas_ctbsv`|3.5.0| | | | | -|`cublasCtbsv_64`|12.0| | | |`hipblasCtbsv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCtbsv_64`|12.0| | | |`hipblasCtbsv_v2_64`|6.2.0| | | | |`rocblas_ctbsv_64`|6.2.0| | | | | |`cublasCtbsv_v2`| | | | |`hipblasCtbsv_v2`|6.0.0| | | | |`rocblas_ctbsv`|3.5.0| | | | | -|`cublasCtbsv_v2_64`|12.0| | | |`hipblasCtbsv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCtbsv_v2_64`|12.0| | | |`hipblasCtbsv_v2_64`|6.2.0| | | | |`rocblas_ctbsv_64`|6.2.0| | | | | |`cublasCtpmv`| | | | |`hipblasCtpmv_v2`|6.0.0| | | | |`rocblas_ctpmv`|3.5.0| | | | | -|`cublasCtpmv_64`|12.0| | | |`hipblasCtpmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCtpmv_64`|12.0| | | |`hipblasCtpmv_v2_64`|6.2.0| | | | |`rocblas_ctpmv_64`|6.2.0| | | | | |`cublasCtpmv_v2`| | | | |`hipblasCtpmv_v2`|6.0.0| | | | |`rocblas_ctpmv`|3.5.0| | | | | -|`cublasCtpmv_v2_64`|12.0| | | |`hipblasCtpmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCtpmv_v2_64`|12.0| | | |`hipblasCtpmv_v2_64`|6.2.0| | | | |`rocblas_ctpmv_64`|6.2.0| | | | | |`cublasCtpsv`| | | | |`hipblasCtpsv_v2`|6.0.0| | | | |`rocblas_ctpsv`|3.5.0| | | | | |`cublasCtpsv_64`|12.0| | | |`hipblasCtpsv_v2_64`|6.2.0| | | | | | | | | | | |`cublasCtpsv_v2`| | | | |`hipblasCtpsv_v2`|6.0.0| | | | |`rocblas_ctpsv`|3.5.0| | | | | |`cublasCtpsv_v2_64`|12.0| | | |`hipblasCtpsv_v2_64`|6.2.0| | | | | | | | | | | |`cublasCtrmv`| | | | |`hipblasCtrmv_v2`|6.0.0| | | | |`rocblas_ctrmv`|3.5.0| | | | | -|`cublasCtrmv_64`|12.0| | | |`hipblasCtrmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCtrmv_64`|12.0| | | |`hipblasCtrmv_v2_64`|6.2.0| | | | |`rocblas_ctrmv_64`|6.2.0| | | | | |`cublasCtrmv_v2`| | | | |`hipblasCtrmv_v2`|6.0.0| | | | |`rocblas_ctrmv`|3.5.0| | | | | -|`cublasCtrmv_v2_64`|12.0| | | |`hipblasCtrmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCtrmv_v2_64`|12.0| | | |`hipblasCtrmv_v2_64`|6.2.0| | | | |`rocblas_ctrmv_64`|6.2.0| | | | | |`cublasCtrsv`| | | | |`hipblasCtrsv_v2`|6.0.0| | | | |`rocblas_ctrsv`|3.5.0| | | | | -|`cublasCtrsv_64`|12.0| | | |`hipblasCtrsv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCtrsv_64`|12.0| | | |`hipblasCtrsv_v2_64`|6.2.0| | | | |`rocblas_ctrsv_64`|6.2.0| | | | | |`cublasCtrsv_v2`| | | | |`hipblasCtrsv_v2`|6.0.0| | | | |`rocblas_ctrsv`|3.5.0| | | | | -|`cublasCtrsv_v2_64`|12.0| | | |`hipblasCtrsv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCtrsv_v2_64`|12.0| | | |`hipblasCtrsv_v2_64`|6.2.0| | | | |`rocblas_ctrsv_64`|6.2.0| | | | | |`cublasDgbmv`| | | | |`hipblasDgbmv`|3.5.0| | | | |`rocblas_dgbmv`|3.5.0| | | | | |`cublasDgbmv_64`|12.0| | | |`hipblasDgbmv_64`|6.2.0| | | | |`rocblas_dgbmv_64`|6.2.0| | | | | |`cublasDgbmv_v2`| | | | |`hipblasDgbmv`|3.5.0| | | | |`rocblas_dgbmv`|3.5.0| | | | | @@ -815,57 +815,57 @@ |`cublasDger_v2`| | | | |`hipblasDger`|1.8.2| | | | |`rocblas_dger`|1.5.0| | | | | |`cublasDger_v2_64`|12.0| | | |`hipblasDger_64`|6.2.0| | | | | | | | | | | |`cublasDsbmv`| | | | |`hipblasDsbmv`|3.5.0| | | | |`rocblas_dsbmv`|3.5.0| | | | | -|`cublasDsbmv_64`|12.0| | | |`hipblasDsbmv_64`|6.2.0| | | | | | | | | | | +|`cublasDsbmv_64`|12.0| | | |`hipblasDsbmv_64`|6.2.0| | | | |`rocblas_dsbmv_64`|6.2.0| | | | | |`cublasDsbmv_v2`| | | | |`hipblasDsbmv`|3.5.0| | | | |`rocblas_dsbmv`|3.5.0| | | | | -|`cublasDsbmv_v2_64`|12.0| | | |`hipblasDsbmv_64`|6.2.0| | | | | | | | | | | +|`cublasDsbmv_v2_64`|12.0| | | |`hipblasDsbmv_64`|6.2.0| | | | |`rocblas_dsbmv_64`|6.2.0| | | | | |`cublasDspmv`| | | | |`hipblasDspmv`|3.5.0| | | | |`rocblas_dspmv`|3.5.0| | | | | -|`cublasDspmv_64`|12.0| | | |`hipblasDspmv_64`|6.2.0| | | | | | | | | | | +|`cublasDspmv_64`|12.0| | | |`hipblasDspmv_64`|6.2.0| | | | |`rocblas_dspmv_64`|6.2.0| | | | | |`cublasDspmv_v2`| | | | |`hipblasDspmv`|3.5.0| | | | |`rocblas_dspmv`|3.5.0| | | | | -|`cublasDspmv_v2_64`|12.0| | | |`hipblasDspmv_64`|6.2.0| | | | | | | | | | | +|`cublasDspmv_v2_64`|12.0| | | |`hipblasDspmv_64`|6.2.0| | | | |`rocblas_dspmv_64`|6.2.0| | | | | |`cublasDspr`| | | | |`hipblasDspr`|3.5.0| | | | |`rocblas_dspr`|3.5.0| | | | | |`cublasDspr2`| | | | |`hipblasDspr2`|3.5.0| | | | |`rocblas_dspr2`|3.5.0| | | | | -|`cublasDspr2_64`|12.0| | | |`hipblasDspr2_64`|6.2.0| | | | | | | | | | | +|`cublasDspr2_64`|12.0| | | |`hipblasDspr2_64`|6.2.0| | | | |`rocblas_dspr2_64`|6.2.0| | | | | |`cublasDspr2_v2`| | | | |`hipblasDspr2`|3.5.0| | | | |`rocblas_dspr2`|3.5.0| | | | | -|`cublasDspr2_v2_64`|12.0| | | |`hipblasDspr2_64`|6.2.0| | | | | | | | | | | -|`cublasDspr_64`|12.0| | | |`hipblasDspr_64`|6.2.0| | | | | | | | | | | +|`cublasDspr2_v2_64`|12.0| | | |`hipblasDspr2_64`|6.2.0| | | | |`rocblas_dspr2_64`|6.2.0| | | | | +|`cublasDspr_64`|12.0| | | |`hipblasDspr_64`|6.2.0| | | | |`rocblas_dspr_64`|6.2.0| | | | | |`cublasDspr_v2`| | | | |`hipblasDspr`|3.5.0| | | | |`rocblas_dspr`|3.5.0| | | | | -|`cublasDspr_v2_64`|12.0| | | |`hipblasDspr_64`|6.2.0| | | | | | | | | | | +|`cublasDspr_v2_64`|12.0| | | |`hipblasDspr_64`|6.2.0| | | | |`rocblas_dspr_64`|6.2.0| | | | | |`cublasDsymv`| | | | |`hipblasDsymv`|3.5.0| | | | |`rocblas_dsymv`|1.5.0| | | | | -|`cublasDsymv_64`|12.0| | | |`hipblasDsymv_64`|6.2.0| | | | | | | | | | | +|`cublasDsymv_64`|12.0| | | |`hipblasDsymv_64`|6.2.0| | | | |`rocblas_dsymv_64`|6.2.0| | | | | |`cublasDsymv_v2`| | | | |`hipblasDsymv`|3.5.0| | | | |`rocblas_dsymv`|1.5.0| | | | | -|`cublasDsymv_v2_64`|12.0| | | |`hipblasDsymv_64`|6.2.0| | | | | | | | | | | +|`cublasDsymv_v2_64`|12.0| | | |`hipblasDsymv_64`|6.2.0| | | | |`rocblas_dsymv_64`|6.2.0| | | | | |`cublasDsyr`| | | | |`hipblasDsyr`|3.0.0| | | | |`rocblas_dsyr`|1.7.1| | | | | |`cublasDsyr2`| | | | |`hipblasDsyr2`|3.5.0| | | | |`rocblas_dsyr2`|3.5.0| | | | | -|`cublasDsyr2_64`|12.0| | | |`hipblasDsyr2_64`|6.2.0| | | | | | | | | | | +|`cublasDsyr2_64`|12.0| | | |`hipblasDsyr2_64`|6.2.0| | | | |`rocblas_dsyr2_64`|6.2.0| | | | | |`cublasDsyr2_v2`| | | | |`hipblasDsyr2`|3.5.0| | | | |`rocblas_dsyr2`|3.5.0| | | | | -|`cublasDsyr2_v2_64`|12.0| | | |`hipblasDsyr2_64`|6.2.0| | | | | | | | | | | -|`cublasDsyr_64`|12.0| | | |`hipblasDsyr_64`|6.2.0| | | | | | | | | | | +|`cublasDsyr2_v2_64`|12.0| | | |`hipblasDsyr2_64`|6.2.0| | | | |`rocblas_dsyr2_64`|6.2.0| | | | | +|`cublasDsyr_64`|12.0| | | |`hipblasDsyr_64`|6.2.0| | | | |`rocblas_dsyr_64`|6.2.0| | | | | |`cublasDsyr_v2`| | | | |`hipblasDsyr`|3.0.0| | | | |`rocblas_dsyr`|1.7.1| | | | | -|`cublasDsyr_v2_64`|12.0| | | |`hipblasDsyr_64`|6.2.0| | | | | | | | | | | +|`cublasDsyr_v2_64`|12.0| | | |`hipblasDsyr_64`|6.2.0| | | | |`rocblas_dsyr_64`|6.2.0| | | | | |`cublasDtbmv`| | | | |`hipblasDtbmv`|3.5.0| | | | |`rocblas_dtbmv`|3.5.0| | | | | -|`cublasDtbmv_64`|12.0| | | |`hipblasDtbmv_64`|6.2.0| | | | | | | | | | | +|`cublasDtbmv_64`|12.0| | | |`hipblasDtbmv_64`|6.2.0| | | | |`rocblas_dtbmv_64`|6.2.0| | | | | |`cublasDtbmv_v2`| | | | |`hipblasDtbmv`|3.5.0| | | | |`rocblas_dtbmv`|3.5.0| | | | | -|`cublasDtbmv_v2_64`|12.0| | | |`hipblasDtbmv_64`|6.2.0| | | | | | | | | | | +|`cublasDtbmv_v2_64`|12.0| | | |`hipblasDtbmv_64`|6.2.0| | | | |`rocblas_dtbmv_64`|6.2.0| | | | | |`cublasDtbsv`| | | | |`hipblasDtbsv`|3.6.0| | | | |`rocblas_dtbsv`|3.5.0| | | | | -|`cublasDtbsv_64`|12.0| | | |`hipblasDtbsv_64`|6.2.0| | | | | | | | | | | +|`cublasDtbsv_64`|12.0| | | |`hipblasDtbsv_64`|6.2.0| | | | |`rocblas_dtbsv_64`|6.2.0| | | | | |`cublasDtbsv_v2`| | | | |`hipblasDtbsv`|3.6.0| | | | |`rocblas_dtbsv`|3.5.0| | | | | -|`cublasDtbsv_v2_64`|12.0| | | |`hipblasDtbsv_64`|6.2.0| | | | | | | | | | | +|`cublasDtbsv_v2_64`|12.0| | | |`hipblasDtbsv_64`|6.2.0| | | | |`rocblas_dtbsv_64`|6.2.0| | | | | |`cublasDtpmv`| | | | |`hipblasDtpmv`|3.5.0| | | | |`rocblas_dtpmv`|3.5.0| | | | | -|`cublasDtpmv_64`|12.0| | | |`hipblasDtpmv_64`|6.2.0| | | | | | | | | | | +|`cublasDtpmv_64`|12.0| | | |`hipblasDtpmv_64`|6.2.0| | | | |`rocblas_dtpmv_64`|6.2.0| | | | | |`cublasDtpmv_v2`| | | | |`hipblasDtpmv`|3.5.0| | | | |`rocblas_dtpmv`|3.5.0| | | | | -|`cublasDtpmv_v2_64`|12.0| | | |`hipblasDtpmv_64`|6.2.0| | | | | | | | | | | +|`cublasDtpmv_v2_64`|12.0| | | |`hipblasDtpmv_64`|6.2.0| | | | |`rocblas_dtpmv_64`|6.2.0| | | | | |`cublasDtpsv`| | | | |`hipblasDtpsv`|3.5.0| | | | |`rocblas_dtpsv`|3.5.0| | | | | |`cublasDtpsv_64`|12.0| | | |`hipblasDtpsv_64`|6.2.0| | | | | | | | | | | |`cublasDtpsv_v2`| | | | |`hipblasDtpsv`|3.5.0| | | | |`rocblas_dtpsv`|3.5.0| | | | | |`cublasDtpsv_v2_64`|12.0| | | |`hipblasDtpsv_64`|6.2.0| | | | | | | | | | | |`cublasDtrmv`| | | | |`hipblasDtrmv`|3.5.0| | | | |`rocblas_dtrmv`|3.5.0| | | | | -|`cublasDtrmv_64`|12.0| | | |`hipblasDtrmv_64`|6.2.0| | | | | | | | | | | +|`cublasDtrmv_64`|12.0| | | |`hipblasDtrmv_64`|6.2.0| | | | |`rocblas_dtrmv_64`|6.2.0| | | | | |`cublasDtrmv_v2`| | | | |`hipblasDtrmv`|3.5.0| | | | |`rocblas_dtrmv`|3.5.0| | | | | -|`cublasDtrmv_v2_64`|12.0| | | |`hipblasDtrmv_64`|6.2.0| | | | | | | | | | | +|`cublasDtrmv_v2_64`|12.0| | | |`hipblasDtrmv_64`|6.2.0| | | | |`rocblas_dtrmv_64`|6.2.0| | | | | |`cublasDtrsv`| | | | |`hipblasDtrsv`|3.0.0| | | | |`rocblas_dtrsv`|3.5.0| | | | | -|`cublasDtrsv_64`|12.0| | | |`hipblasDtrsv_64`|6.2.0| | | | | | | | | | | +|`cublasDtrsv_64`|12.0| | | |`hipblasDtrsv_64`|6.2.0| | | | |`rocblas_dtrsv_64`|6.2.0| | | | | |`cublasDtrsv_v2`| | | | |`hipblasDtrsv`|3.0.0| | | | |`rocblas_dtrsv`|3.5.0| | | | | -|`cublasDtrsv_v2_64`|12.0| | | |`hipblasDtrsv_64`|6.2.0| | | | | | | | | | | +|`cublasDtrsv_v2_64`|12.0| | | |`hipblasDtrsv_64`|6.2.0| | | | |`rocblas_dtrsv_64`|6.2.0| | | | | |`cublasSgbmv`| | | | |`hipblasSgbmv`|3.5.0| | | | |`rocblas_sgbmv`|3.5.0| | | | | |`cublasSgbmv_64`|12.0| | | |`hipblasSgbmv_64`|6.2.0| | | | |`rocblas_sgbmv_64`|6.2.0| | | | | |`cublasSgbmv_v2`| | | | |`hipblasSgbmv`|3.5.0| | | | |`rocblas_sgbmv`|3.5.0| | | | | @@ -879,57 +879,57 @@ |`cublasSger_v2`| | | | |`hipblasSger`|1.8.2| | | | |`rocblas_sger`|1.5.0| | | | | |`cublasSger_v2_64`|12.0| | | |`hipblasSger_64`|6.2.0| | | | | | | | | | | |`cublasSsbmv`| | | | |`hipblasSsbmv`|3.5.0| | | | |`rocblas_ssbmv`|3.5.0| | | | | -|`cublasSsbmv_64`|12.0| | | |`hipblasSsbmv_64`|6.2.0| | | | | | | | | | | +|`cublasSsbmv_64`|12.0| | | |`hipblasSsbmv_64`|6.2.0| | | | |`rocblas_ssbmv_64`|6.2.0| | | | | |`cublasSsbmv_v2`| | | | |`hipblasSsbmv`|3.5.0| | | | |`rocblas_ssbmv`|3.5.0| | | | | -|`cublasSsbmv_v2_64`|12.0| | | |`hipblasSsbmv_64`|6.2.0| | | | | | | | | | | +|`cublasSsbmv_v2_64`|12.0| | | |`hipblasSsbmv_64`|6.2.0| | | | |`rocblas_ssbmv_64`|6.2.0| | | | | |`cublasSspmv`| | | | |`hipblasSspmv`|3.5.0| | | | |`rocblas_sspmv`|3.5.0| | | | | -|`cublasSspmv_64`|12.0| | | |`hipblasSspmv_64`|6.2.0| | | | | | | | | | | +|`cublasSspmv_64`|12.0| | | |`hipblasSspmv_64`|6.2.0| | | | |`rocblas_sspmv_64`|6.2.0| | | | | |`cublasSspmv_v2`| | | | |`hipblasSspmv`|3.5.0| | | | |`rocblas_sspmv`|3.5.0| | | | | -|`cublasSspmv_v2_64`|12.0| | | |`hipblasSspmv_64`|6.2.0| | | | | | | | | | | +|`cublasSspmv_v2_64`|12.0| | | |`hipblasSspmv_64`|6.2.0| | | | |`rocblas_sspmv_64`|6.2.0| | | | | |`cublasSspr`| | | | |`hipblasSspr`|3.5.0| | | | |`rocblas_sspr`|3.5.0| | | | | |`cublasSspr2`| | | | |`hipblasSspr2`|3.5.0| | | | |`rocblas_sspr2`|3.5.0| | | | | -|`cublasSspr2_64`|12.0| | | |`hipblasSspr2_64`|6.2.0| | | | | | | | | | | +|`cublasSspr2_64`|12.0| | | |`hipblasSspr2_64`|6.2.0| | | | |`rocblas_sspr2_64`|6.2.0| | | | | |`cublasSspr2_v2`| | | | |`hipblasSspr2`|3.5.0| | | | |`rocblas_sspr2`|3.5.0| | | | | -|`cublasSspr2_v2_64`|12.0| | | |`hipblasSspr2_64`|6.2.0| | | | | | | | | | | -|`cublasSspr_64`|12.0| | | |`hipblasSspr_64`|6.2.0| | | | | | | | | | | +|`cublasSspr2_v2_64`|12.0| | | |`hipblasSspr2_64`|6.2.0| | | | |`rocblas_sspr2_64`|6.2.0| | | | | +|`cublasSspr_64`|12.0| | | |`hipblasSspr_64`|6.2.0| | | | |`rocblas_sspr_64`|6.2.0| | | | | |`cublasSspr_v2`| | | | |`hipblasSspr`|3.5.0| | | | |`rocblas_sspr`|3.5.0| | | | | -|`cublasSspr_v2_64`|12.0| | | |`hipblasSspr_64`|6.2.0| | | | | | | | | | | +|`cublasSspr_v2_64`|12.0| | | |`hipblasSspr_64`|6.2.0| | | | |`rocblas_sspr_64`|6.2.0| | | | | |`cublasSsymv`| | | | |`hipblasSsymv`|3.5.0| | | | |`rocblas_ssymv`|1.5.0| | | | | -|`cublasSsymv_64`|12.0| | | |`hipblasSsymv_64`|6.2.0| | | | | | | | | | | +|`cublasSsymv_64`|12.0| | | |`hipblasSsymv_64`|6.2.0| | | | |`rocblas_ssymv_64`|6.2.0| | | | | |`cublasSsymv_v2`| | | | |`hipblasSsymv`|3.5.0| | | | |`rocblas_ssymv`|1.5.0| | | | | -|`cublasSsymv_v2_64`|12.0| | | |`hipblasSsymv_64`|6.2.0| | | | | | | | | | | +|`cublasSsymv_v2_64`|12.0| | | |`hipblasSsymv_64`|6.2.0| | | | |`rocblas_ssymv_64`|6.2.0| | | | | |`cublasSsyr`| | | | |`hipblasSsyr`|3.0.0| | | | |`rocblas_ssyr`|1.7.1| | | | | |`cublasSsyr2`| | | | |`hipblasSsyr2`|3.5.0| | | | |`rocblas_ssyr2`|3.5.0| | | | | -|`cublasSsyr2_64`|12.0| | | |`hipblasSsyr2_64`|6.2.0| | | | | | | | | | | +|`cublasSsyr2_64`|12.0| | | |`hipblasSsyr2_64`|6.2.0| | | | |`rocblas_ssyr2_64`|6.2.0| | | | | |`cublasSsyr2_v2`| | | | |`hipblasSsyr2`|3.5.0| | | | |`rocblas_ssyr2`|3.5.0| | | | | -|`cublasSsyr2_v2_64`|12.0| | | |`hipblasSsyr2_64`|6.2.0| | | | | | | | | | | -|`cublasSsyr_64`|12.0| | | |`hipblasSsyr_64`|6.2.0| | | | | | | | | | | +|`cublasSsyr2_v2_64`|12.0| | | |`hipblasSsyr2_64`|6.2.0| | | | |`rocblas_ssyr2_64`|6.2.0| | | | | +|`cublasSsyr_64`|12.0| | | |`hipblasSsyr_64`|6.2.0| | | | |`rocblas_ssyr_64`|6.2.0| | | | | |`cublasSsyr_v2`| | | | |`hipblasSsyr`|3.0.0| | | | |`rocblas_ssyr`|1.7.1| | | | | -|`cublasSsyr_v2_64`|12.0| | | |`hipblasSsyr_64`|6.2.0| | | | | | | | | | | +|`cublasSsyr_v2_64`|12.0| | | |`hipblasSsyr_64`|6.2.0| | | | |`rocblas_ssyr_64`|6.2.0| | | | | |`cublasStbmv`| | | | |`hipblasStbmv`|3.5.0| | | | |`rocblas_stbmv`|3.5.0| | | | | -|`cublasStbmv_64`|12.0| | | |`hipblasStbmv_64`|6.2.0| | | | | | | | | | | +|`cublasStbmv_64`|12.0| | | |`hipblasStbmv_64`|6.2.0| | | | |`rocblas_stbmv_64`|6.2.0| | | | | |`cublasStbmv_v2`| | | | |`hipblasStbmv`|3.5.0| | | | |`rocblas_stbmv`|3.5.0| | | | | -|`cublasStbmv_v2_64`|12.0| | | |`hipblasStbmv_64`|6.2.0| | | | | | | | | | | +|`cublasStbmv_v2_64`|12.0| | | |`hipblasStbmv_64`|6.2.0| | | | |`rocblas_stbmv_64`|6.2.0| | | | | |`cublasStbsv`| | | | |`hipblasStbsv`|3.6.0| | | | |`rocblas_stbsv`|3.5.0| | | | | -|`cublasStbsv_64`|12.0| | | |`hipblasStbsv_64`|6.2.0| | | | | | | | | | | +|`cublasStbsv_64`|12.0| | | |`hipblasStbsv_64`|6.2.0| | | | |`rocblas_stbsv_64`|6.2.0| | | | | |`cublasStbsv_v2`| | | | |`hipblasStbsv`|3.6.0| | | | |`rocblas_stbsv`|3.5.0| | | | | -|`cublasStbsv_v2_64`|12.0| | | |`hipblasStbsv_64`|6.2.0| | | | | | | | | | | +|`cublasStbsv_v2_64`|12.0| | | |`hipblasStbsv_64`|6.2.0| | | | |`rocblas_stbsv_64`|6.2.0| | | | | |`cublasStpmv`| | | | |`hipblasStpmv`|3.5.0| | | | |`rocblas_stpmv`|3.5.0| | | | | -|`cublasStpmv_64`|12.0| | | |`hipblasStpmv_64`|6.2.0| | | | | | | | | | | +|`cublasStpmv_64`|12.0| | | |`hipblasStpmv_64`|6.2.0| | | | |`rocblas_stpmv_64`|6.2.0| | | | | |`cublasStpmv_v2`| | | | |`hipblasStpmv`|3.5.0| | | | |`rocblas_stpmv`|3.5.0| | | | | -|`cublasStpmv_v2_64`|12.0| | | |`hipblasStpmv_64`|6.2.0| | | | | | | | | | | +|`cublasStpmv_v2_64`|12.0| | | |`hipblasStpmv_64`|6.2.0| | | | |`rocblas_stpmv_64`|6.2.0| | | | | |`cublasStpsv`| | | | |`hipblasStpsv`|3.5.0| | | | |`rocblas_stpsv`|3.5.0| | | | | |`cublasStpsv_64`|12.0| | | |`hipblasStpsv_64`|6.2.0| | | | | | | | | | | |`cublasStpsv_v2`| | | | |`hipblasStpsv`|3.5.0| | | | |`rocblas_stpsv`|3.5.0| | | | | |`cublasStpsv_v2_64`|12.0| | | |`hipblasStpsv_64`|6.2.0| | | | | | | | | | | |`cublasStrmv`| | | | |`hipblasStrmv`|3.5.0| | | | |`rocblas_strmv`|3.5.0| | | | | -|`cublasStrmv_64`|12.0| | | |`hipblasStrmv_64`|6.2.0| | | | | | | | | | | +|`cublasStrmv_64`|12.0| | | |`hipblasStrmv_64`|6.2.0| | | | |`rocblas_strmv_64`|6.2.0| | | | | |`cublasStrmv_v2`| | | | |`hipblasStrmv`|3.5.0| | | | |`rocblas_strmv`|3.5.0| | | | | -|`cublasStrmv_v2_64`|12.0| | | |`hipblasStrmv_64`|6.2.0| | | | | | | | | | | +|`cublasStrmv_v2_64`|12.0| | | |`hipblasStrmv_64`|6.2.0| | | | |`rocblas_strmv_64`|6.2.0| | | | | |`cublasStrsv`| | | | |`hipblasStrsv`|3.0.0| | | | |`rocblas_strsv`|3.5.0| | | | | -|`cublasStrsv_64`|12.0| | | |`hipblasStrsv_64`|6.2.0| | | | | | | | | | | +|`cublasStrsv_64`|12.0| | | |`hipblasStrsv_64`|6.2.0| | | | |`rocblas_strsv_64`|6.2.0| | | | | |`cublasStrsv_v2`| | | | |`hipblasStrsv`|3.0.0| | | | |`rocblas_strsv`|3.5.0| | | | | -|`cublasStrsv_v2_64`|12.0| | | |`hipblasStrsv_64`|6.2.0| | | | | | | | | | | +|`cublasStrsv_v2_64`|12.0| | | |`hipblasStrsv_64`|6.2.0| | | | |`rocblas_strsv_64`|6.2.0| | | | | |`cublasZgbmv`| | | | |`hipblasZgbmv_v2`|6.0.0| | | | |`rocblas_zgbmv`|3.5.0| | | | | |`cublasZgbmv_64`|12.0| | | |`hipblasZgbmv_v2_64`|6.2.0| | | | |`rocblas_zgbmv_64`|6.2.0| | | | | |`cublasZgbmv_v2`| | | | |`hipblasZgbmv_v2`|6.0.0| | | | |`rocblas_zgbmv`|3.5.0| | | | | @@ -947,69 +947,69 @@ |`cublasZgeru_v2`| | | | |`hipblasZgeru_v2`|6.0.0| | | | |`rocblas_zgeru`|3.5.0| | | | | |`cublasZgeru_v2_64`|12.0| | | |`hipblasZgeru_v2_64`|6.2.0| | | | | | | | | | | |`cublasZhbmv`| | | | |`hipblasZhbmv_v2`|6.0.0| | | | |`rocblas_zhbmv`|3.5.0| | | | | -|`cublasZhbmv_64`|12.0| | | |`hipblasZhbmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZhbmv_64`|12.0| | | |`hipblasZhbmv_v2_64`|6.2.0| | | | |`rocblas_zhbmv_64`|6.2.0| | | | | |`cublasZhbmv_v2`| | | | |`hipblasZhbmv_v2`|6.0.0| | | | |`rocblas_zhbmv`|3.5.0| | | | | -|`cublasZhbmv_v2_64`|12.0| | | |`hipblasZhbmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZhbmv_v2_64`|12.0| | | |`hipblasZhbmv_v2_64`|6.2.0| | | | |`rocblas_zhbmv_64`|6.2.0| | | | | |`cublasZhemv`| | | | |`hipblasZhemv_v2`|6.0.0| | | | |`rocblas_zhemv`|1.5.0| | | | | -|`cublasZhemv_64`|12.0| | | |`hipblasZhemv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZhemv_64`|12.0| | | |`hipblasZhemv_v2_64`|6.2.0| | | | |`rocblas_zhemv_64`|6.2.0| | | | | |`cublasZhemv_v2`| | | | |`hipblasZhemv_v2`|6.0.0| | | | |`rocblas_zhemv`|1.5.0| | | | | -|`cublasZhemv_v2_64`|12.0| | | |`hipblasZhemv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZhemv_v2_64`|12.0| | | |`hipblasZhemv_v2_64`|6.2.0| | | | |`rocblas_zhemv_64`|6.2.0| | | | | |`cublasZher`| | | | |`hipblasZher_v2`|6.0.0| | | | |`rocblas_zher`|3.5.0| | | | | |`cublasZher2`| | | | |`hipblasZher2_v2`|6.0.0| | | | |`rocblas_zher2`|3.5.0| | | | | -|`cublasZher2_64`|12.0| | | |`hipblasZher2_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZher2_64`|12.0| | | |`hipblasZher2_v2_64`|6.2.0| | | | |`rocblas_zher2_64`|6.2.0| | | | | |`cublasZher2_v2`| | | | |`hipblasZher2_v2`|6.0.0| | | | |`rocblas_zher2`|3.5.0| | | | | -|`cublasZher2_v2_64`|12.0| | | |`hipblasZher2_v2_64`|6.2.0| | | | | | | | | | | -|`cublasZher_64`|12.0| | | |`hipblasZher_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZher2_v2_64`|12.0| | | |`hipblasZher2_v2_64`|6.2.0| | | | |`rocblas_zher2_64`|6.2.0| | | | | +|`cublasZher_64`|12.0| | | |`hipblasZher_v2_64`|6.2.0| | | | |`rocblas_zher_64`|6.2.0| | | | | |`cublasZher_v2`| | | | |`hipblasZher_v2`|6.0.0| | | | |`rocblas_zher`|3.5.0| | | | | -|`cublasZher_v2_64`|12.0| | | |`hipblasZher_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZher_v2_64`|12.0| | | |`hipblasZher_v2_64`|6.2.0| | | | |`rocblas_zher_64`|6.2.0| | | | | |`cublasZhpmv`| | | | |`hipblasZhpmv_v2`|6.0.0| | | | |`rocblas_zhpmv`|3.5.0| | | | | -|`cublasZhpmv_64`|12.0| | | |`hipblasZhpmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZhpmv_64`|12.0| | | |`hipblasZhpmv_v2_64`|6.2.0| | | | |`rocblas_zhpmv_64`|6.2.0| | | | | |`cublasZhpmv_v2`| | | | |`hipblasZhpmv_v2`|6.0.0| | | | |`rocblas_zhpmv`|3.5.0| | | | | -|`cublasZhpmv_v2_64`|12.0| | | |`hipblasZhpmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZhpmv_v2_64`|12.0| | | |`hipblasZhpmv_v2_64`|6.2.0| | | | |`rocblas_zhpmv_64`|6.2.0| | | | | |`cublasZhpr`| | | | |`hipblasZhpr_v2`|6.0.0| | | | |`rocblas_zhpr`|3.5.0| | | | | |`cublasZhpr2`| | | | |`hipblasZhpr2_v2`|6.0.0| | | | |`rocblas_zhpr2`|3.5.0| | | | | -|`cublasZhpr2_64`|12.0| | | |`hipblasZhpr2_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZhpr2_64`|12.0| | | |`hipblasZhpr2_v2_64`|6.2.0| | | | |`rocblas_zhpr2_64`|6.2.0| | | | | |`cublasZhpr2_v2`| | | | |`hipblasZhpr2_v2`|6.0.0| | | | |`rocblas_zhpr2`|3.5.0| | | | | -|`cublasZhpr2_v2_64`|12.0| | | |`hipblasZhpr2_v2_64`|6.2.0| | | | | | | | | | | -|`cublasZhpr_64`|12.0| | | |`hipblasZhpr_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZhpr2_v2_64`|12.0| | | |`hipblasZhpr2_v2_64`|6.2.0| | | | |`rocblas_zhpr2_64`|6.2.0| | | | | +|`cublasZhpr_64`|12.0| | | |`hipblasZhpr_v2_64`|6.2.0| | | | |`rocblas_zhpr_64`|6.2.0| | | | | |`cublasZhpr_v2`| | | | |`hipblasZhpr_v2`|6.0.0| | | | |`rocblas_zhpr`|3.5.0| | | | | -|`cublasZhpr_v2_64`|12.0| | | |`hipblasZhpr_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZhpr_v2_64`|12.0| | | |`hipblasZhpr_v2_64`|6.2.0| | | | |`rocblas_zhpr_64`|6.2.0| | | | | |`cublasZsymv`| | | | |`hipblasZsymv_v2`|6.0.0| | | | |`rocblas_zsymv`|3.5.0| | | | | -|`cublasZsymv_64`|12.0| | | |`hipblasZsymv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZsymv_64`|12.0| | | |`hipblasZsymv_v2_64`|6.2.0| | | | |`rocblas_zsymv_64`|6.2.0| | | | | |`cublasZsymv_v2`| | | | |`hipblasZsymv_v2`|6.0.0| | | | |`rocblas_zsymv`|3.5.0| | | | | -|`cublasZsymv_v2_64`|12.0| | | |`hipblasZsymv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZsymv_v2_64`|12.0| | | |`hipblasZsymv_v2_64`|6.2.0| | | | |`rocblas_zsymv_64`|6.2.0| | | | | |`cublasZsyr`| | | | |`hipblasZsyr_v2`|6.0.0| | | | |`rocblas_zsyr`|1.7.1| | | | | |`cublasZsyr2`| | | | |`hipblasZsyr2_v2`|6.0.0| | | | |`rocblas_zsyr2`|3.5.0| | | | | -|`cublasZsyr2_64`|12.0| | | |`hipblasZsyr2_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZsyr2_64`|12.0| | | |`hipblasZsyr2_v2_64`|6.2.0| | | | |`rocblas_zsyr2_64`|6.2.0| | | | | |`cublasZsyr2_v2`| | | | |`hipblasZsyr2_v2`|6.0.0| | | | |`rocblas_zsyr2`|3.5.0| | | | | -|`cublasZsyr2_v2_64`|12.0| | | |`hipblasZsyr2_v2_64`|6.2.0| | | | | | | | | | | -|`cublasZsyr_64`|12.0| | | |`hipblasZsyr_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZsyr2_v2_64`|12.0| | | |`hipblasZsyr2_v2_64`|6.2.0| | | | |`rocblas_zsyr2_64`|6.2.0| | | | | +|`cublasZsyr_64`|12.0| | | |`hipblasZsyr_v2_64`|6.2.0| | | | |`rocblas_zsyr_64`|6.2.0| | | | | |`cublasZsyr_v2`| | | | |`hipblasZsyr_v2`|6.0.0| | | | |`rocblas_zsyr`|1.7.1| | | | | -|`cublasZsyr_v2_64`|12.0| | | |`hipblasZsyr_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZsyr_v2_64`|12.0| | | |`hipblasZsyr_v2_64`|6.2.0| | | | |`rocblas_zsyr_64`|6.2.0| | | | | |`cublasZtbmv`| | | | |`hipblasZtbmv_v2`|6.0.0| | | | |`rocblas_ztbmv`|3.5.0| | | | | -|`cublasZtbmv_64`|12.0| | | |`hipblasZtbmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZtbmv_64`|12.0| | | |`hipblasZtbmv_v2_64`|6.2.0| | | | |`rocblas_ztbmv_64`|6.2.0| | | | | |`cublasZtbmv_v2`| | | | |`hipblasZtbmv_v2`|6.0.0| | | | |`rocblas_ztbmv`|3.5.0| | | | | -|`cublasZtbmv_v2_64`|12.0| | | |`hipblasZtbmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZtbmv_v2_64`|12.0| | | |`hipblasZtbmv_v2_64`|6.2.0| | | | |`rocblas_ztbmv_64`|6.2.0| | | | | |`cublasZtbsv`| | | | |`hipblasZtbsv_v2`|6.0.0| | | | |`rocblas_ztbsv`|3.5.0| | | | | -|`cublasZtbsv_64`|12.0| | | |`hipblasZtbsv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZtbsv_64`|12.0| | | |`hipblasZtbsv_v2_64`|6.2.0| | | | |`rocblas_ztbsv_64`|6.2.0| | | | | |`cublasZtbsv_v2`| | | | |`hipblasZtbsv_v2`|6.0.0| | | | |`rocblas_ztbsv`|3.5.0| | | | | -|`cublasZtbsv_v2_64`|12.0| | | |`hipblasZtbsv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZtbsv_v2_64`|12.0| | | |`hipblasZtbsv_v2_64`|6.2.0| | | | |`rocblas_ztbsv_64`|6.2.0| | | | | |`cublasZtpmv`| | | | |`hipblasZtpmv_v2`|6.0.0| | | | |`rocblas_ztpmv`|3.5.0| | | | | -|`cublasZtpmv_64`|12.0| | | |`hipblasZtpmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZtpmv_64`|12.0| | | |`hipblasZtpmv_v2_64`|6.2.0| | | | |`rocblas_ztpmv_64`|6.2.0| | | | | |`cublasZtpmv_v2`| | | | |`hipblasZtpmv_v2`|6.0.0| | | | |`rocblas_ztpmv`|3.5.0| | | | | -|`cublasZtpmv_v2_64`|12.0| | | |`hipblasZtpmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZtpmv_v2_64`|12.0| | | |`hipblasZtpmv_v2_64`|6.2.0| | | | |`rocblas_ztpmv_64`|6.2.0| | | | | |`cublasZtpsv`| | | | |`hipblasZtpsv_v2`|6.0.0| | | | |`rocblas_ztpsv`|3.5.0| | | | | |`cublasZtpsv_64`|12.0| | | |`hipblasZtpsv_v2_64`|6.2.0| | | | | | | | | | | |`cublasZtpsv_v2`| | | | |`hipblasZtpsv_v2`|6.0.0| | | | |`rocblas_ztpsv`|3.5.0| | | | | |`cublasZtpsv_v2_64`|12.0| | | |`hipblasZtpsv_v2_64`|6.2.0| | | | | | | | | | | |`cublasZtrmv`| | | | |`hipblasZtrmv_v2`|6.0.0| | | | |`rocblas_ztrmv`|3.5.0| | | | | -|`cublasZtrmv_64`|12.0| | | |`hipblasZtrmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZtrmv_64`|12.0| | | |`hipblasZtrmv_v2_64`|6.2.0| | | | |`rocblas_ztrmv_64`|6.2.0| | | | | |`cublasZtrmv_v2`| | | | |`hipblasZtrmv_v2`|6.0.0| | | | |`rocblas_ztrmv`|3.5.0| | | | | -|`cublasZtrmv_v2_64`|12.0| | | |`hipblasZtrmv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZtrmv_v2_64`|12.0| | | |`hipblasZtrmv_v2_64`|6.2.0| | | | |`rocblas_ztrmv_64`|6.2.0| | | | | |`cublasZtrsv`| | | | |`hipblasZtrsv_v2`|6.0.0| | | | |`rocblas_ztrsv`|3.5.0| | | | | -|`cublasZtrsv_64`|12.0| | | |`hipblasZtrsv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZtrsv_64`|12.0| | | |`hipblasZtrsv_v2_64`|6.2.0| | | | |`rocblas_ztrsv_64`|6.2.0| | | | | |`cublasZtrsv_v2`| | | | |`hipblasZtrsv_v2`|6.0.0| | | | |`rocblas_ztrsv`|3.5.0| | | | | -|`cublasZtrsv_v2_64`|12.0| | | |`hipblasZtrsv_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZtrsv_v2_64`|12.0| | | |`hipblasZtrsv_v2_64`|6.2.0| | | | |`rocblas_ztrsv_64`|6.2.0| | | | | ## **7. CUBLAS Level-3 Function Reference** @@ -1034,7 +1034,7 @@ |`cublasCgemvBatched`|11.6| | | |`hipblasCgemvBatched_v2`|6.0.0| | | | |`rocblas_cgemv_batched`|3.5.0| | | | | |`cublasCgemvBatched_64`|12.0| | | |`hipblasCgemvBatched_v2_64`|6.2.0| | | | |`rocblas_cgemv_batched_64`|6.2.0| | | | | |`cublasCgemvStridedBatched`|11.6| | | |`hipblasCgemvStridedBatched_v2`|6.0.0| | | | |`rocblas_cgemv_strided_batched`|3.5.0| | | | | -|`cublasCgemvStridedBatched_64`|12.0| | | |`hipblasCgemvStridedBatched_v2_64`|6.2.0| | | | | | | | | | | +|`cublasCgemvStridedBatched_64`|12.0| | | |`hipblasCgemvStridedBatched_v2_64`|6.2.0| | | | |`rocblas_cgemv_strided_batched_64`|6.2.0| | | | | |`cublasChemm`| | | | |`hipblasChemm_v2`|6.0.0| | | | |`rocblas_chemm`|3.5.0| | | | | |`cublasChemm_64`|12.0| | | | | | | | | | | | | | | | |`cublasChemm_v2`| | | | |`hipblasChemm_v2`|6.0.0| | | | |`rocblas_chemm`|3.5.0| | | | | @@ -1083,8 +1083,8 @@ |`cublasDgemm_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasDgemvBatched`|11.6| | | |`hipblasDgemvBatched`|3.0.0| | | | |`rocblas_dgemv_batched`|3.5.0| | | | | |`cublasDgemvBatched_64`|12.0| | | |`hipblasDgemvBatched_64`|6.2.0| | | | |`rocblas_dgemv_batched_64`|6.2.0| | | | | -|`cublasDgemvStridedBatched`|11.6| | | |`hipblasDgemvStridedBatched`|3.0.0| | | | | | | | | | | -|`cublasDgemvStridedBatched_64`|12.0| | | |`hipblasDgemvStridedBatched_64`|6.2.0| | | | | | | | | | | +|`cublasDgemvStridedBatched`|11.6| | | |`hipblasDgemvStridedBatched`|3.0.0| | | | |`rocblas_dgemv_strided_batched`|3.5.0| | | | | +|`cublasDgemvStridedBatched_64`|12.0| | | |`hipblasDgemvStridedBatched_64`|6.2.0| | | | |`rocblas_dgemv_strided_batched_64`|6.2.0| | | | | |`cublasDsymm`| | | | |`hipblasDsymm`|3.6.0| | | | |`rocblas_dsymm`|3.5.0| | | | | |`cublasDsymm_64`|12.0| | | | | | | | | | | | | | | | |`cublasDsymm_v2`| | | | |`hipblasDsymm`|3.6.0| | | | |`rocblas_dsymm`|3.5.0| | | | | @@ -1112,11 +1112,11 @@ |`cublasHSHgemvBatched`|11.6| | | | | | | | | |`rocblas_hshgemv_batched`|6.0.0| | | | | |`cublasHSHgemvBatched_64`|12.0| | | | | | | | | |`rocblas_hshgemv_batched_64`|6.2.0| | | | | |`cublasHSHgemvStridedBatched`|11.6| | | | | | | | | |`rocblas_hshgemv_strided_batched`|6.0.0| | | | | -|`cublasHSHgemvStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasHSHgemvStridedBatched_64`|12.0| | | | | | | | | |`rocblas_hshgemv_strided_batched_64`|6.2.0| | | | | |`cublasHSSgemvBatched`|11.6| | | | | | | | | |`rocblas_hssgemv_batched`|6.0.0| | | | | |`cublasHSSgemvBatched_64`|12.0| | | | | | | | | |`rocblas_hssgemv_batched_64`|6.2.0| | | | | |`cublasHSSgemvStridedBatched`|11.6| | | | | | | | | |`rocblas_hssgemv_strided_batched`|6.0.0| | | | | -|`cublasHSSgemvStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasHSSgemvStridedBatched_64`|12.0| | | | | | | | | |`rocblas_hssgemv_strided_batched_64`|6.2.0| | | | | |`cublasHgemm`|7.5| | | |`hipblasHgemm`|1.8.2| | | | |`rocblas_hgemm`|1.5.0| | | | | |`cublasHgemmBatched`|9.0| | | |`hipblasHgemmBatched`|3.0.0| | | | |`rocblas_hgemm_batched`|3.5.0| | | | | |`cublasHgemmBatched_64`|12.0| | | | | | | | | | | | | | | | @@ -1135,8 +1135,8 @@ |`cublasSgemm_v2_64`|12.0| | | | | | | | | | | | | | | | |`cublasSgemvBatched`|11.6| | | |`hipblasSgemvBatched`|1.6.0| | | | |`rocblas_sgemv_batched`|3.5.0| | | | | |`cublasSgemvBatched_64`|12.0| | | |`hipblasSgemvBatched_64`|6.2.0| | | | |`rocblas_sgemv_batched_64`|6.2.0| | | | | -|`cublasSgemvStridedBatched`|11.6| | | |`hipblasSgemvStridedBatched`|3.0.0| | | | | | | | | | | -|`cublasSgemvStridedBatched_64`|12.0| | | |`hipblasSgemvStridedBatched_64`|6.2.0| | | | | | | | | | | +|`cublasSgemvStridedBatched`|11.6| | | |`hipblasSgemvStridedBatched`|3.0.0| | | | |`rocblas_sgemv_strided_batched`|3.5.0| | | | | +|`cublasSgemvStridedBatched_64`|12.0| | | |`hipblasSgemvStridedBatched_64`|6.2.0| | | | |`rocblas_sgemv_strided_batched_64`|6.2.0| | | | | |`cublasSsymm`| | | | |`hipblasSsymm`|3.6.0| | | | |`rocblas_ssymm`|3.5.0| | | | | |`cublasSsymm_64`|12.0| | | | | | | | | | | | | | | | |`cublasSsymm_v2`| | | | |`hipblasSsymm`|3.6.0| | | | |`rocblas_ssymm`|3.5.0| | | | | @@ -1162,11 +1162,11 @@ |`cublasTSSgemvBatched`|11.6| | | | | | | | | |`rocblas_tssgemv_batched`|6.0.0| | | | | |`cublasTSSgemvBatched_64`|12.0| | | | | | | | | |`rocblas_tssgemv_batched_64`|6.2.0| | | | | |`cublasTSSgemvStridedBatched`|11.6| | | | | | | | | |`rocblas_tssgemv_strided_batched`|6.0.0| | | | | -|`cublasTSSgemvStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasTSSgemvStridedBatched_64`|12.0| | | | | | | | | |`rocblas_tssgemv_strided_batched_64`|6.2.0| | | | | |`cublasTSTgemvBatched`|11.6| | | | | | | | | |`rocblas_tstgemv_batched`|6.0.0| | | | | |`cublasTSTgemvBatched_64`|12.0| | | | | | | | | |`rocblas_tstgemv_batched_64`|6.2.0| | | | | |`cublasTSTgemvStridedBatched`|11.6| | | | | | | | | |`rocblas_tstgemv_strided_batched`|6.0.0| | | | | -|`cublasTSTgemvStridedBatched_64`|12.0| | | | | | | | | | | | | | | | +|`cublasTSTgemvStridedBatched_64`|12.0| | | | | | | | | |`rocblas_tstgemv_strided_batched_64`|6.2.0| | | | | |`cublasZgemm`| | | | |`hipblasZgemm_v2`|6.0.0| | | | |`rocblas_zgemm`|1.5.0| | | | | |`cublasZgemm3m`|8.0| | | | | | | | | | | | | | | | |`cublasZgemm3m_64`|12.0| | | | | | | | | | | | | | | | @@ -1180,7 +1180,7 @@ |`cublasZgemvBatched`|11.6| | | |`hipblasZgemvBatched_v2`|6.0.0| | | | |`rocblas_zgemv_batched`|3.5.0| | | | | |`cublasZgemvBatched_64`|12.0| | | |`hipblasZgemvBatched_v2_64`|6.2.0| | | | |`rocblas_zgemv_batched_64`|6.2.0| | | | | |`cublasZgemvStridedBatched`|11.6| | | |`hipblasZgemvStridedBatched_v2`|6.0.0| | | | |`rocblas_zgemv_strided_batched`|3.5.0| | | | | -|`cublasZgemvStridedBatched_64`|12.0| | | |`hipblasZgemvStridedBatched_v2_64`|6.2.0| | | | | | | | | | | +|`cublasZgemvStridedBatched_64`|12.0| | | |`hipblasZgemvStridedBatched_v2_64`|6.2.0| | | | |`rocblas_zgemv_strided_batched_64`|6.2.0| | | | | |`cublasZhemm`| | | | |`hipblasZhemm_v2`|6.0.0| | | | |`rocblas_zhemm`|3.5.0| | | | | |`cublasZhemm_64`|12.0| | | | | | | | | | | | | | | | |`cublasZhemm_v2`| | | | |`hipblasZhemm_v2`|6.0.0| | | | |`rocblas_zhemm`|3.5.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_ROC.md b/docs/tables/CUBLAS_API_supported_by_ROC.md index 5c0fa35d..6266492b 100644 --- a/docs/tables/CUBLAS_API_supported_by_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_ROC.md @@ -739,69 +739,69 @@ |`cublasCgeru_v2`| | | | |`rocblas_cgeru`|3.5.0| | | | | |`cublasCgeru_v2_64`|12.0| | | | | | | | | | |`cublasChbmv`| | | | |`rocblas_chbmv`|3.5.0| | | | | -|`cublasChbmv_64`|12.0| | | | | | | | | | +|`cublasChbmv_64`|12.0| | | |`rocblas_chbmv_64`|6.2.0| | | | | |`cublasChbmv_v2`| | | | |`rocblas_chbmv`|3.5.0| | | | | -|`cublasChbmv_v2_64`|12.0| | | | | | | | | | +|`cublasChbmv_v2_64`|12.0| | | |`rocblas_chbmv_64`|6.2.0| | | | | |`cublasChemv`| | | | |`rocblas_chemv`|1.5.0| | | | | -|`cublasChemv_64`|12.0| | | | | | | | | | +|`cublasChemv_64`|12.0| | | |`rocblas_chemv_64`|6.2.0| | | | | |`cublasChemv_v2`| | | | |`rocblas_chemv`|1.5.0| | | | | -|`cublasChemv_v2_64`|12.0| | | | | | | | | | +|`cublasChemv_v2_64`|12.0| | | |`rocblas_chemv_64`|6.2.0| | | | | |`cublasCher`| | | | |`rocblas_cher`|3.5.0| | | | | |`cublasCher2`| | | | |`rocblas_cher2`|3.5.0| | | | | -|`cublasCher2_64`|12.0| | | | | | | | | | +|`cublasCher2_64`|12.0| | | |`rocblas_cher2_64`|6.2.0| | | | | |`cublasCher2_v2`| | | | |`rocblas_cher2`|3.5.0| | | | | -|`cublasCher2_v2_64`|12.0| | | | | | | | | | -|`cublasCher_64`|12.0| | | | | | | | | | +|`cublasCher2_v2_64`|12.0| | | |`rocblas_cher2_64`|6.2.0| | | | | +|`cublasCher_64`|12.0| | | |`rocblas_cher_64`|6.2.0| | | | | |`cublasCher_v2`| | | | |`rocblas_cher`|3.5.0| | | | | -|`cublasCher_v2_64`|12.0| | | | | | | | | | +|`cublasCher_v2_64`|12.0| | | |`rocblas_cher_64`|6.2.0| | | | | |`cublasChpmv`| | | | |`rocblas_chpmv`|3.5.0| | | | | -|`cublasChpmv_64`|12.0| | | | | | | | | | +|`cublasChpmv_64`|12.0| | | |`rocblas_chpmv_64`|6.2.0| | | | | |`cublasChpmv_v2`| | | | |`rocblas_chpmv`|3.5.0| | | | | -|`cublasChpmv_v2_64`|12.0| | | | | | | | | | +|`cublasChpmv_v2_64`|12.0| | | |`rocblas_chpmv_64`|6.2.0| | | | | |`cublasChpr`| | | | |`rocblas_chpr`|3.5.0| | | | | |`cublasChpr2`| | | | |`rocblas_chpr2`|3.5.0| | | | | -|`cublasChpr2_64`|12.0| | | | | | | | | | +|`cublasChpr2_64`|12.0| | | |`rocblas_chpr2_64`|6.2.0| | | | | |`cublasChpr2_v2`| | | | |`rocblas_chpr2`|3.5.0| | | | | -|`cublasChpr2_v2_64`|12.0| | | | | | | | | | -|`cublasChpr_64`|12.0| | | | | | | | | | +|`cublasChpr2_v2_64`|12.0| | | |`rocblas_chpr2_64`|6.2.0| | | | | +|`cublasChpr_64`|12.0| | | |`rocblas_chpr_64`|6.2.0| | | | | |`cublasChpr_v2`| | | | |`rocblas_chpr`|3.5.0| | | | | -|`cublasChpr_v2_64`|12.0| | | | | | | | | | +|`cublasChpr_v2_64`|12.0| | | |`rocblas_chpr_64`|6.2.0| | | | | |`cublasCsymv`| | | | |`rocblas_csymv`|3.5.0| | | | | -|`cublasCsymv_64`|12.0| | | | | | | | | | +|`cublasCsymv_64`|12.0| | | |`rocblas_csymv_64`|6.2.0| | | | | |`cublasCsymv_v2`| | | | |`rocblas_csymv`|3.5.0| | | | | -|`cublasCsymv_v2_64`|12.0| | | | | | | | | | +|`cublasCsymv_v2_64`|12.0| | | |`rocblas_csymv_64`|6.2.0| | | | | |`cublasCsyr`| | | | |`rocblas_csyr`|1.7.1| | | | | |`cublasCsyr2`| | | | |`rocblas_csyr2`|3.5.0| | | | | -|`cublasCsyr2_64`|12.0| | | | | | | | | | +|`cublasCsyr2_64`|12.0| | | |`rocblas_csyr2_64`|6.2.0| | | | | |`cublasCsyr2_v2`| | | | |`rocblas_csyr2`|3.5.0| | | | | -|`cublasCsyr2_v2_64`|12.0| | | | | | | | | | -|`cublasCsyr_64`|12.0| | | | | | | | | | +|`cublasCsyr2_v2_64`|12.0| | | |`rocblas_csyr2_64`|6.2.0| | | | | +|`cublasCsyr_64`|12.0| | | |`rocblas_csyr_64`|6.2.0| | | | | |`cublasCsyr_v2`| | | | |`rocblas_csyr`|1.7.1| | | | | -|`cublasCsyr_v2_64`|12.0| | | | | | | | | | +|`cublasCsyr_v2_64`|12.0| | | |`rocblas_csyr_64`|6.2.0| | | | | |`cublasCtbmv`| | | | |`rocblas_ctbmv`|3.5.0| | | | | -|`cublasCtbmv_64`|12.0| | | | | | | | | | +|`cublasCtbmv_64`|12.0| | | |`rocblas_ctbmv_64`|6.2.0| | | | | |`cublasCtbmv_v2`| | | | |`rocblas_ctbmv`|3.5.0| | | | | -|`cublasCtbmv_v2_64`|12.0| | | | | | | | | | +|`cublasCtbmv_v2_64`|12.0| | | |`rocblas_ctbmv_64`|6.2.0| | | | | |`cublasCtbsv`| | | | |`rocblas_ctbsv`|3.5.0| | | | | -|`cublasCtbsv_64`|12.0| | | | | | | | | | +|`cublasCtbsv_64`|12.0| | | |`rocblas_ctbsv_64`|6.2.0| | | | | |`cublasCtbsv_v2`| | | | |`rocblas_ctbsv`|3.5.0| | | | | -|`cublasCtbsv_v2_64`|12.0| | | | | | | | | | +|`cublasCtbsv_v2_64`|12.0| | | |`rocblas_ctbsv_64`|6.2.0| | | | | |`cublasCtpmv`| | | | |`rocblas_ctpmv`|3.5.0| | | | | -|`cublasCtpmv_64`|12.0| | | | | | | | | | +|`cublasCtpmv_64`|12.0| | | |`rocblas_ctpmv_64`|6.2.0| | | | | |`cublasCtpmv_v2`| | | | |`rocblas_ctpmv`|3.5.0| | | | | -|`cublasCtpmv_v2_64`|12.0| | | | | | | | | | +|`cublasCtpmv_v2_64`|12.0| | | |`rocblas_ctpmv_64`|6.2.0| | | | | |`cublasCtpsv`| | | | |`rocblas_ctpsv`|3.5.0| | | | | |`cublasCtpsv_64`|12.0| | | | | | | | | | |`cublasCtpsv_v2`| | | | |`rocblas_ctpsv`|3.5.0| | | | | |`cublasCtpsv_v2_64`|12.0| | | | | | | | | | |`cublasCtrmv`| | | | |`rocblas_ctrmv`|3.5.0| | | | | -|`cublasCtrmv_64`|12.0| | | | | | | | | | +|`cublasCtrmv_64`|12.0| | | |`rocblas_ctrmv_64`|6.2.0| | | | | |`cublasCtrmv_v2`| | | | |`rocblas_ctrmv`|3.5.0| | | | | -|`cublasCtrmv_v2_64`|12.0| | | | | | | | | | +|`cublasCtrmv_v2_64`|12.0| | | |`rocblas_ctrmv_64`|6.2.0| | | | | |`cublasCtrsv`| | | | |`rocblas_ctrsv`|3.5.0| | | | | -|`cublasCtrsv_64`|12.0| | | | | | | | | | +|`cublasCtrsv_64`|12.0| | | |`rocblas_ctrsv_64`|6.2.0| | | | | |`cublasCtrsv_v2`| | | | |`rocblas_ctrsv`|3.5.0| | | | | -|`cublasCtrsv_v2_64`|12.0| | | | | | | | | | +|`cublasCtrsv_v2_64`|12.0| | | |`rocblas_ctrsv_64`|6.2.0| | | | | |`cublasDgbmv`| | | | |`rocblas_dgbmv`|3.5.0| | | | | |`cublasDgbmv_64`|12.0| | | |`rocblas_dgbmv_64`|6.2.0| | | | | |`cublasDgbmv_v2`| | | | |`rocblas_dgbmv`|3.5.0| | | | | @@ -815,57 +815,57 @@ |`cublasDger_v2`| | | | |`rocblas_dger`|1.5.0| | | | | |`cublasDger_v2_64`|12.0| | | | | | | | | | |`cublasDsbmv`| | | | |`rocblas_dsbmv`|3.5.0| | | | | -|`cublasDsbmv_64`|12.0| | | | | | | | | | +|`cublasDsbmv_64`|12.0| | | |`rocblas_dsbmv_64`|6.2.0| | | | | |`cublasDsbmv_v2`| | | | |`rocblas_dsbmv`|3.5.0| | | | | -|`cublasDsbmv_v2_64`|12.0| | | | | | | | | | +|`cublasDsbmv_v2_64`|12.0| | | |`rocblas_dsbmv_64`|6.2.0| | | | | |`cublasDspmv`| | | | |`rocblas_dspmv`|3.5.0| | | | | -|`cublasDspmv_64`|12.0| | | | | | | | | | +|`cublasDspmv_64`|12.0| | | |`rocblas_dspmv_64`|6.2.0| | | | | |`cublasDspmv_v2`| | | | |`rocblas_dspmv`|3.5.0| | | | | -|`cublasDspmv_v2_64`|12.0| | | | | | | | | | +|`cublasDspmv_v2_64`|12.0| | | |`rocblas_dspmv_64`|6.2.0| | | | | |`cublasDspr`| | | | |`rocblas_dspr`|3.5.0| | | | | |`cublasDspr2`| | | | |`rocblas_dspr2`|3.5.0| | | | | -|`cublasDspr2_64`|12.0| | | | | | | | | | +|`cublasDspr2_64`|12.0| | | |`rocblas_dspr2_64`|6.2.0| | | | | |`cublasDspr2_v2`| | | | |`rocblas_dspr2`|3.5.0| | | | | -|`cublasDspr2_v2_64`|12.0| | | | | | | | | | -|`cublasDspr_64`|12.0| | | | | | | | | | +|`cublasDspr2_v2_64`|12.0| | | |`rocblas_dspr2_64`|6.2.0| | | | | +|`cublasDspr_64`|12.0| | | |`rocblas_dspr_64`|6.2.0| | | | | |`cublasDspr_v2`| | | | |`rocblas_dspr`|3.5.0| | | | | -|`cublasDspr_v2_64`|12.0| | | | | | | | | | +|`cublasDspr_v2_64`|12.0| | | |`rocblas_dspr_64`|6.2.0| | | | | |`cublasDsymv`| | | | |`rocblas_dsymv`|1.5.0| | | | | -|`cublasDsymv_64`|12.0| | | | | | | | | | +|`cublasDsymv_64`|12.0| | | |`rocblas_dsymv_64`|6.2.0| | | | | |`cublasDsymv_v2`| | | | |`rocblas_dsymv`|1.5.0| | | | | -|`cublasDsymv_v2_64`|12.0| | | | | | | | | | +|`cublasDsymv_v2_64`|12.0| | | |`rocblas_dsymv_64`|6.2.0| | | | | |`cublasDsyr`| | | | |`rocblas_dsyr`|1.7.1| | | | | |`cublasDsyr2`| | | | |`rocblas_dsyr2`|3.5.0| | | | | -|`cublasDsyr2_64`|12.0| | | | | | | | | | +|`cublasDsyr2_64`|12.0| | | |`rocblas_dsyr2_64`|6.2.0| | | | | |`cublasDsyr2_v2`| | | | |`rocblas_dsyr2`|3.5.0| | | | | -|`cublasDsyr2_v2_64`|12.0| | | | | | | | | | -|`cublasDsyr_64`|12.0| | | | | | | | | | +|`cublasDsyr2_v2_64`|12.0| | | |`rocblas_dsyr2_64`|6.2.0| | | | | +|`cublasDsyr_64`|12.0| | | |`rocblas_dsyr_64`|6.2.0| | | | | |`cublasDsyr_v2`| | | | |`rocblas_dsyr`|1.7.1| | | | | -|`cublasDsyr_v2_64`|12.0| | | | | | | | | | +|`cublasDsyr_v2_64`|12.0| | | |`rocblas_dsyr_64`|6.2.0| | | | | |`cublasDtbmv`| | | | |`rocblas_dtbmv`|3.5.0| | | | | -|`cublasDtbmv_64`|12.0| | | | | | | | | | +|`cublasDtbmv_64`|12.0| | | |`rocblas_dtbmv_64`|6.2.0| | | | | |`cublasDtbmv_v2`| | | | |`rocblas_dtbmv`|3.5.0| | | | | -|`cublasDtbmv_v2_64`|12.0| | | | | | | | | | +|`cublasDtbmv_v2_64`|12.0| | | |`rocblas_dtbmv_64`|6.2.0| | | | | |`cublasDtbsv`| | | | |`rocblas_dtbsv`|3.5.0| | | | | -|`cublasDtbsv_64`|12.0| | | | | | | | | | +|`cublasDtbsv_64`|12.0| | | |`rocblas_dtbsv_64`|6.2.0| | | | | |`cublasDtbsv_v2`| | | | |`rocblas_dtbsv`|3.5.0| | | | | -|`cublasDtbsv_v2_64`|12.0| | | | | | | | | | +|`cublasDtbsv_v2_64`|12.0| | | |`rocblas_dtbsv_64`|6.2.0| | | | | |`cublasDtpmv`| | | | |`rocblas_dtpmv`|3.5.0| | | | | -|`cublasDtpmv_64`|12.0| | | | | | | | | | +|`cublasDtpmv_64`|12.0| | | |`rocblas_dtpmv_64`|6.2.0| | | | | |`cublasDtpmv_v2`| | | | |`rocblas_dtpmv`|3.5.0| | | | | -|`cublasDtpmv_v2_64`|12.0| | | | | | | | | | +|`cublasDtpmv_v2_64`|12.0| | | |`rocblas_dtpmv_64`|6.2.0| | | | | |`cublasDtpsv`| | | | |`rocblas_dtpsv`|3.5.0| | | | | |`cublasDtpsv_64`|12.0| | | | | | | | | | |`cublasDtpsv_v2`| | | | |`rocblas_dtpsv`|3.5.0| | | | | |`cublasDtpsv_v2_64`|12.0| | | | | | | | | | |`cublasDtrmv`| | | | |`rocblas_dtrmv`|3.5.0| | | | | -|`cublasDtrmv_64`|12.0| | | | | | | | | | +|`cublasDtrmv_64`|12.0| | | |`rocblas_dtrmv_64`|6.2.0| | | | | |`cublasDtrmv_v2`| | | | |`rocblas_dtrmv`|3.5.0| | | | | -|`cublasDtrmv_v2_64`|12.0| | | | | | | | | | +|`cublasDtrmv_v2_64`|12.0| | | |`rocblas_dtrmv_64`|6.2.0| | | | | |`cublasDtrsv`| | | | |`rocblas_dtrsv`|3.5.0| | | | | -|`cublasDtrsv_64`|12.0| | | | | | | | | | +|`cublasDtrsv_64`|12.0| | | |`rocblas_dtrsv_64`|6.2.0| | | | | |`cublasDtrsv_v2`| | | | |`rocblas_dtrsv`|3.5.0| | | | | -|`cublasDtrsv_v2_64`|12.0| | | | | | | | | | +|`cublasDtrsv_v2_64`|12.0| | | |`rocblas_dtrsv_64`|6.2.0| | | | | |`cublasSgbmv`| | | | |`rocblas_sgbmv`|3.5.0| | | | | |`cublasSgbmv_64`|12.0| | | |`rocblas_sgbmv_64`|6.2.0| | | | | |`cublasSgbmv_v2`| | | | |`rocblas_sgbmv`|3.5.0| | | | | @@ -879,57 +879,57 @@ |`cublasSger_v2`| | | | |`rocblas_sger`|1.5.0| | | | | |`cublasSger_v2_64`|12.0| | | | | | | | | | |`cublasSsbmv`| | | | |`rocblas_ssbmv`|3.5.0| | | | | -|`cublasSsbmv_64`|12.0| | | | | | | | | | +|`cublasSsbmv_64`|12.0| | | |`rocblas_ssbmv_64`|6.2.0| | | | | |`cublasSsbmv_v2`| | | | |`rocblas_ssbmv`|3.5.0| | | | | -|`cublasSsbmv_v2_64`|12.0| | | | | | | | | | +|`cublasSsbmv_v2_64`|12.0| | | |`rocblas_ssbmv_64`|6.2.0| | | | | |`cublasSspmv`| | | | |`rocblas_sspmv`|3.5.0| | | | | -|`cublasSspmv_64`|12.0| | | | | | | | | | +|`cublasSspmv_64`|12.0| | | |`rocblas_sspmv_64`|6.2.0| | | | | |`cublasSspmv_v2`| | | | |`rocblas_sspmv`|3.5.0| | | | | -|`cublasSspmv_v2_64`|12.0| | | | | | | | | | +|`cublasSspmv_v2_64`|12.0| | | |`rocblas_sspmv_64`|6.2.0| | | | | |`cublasSspr`| | | | |`rocblas_sspr`|3.5.0| | | | | |`cublasSspr2`| | | | |`rocblas_sspr2`|3.5.0| | | | | -|`cublasSspr2_64`|12.0| | | | | | | | | | +|`cublasSspr2_64`|12.0| | | |`rocblas_sspr2_64`|6.2.0| | | | | |`cublasSspr2_v2`| | | | |`rocblas_sspr2`|3.5.0| | | | | -|`cublasSspr2_v2_64`|12.0| | | | | | | | | | -|`cublasSspr_64`|12.0| | | | | | | | | | +|`cublasSspr2_v2_64`|12.0| | | |`rocblas_sspr2_64`|6.2.0| | | | | +|`cublasSspr_64`|12.0| | | |`rocblas_sspr_64`|6.2.0| | | | | |`cublasSspr_v2`| | | | |`rocblas_sspr`|3.5.0| | | | | -|`cublasSspr_v2_64`|12.0| | | | | | | | | | +|`cublasSspr_v2_64`|12.0| | | |`rocblas_sspr_64`|6.2.0| | | | | |`cublasSsymv`| | | | |`rocblas_ssymv`|1.5.0| | | | | -|`cublasSsymv_64`|12.0| | | | | | | | | | +|`cublasSsymv_64`|12.0| | | |`rocblas_ssymv_64`|6.2.0| | | | | |`cublasSsymv_v2`| | | | |`rocblas_ssymv`|1.5.0| | | | | -|`cublasSsymv_v2_64`|12.0| | | | | | | | | | +|`cublasSsymv_v2_64`|12.0| | | |`rocblas_ssymv_64`|6.2.0| | | | | |`cublasSsyr`| | | | |`rocblas_ssyr`|1.7.1| | | | | |`cublasSsyr2`| | | | |`rocblas_ssyr2`|3.5.0| | | | | -|`cublasSsyr2_64`|12.0| | | | | | | | | | +|`cublasSsyr2_64`|12.0| | | |`rocblas_ssyr2_64`|6.2.0| | | | | |`cublasSsyr2_v2`| | | | |`rocblas_ssyr2`|3.5.0| | | | | -|`cublasSsyr2_v2_64`|12.0| | | | | | | | | | -|`cublasSsyr_64`|12.0| | | | | | | | | | +|`cublasSsyr2_v2_64`|12.0| | | |`rocblas_ssyr2_64`|6.2.0| | | | | +|`cublasSsyr_64`|12.0| | | |`rocblas_ssyr_64`|6.2.0| | | | | |`cublasSsyr_v2`| | | | |`rocblas_ssyr`|1.7.1| | | | | -|`cublasSsyr_v2_64`|12.0| | | | | | | | | | +|`cublasSsyr_v2_64`|12.0| | | |`rocblas_ssyr_64`|6.2.0| | | | | |`cublasStbmv`| | | | |`rocblas_stbmv`|3.5.0| | | | | -|`cublasStbmv_64`|12.0| | | | | | | | | | +|`cublasStbmv_64`|12.0| | | |`rocblas_stbmv_64`|6.2.0| | | | | |`cublasStbmv_v2`| | | | |`rocblas_stbmv`|3.5.0| | | | | -|`cublasStbmv_v2_64`|12.0| | | | | | | | | | +|`cublasStbmv_v2_64`|12.0| | | |`rocblas_stbmv_64`|6.2.0| | | | | |`cublasStbsv`| | | | |`rocblas_stbsv`|3.5.0| | | | | -|`cublasStbsv_64`|12.0| | | | | | | | | | +|`cublasStbsv_64`|12.0| | | |`rocblas_stbsv_64`|6.2.0| | | | | |`cublasStbsv_v2`| | | | |`rocblas_stbsv`|3.5.0| | | | | -|`cublasStbsv_v2_64`|12.0| | | | | | | | | | +|`cublasStbsv_v2_64`|12.0| | | |`rocblas_stbsv_64`|6.2.0| | | | | |`cublasStpmv`| | | | |`rocblas_stpmv`|3.5.0| | | | | -|`cublasStpmv_64`|12.0| | | | | | | | | | +|`cublasStpmv_64`|12.0| | | |`rocblas_stpmv_64`|6.2.0| | | | | |`cublasStpmv_v2`| | | | |`rocblas_stpmv`|3.5.0| | | | | -|`cublasStpmv_v2_64`|12.0| | | | | | | | | | +|`cublasStpmv_v2_64`|12.0| | | |`rocblas_stpmv_64`|6.2.0| | | | | |`cublasStpsv`| | | | |`rocblas_stpsv`|3.5.0| | | | | |`cublasStpsv_64`|12.0| | | | | | | | | | |`cublasStpsv_v2`| | | | |`rocblas_stpsv`|3.5.0| | | | | |`cublasStpsv_v2_64`|12.0| | | | | | | | | | |`cublasStrmv`| | | | |`rocblas_strmv`|3.5.0| | | | | -|`cublasStrmv_64`|12.0| | | | | | | | | | +|`cublasStrmv_64`|12.0| | | |`rocblas_strmv_64`|6.2.0| | | | | |`cublasStrmv_v2`| | | | |`rocblas_strmv`|3.5.0| | | | | -|`cublasStrmv_v2_64`|12.0| | | | | | | | | | +|`cublasStrmv_v2_64`|12.0| | | |`rocblas_strmv_64`|6.2.0| | | | | |`cublasStrsv`| | | | |`rocblas_strsv`|3.5.0| | | | | -|`cublasStrsv_64`|12.0| | | | | | | | | | +|`cublasStrsv_64`|12.0| | | |`rocblas_strsv_64`|6.2.0| | | | | |`cublasStrsv_v2`| | | | |`rocblas_strsv`|3.5.0| | | | | -|`cublasStrsv_v2_64`|12.0| | | | | | | | | | +|`cublasStrsv_v2_64`|12.0| | | |`rocblas_strsv_64`|6.2.0| | | | | |`cublasZgbmv`| | | | |`rocblas_zgbmv`|3.5.0| | | | | |`cublasZgbmv_64`|12.0| | | |`rocblas_zgbmv_64`|6.2.0| | | | | |`cublasZgbmv_v2`| | | | |`rocblas_zgbmv`|3.5.0| | | | | @@ -947,69 +947,69 @@ |`cublasZgeru_v2`| | | | |`rocblas_zgeru`|3.5.0| | | | | |`cublasZgeru_v2_64`|12.0| | | | | | | | | | |`cublasZhbmv`| | | | |`rocblas_zhbmv`|3.5.0| | | | | -|`cublasZhbmv_64`|12.0| | | | | | | | | | +|`cublasZhbmv_64`|12.0| | | |`rocblas_zhbmv_64`|6.2.0| | | | | |`cublasZhbmv_v2`| | | | |`rocblas_zhbmv`|3.5.0| | | | | -|`cublasZhbmv_v2_64`|12.0| | | | | | | | | | +|`cublasZhbmv_v2_64`|12.0| | | |`rocblas_zhbmv_64`|6.2.0| | | | | |`cublasZhemv`| | | | |`rocblas_zhemv`|1.5.0| | | | | -|`cublasZhemv_64`|12.0| | | | | | | | | | +|`cublasZhemv_64`|12.0| | | |`rocblas_zhemv_64`|6.2.0| | | | | |`cublasZhemv_v2`| | | | |`rocblas_zhemv`|1.5.0| | | | | -|`cublasZhemv_v2_64`|12.0| | | | | | | | | | +|`cublasZhemv_v2_64`|12.0| | | |`rocblas_zhemv_64`|6.2.0| | | | | |`cublasZher`| | | | |`rocblas_zher`|3.5.0| | | | | |`cublasZher2`| | | | |`rocblas_zher2`|3.5.0| | | | | -|`cublasZher2_64`|12.0| | | | | | | | | | +|`cublasZher2_64`|12.0| | | |`rocblas_zher2_64`|6.2.0| | | | | |`cublasZher2_v2`| | | | |`rocblas_zher2`|3.5.0| | | | | -|`cublasZher2_v2_64`|12.0| | | | | | | | | | -|`cublasZher_64`|12.0| | | | | | | | | | +|`cublasZher2_v2_64`|12.0| | | |`rocblas_zher2_64`|6.2.0| | | | | +|`cublasZher_64`|12.0| | | |`rocblas_zher_64`|6.2.0| | | | | |`cublasZher_v2`| | | | |`rocblas_zher`|3.5.0| | | | | -|`cublasZher_v2_64`|12.0| | | | | | | | | | +|`cublasZher_v2_64`|12.0| | | |`rocblas_zher_64`|6.2.0| | | | | |`cublasZhpmv`| | | | |`rocblas_zhpmv`|3.5.0| | | | | -|`cublasZhpmv_64`|12.0| | | | | | | | | | +|`cublasZhpmv_64`|12.0| | | |`rocblas_zhpmv_64`|6.2.0| | | | | |`cublasZhpmv_v2`| | | | |`rocblas_zhpmv`|3.5.0| | | | | -|`cublasZhpmv_v2_64`|12.0| | | | | | | | | | +|`cublasZhpmv_v2_64`|12.0| | | |`rocblas_zhpmv_64`|6.2.0| | | | | |`cublasZhpr`| | | | |`rocblas_zhpr`|3.5.0| | | | | |`cublasZhpr2`| | | | |`rocblas_zhpr2`|3.5.0| | | | | -|`cublasZhpr2_64`|12.0| | | | | | | | | | +|`cublasZhpr2_64`|12.0| | | |`rocblas_zhpr2_64`|6.2.0| | | | | |`cublasZhpr2_v2`| | | | |`rocblas_zhpr2`|3.5.0| | | | | -|`cublasZhpr2_v2_64`|12.0| | | | | | | | | | -|`cublasZhpr_64`|12.0| | | | | | | | | | +|`cublasZhpr2_v2_64`|12.0| | | |`rocblas_zhpr2_64`|6.2.0| | | | | +|`cublasZhpr_64`|12.0| | | |`rocblas_zhpr_64`|6.2.0| | | | | |`cublasZhpr_v2`| | | | |`rocblas_zhpr`|3.5.0| | | | | -|`cublasZhpr_v2_64`|12.0| | | | | | | | | | +|`cublasZhpr_v2_64`|12.0| | | |`rocblas_zhpr_64`|6.2.0| | | | | |`cublasZsymv`| | | | |`rocblas_zsymv`|3.5.0| | | | | -|`cublasZsymv_64`|12.0| | | | | | | | | | +|`cublasZsymv_64`|12.0| | | |`rocblas_zsymv_64`|6.2.0| | | | | |`cublasZsymv_v2`| | | | |`rocblas_zsymv`|3.5.0| | | | | -|`cublasZsymv_v2_64`|12.0| | | | | | | | | | +|`cublasZsymv_v2_64`|12.0| | | |`rocblas_zsymv_64`|6.2.0| | | | | |`cublasZsyr`| | | | |`rocblas_zsyr`|1.7.1| | | | | |`cublasZsyr2`| | | | |`rocblas_zsyr2`|3.5.0| | | | | -|`cublasZsyr2_64`|12.0| | | | | | | | | | +|`cublasZsyr2_64`|12.0| | | |`rocblas_zsyr2_64`|6.2.0| | | | | |`cublasZsyr2_v2`| | | | |`rocblas_zsyr2`|3.5.0| | | | | -|`cublasZsyr2_v2_64`|12.0| | | | | | | | | | -|`cublasZsyr_64`|12.0| | | | | | | | | | +|`cublasZsyr2_v2_64`|12.0| | | |`rocblas_zsyr2_64`|6.2.0| | | | | +|`cublasZsyr_64`|12.0| | | |`rocblas_zsyr_64`|6.2.0| | | | | |`cublasZsyr_v2`| | | | |`rocblas_zsyr`|1.7.1| | | | | -|`cublasZsyr_v2_64`|12.0| | | | | | | | | | +|`cublasZsyr_v2_64`|12.0| | | |`rocblas_zsyr_64`|6.2.0| | | | | |`cublasZtbmv`| | | | |`rocblas_ztbmv`|3.5.0| | | | | -|`cublasZtbmv_64`|12.0| | | | | | | | | | +|`cublasZtbmv_64`|12.0| | | |`rocblas_ztbmv_64`|6.2.0| | | | | |`cublasZtbmv_v2`| | | | |`rocblas_ztbmv`|3.5.0| | | | | -|`cublasZtbmv_v2_64`|12.0| | | | | | | | | | +|`cublasZtbmv_v2_64`|12.0| | | |`rocblas_ztbmv_64`|6.2.0| | | | | |`cublasZtbsv`| | | | |`rocblas_ztbsv`|3.5.0| | | | | -|`cublasZtbsv_64`|12.0| | | | | | | | | | +|`cublasZtbsv_64`|12.0| | | |`rocblas_ztbsv_64`|6.2.0| | | | | |`cublasZtbsv_v2`| | | | |`rocblas_ztbsv`|3.5.0| | | | | -|`cublasZtbsv_v2_64`|12.0| | | | | | | | | | +|`cublasZtbsv_v2_64`|12.0| | | |`rocblas_ztbsv_64`|6.2.0| | | | | |`cublasZtpmv`| | | | |`rocblas_ztpmv`|3.5.0| | | | | -|`cublasZtpmv_64`|12.0| | | | | | | | | | +|`cublasZtpmv_64`|12.0| | | |`rocblas_ztpmv_64`|6.2.0| | | | | |`cublasZtpmv_v2`| | | | |`rocblas_ztpmv`|3.5.0| | | | | -|`cublasZtpmv_v2_64`|12.0| | | | | | | | | | +|`cublasZtpmv_v2_64`|12.0| | | |`rocblas_ztpmv_64`|6.2.0| | | | | |`cublasZtpsv`| | | | |`rocblas_ztpsv`|3.5.0| | | | | |`cublasZtpsv_64`|12.0| | | | | | | | | | |`cublasZtpsv_v2`| | | | |`rocblas_ztpsv`|3.5.0| | | | | |`cublasZtpsv_v2_64`|12.0| | | | | | | | | | |`cublasZtrmv`| | | | |`rocblas_ztrmv`|3.5.0| | | | | -|`cublasZtrmv_64`|12.0| | | | | | | | | | +|`cublasZtrmv_64`|12.0| | | |`rocblas_ztrmv_64`|6.2.0| | | | | |`cublasZtrmv_v2`| | | | |`rocblas_ztrmv`|3.5.0| | | | | -|`cublasZtrmv_v2_64`|12.0| | | | | | | | | | +|`cublasZtrmv_v2_64`|12.0| | | |`rocblas_ztrmv_64`|6.2.0| | | | | |`cublasZtrsv`| | | | |`rocblas_ztrsv`|3.5.0| | | | | -|`cublasZtrsv_64`|12.0| | | | | | | | | | +|`cublasZtrsv_64`|12.0| | | |`rocblas_ztrsv_64`|6.2.0| | | | | |`cublasZtrsv_v2`| | | | |`rocblas_ztrsv`|3.5.0| | | | | -|`cublasZtrsv_v2_64`|12.0| | | | | | | | | | +|`cublasZtrsv_v2_64`|12.0| | | |`rocblas_ztrsv_64`|6.2.0| | | | | ## **7. CUBLAS Level-3 Function Reference** @@ -1034,7 +1034,7 @@ |`cublasCgemvBatched`|11.6| | | |`rocblas_cgemv_batched`|3.5.0| | | | | |`cublasCgemvBatched_64`|12.0| | | |`rocblas_cgemv_batched_64`|6.2.0| | | | | |`cublasCgemvStridedBatched`|11.6| | | |`rocblas_cgemv_strided_batched`|3.5.0| | | | | -|`cublasCgemvStridedBatched_64`|12.0| | | | | | | | | | +|`cublasCgemvStridedBatched_64`|12.0| | | |`rocblas_cgemv_strided_batched_64`|6.2.0| | | | | |`cublasChemm`| | | | |`rocblas_chemm`|3.5.0| | | | | |`cublasChemm_64`|12.0| | | | | | | | | | |`cublasChemm_v2`| | | | |`rocblas_chemm`|3.5.0| | | | | @@ -1083,8 +1083,8 @@ |`cublasDgemm_v2_64`|12.0| | | | | | | | | | |`cublasDgemvBatched`|11.6| | | |`rocblas_dgemv_batched`|3.5.0| | | | | |`cublasDgemvBatched_64`|12.0| | | |`rocblas_dgemv_batched_64`|6.2.0| | | | | -|`cublasDgemvStridedBatched`|11.6| | | | | | | | | | -|`cublasDgemvStridedBatched_64`|12.0| | | | | | | | | | +|`cublasDgemvStridedBatched`|11.6| | | |`rocblas_dgemv_strided_batched`|3.5.0| | | | | +|`cublasDgemvStridedBatched_64`|12.0| | | |`rocblas_dgemv_strided_batched_64`|6.2.0| | | | | |`cublasDsymm`| | | | |`rocblas_dsymm`|3.5.0| | | | | |`cublasDsymm_64`|12.0| | | | | | | | | | |`cublasDsymm_v2`| | | | |`rocblas_dsymm`|3.5.0| | | | | @@ -1112,11 +1112,11 @@ |`cublasHSHgemvBatched`|11.6| | | |`rocblas_hshgemv_batched`|6.0.0| | | | | |`cublasHSHgemvBatched_64`|12.0| | | |`rocblas_hshgemv_batched_64`|6.2.0| | | | | |`cublasHSHgemvStridedBatched`|11.6| | | |`rocblas_hshgemv_strided_batched`|6.0.0| | | | | -|`cublasHSHgemvStridedBatched_64`|12.0| | | | | | | | | | +|`cublasHSHgemvStridedBatched_64`|12.0| | | |`rocblas_hshgemv_strided_batched_64`|6.2.0| | | | | |`cublasHSSgemvBatched`|11.6| | | |`rocblas_hssgemv_batched`|6.0.0| | | | | |`cublasHSSgemvBatched_64`|12.0| | | |`rocblas_hssgemv_batched_64`|6.2.0| | | | | |`cublasHSSgemvStridedBatched`|11.6| | | |`rocblas_hssgemv_strided_batched`|6.0.0| | | | | -|`cublasHSSgemvStridedBatched_64`|12.0| | | | | | | | | | +|`cublasHSSgemvStridedBatched_64`|12.0| | | |`rocblas_hssgemv_strided_batched_64`|6.2.0| | | | | |`cublasHgemm`|7.5| | | |`rocblas_hgemm`|1.5.0| | | | | |`cublasHgemmBatched`|9.0| | | |`rocblas_hgemm_batched`|3.5.0| | | | | |`cublasHgemmBatched_64`|12.0| | | | | | | | | | @@ -1135,8 +1135,8 @@ |`cublasSgemm_v2_64`|12.0| | | | | | | | | | |`cublasSgemvBatched`|11.6| | | |`rocblas_sgemv_batched`|3.5.0| | | | | |`cublasSgemvBatched_64`|12.0| | | |`rocblas_sgemv_batched_64`|6.2.0| | | | | -|`cublasSgemvStridedBatched`|11.6| | | | | | | | | | -|`cublasSgemvStridedBatched_64`|12.0| | | | | | | | | | +|`cublasSgemvStridedBatched`|11.6| | | |`rocblas_sgemv_strided_batched`|3.5.0| | | | | +|`cublasSgemvStridedBatched_64`|12.0| | | |`rocblas_sgemv_strided_batched_64`|6.2.0| | | | | |`cublasSsymm`| | | | |`rocblas_ssymm`|3.5.0| | | | | |`cublasSsymm_64`|12.0| | | | | | | | | | |`cublasSsymm_v2`| | | | |`rocblas_ssymm`|3.5.0| | | | | @@ -1162,11 +1162,11 @@ |`cublasTSSgemvBatched`|11.6| | | |`rocblas_tssgemv_batched`|6.0.0| | | | | |`cublasTSSgemvBatched_64`|12.0| | | |`rocblas_tssgemv_batched_64`|6.2.0| | | | | |`cublasTSSgemvStridedBatched`|11.6| | | |`rocblas_tssgemv_strided_batched`|6.0.0| | | | | -|`cublasTSSgemvStridedBatched_64`|12.0| | | | | | | | | | +|`cublasTSSgemvStridedBatched_64`|12.0| | | |`rocblas_tssgemv_strided_batched_64`|6.2.0| | | | | |`cublasTSTgemvBatched`|11.6| | | |`rocblas_tstgemv_batched`|6.0.0| | | | | |`cublasTSTgemvBatched_64`|12.0| | | |`rocblas_tstgemv_batched_64`|6.2.0| | | | | |`cublasTSTgemvStridedBatched`|11.6| | | |`rocblas_tstgemv_strided_batched`|6.0.0| | | | | -|`cublasTSTgemvStridedBatched_64`|12.0| | | | | | | | | | +|`cublasTSTgemvStridedBatched_64`|12.0| | | |`rocblas_tstgemv_strided_batched_64`|6.2.0| | | | | |`cublasZgemm`| | | | |`rocblas_zgemm`|1.5.0| | | | | |`cublasZgemm3m`|8.0| | | | | | | | | | |`cublasZgemm3m_64`|12.0| | | | | | | | | | @@ -1180,7 +1180,7 @@ |`cublasZgemvBatched`|11.6| | | |`rocblas_zgemv_batched`|3.5.0| | | | | |`cublasZgemvBatched_64`|12.0| | | |`rocblas_zgemv_batched_64`|6.2.0| | | | | |`cublasZgemvStridedBatched`|11.6| | | |`rocblas_zgemv_strided_batched`|3.5.0| | | | | -|`cublasZgemvStridedBatched_64`|12.0| | | | | | | | | | +|`cublasZgemvStridedBatched_64`|12.0| | | |`rocblas_zgemv_strided_batched_64`|6.2.0| | | | | |`cublasZhemm`| | | | |`rocblas_zhemm`|3.5.0| | | | | |`cublasZhemm_64`|12.0| | | | | | | | | | |`cublasZhemm_v2`| | | | |`rocblas_zhemm`|3.5.0| | | | | diff --git a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md index 41098b8e..3040876b 100644 --- a/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Driver_API_functions_supported_by_HIP.md @@ -1901,7 +1901,7 @@ |`cuGraphAddHostNode`|10.0| | | |`hipGraphAddHostNode`|5.0.0| | | | | |`cuGraphAddKernelNode`|10.0| | | |`hipGraphAddKernelNode`|4.3.0| | | | | |`cuGraphAddMemAllocNode`|11.4| | | |`hipGraphAddMemAllocNode`|5.5.0| | | | | -|`cuGraphAddMemFreeNode`|11.4| | | | | | | | | | +|`cuGraphAddMemFreeNode`|11.4| | | |`hipDrvGraphAddMemFreeNode`|6.3.0| | | |6.3.0| |`cuGraphAddMemcpyNode`|10.0| | | |`hipDrvGraphAddMemcpyNode`|6.0.0| | | | | |`cuGraphAddMemsetNode`|10.0| | | |`hipDrvGraphAddMemsetNode`|6.1.0| | | | | |`cuGraphAddNode`|12.2| | | |`hipGraphAddNode`|6.2.0| | | | | @@ -1926,11 +1926,12 @@ |`cuGraphExecEventWaitNodeSetEvent`|11.1| | | |`hipGraphExecEventWaitNodeSetEvent`|5.0.0| | | | | |`cuGraphExecExternalSemaphoresSignalNodeSetParams`|11.2| | | |`hipGraphExecExternalSemaphoresSignalNodeSetParams`|5.7.0| | | | | |`cuGraphExecExternalSemaphoresWaitNodeSetParams`|11.2| | | |`hipGraphExecExternalSemaphoresWaitNodeSetParams`|5.7.0| | | | | -|`cuGraphExecGetFlags`|12.0| | | | | | | | | | +|`cuGraphExecGetFlags`|12.0| | | |`hipGraphExecGetFlags`|6.3.0| | | |6.3.0| |`cuGraphExecHostNodeSetParams`|10.2| | | |`hipGraphExecHostNodeSetParams`|5.0.0| | | | | |`cuGraphExecKernelNodeSetParams`|10.1| | | |`hipGraphExecKernelNodeSetParams`|4.5.0| | | | | -|`cuGraphExecMemcpyNodeSetParams`|10.2| | | | | | | | | | -|`cuGraphExecNodeSetParams`|12.2| | | | | | | | | | +|`cuGraphExecMemcpyNodeSetParams`|10.2| | | |`hipDrvGraphExecMemcpyNodeSetParams`|6.3.0| | | |6.3.0| +|`cuGraphExecMemsetNodeSetParams`|10.2| | | |`hipDrvGraphExecMemsetNodeSetParams`|6.3.0| | | |6.3.0| +|`cuGraphExecNodeSetParams`|12.2| | | |`hipGraphExecNodeSetParams`|6.3.0| | | |6.3.0| |`cuGraphExecUpdate`|10.2| | | |`hipGraphExecUpdate`|5.0.0| | | | | |`cuGraphExternalSemaphoresSignalNodeGetParams`|11.2| | | |`hipGraphExternalSemaphoresSignalNodeGetParams`|5.7.0| | | | | |`cuGraphExternalSemaphoresSignalNodeSetParams`|11.2| | | |`hipGraphExternalSemaphoresSignalNodeSetParams`|5.7.0| | | | | @@ -1954,8 +1955,8 @@ |`cuGraphLaunch`|10.0| | | |`hipGraphLaunch`|4.3.0| | | | | |`cuGraphMemAllocNodeGetParams`|11.4| | | |`hipGraphMemAllocNodeGetParams`|5.5.0| | | | | |`cuGraphMemFreeNodeGetParams`|11.4| | | |`hipGraphMemFreeNodeGetParams`|5.5.0| | | | | -|`cuGraphMemcpyNodeGetParams`|10.0| | | | | | | | | | -|`cuGraphMemcpyNodeSetParams`|10.0| | | | | | | | | | +|`cuGraphMemcpyNodeGetParams`|10.0| | | |`hipDrvGraphMemcpyNodeGetParams`|6.3.0| | | |6.3.0| +|`cuGraphMemcpyNodeSetParams`|10.0| | | |`hipDrvGraphMemcpyNodeSetParams`|6.3.0| | | |6.3.0| |`cuGraphMemsetNodeGetParams`|10.0| | | |`hipGraphMemsetNodeGetParams`|4.5.0| | | | | |`cuGraphMemsetNodeSetParams`|10.0| | | |`hipGraphMemsetNodeSetParams`|4.5.0| | | | | |`cuGraphNodeFindInClone`|10.0| | | |`hipGraphNodeFindInClone`|5.0.0| | | | | @@ -1966,7 +1967,7 @@ |`cuGraphNodeGetEnabled`|11.6| | | |`hipGraphNodeGetEnabled`|5.5.0| | | | | |`cuGraphNodeGetType`|10.0| | | |`hipGraphNodeGetType`|5.0.0| | | | | |`cuGraphNodeSetEnabled`|11.6| | | |`hipGraphNodeSetEnabled`|5.5.0| | | | | -|`cuGraphNodeSetParams`|12.2| | | | | | | | | | +|`cuGraphNodeSetParams`|12.2| | | |`hipGraphNodeSetParams`|6.3.0| | | |6.3.0| |`cuGraphReleaseUserObject`|11.3| | | |`hipGraphReleaseUserObject`|5.3.0| | | | | |`cuGraphRemoveDependencies`|10.0| | | |`hipGraphRemoveDependencies`|5.0.0| | | | | |`cuGraphRemoveDependencies_v2`|12.3| | | | | | | | | | diff --git a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md index 3968ea01..a681967b 100644 --- a/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/docs/tables/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -464,7 +464,7 @@ |`cudaGraphExecEventWaitNodeSetEvent`|11.1| | | |`hipGraphExecEventWaitNodeSetEvent`|5.0.0| | | | | |`cudaGraphExecExternalSemaphoresSignalNodeSetParams`|11.2| | | |`hipGraphExecExternalSemaphoresSignalNodeSetParams`|5.7.0| | | | | |`cudaGraphExecExternalSemaphoresWaitNodeSetParams`|11.2| | | |`hipGraphExecExternalSemaphoresWaitNodeSetParams`|5.7.0| | | | | -|`cudaGraphExecGetFlags`|12.0| | | | | | | | | | +|`cudaGraphExecGetFlags`|12.0| | | |`hipGraphExecGetFlags`|6.3.0| | | |6.3.0| |`cudaGraphExecHostNodeSetParams`|11.0| | | |`hipGraphExecHostNodeSetParams`|5.0.0| | | | | |`cudaGraphExecKernelNodeSetParams`|11.0| | | |`hipGraphExecKernelNodeSetParams`|4.5.0| | | | | |`cudaGraphExecMemcpyNodeSetParams`|11.0| | | |`hipGraphExecMemcpyNodeSetParams`|5.0.0| | | | | @@ -472,7 +472,7 @@ |`cudaGraphExecMemcpyNodeSetParamsFromSymbol`|11.1| | | |`hipGraphExecMemcpyNodeSetParamsFromSymbol`|5.0.0| | | | | |`cudaGraphExecMemcpyNodeSetParamsToSymbol`|11.1| | | |`hipGraphExecMemcpyNodeSetParamsToSymbol`|5.0.0| | | | | |`cudaGraphExecMemsetNodeSetParams`|11.0| | | |`hipGraphExecMemsetNodeSetParams`|5.0.0| | | | | -|`cudaGraphExecNodeSetParams`|12.2| | | | | | | | | | +|`cudaGraphExecNodeSetParams`|12.2| | | |`hipGraphExecNodeSetParams`|6.3.0| | | |6.3.0| |`cudaGraphExecUpdate`|11.0| | | |`hipGraphExecUpdate`|5.0.0| | | | | |`cudaGraphExternalSemaphoresSignalNodeGetParams`|11.2| | | |`hipGraphExternalSemaphoresSignalNodeGetParams`|5.7.0| | | | | |`cudaGraphExternalSemaphoresSignalNodeSetParams`|11.2| | | |`hipGraphExternalSemaphoresSignalNodeSetParams`|5.7.0| | | | | @@ -510,7 +510,7 @@ |`cudaGraphNodeGetEnabled`|11.6| | | |`hipGraphNodeGetEnabled`|5.5.0| | | | | |`cudaGraphNodeGetType`|11.0| | | |`hipGraphNodeGetType`|5.0.0| | | | | |`cudaGraphNodeSetEnabled`|11.6| | | |`hipGraphNodeSetEnabled`|5.5.0| | | | | -|`cudaGraphNodeSetParams`|12.2| | | | | | | | | | +|`cudaGraphNodeSetParams`|12.2| | | |`hipGraphNodeSetParams`|6.3.0| | | |6.3.0| |`cudaGraphReleaseUserObject`|11.3| | | |`hipGraphReleaseUserObject`|5.3.0| | | | | |`cudaGraphRemoveDependencies`|11.0| | | |`hipGraphRemoveDependencies`|5.0.0| | | | | |`cudaGraphRemoveDependencies_v2`|12.3| | | | | | | | | | diff --git a/docs/tables/CUDNN_API_supported_by_HIP.md b/docs/tables/CUDNN_API_supported_by_HIP.md index 2bef5b3d..11608bf9 100644 --- a/docs/tables/CUDNN_API_supported_by_HIP.md +++ b/docs/tables/CUDNN_API_supported_by_HIP.md @@ -55,6 +55,7 @@ |`CUDNN_ATTR_INTERMEDIATE_INFO_SIZE`|8.0.1| | | | | | | | | | |`CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID`|8.0.2| | | | | | | | | | |`CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED`|9.4.0| | | | | | | | | | +|`CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH`|9.5.0| | | | | | | | | | |`CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE`|8.0.1| | | | | | | | | | |`CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE`|8.0.1| | | | | | | | | | |`CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE`|8.0.1| | | | | | | | | | @@ -287,6 +288,7 @@ |`CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER`|8.3.0| | | | | | | | | | |`CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER`|8.3.0| | | | | | | | | | |`CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION`|8.2.0| | | | | | | | | | +|`CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API`|9.5.0| | | | | | | | | | |`CUDNN_BEHAVIOR_NOTE_TYPE_COUNT`|8.2.0| | | | | | | | | | |`CUDNN_BIDIRECTIONAL`|5.0.0| | | |`HIPDNN_BIDIRECTIONAL`| | | | | | |`CUDNN_BN_FINALIZE_STATISTICS_INFERENCE`|8.1.0| | | | | | | | | | @@ -620,8 +622,8 @@ |`CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED`|7.2.1| | | | | | | | | | |`CUDNN_RNN_DOUBLE_BIAS`|7.5.0| | | |`HIPDNN_RNN_WITH_BIAS`| | | | | | |`CUDNN_RNN_NO_BIAS`|7.5.0| | | |`HIPDNN_RNN_NO_BIAS`| | | | | | -|`CUDNN_RNN_PADDED_IO_DISABLED`|7.2.1| | | | | | | | | | -|`CUDNN_RNN_PADDED_IO_ENABLED`|7.2.1| | | | | | | | | | +|`CUDNN_RNN_PADDED_IO_DISABLED`|7.2.1|8.0.1| |9.0.0| | | | | | | +|`CUDNN_RNN_PADDED_IO_ENABLED`|7.2.1|8.0.1| |9.0.0| | | | | | | |`CUDNN_RNN_RELU`|5.0.0| | | |`HIPDNN_RNN_RELU`| | | | | | |`CUDNN_RNN_SINGLE_INP_BIAS`|7.5.0| | | |`HIPDNN_RNN_WITH_BIAS`| | | | | | |`CUDNN_RNN_SINGLE_REC_BIAS`|7.5.0| | | |`HIPDNN_RNN_WITH_BIAS`| | | | | | @@ -655,6 +657,7 @@ |`CUDNN_STATUS_ARCH_MISMATCH`|1.0.0|9.0.0| | |`HIPDNN_STATUS_ARCH_MISMATCH`| | | | | | |`CUDNN_STATUS_BAD_PARAM`|1.0.0| | | |`HIPDNN_STATUS_BAD_PARAM`| | | | | | |`CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE`|9.0.0| | | | | | | | | | +|`CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH`|9.5.0| | | | | | | | | | |`CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES`|9.0.0| | | | | | | | | | |`CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER`|9.0.0| | | | | | | | | | |`CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED`|9.0.0| | | | | | | | | | @@ -685,6 +688,7 @@ |`CUDNN_STATUS_NOT_SUPPORTED`|1.0.0| | | |`HIPDNN_STATUS_NOT_SUPPORTED`| | | | | | |`CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH`|9.0.0| | | | | | | | | | |`CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM`|9.0.0| | | | | | | | | | +|`CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API`|9.5.0| | | | | | | | | | |`CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE`|9.0.0| | | | | | | | | | |`CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN`|9.0.0| | | | | | | | | | |`CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART`|9.0.0| | | | | | | | | | @@ -853,7 +857,7 @@ |`cudnnRNNDescriptor_t`|5.0.0| | | |`hipdnnRNNDescriptor_t`| | | | | | |`cudnnRNNInputMode_t`|5.0.0| | | |`hipdnnRNNInputMode_t`| | | | | | |`cudnnRNNMode_t`|5.0.0| | | |`hipdnnRNNMode_t`| | | | | | -|`cudnnRNNPaddingMode_t`|7.2.1| | | | | | | | | | +|`cudnnRNNPaddingMode_t`|7.2.1|8.0.1| |9.0.0| | | | | | | |`cudnnRNNStruct`|5.0.0| | | | | | | | | | |`cudnnReduceTensorDescriptor_t`|6.0.0|9.0.0| | |`hipdnnReduceTensorDescriptor_t`| | | | | | |`cudnnReduceTensorIndices_t`|6.0.0|9.0.0| | |`hipdnnReduceTensorIndices_t`| | | | | | @@ -899,7 +903,9 @@ |`cudnnBackendFinalize`|8.0.1| | | | | | | | | | |`cudnnBackendGetAttribute`|8.0.1| | | | | | | | | | |`cudnnBackendInitialize`|8.0.1|9.3.0| | | | | | | | | +|`cudnnBackendPopulateCudaGraph`|9.5.0| | | | | | | | | | |`cudnnBackendSetAttribute`|8.0.1| | | | | | | | | | +|`cudnnBackendUpdateCudaGraph`|9.5.0| | | | | | | | | | |`cudnnBatchNormalizationBackward`|4.0.0|9.0.0| | |`hipdnnBatchNormalizationBackward`| | | | | | |`cudnnBatchNormalizationBackwardEx`|7.4.1|9.0.0| | | | | | | | | |`cudnnBatchNormalizationForwardInference`|4.0.0|9.0.0| | |`hipdnnBatchNormalizationForwardInference`| | | | | | diff --git a/docs/tables/CUDNN_API_supported_by_HIP_and_MIOPEN.md b/docs/tables/CUDNN_API_supported_by_HIP_and_MIOPEN.md index 07144000..8b1f288e 100644 --- a/docs/tables/CUDNN_API_supported_by_HIP_and_MIOPEN.md +++ b/docs/tables/CUDNN_API_supported_by_HIP_and_MIOPEN.md @@ -12,7 +12,7 @@ |`CUDNN_ACTIVATION_ELU`|6.0.0|9.0.0| | |`HIPDNN_ACTIVATION_ELU`| | | | | |`miopenActivationELU`|2.1.0| | | | | |`CUDNN_ACTIVATION_IDENTITY`|7.1.3|9.0.0| | |`HIPDNN_ACTIVATION_PATHTRU`| | | | | |`miopenActivationPASTHRU`|2.1.0| | | | | |`CUDNN_ACTIVATION_RELU`|1.0.0|9.0.0| | |`HIPDNN_ACTIVATION_RELU`| | | | | |`miopenActivationRELU`|2.1.0| | | | | -|`CUDNN_ACTIVATION_SIGMOID`|1.0.0|9.0.0| | |`HIPDNN_ACTIVATION_SIGMOID`| | | | | | | | | | | | +|`CUDNN_ACTIVATION_SIGMOID`|1.0.0|9.0.0| | |`HIPDNN_ACTIVATION_SIGMOID`| | | | | |`miopenActivationLOGISTIC`|2.1.0| | | | | |`CUDNN_ACTIVATION_SWISH`|8.2.0|9.0.0| | |`HIPDNN_ACTIVATION_SWISH`| | | | | | | | | | | | |`CUDNN_ACTIVATION_TANH`|1.0.0|9.0.0| | |`HIPDNN_ACTIVATION_TANH`| | | | | |`miopenActivationTANH`|2.1.0| | | | | |`CUDNN_ATTN_DISABLE_PROJ_BIASES`|7.6.3| | | | | | | | | | | | | | | | @@ -55,6 +55,7 @@ |`CUDNN_ATTR_INTERMEDIATE_INFO_SIZE`|8.0.1| | | | | | | | | |`MIOPEN_ATTR_INTERMEDIATE_INFO_SIZE`|6.2.0| | | | | |`CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID`|8.0.2| | | | | | | | | |`MIOPEN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID`|6.2.0| | | | | |`CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED`|9.4.0| | | | | | | | | | | | | | | | +|`CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH`|9.5.0| | | | | | | | | | | | | | | | |`CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE`|8.0.1| | | | | | | | | |`MIOPEN_ATTR_KNOB_CHOICE_KNOB_TYPE`|6.2.0| | | | | |`CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE`|8.0.1| | | | | | | | | |`MIOPEN_ATTR_KNOB_CHOICE_KNOB_VALUE`|6.2.0| | | | | |`CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE`|8.0.1| | | | | | | | | |`MIOPEN_ATTR_KNOB_INFO_MAXIMUM_VALUE`|6.2.0| | | | | @@ -287,6 +288,7 @@ |`CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER`|8.3.0| | | | | | | | | | | | | | | | |`CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER`|8.3.0| | | | | | | | | | | | | | | | |`CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION`|8.2.0| | | | | | | | | | | | | | | | +|`CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API`|9.5.0| | | | | | | | | | | | | | | | |`CUDNN_BEHAVIOR_NOTE_TYPE_COUNT`|8.2.0| | | | | | | | | | | | | | | | |`CUDNN_BIDIRECTIONAL`|5.0.0| | | |`HIPDNN_BIDIRECTIONAL`| | | | | |`miopenRNNbidirection`|2.1.0| | | | | |`CUDNN_BN_FINALIZE_STATISTICS_INFERENCE`|8.1.0| | | | | | | | | | | | | | | | @@ -295,7 +297,7 @@ |`CUDNN_CONVOLUTION`|1.0.0|9.0.0| | |`HIPDNN_CONVOLUTION`| | | | | |`miopenConvolution`|2.1.0| | | | | |`CUDNN_CONVOLUTION_BWD_DATA_ALGO_0`|3.0.0| | | |`HIPDNN_CONVOLUTION_BWD_DATA_ALGO_0`| | | | | |`miopenConvolutionBwdDataAlgoGEMM`|2.1.0| | | | | |`CUDNN_CONVOLUTION_BWD_DATA_ALGO_1`|3.0.0| | | |`HIPDNN_CONVOLUTION_BWD_DATA_ALGO_1`| | | | | |`miopenConvolutionBwdDataAlgoDirect`|2.1.0| | | | | -|`CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT`|6.0.0| | | |`HIPDNN_CONVOLUTION_BWD_DATA_ALGO_TRANSPOSE_GEMM`| | | | | | | | | | | | +|`CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT`|6.0.0| | | |`HIPDNN_CONVOLUTION_BWD_DATA_ALGO_TRANSPOSE_GEMM`| | | | | |`miopenTransposeBwdDataAlgoGEMM`|2.1.0| | | | | |`CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT`|3.0.0| | | |`HIPDNN_CONVOLUTION_BWD_DATA_ALGO_FFT`| | | | | |`miopenConvolutionBwdDataAlgoFFT`|2.1.0| | | | | |`CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING`|4.0.0| | | |`HIPDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING`| | | | | | | | | | | | |`CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD`|5.0.0| | | |`HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD`| | | | | |`miopenConvolutionBwdDataAlgoWinograd`|2.1.0| | | | | @@ -336,8 +338,8 @@ |`CUDNN_DATA_DOUBLE`|1.0.0| | | |`HIPDNN_DATA_DOUBLE`| | | | | |`miopenDouble`|4.5.0| | | | | |`CUDNN_DATA_FAST_FLOAT_FOR_FP8`|8.7.0| | | | | | | | | | | | | | | | |`CUDNN_DATA_FLOAT`|1.0.0| | | |`HIPDNN_DATA_FLOAT`| | | | | |`miopenFloat`|2.1.0| | | | | -|`CUDNN_DATA_FP8_E4M3`|8.6.0| | | | | | | | | | | | | | | | -|`CUDNN_DATA_FP8_E5M2`|8.6.0| | | | | | | | | | | | | | | | +|`CUDNN_DATA_FP8_E4M3`|8.6.0| | | | | | | | | |`miopenFloat8`|6.0.0| | | | | +|`CUDNN_DATA_FP8_E5M2`|8.6.0| | | | | | | | | |`miopenBFloat8`|6.0.0| | | | | |`CUDNN_DATA_HALF`|3.0.0| | | |`HIPDNN_DATA_HALF`| | | | | |`miopenHalf`|2.1.0| | | | | |`CUDNN_DATA_INT32`|6.0.0| | | |`HIPDNN_DATA_INT32`| | | | | |`miopenInt32`|2.1.0| | | | | |`CUDNN_DATA_INT64`|8.1.0| | | | | | | | | |`miopenInt64`|6.2.0| | | | | @@ -351,7 +353,7 @@ |`CUDNN_DETERMINISTIC`|6.0.0| | | | | | | | | | | | | | | | |`CUDNN_DIM_MAX`|4.0.0| | | | | | | | | | | | | | | | |`CUDNN_DIVNORM_PRECOMPUTED_MEANS`|3.0.0| | | | | | | | | | | | | | | | -|`CUDNN_EDGE_VAL_PAD`|8.3.0| | | | | | | | | | | | | | | | +|`CUDNN_EDGE_VAL_PAD`|8.3.0| | | | | | | | | |`miopenPaddingValid`|2.1.0| | | | | |`CUDNN_ERRQUERY_BLOCKING`|7.0.5| | | | | | | | | | | | | | | | |`CUDNN_ERRQUERY_NONBLOCKING`|7.0.5| | | | | | | | | | | | | | | | |`CUDNN_ERRQUERY_RAWCODE`|7.0.5| | | | | | | | | | | | | | | | @@ -363,8 +365,8 @@ |`CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS`|7.6.0| | | | | | | | | | | | | | | | |`CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD`|7.6.0| | | | | | | | | | | | | | | | |`CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK`|7.6.0| | | | | | | | | | | | | | | | -|`CUDNN_FWD_MODE_INFERENCE`|8.0.1| | | | | | | | | | | | | | | | -|`CUDNN_FWD_MODE_TRAINING`|8.0.1| | | | | | | | | | | | | | | | +|`CUDNN_FWD_MODE_INFERENCE`|8.0.1| | | | | | | | | |`miopenRNNInference`|6.0.0| | | | | +|`CUDNN_FWD_MODE_TRAINING`|8.0.1| | | | | | | | | |`miopenRNNTraining`|6.0.0| | | | | |`CUDNN_GENSTATS_SUM_SQSUM`|8.0.1| | | | | | | | | | | | | | | | |`CUDNN_GROUP_NORM`|8.5.0| | | | | | | | | | | | | | | | |`CUDNN_GRU`|5.0.0| | | |`HIPDNN_GRU`| | | | | |`miopenGRU`|2.1.0| | | | | @@ -435,7 +437,7 @@ |`CUDNN_MH_ATTN_Q_WEIGHTS`|7.5.0| | | | | | | | | | | | | | | | |`CUDNN_MH_ATTN_V_BIASES`|7.6.3| | | | | | | | | | | | | | | | |`CUDNN_MH_ATTN_V_WEIGHTS`|7.5.0| | | | | | | | | | | | | | | | -|`CUDNN_NEG_INF_PAD`|8.3.0| | | | | | | | | | | | | | | | +|`CUDNN_NEG_INF_PAD`|8.3.0| | | | | | | | | |`miopenPaddingSame`|2.1.0| | | | | |`CUDNN_NON_DETERMINISTIC`|6.0.0| | | | | | | | | | | | | | | | |`CUDNN_NORM_ALGO_PERSIST`|8.0.1|9.0.0| | | | | | | | | | | | | | | |`CUDNN_NORM_ALGO_STANDARD`|8.0.1|9.0.0| | | | | | | | | | | | | | | @@ -555,8 +557,8 @@ |`CUDNN_POINTWISE_TAN`|8.3.0| | | | | | | | | |`MIOPEN_POINTWISE_TAN`|6.2.0| | | | | |`CUDNN_POINTWISE_TANH_BWD`|8.1.0| | | | | | | | | |`MIOPEN_POINTWISE_TANH_BWD`|6.2.0| | | | | |`CUDNN_POINTWISE_TANH_FWD`|8.0.1| | | | | | | | | |`MIOPEN_POINTWISE_TANH_FWD`|6.2.0| | | | | -|`CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING`|2.0.0|9.0.0| | |`HIPDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING`| | | | | | | | | | | | -|`CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING`|2.0.0|9.0.0| | |`HIPDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING`| | | | | | | | | | | | +|`CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING`|2.0.0|9.0.0| | |`HIPDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING`| | | | | |`miopenPoolingAverage`|2.1.0| | | | | +|`CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING`|2.0.0|9.0.0| | |`HIPDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING`| | | | | |`miopenPoolingAverageInclusive`|2.1.0| | | | | |`CUDNN_POOLING_MAX`|1.0.0|9.0.0| | |`HIPDNN_POOLING_MAX`| | | | | |`miopenPoolingMax`|2.1.0| | | | | |`CUDNN_POOLING_MAX_DETERMINISTIC`|6.0.0|9.0.0| | |`HIPDNN_POOLING_MAX_DETERMINISTIC`| | | | | | | | | | | | |`CUDNN_PROPAGATE_NAN`|4.0.0|9.0.0| | |`HIPDNN_PROPAGATE_NAN`| | | | | |`MIOPEN_PROPAGATE_NAN`|3.9.0| | | | | @@ -620,8 +622,8 @@ |`CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED`|7.2.1| | | | | | | | | | | | | | | | |`CUDNN_RNN_DOUBLE_BIAS`|7.5.0| | | |`HIPDNN_RNN_WITH_BIAS`| | | | | |`miopenRNNwithBias`|2.1.0| | | | | |`CUDNN_RNN_NO_BIAS`|7.5.0| | | |`HIPDNN_RNN_NO_BIAS`| | | | | |`miopenRNNNoBias`|2.1.0| | | | | -|`CUDNN_RNN_PADDED_IO_DISABLED`|7.2.1| | | | | | | | | | | | | | | | -|`CUDNN_RNN_PADDED_IO_ENABLED`|7.2.1| | | | | | | | | | | | | | | | +|`CUDNN_RNN_PADDED_IO_DISABLED`|7.2.1|8.0.1| |9.0.0| | | | | | |`miopenRNNIONotPadded`|6.0.0| | | | | +|`CUDNN_RNN_PADDED_IO_ENABLED`|7.2.1|8.0.1| |9.0.0| | | | | | |`miopenRNNIOWithPadding`|6.0.0| | | | | |`CUDNN_RNN_RELU`|5.0.0| | | |`HIPDNN_RNN_RELU`| | | | | |`miopenRNNRELU`|2.1.0| | | | | |`CUDNN_RNN_SINGLE_INP_BIAS`|7.5.0| | | |`HIPDNN_RNN_WITH_BIAS`| | | | | |`miopenRNNwithBias`|2.1.0| | | | | |`CUDNN_RNN_SINGLE_REC_BIAS`|7.5.0| | | |`HIPDNN_RNN_WITH_BIAS`| | | | | |`miopenRNNwithBias`|2.1.0| | | | | @@ -655,6 +657,7 @@ |`CUDNN_STATUS_ARCH_MISMATCH`|1.0.0|9.0.0| | |`HIPDNN_STATUS_ARCH_MISMATCH`| | | | | | | | | | | | |`CUDNN_STATUS_BAD_PARAM`|1.0.0| | | |`HIPDNN_STATUS_BAD_PARAM`| | | | | |`miopenStatusBadParm`|2.1.0| | | | | |`CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE`|9.0.0| | | | | | | | | | | | | | | | +|`CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH`|9.5.0| | | | | | | | | | | | | | | | |`CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES`|9.0.0| | | | | | | | | | | | | | | | |`CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER`|9.0.0| | | | | | | | | | | | | | | | |`CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED`|9.0.0| | | | | | | | | | | | | | | | @@ -685,6 +688,7 @@ |`CUDNN_STATUS_NOT_SUPPORTED`|1.0.0| | | |`HIPDNN_STATUS_NOT_SUPPORTED`| | | | | |`miopenStatusUnsupportedOp`|2.1.0| | | | | |`CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH`|9.0.0| | | | | | | | | | | | | | | | |`CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM`|9.0.0| | | | | | | | | | | | | | | | +|`CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API`|9.5.0| | | | | | | | | | | | | | | | |`CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE`|9.0.0| | | | | | | | | | | | | | | | |`CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN`|9.0.0| | | | | | | | | | | | | | | | |`CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART`|9.0.0| | | | | | | | | | | | | | | | @@ -714,40 +718,40 @@ |`CUDNN_TENSOR_REORDERING_NONE`|8.3.0| | | | | | | | | | | | | | | | |`CUDNN_TRANSFORM_FOLD`|7.5.0| | | | | | | | | | | | | | | | |`CUDNN_TRANSFORM_UNFOLD`|7.5.0| | | | | | | | | | | | | | | | -|`CUDNN_TYPE_ATTRIB_NAME`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_ATTRIB_NAME`| | | | | | -|`CUDNN_TYPE_BACKEND_DESCRIPTOR`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_BACKEND_DESCRIPTOR`| | | | | | -|`CUDNN_TYPE_BEHAVIOR_NOTE`|8.2.0| | | | | | | | | |`MIOPEN_TYPE_BEHAVIOR_NOTE`| | | | | | -|`CUDNN_TYPE_BN_FINALIZE_STATS_MODE`|8.1.0| | | | | | | | | |`MIOPEN_TYPE_BN_FINALIZE_STATS_MODE`| | | | | | -|`CUDNN_TYPE_BOOLEAN`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_BOOLEAN`| | | | | | -|`CUDNN_TYPE_CHAR`|8.4.0| | | | | | | | | |`MIOPEN_TYPE_CHAR`| | | | | | -|`CUDNN_TYPE_CONVOLUTION_MODE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_CONVOLUTION_MODE`| | | | | | -|`CUDNN_TYPE_DATA_TYPE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_DATA_TYPE`| | | | | | -|`CUDNN_TYPE_DOUBLE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_DOUBLE`| | | | | | -|`CUDNN_TYPE_FLOAT`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_FLOAT`| | | | | | -|`CUDNN_TYPE_FRACTION`|8.5.0| | | | | | | | | |`MIOPEN_TYPE_FRACTION`| | | | | | -|`CUDNN_TYPE_GENSTATS_MODE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_GENSTATS_MODE`| | | | | | -|`CUDNN_TYPE_HANDLE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_HANDLE`| | | | | | -|`CUDNN_TYPE_HEUR_MODE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_HEUR_MODE`| | | | | | -|`CUDNN_TYPE_INT32`|8.3.0| | | | | | | | | |`MIOPEN_TYPE_INT32`| | | | | | -|`CUDNN_TYPE_INT64`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_INT64`| | | | | | -|`CUDNN_TYPE_KNOB_TYPE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_KNOB_TYPE`| | | | | | -|`CUDNN_TYPE_LAYOUT_TYPE`|8.0.2| | | | | | | | | |`MIOPEN_TYPE_LAYOUT_TYPE`| | | | | | -|`CUDNN_TYPE_NAN_PROPOGATION`|8.0.1|9.0.0| | | | | | | | |`MIOPEN_TYPE_NAN_PROPOGATION`| | | | | | -|`CUDNN_TYPE_NORM_FWD_PHASE`|8.5.0| | | | | | | | | |`MIOPEN_TYPE_NORM_FWD_PHASE`| | | | | | -|`CUDNN_TYPE_NORM_MODE`|8.5.0| | | | | | | | | |`MIOPEN_TYPE_NORM_MODE`| | | | | | -|`CUDNN_TYPE_NUMERICAL_NOTE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_NUMERICAL_NOTE`| | | | | | -|`CUDNN_TYPE_PADDING_MODE`|8.3.0| | | | | | | | | |`MIOPEN_TYPE_PADDING_MODE`| | | | | | -|`CUDNN_TYPE_POINTWISE_MODE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_POINTWISE_MODE`| | | | | | -|`CUDNN_TYPE_REDUCTION_OPERATOR_TYPE`|8.1.0| | | | | | | | | |`MIOPEN_TYPE_REDUCTION_OPERATOR_TYPE`| | | | | | -|`CUDNN_TYPE_RESAMPLE_MODE`|8.3.0| | | | | | | | | |`MIOPEN_TYPE_RESAMPLE_MODE`| | | | | | -|`CUDNN_TYPE_RNG_DISTRIBUTION`|8.7.0| | | | | | | | | |`MIOPEN_TYPE_RNG_DISTRIBUTION`| | | | | | -|`CUDNN_TYPE_SIGNAL_MODE`|8.5.0| | | | | | | | | |`MIOPEN_TYPE_SIGNAL_MODE`| | | | | | -|`CUDNN_TYPE_TENSOR_REORDERING_MODE`|8.3.0| | | | | | | | | |`MIOPEN_TYPE_TENSOR_REORDERING_MODE`| | | | | | -|`CUDNN_TYPE_VOID_PTR`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_VOID_PTR`| | | | | | +|`CUDNN_TYPE_ATTRIB_NAME`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_ATTRIB_NAME`|6.2.0| | | | | +|`CUDNN_TYPE_BACKEND_DESCRIPTOR`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_BACKEND_DESCRIPTOR`|6.2.0| | | | | +|`CUDNN_TYPE_BEHAVIOR_NOTE`|8.2.0| | | | | | | | | |`MIOPEN_TYPE_BEHAVIOR_NOTE`|6.2.0| | | | | +|`CUDNN_TYPE_BN_FINALIZE_STATS_MODE`|8.1.0| | | | | | | | | |`MIOPEN_TYPE_BN_FINALIZE_STATS_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_BOOLEAN`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_BOOLEAN`|6.2.0| | | | | +|`CUDNN_TYPE_CHAR`|8.4.0| | | | | | | | | |`MIOPEN_TYPE_CHAR`|6.2.0| | | | | +|`CUDNN_TYPE_CONVOLUTION_MODE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_CONVOLUTION_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_DATA_TYPE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_DATA_TYPE`|6.2.0| | | | | +|`CUDNN_TYPE_DOUBLE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_DOUBLE`|6.2.0| | | | | +|`CUDNN_TYPE_FLOAT`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_FLOAT`|6.2.0| | | | | +|`CUDNN_TYPE_FRACTION`|8.5.0| | | | | | | | | |`MIOPEN_TYPE_FRACTION`|6.2.0| | | | | +|`CUDNN_TYPE_GENSTATS_MODE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_GENSTATS_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_HANDLE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_HANDLE`|6.2.0| | | | | +|`CUDNN_TYPE_HEUR_MODE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_HEUR_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_INT32`|8.3.0| | | | | | | | | |`MIOPEN_TYPE_INT32`|6.2.0| | | | | +|`CUDNN_TYPE_INT64`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_INT64`|6.2.0| | | | | +|`CUDNN_TYPE_KNOB_TYPE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_KNOB_TYPE`|6.2.0| | | | | +|`CUDNN_TYPE_LAYOUT_TYPE`|8.0.2| | | | | | | | | |`MIOPEN_TYPE_LAYOUT_TYPE`|6.2.0| | | | | +|`CUDNN_TYPE_NAN_PROPOGATION`|8.0.1|9.0.0| | | | | | | | |`MIOPEN_TYPE_NAN_PROPOGATION`|6.2.0| | | | | +|`CUDNN_TYPE_NORM_FWD_PHASE`|8.5.0| | | | | | | | | |`MIOPEN_TYPE_NORM_FWD_PHASE`|6.2.0| | | | | +|`CUDNN_TYPE_NORM_MODE`|8.5.0| | | | | | | | | |`MIOPEN_TYPE_NORM_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_NUMERICAL_NOTE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_NUMERICAL_NOTE`|6.2.0| | | | | +|`CUDNN_TYPE_PADDING_MODE`|8.3.0| | | | | | | | | |`MIOPEN_TYPE_PADDING_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_POINTWISE_MODE`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_POINTWISE_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_REDUCTION_OPERATOR_TYPE`|8.1.0| | | | | | | | | |`MIOPEN_TYPE_REDUCTION_OPERATOR_TYPE`|6.2.0| | | | | +|`CUDNN_TYPE_RESAMPLE_MODE`|8.3.0| | | | | | | | | |`MIOPEN_TYPE_RESAMPLE_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_RNG_DISTRIBUTION`|8.7.0| | | | | | | | | |`MIOPEN_TYPE_RNG_DISTRIBUTION`|6.2.0| | | | | +|`CUDNN_TYPE_SIGNAL_MODE`|8.5.0| | | | | | | | | |`MIOPEN_TYPE_SIGNAL_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_TENSOR_REORDERING_MODE`|8.3.0| | | | | | | | | |`MIOPEN_TYPE_TENSOR_REORDERING_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_VOID_PTR`|8.0.1| | | | | | | | | |`MIOPEN_TYPE_VOID_PTR`|6.2.0| | | | | |`CUDNN_UNIDIRECTIONAL`|5.0.0| | | |`HIPDNN_UNIDIRECTIONAL`| | | | | |`miopenRNNunidirection`|2.1.0| | | | | |`CUDNN_WGRAD_MODE_ADD`|7.5.0| | | | | | | | | | | | | | | | |`CUDNN_WGRAD_MODE_SET`|7.5.0| | | | | | | | | | | | | | | | -|`CUDNN_ZERO_PAD`|8.3.0| | | | | | | | | | | | | | | | +|`CUDNN_ZERO_PAD`|8.3.0| | | | | | | | | |`miopenPaddingDefault`|2.1.0| | | | | |`cudnnActivationDescriptor_t`|4.0.0|9.0.0| | |`hipdnnActivationDescriptor_t`| | | | | |`miopenActivationDescriptor_t`|2.1.0| | | | | |`cudnnActivationMode_t`|1.0.0|9.0.0| | |`hipdnnActivationMode_t`| | | | | |`miopenActivationMode_t`|2.1.0| | | | | |`cudnnActivationStruct`|4.0.0|9.0.0| | | | | | | | | | | | | | | @@ -761,7 +765,7 @@ |`cudnnAttnQueryMap_t`|7.5.0| | |9.0.0| | | | | | | | | | | | | |`cudnnAttnStruct`|7.5.0| | | | | | | | | | | | | | | | |`cudnnBackendAttributeName_t`|8.0.1| | | | | | | | | |`miopenBackendAttributeName_t`|6.2.0| | | | | -|`cudnnBackendAttributeType_t`|8.0.1| | | | | | | | | |`miopenBackendAttributeType_t`| | | | | | +|`cudnnBackendAttributeType_t`|8.0.1| | | | | | | | | |`miopenBackendAttributeType_t`|6.2.0| | | | | |`cudnnBackendBehaviorNote_t`|8.2.0| | | | | | | | | | | | | | | | |`cudnnBackendDescriptorType_t`|8.0.1| | | | | | | | | |`miopenBackendDescriptorType_t`|6.2.0| | | | | |`cudnnBackendDescriptor_t`|8.0.1| | | | | | | | | |`miopenBackendDescriptor_t`|6.2.0| | | | | @@ -808,7 +812,7 @@ |`cudnnFilterDescriptor_t`|1.0.0| | | |`hipdnnFilterDescriptor_t`| | | | | |`miopenTensorDescriptor_t`|2.1.0| | | | | |`cudnnFilterStruct`|1.0.0|9.0.0| | | | | | | | | | | | | | | |`cudnnFoldingDirection_t`|7.5.0| | | | | | | | | | | | | | | | -|`cudnnForwardMode_t`|8.0.1| | | | | | | | | | | | | | | | +|`cudnnForwardMode_t`|8.0.1| | | | | | | | | |`miopenRNNFWDMode_t`|6.0.0| | | | | |`cudnnFractionStruct`|8.5.0| | | | | | | | | | | | | | | | |`cudnnFraction_t`|8.5.0| | | | | | | | | | | | | | | | |`cudnnFusedOpsConstParamLabel_t`|7.6.0|9.0.0| | | | | | | | | | | | | | | @@ -837,7 +841,7 @@ |`cudnnOpTensorDescriptor_t`|5.0.0|9.0.0| | |`hipdnnOpTensorDescriptor_t`| | | | | | | | | | | | |`cudnnOpTensorOp_t`|5.0.0| | | |`hipdnnOpTensorOp_t`| | | | | |`miopenTensorOp_t`|2.1.0| | | | | |`cudnnOpTensorStruct`|5.0.0|9.0.0| | | | | | | | | | | | | | | -|`cudnnPaddingMode_t`|8.3.0| | | | | | | | | | | | | | | | +|`cudnnPaddingMode_t`|8.3.0| | | | | | | | | |`miopenPaddingMode_t`|2.1.0| | | | | |`cudnnPersistentRNNPlan`|6.0.0| | | | | | | | | | | | | | | | |`cudnnPersistentRNNPlan_t`|6.0.0| | | |`hipdnnPersistentRNNPlan_t`| | | | | | | | | | | | |`cudnnPointwiseMode_t`|8.0.1| | | | | | | | | |`miopenPointwiseMode_t`|6.2.0| | | | | @@ -853,7 +857,7 @@ |`cudnnRNNDescriptor_t`|5.0.0| | | |`hipdnnRNNDescriptor_t`| | | | | |`miopenRNNDescriptor_t`|2.1.0| | | | | |`cudnnRNNInputMode_t`|5.0.0| | | |`hipdnnRNNInputMode_t`| | | | | |`miopenRNNInputMode_t`|2.1.0| | | | | |`cudnnRNNMode_t`|5.0.0| | | |`hipdnnRNNMode_t`| | | | | |`miopenRNNMode_t`|2.1.0| | | | | -|`cudnnRNNPaddingMode_t`|7.2.1| | | | | | | | | | | | | | | | +|`cudnnRNNPaddingMode_t`|7.2.1|8.0.1| |9.0.0| | | | | | |`miopenRNNPaddingMode_t`|6.0.0| | | | | |`cudnnRNNStruct`|5.0.0| | | | | | | | | | | | | | | | |`cudnnReduceTensorDescriptor_t`|6.0.0|9.0.0| | |`hipdnnReduceTensorDescriptor_t`| | | | | |`miopenReduceTensorDescriptor_t`|3.9.0| | | | | |`cudnnReduceTensorIndices_t`|6.0.0|9.0.0| | |`hipdnnReduceTensorIndices_t`| | | | | |`miopenReduceTensorIndices_t`|3.9.0| | | | | @@ -887,26 +891,28 @@ |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**|**MIOPEN**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| -|`cudnnActivationBackward`|1.0.0|9.0.0| | |`hipdnnActivationBackward`| | | | | |`miopenActivationBackward`| | | | | | -|`cudnnActivationForward`|1.0.0|9.0.0| | |`hipdnnActivationForward`| | | | | |`miopenActivationForward`| | | | | | +|`cudnnActivationBackward`|1.0.0|9.0.0| | |`hipdnnActivationBackward`| | | | | |`miopenActivationBackward`|2.1.0| | | | | +|`cudnnActivationForward`|1.0.0|9.0.0| | |`hipdnnActivationForward`| | | | | |`miopenActivationForward`|2.1.0| | | | | |`cudnnAddTensor`|2.0.0|9.0.0| | |`hipdnnAddTensor`| | | | | | | | | | | | |`cudnnAdvInferVersionCheck`|8.0.1| | |9.0.0| | | | | | | | | | | | | |`cudnnAdvTrainVersionCheck`|8.0.1| | |9.0.0| | | | | | | | | | | | | |`cudnnAdvVersionCheck`|9.0.0| | | | | | | | | | | | | | | | -|`cudnnBackendCreateDescriptor`|8.0.1| | | | | | | | | |`miopenBackendCreateDescriptor`| | | | | | -|`cudnnBackendDestroyDescriptor`|8.0.1| | | | | | | | | |`miopenBackendDestroyDescriptor`| | | | | | -|`cudnnBackendExecute`|8.0.1| | | | | | | | | |`miopenBackendExecute`| | | | | | -|`cudnnBackendFinalize`|8.0.1| | | | | | | | | |`miopenBackendFinalize`| | | | | | -|`cudnnBackendGetAttribute`|8.0.1| | | | | | | | | |`miopenBackendGetAttribute`| | | | | | +|`cudnnBackendCreateDescriptor`|8.0.1| | | | | | | | | |`miopenBackendCreateDescriptor`|6.2.0| | | | | +|`cudnnBackendDestroyDescriptor`|8.0.1| | | | | | | | | |`miopenBackendDestroyDescriptor`|6.2.0| | | | | +|`cudnnBackendExecute`|8.0.1| | | | | | | | | |`miopenBackendExecute`|6.2.0| | | | | +|`cudnnBackendFinalize`|8.0.1| | | | | | | | | |`miopenBackendFinalize`|6.2.0| | | | | +|`cudnnBackendGetAttribute`|8.0.1| | | | | | | | | |`miopenBackendGetAttribute`|6.2.0| | | | | |`cudnnBackendInitialize`|8.0.1|9.3.0| | | | | | | | | | | | | | | -|`cudnnBackendSetAttribute`|8.0.1| | | | | | | | | |`miopenBackendSetAttribute`| | | | | | -|`cudnnBatchNormalizationBackward`|4.0.0|9.0.0| | |`hipdnnBatchNormalizationBackward`| | | | | |`miopenBatchNormalizationBackward`| | | | | | +|`cudnnBackendPopulateCudaGraph`|9.5.0| | | | | | | | | | | | | | | | +|`cudnnBackendSetAttribute`|8.0.1| | | | | | | | | |`miopenBackendSetAttribute`|6.2.0| | | | | +|`cudnnBackendUpdateCudaGraph`|9.5.0| | | | | | | | | | | | | | | | +|`cudnnBatchNormalizationBackward`|4.0.0|9.0.0| | |`hipdnnBatchNormalizationBackward`| | | | | |`miopenBatchNormalizationBackward`|2.1.0| | | | | |`cudnnBatchNormalizationBackwardEx`|7.4.1|9.0.0| | | | | | | | | | | | | | | -|`cudnnBatchNormalizationForwardInference`|4.0.0|9.0.0| | |`hipdnnBatchNormalizationForwardInference`| | | | | |`miopenBatchNormalizationForwardInference`| | | | | | -|`cudnnBatchNormalizationForwardTraining`|4.0.0|9.0.0| | |`hipdnnBatchNormalizationForwardTraining`| | | | | |`miopenBatchNormalizationForwardTraining`| | | | | | +|`cudnnBatchNormalizationForwardInference`|4.0.0|9.0.0| | |`hipdnnBatchNormalizationForwardInference`| | | | | |`miopenBatchNormalizationForwardInference`|2.1.0| | | | | +|`cudnnBatchNormalizationForwardTraining`|4.0.0|9.0.0| | |`hipdnnBatchNormalizationForwardTraining`| | | | | |`miopenBatchNormalizationForwardTraining`|2.1.0| | | | | |`cudnnBatchNormalizationForwardTrainingEx`|7.4.1|9.0.0| | | | | | | | | | | | | | | |`cudnnBuildRNNDynamic`|8.0.1| | | | | | | | | | | | | | | | -|`cudnnCTCLoss`|7.0.5| | | | | | | | | |`miopenCTCLoss`| | | | | | +|`cudnnCTCLoss`|7.0.5| | | | | | | | | |`miopenCTCLoss`|2.6.0| | | | | |`cudnnCTCLoss_v8`|8.0.1| | | | | | | | | | | | | | | | |`cudnnCnnInferVersionCheck`|8.0.2| | | | | | | | | | | | | | | | |`cudnnCnnTrainVersionCheck`|8.0.2| | | | | | | | | | | | | | | | @@ -917,48 +923,48 @@ |`cudnnConvolutionForward`|1.0.0|9.0.0| | |`hipdnnConvolutionForward`| | | | | |`miopenConvolutionForward`|2.1.0| | | | | |`cudnnCopyAlgorithmDescriptor`|7.1.3|8.0.2| |9.0.0| | | | | | | | | | | | | |`cudnnCreate`|1.0.0| | | |`hipdnnCreate`| | | | | |`miopenCreate`|2.1.0| | | | | -|`cudnnCreateActivationDescriptor`|4.0.0|9.0.0| | |`hipdnnCreateActivationDescriptor`| | | | | |`miopenCreateActivationDescriptor`| | | | | | +|`cudnnCreateActivationDescriptor`|4.0.0|9.0.0| | |`hipdnnCreateActivationDescriptor`| | | | | |`miopenCreateActivationDescriptor`|2.1.0| | | | | |`cudnnCreateAlgorithmDescriptor`|7.1.3|8.0.2| |9.0.0| | | | | | | | | | | | | |`cudnnCreateAlgorithmPerformance`|7.1.3|8.0.2| |9.0.0| | | | | | | | | | | | | |`cudnnCreateAttnDescriptor`|7.5.0|9.0.0| | | | | | | | | | | | | | | -|`cudnnCreateCTCLossDescriptor`|7.0.5| | | | | | | | | |`miopenCreateCTCLossDescriptor`| | | | | | +|`cudnnCreateCTCLossDescriptor`|7.0.5| | | | | | | | | |`miopenCreateCTCLossDescriptor`|2.6.0| | | | | |`cudnnCreateConvolutionDescriptor`|1.0.0|9.0.0| | |`hipdnnCreateConvolutionDescriptor`| | | | | |`miopenCreateConvolutionDescriptor`|2.1.0| | | | | -|`cudnnCreateDropoutDescriptor`|5.0.0| | | |`hipdnnCreateDropoutDescriptor`| | | | | |`miopenCreateDropoutDescriptor`| | | | | | +|`cudnnCreateDropoutDescriptor`|5.0.0| | | |`hipdnnCreateDropoutDescriptor`| | | | | |`miopenCreateDropoutDescriptor`|2.8.0| | | | | |`cudnnCreateFilterDescriptor`|1.0.0|9.0.0| | |`hipdnnCreateFilterDescriptor`| | | | | | | | | | | | |`cudnnCreateFusedOpsConstParamPack`|7.6.0|9.0.0| | | | | | | | | | | | | | | |`cudnnCreateFusedOpsPlan`|7.6.0|9.0.0| | | | | | | | | | | | | | | |`cudnnCreateFusedOpsVariantParamPack`|7.6.0|9.0.0| | | | | | | | | | | | | | | -|`cudnnCreateLRNDescriptor`|3.0.0| | | |`hipdnnCreateLRNDescriptor`| | | | | |`miopenCreateLRNDescriptor`| | | | | | +|`cudnnCreateLRNDescriptor`|3.0.0| | | |`hipdnnCreateLRNDescriptor`| | | | | |`miopenCreateLRNDescriptor`|2.1.0| | | | | |`cudnnCreateOpTensorDescriptor`|5.0.0|9.0.0| | |`hipdnnCreateOpTensorDescriptor`| | | | | | | | | | | | |`cudnnCreatePersistentRNNPlan`|6.0.0|8.0.1| |9.0.0|`hipdnnCreatePersistentRNNPlan`| | | | | | | | | | | | |`cudnnCreatePoolingDescriptor`|1.0.0|9.0.0| | |`hipdnnCreatePoolingDescriptor`| | | | | |`miopenCreatePoolingDescriptor`|2.1.0| | | | | |`cudnnCreateRNNDataDescriptor`|7.2.1| | | | | | | | | | | | | | | | -|`cudnnCreateRNNDescriptor`|5.0.0| | | |`hipdnnCreateRNNDescriptor`| | | | | |`miopenCreateRNNDescriptor`| | | | | | +|`cudnnCreateRNNDescriptor`|5.0.0| | | |`hipdnnCreateRNNDescriptor`| | | | | |`miopenCreateRNNDescriptor`|2.1.0| | | | | |`cudnnCreateReduceTensorDescriptor`|6.0.0|9.0.0| | |`hipdnnCreateReduceTensorDescriptor`| | | | | |`miopenCreateReduceTensorDescriptor`|3.9.0| | | | | |`cudnnCreateSeqDataDescriptor`|7.5.0|9.0.0| | | | | | | | | | | | | | | |`cudnnCreateSpatialTransformerDescriptor`|5.0.0| | | | | | | | | | | | | | | | |`cudnnCreateTensorDescriptor`|2.0.0| | | |`hipdnnCreateTensorDescriptor`| | | | | |`miopenCreateTensorDescriptor`|2.1.0| | | | | |`cudnnCreateTensorTransformDescriptor`|7.5.0|9.0.0| | | | | | | | | | | | | | | -|`cudnnDeriveBNTensorDescriptor`|4.0.0|9.0.0| | |`hipdnnDeriveBNTensorDescriptor`| | | | | |`miopenDeriveBNTensorDescriptor`| | | | | | +|`cudnnDeriveBNTensorDescriptor`|4.0.0|9.0.0| | |`hipdnnDeriveBNTensorDescriptor`| | | | | |`miopenDeriveBNTensorDescriptor`|2.1.0| | | | | |`cudnnDeriveNormTensorDescriptor`|8.0.1|9.0.0| | | | | | | | | | | | | | | |`cudnnDestroy`|1.0.0| | | |`hipdnnDestroy`| | | | | |`miopenDestroy`|2.1.0| | | | | -|`cudnnDestroyActivationDescriptor`|4.0.0|9.0.0| | |`hipdnnDestroyActivationDescriptor`| | | | | |`miopenDestroyActivationDescriptor`| | | | | | +|`cudnnDestroyActivationDescriptor`|4.0.0|9.0.0| | |`hipdnnDestroyActivationDescriptor`| | | | | |`miopenDestroyActivationDescriptor`|2.1.0| | | | | |`cudnnDestroyAlgorithmDescriptor`|7.1.3|8.0.2| |9.0.0| | | | | | | | | | | | | |`cudnnDestroyAlgorithmPerformance`|7.1.3|8.0.2| |9.0.0| | | | | | | | | | | | | |`cudnnDestroyAttnDescriptor`|7.5.0|9.0.0| | | | | | | | | | | | | | | -|`cudnnDestroyCTCLossDescriptor`|7.0.5| | | | | | | | | |`miopenDestroyCTCLossDescriptor`| | | | | | +|`cudnnDestroyCTCLossDescriptor`|7.0.5| | | | | | | | | |`miopenDestroyCTCLossDescriptor`|2.6.0| | | | | |`cudnnDestroyConvolutionDescriptor`|1.0.0|9.0.0| | |`hipdnnDestroyConvolutionDescriptor`| | | | | |`miopenDestroyConvolutionDescriptor`|2.1.0| | | | | -|`cudnnDestroyDropoutDescriptor`|5.0.0| | | |`hipdnnDestroyDropoutDescriptor`| | | | | |`miopenDestroyDropoutDescriptor`| | | | | | +|`cudnnDestroyDropoutDescriptor`|5.0.0| | | |`hipdnnDestroyDropoutDescriptor`| | | | | |`miopenDestroyDropoutDescriptor`|2.8.0| | | | | |`cudnnDestroyFilterDescriptor`|1.0.0|9.0.0| | |`hipdnnDestroyFilterDescriptor`| | | | | | | | | | | | |`cudnnDestroyFusedOpsConstParamPack`|7.6.0|9.0.0| | | | | | | | | | | | | | | |`cudnnDestroyFusedOpsPlan`|7.6.0|9.0.0| | | | | | | | | | | | | | | |`cudnnDestroyFusedOpsVariantParamPack`|7.6.0|9.0.0| | | | | | | | | | | | | | | -|`cudnnDestroyLRNDescriptor`|3.0.0| | | |`hipdnnDestroyLRNDescriptor`| | | | | |`miopenDestroyLRNDescriptor`| | | | | | +|`cudnnDestroyLRNDescriptor`|3.0.0| | | |`hipdnnDestroyLRNDescriptor`| | | | | |`miopenDestroyLRNDescriptor`|2.1.0| | | | | |`cudnnDestroyOpTensorDescriptor`|5.0.0|9.0.0| | |`hipdnnDestroyOpTensorDescriptor`| | | | | | | | | | | | |`cudnnDestroyPersistentRNNPlan`|6.0.0|8.0.1| |9.0.0|`hipdnnDestroyPersistentRNNPlan`| | | | | | | | | | | | |`cudnnDestroyPoolingDescriptor`|1.0.0|9.0.0| | |`hipdnnDestroyPoolingDescriptor`| | | | | |`miopenDestroyPoolingDescriptor`|2.1.0| | | | | |`cudnnDestroyRNNDataDescriptor`|7.2.1| | | | | | | | | | | | | | | | -|`cudnnDestroyRNNDescriptor`|5.0.0| | | |`hipdnnDestroyRNNDescriptor`| | | | | |`miopenDestroyRNNDescriptor`| | | | | | +|`cudnnDestroyRNNDescriptor`|5.0.0| | | |`hipdnnDestroyRNNDescriptor`| | | | | |`miopenDestroyRNNDescriptor`|2.1.0| | | | | |`cudnnDestroyReduceTensorDescriptor`|6.0.0|9.0.0| | |`hipdnnDestroyReduceTensorDescriptor`| | | | | |`miopenDestroyReduceTensorDescriptor`|3.9.0| | | | | |`cudnnDestroySeqDataDescriptor`|7.5.0|9.0.0| | | | | | | | | | | | | | | |`cudnnDestroySpatialTransformerDescriptor`|5.0.0| | | | | | | | | | | | | | | | @@ -966,10 +972,10 @@ |`cudnnDestroyTensorTransformDescriptor`|7.5.0|9.0.0| | | | | | | | | | | | | | | |`cudnnDivisiveNormalizationBackward`|3.0.0| | | | | | | | | | | | | | | | |`cudnnDivisiveNormalizationForward`|3.0.0| | | | | | | | | | | | | | | | -|`cudnnDropoutBackward`|5.0.0| | | | | | | | | |`miopenDropoutBackward`| | | | | | -|`cudnnDropoutForward`|5.0.0| | | | | | | | | |`miopenDropoutForward`| | | | | | -|`cudnnDropoutGetReserveSpaceSize`|5.0.0| | | | | | | | | |`miopenDropoutGetReserveSpaceSize`| | | | | | -|`cudnnDropoutGetStatesSize`|5.0.0| | | |`hipdnnDropoutGetStatesSize`| | | | | |`miopenDropoutGetStatesSize`| | | | | | +|`cudnnDropoutBackward`|5.0.0| | | | | | | | | |`miopenDropoutBackward`|2.8.0| | | | | +|`cudnnDropoutForward`|5.0.0| | | | | | | | | |`miopenDropoutForward`|2.8.0| | | | | +|`cudnnDropoutGetReserveSpaceSize`|5.0.0| | | | | | | | | |`miopenDropoutGetReserveSpaceSize`|2.8.0| | | | | +|`cudnnDropoutGetStatesSize`|5.0.0| | | |`hipdnnDropoutGetStatesSize`| | | | | |`miopenDropoutGetStatesSize`|2.8.0| | | | | |`cudnnFindConvolutionBackwardDataAlgorithm`|3.0.0|9.0.0| | |`hipdnnFindConvolutionBackwardDataAlgorithm`| | | | | | | | | | | | |`cudnnFindConvolutionBackwardDataAlgorithmEx`|5.0.0|9.0.0| | |`hipdnnFindConvolutionBackwardDataAlgorithmEx`| | | | | | | | | | | | |`cudnnFindConvolutionBackwardFilterAlgorithm`|3.0.0|9.0.0| | |`hipdnnFindConvolutionBackwardFilterAlgorithm`| | | | | | | | | | | | @@ -990,11 +996,11 @@ |`cudnnGetBatchNormalizationBackwardExWorkspaceSize`|7.4.1|9.0.0| | | | | | | | | | | | | | | |`cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize`|7.4.1|9.0.0| | | | | | | | | | | | | | | |`cudnnGetBatchNormalizationTrainingExReserveSpaceSize`|7.4.1|9.0.0| | | | | | | | | | | | | | | -|`cudnnGetCTCLossDescriptor`|7.0.5|9.0.0| | | | | | | | |`miopenGetCTCLossDescriptor`| | | | | | +|`cudnnGetCTCLossDescriptor`|7.0.5|9.0.0| | | | | | | | |`miopenGetCTCLossDescriptor`|2.6.0| | | | | |`cudnnGetCTCLossDescriptorEx`|7.5.0|9.0.0| | | | | | | | | | | | | | | |`cudnnGetCTCLossDescriptor_v8`|8.0.1|9.0.0| | | | | | | | | | | | | | | |`cudnnGetCTCLossDescriptor_v9`|9.0.0| | | | | | | | | | | | | | | | -|`cudnnGetCTCLossWorkspaceSize`|7.0.5| | | | | | | | | |`miopenGetCTCLossWorkspaceSize`| | | | | | +|`cudnnGetCTCLossWorkspaceSize`|7.0.5| | | | | | | | | |`miopenGetCTCLossWorkspaceSize`|2.6.0| | | | | |`cudnnGetCTCLossWorkspaceSize_v8`|8.0.1| | | | | | | | | | | | | | | | |`cudnnGetCallback`|7.1.3| | | | | | | | | | | | | | | | |`cudnnGetConvolution2dDescriptor`|2.0.0|9.0.0| | |`hipdnnGetConvolution2dDescriptor`| | | | | | | | | | | | @@ -1017,7 +1023,7 @@ |`cudnnGetConvolutionNdForwardOutputDim`|2.0.0|9.0.0| | | | | | | | | | | | | | | |`cudnnGetConvolutionReorderType`|7.6.0|9.0.0| | | | | | | | | | | | | | | |`cudnnGetCudartVersion`|6.0.0| | | | | | | | | | | | | | | | -|`cudnnGetDropoutDescriptor`|7.0.5| | | | | | | | | |`miopenGetDropoutDescriptor`| | | | | | +|`cudnnGetDropoutDescriptor`|7.0.5| | | | | | | | | |`miopenGetDropoutDescriptor`|2.8.0| | | | | |`cudnnGetErrorString`|2.0.0| | | |`hipdnnGetErrorString`| | | | | |`miopenGetErrorString`|2.1.0| | | | | |`cudnnGetFilter4dDescriptor`|2.0.0|9.0.0| | |`hipdnnGetFilter4dDescriptor`| | | | | | | | | | | | |`cudnnGetFilterNdDescriptor`|2.0.0|9.0.0| | |`hipdnnGetFilterNdDescriptor`| | | | | | | | | | | | @@ -1025,7 +1031,7 @@ |`cudnnGetFoldedConvBackwardDataDescriptors`|7.6.0|9.0.0| | | | | | | | | | | | | | | |`cudnnGetFusedOpsConstParamPackAttribute`|7.6.0|9.0.0| | | | | | | | | | | | | | | |`cudnnGetFusedOpsVariantParamPackAttribute`|7.6.0|9.0.0| | | | | | | | | | | | | | | -|`cudnnGetLRNDescriptor`|3.0.0| | | |`hipdnnGetLRNDescriptor`| | | | | |`miopenGetLRNDescriptor`| | | | | | +|`cudnnGetLRNDescriptor`|3.0.0| | | |`hipdnnGetLRNDescriptor`| | | | | |`miopenGetLRNDescriptor`|2.1.0| | | | | |`cudnnGetLastErrorString`|9.0.0| | | | | | | | | | | | | | | | |`cudnnGetMaxDeviceVersion`|8.6.0| | | | | | | | | | | | | | | | |`cudnnGetMultiHeadAttnBuffers`|7.5.0|9.0.0| | | | | | | | | | | | | | | @@ -1043,8 +1049,8 @@ |`cudnnGetRNNBackwardWeightsAlgorithmMaxCount`|7.1.3|8.0.2| |9.0.0| | | | | | | | | | | | | |`cudnnGetRNNBiasMode`|7.5.0|8.0.1| |9.0.0| | | | | | | | | | | | | |`cudnnGetRNNDataDescriptor`|7.2.1| | | | | | | | | | | | | | | | -|`cudnnGetRNNDescriptor`|7.0.5|7.6.5| |8.0.1|`hipdnnGetRNNDescriptor`| | | | | |`miopenGetRNNDescriptor_V2`| | | | | | -|`cudnnGetRNNDescriptor_v6`|8.0.1|8.0.1| |9.0.0| | | | | | |`miopenGetRNNDescriptor_V2`| | | | | | +|`cudnnGetRNNDescriptor`|7.0.5|7.6.5| |8.0.1|`hipdnnGetRNNDescriptor`| | | | | |`miopenGetRNNDescriptor_V2`|3.5.0| | | | | +|`cudnnGetRNNDescriptor_v6`|8.0.1|8.0.1| |9.0.0| | | | | | |`miopenGetRNNDescriptor_V2`|3.5.0| | | | | |`cudnnGetRNNDescriptor_v8`|8.0.1| | | | | | | | | | | | | | | | |`cudnnGetRNNForwardInferenceAlgorithmMaxCount`|7.1.3|8.0.2| |9.0.0| | | | | | | | | | | | | |`cudnnGetRNNForwardTrainingAlgorithmMaxCount`|7.1.3|8.0.2| |9.0.0| | | | | | | | | | | | | @@ -1052,13 +1058,13 @@ |`cudnnGetRNNLinLayerMatrixParams`|5.0.0|8.0.1| |9.0.0|`hipdnnGetRNNLinLayerMatrixParams`| | | | | | | | | | | | |`cudnnGetRNNMatrixMathType`|7.1.3|8.0.1| |9.0.0| | | | | | | | | | | | | |`cudnnGetRNNPaddingMode`|7.2.1|8.0.1| |9.0.0| | | | | | | | | | | | | -|`cudnnGetRNNParamsSize`|5.0.0|8.0.1| |9.0.0|`hipdnnGetRNNParamsSize`| | | | | |`miopenGetRNNParamsSize`| | | | | | +|`cudnnGetRNNParamsSize`|5.0.0|8.0.1| |9.0.0|`hipdnnGetRNNParamsSize`| | | | | |`miopenGetRNNParamsSize`|2.1.0| | | | | |`cudnnGetRNNProjectionLayers`|7.1.3|8.0.1| |9.0.0| | | | | | | | | | | | | |`cudnnGetRNNTempSpaceSizes`|8.0.1| | | | | | | | | | | | | | | | -|`cudnnGetRNNTrainingReserveSize`|5.0.0|8.0.1| |9.0.0|`hipdnnGetRNNTrainingReserveSize`| | | | | |`miopenGetRNNTrainingReserveSize`| | | | | | +|`cudnnGetRNNTrainingReserveSize`|5.0.0|8.0.1| |9.0.0|`hipdnnGetRNNTrainingReserveSize`| | | | | |`miopenGetRNNTrainingReserveSize`|2.1.0| | | | | |`cudnnGetRNNWeightParams`|8.0.1| | | | | | | | | | | | | | | | |`cudnnGetRNNWeightSpaceSize`|8.0.1| | | | | | | | | | | | | | | | -|`cudnnGetRNNWorkspaceSize`|5.0.0|8.0.1| |9.0.0|`hipdnnGetRNNWorkspaceSize`| | | | | |`miopenGetRNNWorkspaceSize`| | | | | | +|`cudnnGetRNNWorkspaceSize`|5.0.0|8.0.1| |9.0.0|`hipdnnGetRNNWorkspaceSize`| | | | | |`miopenGetRNNWorkspaceSize`|2.1.0| | | | | |`cudnnGetReduceTensorDescriptor`|6.0.0|9.0.0| | |`hipdnnGetReduceTensorDescriptor`| | | | | |`miopenGetReduceTensorDescriptor`|3.9.0| | | | | |`cudnnGetReductionIndicesSize`|6.0.0|9.0.0| | | | | | | | |`miopenGetReductionIndicesSize`|3.9.0| | | | | |`cudnnGetReductionWorkspaceSize`|6.0.0|9.0.0| | |`hipdnnGetReductionWorkspaceSize`| | | | | |`miopenGetReductionWorkspaceSize`|3.9.0| | | | | @@ -1088,16 +1094,16 @@ |`cudnnPoolingBackward`|1.0.0|9.0.0| | |`hipdnnPoolingBackward`| | | | | | | | | | | | |`cudnnPoolingForward`|1.0.0|9.0.0| | |`hipdnnPoolingForward`| | | | | | | | | | | | |`cudnnQueryRuntimeError`|7.0.5|9.0.0| | | | | | | | | | | | | | | -|`cudnnRNNBackwardData`|5.0.0|8.0.2| |9.0.0|`hipdnnRNNBackwardData`| | | | | |`miopenRNNBackwardData`| | | | | | +|`cudnnRNNBackwardData`|5.0.0|8.0.2| |9.0.0|`hipdnnRNNBackwardData`| | | | | |`miopenRNNBackwardData`|2.1.0| | | | | |`cudnnRNNBackwardDataEx`|7.2.1|8.0.2| |9.0.0| | | | | | | | | | | | | |`cudnnRNNBackwardData_v8`|8.0.2| | | | | | | | | | | | | | | | -|`cudnnRNNBackwardWeights`|5.0.0|8.0.2| |9.0.0|`hipdnnRNNBackwardWeights`| | | | | |`miopenRNNBackwardWeights`| | | | | | +|`cudnnRNNBackwardWeights`|5.0.0|8.0.2| |9.0.0|`hipdnnRNNBackwardWeights`| | | | | |`miopenRNNBackwardWeights`|2.1.0| | | | | |`cudnnRNNBackwardWeightsEx`|7.2.1|8.0.2| |9.0.0| | | | | | | | | | | | | |`cudnnRNNBackwardWeights_v8`|8.0.2| | | | | | | | | | | | | | | | |`cudnnRNNForward`|8.0.1| | | | | | | | | | | | | | | | -|`cudnnRNNForwardInference`|5.0.0|8.0.1| |9.0.0|`hipdnnRNNForwardInference`| | | | | |`miopenRNNForwardInference`| | | | | | +|`cudnnRNNForwardInference`|5.0.0|8.0.1| |9.0.0|`hipdnnRNNForwardInference`| | | | | |`miopenRNNForwardInference`|2.1.0| | | | | |`cudnnRNNForwardInferenceEx`|7.2.1|8.0.1| |9.0.0| | | | | | | | | | | | | -|`cudnnRNNForwardTraining`|5.0.0|8.0.1| |9.0.0|`hipdnnRNNForwardTraining`| | | | | |`miopenRNNForwardTraining`| | | | | | +|`cudnnRNNForwardTraining`|5.0.0|8.0.1| |9.0.0|`hipdnnRNNForwardTraining`| | | | | |`miopenRNNForwardTraining`|2.1.0| | | | | |`cudnnRNNForwardTrainingEx`|7.2.1|8.0.1| |9.0.0| | | | | | | | | | | | | |`cudnnRNNGetClip`|7.2.1|8.0.1| |9.0.0| | | | | | | | | | | | | |`cudnnRNNGetClip_v8`|8.0.1| | | | | | | | | | | | | | | | @@ -1108,7 +1114,7 @@ |`cudnnReduceTensor`|6.0.0|9.0.0| | |`hipdnnReduceTensor`| | | | | |`miopenReduceTensor`|3.9.0| | | | | |`cudnnReorderFilterAndBias`|7.6.0|9.0.0| | | | | | | | | | | | | | | |`cudnnRestoreAlgorithm`|7.1.3|8.0.2| |9.0.0| | | | | | | | | | | | | -|`cudnnRestoreDropoutDescriptor`|7.0.5| | | | | | | | | |`miopenRestoreDropoutDescriptor`| | | | | | +|`cudnnRestoreDropoutDescriptor`|7.0.5| | | | | | | | | |`miopenRestoreDropoutDescriptor`|2.8.0| | | | | |`cudnnSaveAlgorithm`|7.1.3|8.0.2| |9.0.0| | | | | | | | | | | | | |`cudnnScaleTensor`|2.0.0|9.0.0| | |`hipdnnScaleTensor`| | | | | |`miopenScaleTensor`|2.1.0| | | | | |`cudnnSetActivationDescriptor`|4.0.0|9.0.0| | |`hipdnnSetActivationDescriptor`| | | | | | | | | | | | @@ -1116,7 +1122,7 @@ |`cudnnSetAlgorithmDescriptor`|7.1.3|8.0.2| |9.0.0| | | | | | | | | | | | | |`cudnnSetAlgorithmPerformance`|7.1.3|8.0.2| |9.0.0| | | | | | | | | | | | | |`cudnnSetAttnDescriptor`|7.5.0|9.0.0| | | | | | | | | | | | | | | -|`cudnnSetCTCLossDescriptor`|7.0.5|9.0.0| | | | | | | | |`miopenSetCTCLossDescriptor`| | | | | | +|`cudnnSetCTCLossDescriptor`|7.0.5|9.0.0| | | | | | | | |`miopenSetCTCLossDescriptor`|2.6.0| | | | | |`cudnnSetCTCLossDescriptorEx`|7.6.0|9.0.0| | | | | | | | | | | | | | | |`cudnnSetCTCLossDescriptor_v8`|8.0.1|9.0.0| | | | | | | | | | | | | | | |`cudnnSetCTCLossDescriptor_v9`|9.0.0| | | | | | | | | | | | | | | | @@ -1126,12 +1132,12 @@ |`cudnnSetConvolutionMathType`|7.0.5|9.0.0| | |`hipdnnSetConvolutionMathType`| | | | | | | | | | | | |`cudnnSetConvolutionNdDescriptor`|2.0.0|9.0.0| | |`hipdnnSetConvolutionNdDescriptor`| | | | | | | | | | | | |`cudnnSetConvolutionReorderType`|7.6.0|9.0.0| | | | | | | | | | | | | | | -|`cudnnSetDropoutDescriptor`|5.0.0| | | |`hipdnnSetDropoutDescriptor`| | | | | |`miopenSetDropoutDescriptor`| | | | | | +|`cudnnSetDropoutDescriptor`|5.0.0| | | |`hipdnnSetDropoutDescriptor`| | | | | |`miopenSetDropoutDescriptor`|2.8.0| | | | | |`cudnnSetFilter4dDescriptor`|2.0.0|9.0.0| | |`hipdnnSetFilter4dDescriptor`| | | | | | | | | | | | |`cudnnSetFilterNdDescriptor`|2.0.0|9.0.0| | |`hipdnnSetFilterNdDescriptor`| | | | | | | | | | | | |`cudnnSetFusedOpsConstParamPackAttribute`|7.6.0|9.0.0| | | | | | | | | | | | | | | |`cudnnSetFusedOpsVariantParamPackAttribute`|7.6.0|9.0.0| | | | | | | | | | | | | | | -|`cudnnSetLRNDescriptor`|3.0.0| | | |`hipdnnSetLRNDescriptor`| | | | | |`miopenSetLRNDescriptor`| | | | | | +|`cudnnSetLRNDescriptor`|3.0.0| | | |`hipdnnSetLRNDescriptor`| | | | | |`miopenSetLRNDescriptor`|2.1.0| | | | | |`cudnnSetOpTensorDescriptor`|5.0.0|9.0.0| | |`hipdnnSetOpTensorDescriptor`| | | | | | | | | | | | |`cudnnSetPersistentRNNPlan`|6.0.0|8.0.1| |9.0.0|`hipdnnSetPersistentRNNPlan`| | | | | | | | | | | | |`cudnnSetPooling2dDescriptor`|2.0.0|9.0.0| | |`hipdnnSetPooling2dDescriptor`| | | | | |`miopenSet2dPoolingDescriptor`|2.1.0| | | | | @@ -1141,7 +1147,7 @@ |`cudnnSetRNNDataDescriptor`|7.2.1| | | | | | | | | | | | | | | | |`cudnnSetRNNDescriptor`|5.0.0|7.6.5| |8.0.1|`hipdnnSetRNNDescriptor`| | | | | | | | | | | | |`cudnnSetRNNDescriptor_v5`|7.0.5|7.6.5| |8.0.1|`hipdnnSetRNNDescriptor_v5`| | | | | | | | | | | | -|`cudnnSetRNNDescriptor_v6`|6.0.0|8.0.1| |9.0.0|`hipdnnSetRNNDescriptor_v6`| | | | | |`miopenSetRNNDescriptor_V2`| | | | | | +|`cudnnSetRNNDescriptor_v6`|6.0.0|8.0.1| |9.0.0|`hipdnnSetRNNDescriptor_v6`| | | | | |`miopenSetRNNDescriptor_V2`|3.5.0| | | | | |`cudnnSetRNNDescriptor_v8`|8.0.1| | | | | | | | | | | | | | | | |`cudnnSetRNNMatrixMathType`|7.0.5|8.0.1| |9.0.0| | | | | | | | | | | | | |`cudnnSetRNNPaddingMode`|7.2.1|8.0.1| |9.0.0| | | | | | | | | | | | | diff --git a/docs/tables/CUDNN_API_supported_by_MIOPEN.md b/docs/tables/CUDNN_API_supported_by_MIOPEN.md index 7af31076..f0a4172f 100644 --- a/docs/tables/CUDNN_API_supported_by_MIOPEN.md +++ b/docs/tables/CUDNN_API_supported_by_MIOPEN.md @@ -12,7 +12,7 @@ |`CUDNN_ACTIVATION_ELU`|6.0.0|9.0.0| | |`miopenActivationELU`|2.1.0| | | | | |`CUDNN_ACTIVATION_IDENTITY`|7.1.3|9.0.0| | |`miopenActivationPASTHRU`|2.1.0| | | | | |`CUDNN_ACTIVATION_RELU`|1.0.0|9.0.0| | |`miopenActivationRELU`|2.1.0| | | | | -|`CUDNN_ACTIVATION_SIGMOID`|1.0.0|9.0.0| | | | | | | | | +|`CUDNN_ACTIVATION_SIGMOID`|1.0.0|9.0.0| | |`miopenActivationLOGISTIC`|2.1.0| | | | | |`CUDNN_ACTIVATION_SWISH`|8.2.0|9.0.0| | | | | | | | | |`CUDNN_ACTIVATION_TANH`|1.0.0|9.0.0| | |`miopenActivationTANH`|2.1.0| | | | | |`CUDNN_ATTN_DISABLE_PROJ_BIASES`|7.6.3| | | | | | | | | | @@ -55,6 +55,7 @@ |`CUDNN_ATTR_INTERMEDIATE_INFO_SIZE`|8.0.1| | | |`MIOPEN_ATTR_INTERMEDIATE_INFO_SIZE`|6.2.0| | | | | |`CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID`|8.0.2| | | |`MIOPEN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID`|6.2.0| | | | | |`CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED`|9.4.0| | | | | | | | | | +|`CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH`|9.5.0| | | | | | | | | | |`CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE`|8.0.1| | | |`MIOPEN_ATTR_KNOB_CHOICE_KNOB_TYPE`|6.2.0| | | | | |`CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE`|8.0.1| | | |`MIOPEN_ATTR_KNOB_CHOICE_KNOB_VALUE`|6.2.0| | | | | |`CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE`|8.0.1| | | |`MIOPEN_ATTR_KNOB_INFO_MAXIMUM_VALUE`|6.2.0| | | | | @@ -287,6 +288,7 @@ |`CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER`|8.3.0| | | | | | | | | | |`CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER`|8.3.0| | | | | | | | | | |`CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION`|8.2.0| | | | | | | | | | +|`CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API`|9.5.0| | | | | | | | | | |`CUDNN_BEHAVIOR_NOTE_TYPE_COUNT`|8.2.0| | | | | | | | | | |`CUDNN_BIDIRECTIONAL`|5.0.0| | | |`miopenRNNbidirection`|2.1.0| | | | | |`CUDNN_BN_FINALIZE_STATISTICS_INFERENCE`|8.1.0| | | | | | | | | | @@ -295,7 +297,7 @@ |`CUDNN_CONVOLUTION`|1.0.0|9.0.0| | |`miopenConvolution`|2.1.0| | | | | |`CUDNN_CONVOLUTION_BWD_DATA_ALGO_0`|3.0.0| | | |`miopenConvolutionBwdDataAlgoGEMM`|2.1.0| | | | | |`CUDNN_CONVOLUTION_BWD_DATA_ALGO_1`|3.0.0| | | |`miopenConvolutionBwdDataAlgoDirect`|2.1.0| | | | | -|`CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT`|6.0.0| | | | | | | | | | +|`CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT`|6.0.0| | | |`miopenTransposeBwdDataAlgoGEMM`|2.1.0| | | | | |`CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT`|3.0.0| | | |`miopenConvolutionBwdDataAlgoFFT`|2.1.0| | | | | |`CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING`|4.0.0| | | | | | | | | | |`CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD`|5.0.0| | | |`miopenConvolutionBwdDataAlgoWinograd`|2.1.0| | | | | @@ -336,8 +338,8 @@ |`CUDNN_DATA_DOUBLE`|1.0.0| | | |`miopenDouble`|4.5.0| | | | | |`CUDNN_DATA_FAST_FLOAT_FOR_FP8`|8.7.0| | | | | | | | | | |`CUDNN_DATA_FLOAT`|1.0.0| | | |`miopenFloat`|2.1.0| | | | | -|`CUDNN_DATA_FP8_E4M3`|8.6.0| | | | | | | | | | -|`CUDNN_DATA_FP8_E5M2`|8.6.0| | | | | | | | | | +|`CUDNN_DATA_FP8_E4M3`|8.6.0| | | |`miopenFloat8`|6.0.0| | | | | +|`CUDNN_DATA_FP8_E5M2`|8.6.0| | | |`miopenBFloat8`|6.0.0| | | | | |`CUDNN_DATA_HALF`|3.0.0| | | |`miopenHalf`|2.1.0| | | | | |`CUDNN_DATA_INT32`|6.0.0| | | |`miopenInt32`|2.1.0| | | | | |`CUDNN_DATA_INT64`|8.1.0| | | |`miopenInt64`|6.2.0| | | | | @@ -351,7 +353,7 @@ |`CUDNN_DETERMINISTIC`|6.0.0| | | | | | | | | | |`CUDNN_DIM_MAX`|4.0.0| | | | | | | | | | |`CUDNN_DIVNORM_PRECOMPUTED_MEANS`|3.0.0| | | | | | | | | | -|`CUDNN_EDGE_VAL_PAD`|8.3.0| | | | | | | | | | +|`CUDNN_EDGE_VAL_PAD`|8.3.0| | | |`miopenPaddingValid`|2.1.0| | | | | |`CUDNN_ERRQUERY_BLOCKING`|7.0.5| | | | | | | | | | |`CUDNN_ERRQUERY_NONBLOCKING`|7.0.5| | | | | | | | | | |`CUDNN_ERRQUERY_RAWCODE`|7.0.5| | | | | | | | | | @@ -363,8 +365,8 @@ |`CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS`|7.6.0| | | | | | | | | | |`CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD`|7.6.0| | | | | | | | | | |`CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK`|7.6.0| | | | | | | | | | -|`CUDNN_FWD_MODE_INFERENCE`|8.0.1| | | | | | | | | | -|`CUDNN_FWD_MODE_TRAINING`|8.0.1| | | | | | | | | | +|`CUDNN_FWD_MODE_INFERENCE`|8.0.1| | | |`miopenRNNInference`|6.0.0| | | | | +|`CUDNN_FWD_MODE_TRAINING`|8.0.1| | | |`miopenRNNTraining`|6.0.0| | | | | |`CUDNN_GENSTATS_SUM_SQSUM`|8.0.1| | | | | | | | | | |`CUDNN_GROUP_NORM`|8.5.0| | | | | | | | | | |`CUDNN_GRU`|5.0.0| | | |`miopenGRU`|2.1.0| | | | | @@ -435,7 +437,7 @@ |`CUDNN_MH_ATTN_Q_WEIGHTS`|7.5.0| | | | | | | | | | |`CUDNN_MH_ATTN_V_BIASES`|7.6.3| | | | | | | | | | |`CUDNN_MH_ATTN_V_WEIGHTS`|7.5.0| | | | | | | | | | -|`CUDNN_NEG_INF_PAD`|8.3.0| | | | | | | | | | +|`CUDNN_NEG_INF_PAD`|8.3.0| | | |`miopenPaddingSame`|2.1.0| | | | | |`CUDNN_NON_DETERMINISTIC`|6.0.0| | | | | | | | | | |`CUDNN_NORM_ALGO_PERSIST`|8.0.1|9.0.0| | | | | | | | | |`CUDNN_NORM_ALGO_STANDARD`|8.0.1|9.0.0| | | | | | | | | @@ -555,8 +557,8 @@ |`CUDNN_POINTWISE_TAN`|8.3.0| | | |`MIOPEN_POINTWISE_TAN`|6.2.0| | | | | |`CUDNN_POINTWISE_TANH_BWD`|8.1.0| | | |`MIOPEN_POINTWISE_TANH_BWD`|6.2.0| | | | | |`CUDNN_POINTWISE_TANH_FWD`|8.0.1| | | |`MIOPEN_POINTWISE_TANH_FWD`|6.2.0| | | | | -|`CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING`|2.0.0|9.0.0| | | | | | | | | -|`CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING`|2.0.0|9.0.0| | | | | | | | | +|`CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING`|2.0.0|9.0.0| | |`miopenPoolingAverage`|2.1.0| | | | | +|`CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING`|2.0.0|9.0.0| | |`miopenPoolingAverageInclusive`|2.1.0| | | | | |`CUDNN_POOLING_MAX`|1.0.0|9.0.0| | |`miopenPoolingMax`|2.1.0| | | | | |`CUDNN_POOLING_MAX_DETERMINISTIC`|6.0.0|9.0.0| | | | | | | | | |`CUDNN_PROPAGATE_NAN`|4.0.0|9.0.0| | |`MIOPEN_PROPAGATE_NAN`|3.9.0| | | | | @@ -620,8 +622,8 @@ |`CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED`|7.2.1| | | | | | | | | | |`CUDNN_RNN_DOUBLE_BIAS`|7.5.0| | | |`miopenRNNwithBias`|2.1.0| | | | | |`CUDNN_RNN_NO_BIAS`|7.5.0| | | |`miopenRNNNoBias`|2.1.0| | | | | -|`CUDNN_RNN_PADDED_IO_DISABLED`|7.2.1| | | | | | | | | | -|`CUDNN_RNN_PADDED_IO_ENABLED`|7.2.1| | | | | | | | | | +|`CUDNN_RNN_PADDED_IO_DISABLED`|7.2.1|8.0.1| |9.0.0|`miopenRNNIONotPadded`|6.0.0| | | | | +|`CUDNN_RNN_PADDED_IO_ENABLED`|7.2.1|8.0.1| |9.0.0|`miopenRNNIOWithPadding`|6.0.0| | | | | |`CUDNN_RNN_RELU`|5.0.0| | | |`miopenRNNRELU`|2.1.0| | | | | |`CUDNN_RNN_SINGLE_INP_BIAS`|7.5.0| | | |`miopenRNNwithBias`|2.1.0| | | | | |`CUDNN_RNN_SINGLE_REC_BIAS`|7.5.0| | | |`miopenRNNwithBias`|2.1.0| | | | | @@ -655,6 +657,7 @@ |`CUDNN_STATUS_ARCH_MISMATCH`|1.0.0|9.0.0| | | | | | | | | |`CUDNN_STATUS_BAD_PARAM`|1.0.0| | | |`miopenStatusBadParm`|2.1.0| | | | | |`CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE`|9.0.0| | | | | | | | | | +|`CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH`|9.5.0| | | | | | | | | | |`CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES`|9.0.0| | | | | | | | | | |`CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER`|9.0.0| | | | | | | | | | |`CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED`|9.0.0| | | | | | | | | | @@ -685,6 +688,7 @@ |`CUDNN_STATUS_NOT_SUPPORTED`|1.0.0| | | |`miopenStatusUnsupportedOp`|2.1.0| | | | | |`CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH`|9.0.0| | | | | | | | | | |`CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM`|9.0.0| | | | | | | | | | +|`CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API`|9.5.0| | | | | | | | | | |`CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE`|9.0.0| | | | | | | | | | |`CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN`|9.0.0| | | | | | | | | | |`CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART`|9.0.0| | | | | | | | | | @@ -714,40 +718,40 @@ |`CUDNN_TENSOR_REORDERING_NONE`|8.3.0| | | | | | | | | | |`CUDNN_TRANSFORM_FOLD`|7.5.0| | | | | | | | | | |`CUDNN_TRANSFORM_UNFOLD`|7.5.0| | | | | | | | | | -|`CUDNN_TYPE_ATTRIB_NAME`|8.0.1| | | |`MIOPEN_TYPE_ATTRIB_NAME`| | | | | | -|`CUDNN_TYPE_BACKEND_DESCRIPTOR`|8.0.1| | | |`MIOPEN_TYPE_BACKEND_DESCRIPTOR`| | | | | | -|`CUDNN_TYPE_BEHAVIOR_NOTE`|8.2.0| | | |`MIOPEN_TYPE_BEHAVIOR_NOTE`| | | | | | -|`CUDNN_TYPE_BN_FINALIZE_STATS_MODE`|8.1.0| | | |`MIOPEN_TYPE_BN_FINALIZE_STATS_MODE`| | | | | | -|`CUDNN_TYPE_BOOLEAN`|8.0.1| | | |`MIOPEN_TYPE_BOOLEAN`| | | | | | -|`CUDNN_TYPE_CHAR`|8.4.0| | | |`MIOPEN_TYPE_CHAR`| | | | | | -|`CUDNN_TYPE_CONVOLUTION_MODE`|8.0.1| | | |`MIOPEN_TYPE_CONVOLUTION_MODE`| | | | | | -|`CUDNN_TYPE_DATA_TYPE`|8.0.1| | | |`MIOPEN_TYPE_DATA_TYPE`| | | | | | -|`CUDNN_TYPE_DOUBLE`|8.0.1| | | |`MIOPEN_TYPE_DOUBLE`| | | | | | -|`CUDNN_TYPE_FLOAT`|8.0.1| | | |`MIOPEN_TYPE_FLOAT`| | | | | | -|`CUDNN_TYPE_FRACTION`|8.5.0| | | |`MIOPEN_TYPE_FRACTION`| | | | | | -|`CUDNN_TYPE_GENSTATS_MODE`|8.0.1| | | |`MIOPEN_TYPE_GENSTATS_MODE`| | | | | | -|`CUDNN_TYPE_HANDLE`|8.0.1| | | |`MIOPEN_TYPE_HANDLE`| | | | | | -|`CUDNN_TYPE_HEUR_MODE`|8.0.1| | | |`MIOPEN_TYPE_HEUR_MODE`| | | | | | -|`CUDNN_TYPE_INT32`|8.3.0| | | |`MIOPEN_TYPE_INT32`| | | | | | -|`CUDNN_TYPE_INT64`|8.0.1| | | |`MIOPEN_TYPE_INT64`| | | | | | -|`CUDNN_TYPE_KNOB_TYPE`|8.0.1| | | |`MIOPEN_TYPE_KNOB_TYPE`| | | | | | -|`CUDNN_TYPE_LAYOUT_TYPE`|8.0.2| | | |`MIOPEN_TYPE_LAYOUT_TYPE`| | | | | | -|`CUDNN_TYPE_NAN_PROPOGATION`|8.0.1|9.0.0| | |`MIOPEN_TYPE_NAN_PROPOGATION`| | | | | | -|`CUDNN_TYPE_NORM_FWD_PHASE`|8.5.0| | | |`MIOPEN_TYPE_NORM_FWD_PHASE`| | | | | | -|`CUDNN_TYPE_NORM_MODE`|8.5.0| | | |`MIOPEN_TYPE_NORM_MODE`| | | | | | -|`CUDNN_TYPE_NUMERICAL_NOTE`|8.0.1| | | |`MIOPEN_TYPE_NUMERICAL_NOTE`| | | | | | -|`CUDNN_TYPE_PADDING_MODE`|8.3.0| | | |`MIOPEN_TYPE_PADDING_MODE`| | | | | | -|`CUDNN_TYPE_POINTWISE_MODE`|8.0.1| | | |`MIOPEN_TYPE_POINTWISE_MODE`| | | | | | -|`CUDNN_TYPE_REDUCTION_OPERATOR_TYPE`|8.1.0| | | |`MIOPEN_TYPE_REDUCTION_OPERATOR_TYPE`| | | | | | -|`CUDNN_TYPE_RESAMPLE_MODE`|8.3.0| | | |`MIOPEN_TYPE_RESAMPLE_MODE`| | | | | | -|`CUDNN_TYPE_RNG_DISTRIBUTION`|8.7.0| | | |`MIOPEN_TYPE_RNG_DISTRIBUTION`| | | | | | -|`CUDNN_TYPE_SIGNAL_MODE`|8.5.0| | | |`MIOPEN_TYPE_SIGNAL_MODE`| | | | | | -|`CUDNN_TYPE_TENSOR_REORDERING_MODE`|8.3.0| | | |`MIOPEN_TYPE_TENSOR_REORDERING_MODE`| | | | | | -|`CUDNN_TYPE_VOID_PTR`|8.0.1| | | |`MIOPEN_TYPE_VOID_PTR`| | | | | | +|`CUDNN_TYPE_ATTRIB_NAME`|8.0.1| | | |`MIOPEN_TYPE_ATTRIB_NAME`|6.2.0| | | | | +|`CUDNN_TYPE_BACKEND_DESCRIPTOR`|8.0.1| | | |`MIOPEN_TYPE_BACKEND_DESCRIPTOR`|6.2.0| | | | | +|`CUDNN_TYPE_BEHAVIOR_NOTE`|8.2.0| | | |`MIOPEN_TYPE_BEHAVIOR_NOTE`|6.2.0| | | | | +|`CUDNN_TYPE_BN_FINALIZE_STATS_MODE`|8.1.0| | | |`MIOPEN_TYPE_BN_FINALIZE_STATS_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_BOOLEAN`|8.0.1| | | |`MIOPEN_TYPE_BOOLEAN`|6.2.0| | | | | +|`CUDNN_TYPE_CHAR`|8.4.0| | | |`MIOPEN_TYPE_CHAR`|6.2.0| | | | | +|`CUDNN_TYPE_CONVOLUTION_MODE`|8.0.1| | | |`MIOPEN_TYPE_CONVOLUTION_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_DATA_TYPE`|8.0.1| | | |`MIOPEN_TYPE_DATA_TYPE`|6.2.0| | | | | +|`CUDNN_TYPE_DOUBLE`|8.0.1| | | |`MIOPEN_TYPE_DOUBLE`|6.2.0| | | | | +|`CUDNN_TYPE_FLOAT`|8.0.1| | | |`MIOPEN_TYPE_FLOAT`|6.2.0| | | | | +|`CUDNN_TYPE_FRACTION`|8.5.0| | | |`MIOPEN_TYPE_FRACTION`|6.2.0| | | | | +|`CUDNN_TYPE_GENSTATS_MODE`|8.0.1| | | |`MIOPEN_TYPE_GENSTATS_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_HANDLE`|8.0.1| | | |`MIOPEN_TYPE_HANDLE`|6.2.0| | | | | +|`CUDNN_TYPE_HEUR_MODE`|8.0.1| | | |`MIOPEN_TYPE_HEUR_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_INT32`|8.3.0| | | |`MIOPEN_TYPE_INT32`|6.2.0| | | | | +|`CUDNN_TYPE_INT64`|8.0.1| | | |`MIOPEN_TYPE_INT64`|6.2.0| | | | | +|`CUDNN_TYPE_KNOB_TYPE`|8.0.1| | | |`MIOPEN_TYPE_KNOB_TYPE`|6.2.0| | | | | +|`CUDNN_TYPE_LAYOUT_TYPE`|8.0.2| | | |`MIOPEN_TYPE_LAYOUT_TYPE`|6.2.0| | | | | +|`CUDNN_TYPE_NAN_PROPOGATION`|8.0.1|9.0.0| | |`MIOPEN_TYPE_NAN_PROPOGATION`|6.2.0| | | | | +|`CUDNN_TYPE_NORM_FWD_PHASE`|8.5.0| | | |`MIOPEN_TYPE_NORM_FWD_PHASE`|6.2.0| | | | | +|`CUDNN_TYPE_NORM_MODE`|8.5.0| | | |`MIOPEN_TYPE_NORM_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_NUMERICAL_NOTE`|8.0.1| | | |`MIOPEN_TYPE_NUMERICAL_NOTE`|6.2.0| | | | | +|`CUDNN_TYPE_PADDING_MODE`|8.3.0| | | |`MIOPEN_TYPE_PADDING_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_POINTWISE_MODE`|8.0.1| | | |`MIOPEN_TYPE_POINTWISE_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_REDUCTION_OPERATOR_TYPE`|8.1.0| | | |`MIOPEN_TYPE_REDUCTION_OPERATOR_TYPE`|6.2.0| | | | | +|`CUDNN_TYPE_RESAMPLE_MODE`|8.3.0| | | |`MIOPEN_TYPE_RESAMPLE_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_RNG_DISTRIBUTION`|8.7.0| | | |`MIOPEN_TYPE_RNG_DISTRIBUTION`|6.2.0| | | | | +|`CUDNN_TYPE_SIGNAL_MODE`|8.5.0| | | |`MIOPEN_TYPE_SIGNAL_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_TENSOR_REORDERING_MODE`|8.3.0| | | |`MIOPEN_TYPE_TENSOR_REORDERING_MODE`|6.2.0| | | | | +|`CUDNN_TYPE_VOID_PTR`|8.0.1| | | |`MIOPEN_TYPE_VOID_PTR`|6.2.0| | | | | |`CUDNN_UNIDIRECTIONAL`|5.0.0| | | |`miopenRNNunidirection`|2.1.0| | | | | |`CUDNN_WGRAD_MODE_ADD`|7.5.0| | | | | | | | | | |`CUDNN_WGRAD_MODE_SET`|7.5.0| | | | | | | | | | -|`CUDNN_ZERO_PAD`|8.3.0| | | | | | | | | | +|`CUDNN_ZERO_PAD`|8.3.0| | | |`miopenPaddingDefault`|2.1.0| | | | | |`cudnnActivationDescriptor_t`|4.0.0|9.0.0| | |`miopenActivationDescriptor_t`|2.1.0| | | | | |`cudnnActivationMode_t`|1.0.0|9.0.0| | |`miopenActivationMode_t`|2.1.0| | | | | |`cudnnActivationStruct`|4.0.0|9.0.0| | | | | | | | | @@ -761,7 +765,7 @@ |`cudnnAttnQueryMap_t`|7.5.0| | |9.0.0| | | | | | | |`cudnnAttnStruct`|7.5.0| | | | | | | | | | |`cudnnBackendAttributeName_t`|8.0.1| | | |`miopenBackendAttributeName_t`|6.2.0| | | | | -|`cudnnBackendAttributeType_t`|8.0.1| | | |`miopenBackendAttributeType_t`| | | | | | +|`cudnnBackendAttributeType_t`|8.0.1| | | |`miopenBackendAttributeType_t`|6.2.0| | | | | |`cudnnBackendBehaviorNote_t`|8.2.0| | | | | | | | | | |`cudnnBackendDescriptorType_t`|8.0.1| | | |`miopenBackendDescriptorType_t`|6.2.0| | | | | |`cudnnBackendDescriptor_t`|8.0.1| | | |`miopenBackendDescriptor_t`|6.2.0| | | | | @@ -808,7 +812,7 @@ |`cudnnFilterDescriptor_t`|1.0.0| | | |`miopenTensorDescriptor_t`|2.1.0| | | | | |`cudnnFilterStruct`|1.0.0|9.0.0| | | | | | | | | |`cudnnFoldingDirection_t`|7.5.0| | | | | | | | | | -|`cudnnForwardMode_t`|8.0.1| | | | | | | | | | +|`cudnnForwardMode_t`|8.0.1| | | |`miopenRNNFWDMode_t`|6.0.0| | | | | |`cudnnFractionStruct`|8.5.0| | | | | | | | | | |`cudnnFraction_t`|8.5.0| | | | | | | | | | |`cudnnFusedOpsConstParamLabel_t`|7.6.0|9.0.0| | | | | | | | | @@ -837,7 +841,7 @@ |`cudnnOpTensorDescriptor_t`|5.0.0|9.0.0| | | | | | | | | |`cudnnOpTensorOp_t`|5.0.0| | | |`miopenTensorOp_t`|2.1.0| | | | | |`cudnnOpTensorStruct`|5.0.0|9.0.0| | | | | | | | | -|`cudnnPaddingMode_t`|8.3.0| | | | | | | | | | +|`cudnnPaddingMode_t`|8.3.0| | | |`miopenPaddingMode_t`|2.1.0| | | | | |`cudnnPersistentRNNPlan`|6.0.0| | | | | | | | | | |`cudnnPersistentRNNPlan_t`|6.0.0| | | | | | | | | | |`cudnnPointwiseMode_t`|8.0.1| | | |`miopenPointwiseMode_t`|6.2.0| | | | | @@ -853,7 +857,7 @@ |`cudnnRNNDescriptor_t`|5.0.0| | | |`miopenRNNDescriptor_t`|2.1.0| | | | | |`cudnnRNNInputMode_t`|5.0.0| | | |`miopenRNNInputMode_t`|2.1.0| | | | | |`cudnnRNNMode_t`|5.0.0| | | |`miopenRNNMode_t`|2.1.0| | | | | -|`cudnnRNNPaddingMode_t`|7.2.1| | | | | | | | | | +|`cudnnRNNPaddingMode_t`|7.2.1|8.0.1| |9.0.0|`miopenRNNPaddingMode_t`|6.0.0| | | | | |`cudnnRNNStruct`|5.0.0| | | | | | | | | | |`cudnnReduceTensorDescriptor_t`|6.0.0|9.0.0| | |`miopenReduceTensorDescriptor_t`|3.9.0| | | | | |`cudnnReduceTensorIndices_t`|6.0.0|9.0.0| | |`miopenReduceTensorIndices_t`|3.9.0| | | | | @@ -887,26 +891,28 @@ |**CUDA**|**A**|**D**|**C**|**R**|**MIOPEN**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| -|`cudnnActivationBackward`|1.0.0|9.0.0| | |`miopenActivationBackward`| | | | | | -|`cudnnActivationForward`|1.0.0|9.0.0| | |`miopenActivationForward`| | | | | | +|`cudnnActivationBackward`|1.0.0|9.0.0| | |`miopenActivationBackward`|2.1.0| | | | | +|`cudnnActivationForward`|1.0.0|9.0.0| | |`miopenActivationForward`|2.1.0| | | | | |`cudnnAddTensor`|2.0.0|9.0.0| | | | | | | | | |`cudnnAdvInferVersionCheck`|8.0.1| | |9.0.0| | | | | | | |`cudnnAdvTrainVersionCheck`|8.0.1| | |9.0.0| | | | | | | |`cudnnAdvVersionCheck`|9.0.0| | | | | | | | | | -|`cudnnBackendCreateDescriptor`|8.0.1| | | |`miopenBackendCreateDescriptor`| | | | | | -|`cudnnBackendDestroyDescriptor`|8.0.1| | | |`miopenBackendDestroyDescriptor`| | | | | | -|`cudnnBackendExecute`|8.0.1| | | |`miopenBackendExecute`| | | | | | -|`cudnnBackendFinalize`|8.0.1| | | |`miopenBackendFinalize`| | | | | | -|`cudnnBackendGetAttribute`|8.0.1| | | |`miopenBackendGetAttribute`| | | | | | +|`cudnnBackendCreateDescriptor`|8.0.1| | | |`miopenBackendCreateDescriptor`|6.2.0| | | | | +|`cudnnBackendDestroyDescriptor`|8.0.1| | | |`miopenBackendDestroyDescriptor`|6.2.0| | | | | +|`cudnnBackendExecute`|8.0.1| | | |`miopenBackendExecute`|6.2.0| | | | | +|`cudnnBackendFinalize`|8.0.1| | | |`miopenBackendFinalize`|6.2.0| | | | | +|`cudnnBackendGetAttribute`|8.0.1| | | |`miopenBackendGetAttribute`|6.2.0| | | | | |`cudnnBackendInitialize`|8.0.1|9.3.0| | | | | | | | | -|`cudnnBackendSetAttribute`|8.0.1| | | |`miopenBackendSetAttribute`| | | | | | -|`cudnnBatchNormalizationBackward`|4.0.0|9.0.0| | |`miopenBatchNormalizationBackward`| | | | | | +|`cudnnBackendPopulateCudaGraph`|9.5.0| | | | | | | | | | +|`cudnnBackendSetAttribute`|8.0.1| | | |`miopenBackendSetAttribute`|6.2.0| | | | | +|`cudnnBackendUpdateCudaGraph`|9.5.0| | | | | | | | | | +|`cudnnBatchNormalizationBackward`|4.0.0|9.0.0| | |`miopenBatchNormalizationBackward`|2.1.0| | | | | |`cudnnBatchNormalizationBackwardEx`|7.4.1|9.0.0| | | | | | | | | -|`cudnnBatchNormalizationForwardInference`|4.0.0|9.0.0| | |`miopenBatchNormalizationForwardInference`| | | | | | -|`cudnnBatchNormalizationForwardTraining`|4.0.0|9.0.0| | |`miopenBatchNormalizationForwardTraining`| | | | | | +|`cudnnBatchNormalizationForwardInference`|4.0.0|9.0.0| | |`miopenBatchNormalizationForwardInference`|2.1.0| | | | | +|`cudnnBatchNormalizationForwardTraining`|4.0.0|9.0.0| | |`miopenBatchNormalizationForwardTraining`|2.1.0| | | | | |`cudnnBatchNormalizationForwardTrainingEx`|7.4.1|9.0.0| | | | | | | | | |`cudnnBuildRNNDynamic`|8.0.1| | | | | | | | | | -|`cudnnCTCLoss`|7.0.5| | | |`miopenCTCLoss`| | | | | | +|`cudnnCTCLoss`|7.0.5| | | |`miopenCTCLoss`|2.6.0| | | | | |`cudnnCTCLoss_v8`|8.0.1| | | | | | | | | | |`cudnnCnnInferVersionCheck`|8.0.2| | | | | | | | | | |`cudnnCnnTrainVersionCheck`|8.0.2| | | | | | | | | | @@ -917,48 +923,48 @@ |`cudnnConvolutionForward`|1.0.0|9.0.0| | |`miopenConvolutionForward`|2.1.0| | | | | |`cudnnCopyAlgorithmDescriptor`|7.1.3|8.0.2| |9.0.0| | | | | | | |`cudnnCreate`|1.0.0| | | |`miopenCreate`|2.1.0| | | | | -|`cudnnCreateActivationDescriptor`|4.0.0|9.0.0| | |`miopenCreateActivationDescriptor`| | | | | | +|`cudnnCreateActivationDescriptor`|4.0.0|9.0.0| | |`miopenCreateActivationDescriptor`|2.1.0| | | | | |`cudnnCreateAlgorithmDescriptor`|7.1.3|8.0.2| |9.0.0| | | | | | | |`cudnnCreateAlgorithmPerformance`|7.1.3|8.0.2| |9.0.0| | | | | | | |`cudnnCreateAttnDescriptor`|7.5.0|9.0.0| | | | | | | | | -|`cudnnCreateCTCLossDescriptor`|7.0.5| | | |`miopenCreateCTCLossDescriptor`| | | | | | +|`cudnnCreateCTCLossDescriptor`|7.0.5| | | |`miopenCreateCTCLossDescriptor`|2.6.0| | | | | |`cudnnCreateConvolutionDescriptor`|1.0.0|9.0.0| | |`miopenCreateConvolutionDescriptor`|2.1.0| | | | | -|`cudnnCreateDropoutDescriptor`|5.0.0| | | |`miopenCreateDropoutDescriptor`| | | | | | +|`cudnnCreateDropoutDescriptor`|5.0.0| | | |`miopenCreateDropoutDescriptor`|2.8.0| | | | | |`cudnnCreateFilterDescriptor`|1.0.0|9.0.0| | | | | | | | | |`cudnnCreateFusedOpsConstParamPack`|7.6.0|9.0.0| | | | | | | | | |`cudnnCreateFusedOpsPlan`|7.6.0|9.0.0| | | | | | | | | |`cudnnCreateFusedOpsVariantParamPack`|7.6.0|9.0.0| | | | | | | | | -|`cudnnCreateLRNDescriptor`|3.0.0| | | |`miopenCreateLRNDescriptor`| | | | | | +|`cudnnCreateLRNDescriptor`|3.0.0| | | |`miopenCreateLRNDescriptor`|2.1.0| | | | | |`cudnnCreateOpTensorDescriptor`|5.0.0|9.0.0| | | | | | | | | |`cudnnCreatePersistentRNNPlan`|6.0.0|8.0.1| |9.0.0| | | | | | | |`cudnnCreatePoolingDescriptor`|1.0.0|9.0.0| | |`miopenCreatePoolingDescriptor`|2.1.0| | | | | |`cudnnCreateRNNDataDescriptor`|7.2.1| | | | | | | | | | -|`cudnnCreateRNNDescriptor`|5.0.0| | | |`miopenCreateRNNDescriptor`| | | | | | +|`cudnnCreateRNNDescriptor`|5.0.0| | | |`miopenCreateRNNDescriptor`|2.1.0| | | | | |`cudnnCreateReduceTensorDescriptor`|6.0.0|9.0.0| | |`miopenCreateReduceTensorDescriptor`|3.9.0| | | | | |`cudnnCreateSeqDataDescriptor`|7.5.0|9.0.0| | | | | | | | | |`cudnnCreateSpatialTransformerDescriptor`|5.0.0| | | | | | | | | | |`cudnnCreateTensorDescriptor`|2.0.0| | | |`miopenCreateTensorDescriptor`|2.1.0| | | | | |`cudnnCreateTensorTransformDescriptor`|7.5.0|9.0.0| | | | | | | | | -|`cudnnDeriveBNTensorDescriptor`|4.0.0|9.0.0| | |`miopenDeriveBNTensorDescriptor`| | | | | | +|`cudnnDeriveBNTensorDescriptor`|4.0.0|9.0.0| | |`miopenDeriveBNTensorDescriptor`|2.1.0| | | | | |`cudnnDeriveNormTensorDescriptor`|8.0.1|9.0.0| | | | | | | | | |`cudnnDestroy`|1.0.0| | | |`miopenDestroy`|2.1.0| | | | | -|`cudnnDestroyActivationDescriptor`|4.0.0|9.0.0| | |`miopenDestroyActivationDescriptor`| | | | | | +|`cudnnDestroyActivationDescriptor`|4.0.0|9.0.0| | |`miopenDestroyActivationDescriptor`|2.1.0| | | | | |`cudnnDestroyAlgorithmDescriptor`|7.1.3|8.0.2| |9.0.0| | | | | | | |`cudnnDestroyAlgorithmPerformance`|7.1.3|8.0.2| |9.0.0| | | | | | | |`cudnnDestroyAttnDescriptor`|7.5.0|9.0.0| | | | | | | | | -|`cudnnDestroyCTCLossDescriptor`|7.0.5| | | |`miopenDestroyCTCLossDescriptor`| | | | | | +|`cudnnDestroyCTCLossDescriptor`|7.0.5| | | |`miopenDestroyCTCLossDescriptor`|2.6.0| | | | | |`cudnnDestroyConvolutionDescriptor`|1.0.0|9.0.0| | |`miopenDestroyConvolutionDescriptor`|2.1.0| | | | | -|`cudnnDestroyDropoutDescriptor`|5.0.0| | | |`miopenDestroyDropoutDescriptor`| | | | | | +|`cudnnDestroyDropoutDescriptor`|5.0.0| | | |`miopenDestroyDropoutDescriptor`|2.8.0| | | | | |`cudnnDestroyFilterDescriptor`|1.0.0|9.0.0| | | | | | | | | |`cudnnDestroyFusedOpsConstParamPack`|7.6.0|9.0.0| | | | | | | | | |`cudnnDestroyFusedOpsPlan`|7.6.0|9.0.0| | | | | | | | | |`cudnnDestroyFusedOpsVariantParamPack`|7.6.0|9.0.0| | | | | | | | | -|`cudnnDestroyLRNDescriptor`|3.0.0| | | |`miopenDestroyLRNDescriptor`| | | | | | +|`cudnnDestroyLRNDescriptor`|3.0.0| | | |`miopenDestroyLRNDescriptor`|2.1.0| | | | | |`cudnnDestroyOpTensorDescriptor`|5.0.0|9.0.0| | | | | | | | | |`cudnnDestroyPersistentRNNPlan`|6.0.0|8.0.1| |9.0.0| | | | | | | |`cudnnDestroyPoolingDescriptor`|1.0.0|9.0.0| | |`miopenDestroyPoolingDescriptor`|2.1.0| | | | | |`cudnnDestroyRNNDataDescriptor`|7.2.1| | | | | | | | | | -|`cudnnDestroyRNNDescriptor`|5.0.0| | | |`miopenDestroyRNNDescriptor`| | | | | | +|`cudnnDestroyRNNDescriptor`|5.0.0| | | |`miopenDestroyRNNDescriptor`|2.1.0| | | | | |`cudnnDestroyReduceTensorDescriptor`|6.0.0|9.0.0| | |`miopenDestroyReduceTensorDescriptor`|3.9.0| | | | | |`cudnnDestroySeqDataDescriptor`|7.5.0|9.0.0| | | | | | | | | |`cudnnDestroySpatialTransformerDescriptor`|5.0.0| | | | | | | | | | @@ -966,10 +972,10 @@ |`cudnnDestroyTensorTransformDescriptor`|7.5.0|9.0.0| | | | | | | | | |`cudnnDivisiveNormalizationBackward`|3.0.0| | | | | | | | | | |`cudnnDivisiveNormalizationForward`|3.0.0| | | | | | | | | | -|`cudnnDropoutBackward`|5.0.0| | | |`miopenDropoutBackward`| | | | | | -|`cudnnDropoutForward`|5.0.0| | | |`miopenDropoutForward`| | | | | | -|`cudnnDropoutGetReserveSpaceSize`|5.0.0| | | |`miopenDropoutGetReserveSpaceSize`| | | | | | -|`cudnnDropoutGetStatesSize`|5.0.0| | | |`miopenDropoutGetStatesSize`| | | | | | +|`cudnnDropoutBackward`|5.0.0| | | |`miopenDropoutBackward`|2.8.0| | | | | +|`cudnnDropoutForward`|5.0.0| | | |`miopenDropoutForward`|2.8.0| | | | | +|`cudnnDropoutGetReserveSpaceSize`|5.0.0| | | |`miopenDropoutGetReserveSpaceSize`|2.8.0| | | | | +|`cudnnDropoutGetStatesSize`|5.0.0| | | |`miopenDropoutGetStatesSize`|2.8.0| | | | | |`cudnnFindConvolutionBackwardDataAlgorithm`|3.0.0|9.0.0| | | | | | | | | |`cudnnFindConvolutionBackwardDataAlgorithmEx`|5.0.0|9.0.0| | | | | | | | | |`cudnnFindConvolutionBackwardFilterAlgorithm`|3.0.0|9.0.0| | | | | | | | | @@ -990,11 +996,11 @@ |`cudnnGetBatchNormalizationBackwardExWorkspaceSize`|7.4.1|9.0.0| | | | | | | | | |`cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize`|7.4.1|9.0.0| | | | | | | | | |`cudnnGetBatchNormalizationTrainingExReserveSpaceSize`|7.4.1|9.0.0| | | | | | | | | -|`cudnnGetCTCLossDescriptor`|7.0.5|9.0.0| | |`miopenGetCTCLossDescriptor`| | | | | | +|`cudnnGetCTCLossDescriptor`|7.0.5|9.0.0| | |`miopenGetCTCLossDescriptor`|2.6.0| | | | | |`cudnnGetCTCLossDescriptorEx`|7.5.0|9.0.0| | | | | | | | | |`cudnnGetCTCLossDescriptor_v8`|8.0.1|9.0.0| | | | | | | | | |`cudnnGetCTCLossDescriptor_v9`|9.0.0| | | | | | | | | | -|`cudnnGetCTCLossWorkspaceSize`|7.0.5| | | |`miopenGetCTCLossWorkspaceSize`| | | | | | +|`cudnnGetCTCLossWorkspaceSize`|7.0.5| | | |`miopenGetCTCLossWorkspaceSize`|2.6.0| | | | | |`cudnnGetCTCLossWorkspaceSize_v8`|8.0.1| | | | | | | | | | |`cudnnGetCallback`|7.1.3| | | | | | | | | | |`cudnnGetConvolution2dDescriptor`|2.0.0|9.0.0| | | | | | | | | @@ -1017,7 +1023,7 @@ |`cudnnGetConvolutionNdForwardOutputDim`|2.0.0|9.0.0| | | | | | | | | |`cudnnGetConvolutionReorderType`|7.6.0|9.0.0| | | | | | | | | |`cudnnGetCudartVersion`|6.0.0| | | | | | | | | | -|`cudnnGetDropoutDescriptor`|7.0.5| | | |`miopenGetDropoutDescriptor`| | | | | | +|`cudnnGetDropoutDescriptor`|7.0.5| | | |`miopenGetDropoutDescriptor`|2.8.0| | | | | |`cudnnGetErrorString`|2.0.0| | | |`miopenGetErrorString`|2.1.0| | | | | |`cudnnGetFilter4dDescriptor`|2.0.0|9.0.0| | | | | | | | | |`cudnnGetFilterNdDescriptor`|2.0.0|9.0.0| | | | | | | | | @@ -1025,7 +1031,7 @@ |`cudnnGetFoldedConvBackwardDataDescriptors`|7.6.0|9.0.0| | | | | | | | | |`cudnnGetFusedOpsConstParamPackAttribute`|7.6.0|9.0.0| | | | | | | | | |`cudnnGetFusedOpsVariantParamPackAttribute`|7.6.0|9.0.0| | | | | | | | | -|`cudnnGetLRNDescriptor`|3.0.0| | | |`miopenGetLRNDescriptor`| | | | | | +|`cudnnGetLRNDescriptor`|3.0.0| | | |`miopenGetLRNDescriptor`|2.1.0| | | | | |`cudnnGetLastErrorString`|9.0.0| | | | | | | | | | |`cudnnGetMaxDeviceVersion`|8.6.0| | | | | | | | | | |`cudnnGetMultiHeadAttnBuffers`|7.5.0|9.0.0| | | | | | | | | @@ -1043,8 +1049,8 @@ |`cudnnGetRNNBackwardWeightsAlgorithmMaxCount`|7.1.3|8.0.2| |9.0.0| | | | | | | |`cudnnGetRNNBiasMode`|7.5.0|8.0.1| |9.0.0| | | | | | | |`cudnnGetRNNDataDescriptor`|7.2.1| | | | | | | | | | -|`cudnnGetRNNDescriptor`|7.0.5|7.6.5| |8.0.1|`miopenGetRNNDescriptor_V2`| | | | | | -|`cudnnGetRNNDescriptor_v6`|8.0.1|8.0.1| |9.0.0|`miopenGetRNNDescriptor_V2`| | | | | | +|`cudnnGetRNNDescriptor`|7.0.5|7.6.5| |8.0.1|`miopenGetRNNDescriptor_V2`|3.5.0| | | | | +|`cudnnGetRNNDescriptor_v6`|8.0.1|8.0.1| |9.0.0|`miopenGetRNNDescriptor_V2`|3.5.0| | | | | |`cudnnGetRNNDescriptor_v8`|8.0.1| | | | | | | | | | |`cudnnGetRNNForwardInferenceAlgorithmMaxCount`|7.1.3|8.0.2| |9.0.0| | | | | | | |`cudnnGetRNNForwardTrainingAlgorithmMaxCount`|7.1.3|8.0.2| |9.0.0| | | | | | | @@ -1052,13 +1058,13 @@ |`cudnnGetRNNLinLayerMatrixParams`|5.0.0|8.0.1| |9.0.0| | | | | | | |`cudnnGetRNNMatrixMathType`|7.1.3|8.0.1| |9.0.0| | | | | | | |`cudnnGetRNNPaddingMode`|7.2.1|8.0.1| |9.0.0| | | | | | | -|`cudnnGetRNNParamsSize`|5.0.0|8.0.1| |9.0.0|`miopenGetRNNParamsSize`| | | | | | +|`cudnnGetRNNParamsSize`|5.0.0|8.0.1| |9.0.0|`miopenGetRNNParamsSize`|2.1.0| | | | | |`cudnnGetRNNProjectionLayers`|7.1.3|8.0.1| |9.0.0| | | | | | | |`cudnnGetRNNTempSpaceSizes`|8.0.1| | | | | | | | | | -|`cudnnGetRNNTrainingReserveSize`|5.0.0|8.0.1| |9.0.0|`miopenGetRNNTrainingReserveSize`| | | | | | +|`cudnnGetRNNTrainingReserveSize`|5.0.0|8.0.1| |9.0.0|`miopenGetRNNTrainingReserveSize`|2.1.0| | | | | |`cudnnGetRNNWeightParams`|8.0.1| | | | | | | | | | |`cudnnGetRNNWeightSpaceSize`|8.0.1| | | | | | | | | | -|`cudnnGetRNNWorkspaceSize`|5.0.0|8.0.1| |9.0.0|`miopenGetRNNWorkspaceSize`| | | | | | +|`cudnnGetRNNWorkspaceSize`|5.0.0|8.0.1| |9.0.0|`miopenGetRNNWorkspaceSize`|2.1.0| | | | | |`cudnnGetReduceTensorDescriptor`|6.0.0|9.0.0| | |`miopenGetReduceTensorDescriptor`|3.9.0| | | | | |`cudnnGetReductionIndicesSize`|6.0.0|9.0.0| | |`miopenGetReductionIndicesSize`|3.9.0| | | | | |`cudnnGetReductionWorkspaceSize`|6.0.0|9.0.0| | |`miopenGetReductionWorkspaceSize`|3.9.0| | | | | @@ -1088,16 +1094,16 @@ |`cudnnPoolingBackward`|1.0.0|9.0.0| | | | | | | | | |`cudnnPoolingForward`|1.0.0|9.0.0| | | | | | | | | |`cudnnQueryRuntimeError`|7.0.5|9.0.0| | | | | | | | | -|`cudnnRNNBackwardData`|5.0.0|8.0.2| |9.0.0|`miopenRNNBackwardData`| | | | | | +|`cudnnRNNBackwardData`|5.0.0|8.0.2| |9.0.0|`miopenRNNBackwardData`|2.1.0| | | | | |`cudnnRNNBackwardDataEx`|7.2.1|8.0.2| |9.0.0| | | | | | | |`cudnnRNNBackwardData_v8`|8.0.2| | | | | | | | | | -|`cudnnRNNBackwardWeights`|5.0.0|8.0.2| |9.0.0|`miopenRNNBackwardWeights`| | | | | | +|`cudnnRNNBackwardWeights`|5.0.0|8.0.2| |9.0.0|`miopenRNNBackwardWeights`|2.1.0| | | | | |`cudnnRNNBackwardWeightsEx`|7.2.1|8.0.2| |9.0.0| | | | | | | |`cudnnRNNBackwardWeights_v8`|8.0.2| | | | | | | | | | |`cudnnRNNForward`|8.0.1| | | | | | | | | | -|`cudnnRNNForwardInference`|5.0.0|8.0.1| |9.0.0|`miopenRNNForwardInference`| | | | | | +|`cudnnRNNForwardInference`|5.0.0|8.0.1| |9.0.0|`miopenRNNForwardInference`|2.1.0| | | | | |`cudnnRNNForwardInferenceEx`|7.2.1|8.0.1| |9.0.0| | | | | | | -|`cudnnRNNForwardTraining`|5.0.0|8.0.1| |9.0.0|`miopenRNNForwardTraining`| | | | | | +|`cudnnRNNForwardTraining`|5.0.0|8.0.1| |9.0.0|`miopenRNNForwardTraining`|2.1.0| | | | | |`cudnnRNNForwardTrainingEx`|7.2.1|8.0.1| |9.0.0| | | | | | | |`cudnnRNNGetClip`|7.2.1|8.0.1| |9.0.0| | | | | | | |`cudnnRNNGetClip_v8`|8.0.1| | | | | | | | | | @@ -1108,7 +1114,7 @@ |`cudnnReduceTensor`|6.0.0|9.0.0| | |`miopenReduceTensor`|3.9.0| | | | | |`cudnnReorderFilterAndBias`|7.6.0|9.0.0| | | | | | | | | |`cudnnRestoreAlgorithm`|7.1.3|8.0.2| |9.0.0| | | | | | | -|`cudnnRestoreDropoutDescriptor`|7.0.5| | | |`miopenRestoreDropoutDescriptor`| | | | | | +|`cudnnRestoreDropoutDescriptor`|7.0.5| | | |`miopenRestoreDropoutDescriptor`|2.8.0| | | | | |`cudnnSaveAlgorithm`|7.1.3|8.0.2| |9.0.0| | | | | | | |`cudnnScaleTensor`|2.0.0|9.0.0| | |`miopenScaleTensor`|2.1.0| | | | | |`cudnnSetActivationDescriptor`|4.0.0|9.0.0| | | | | | | | | @@ -1116,7 +1122,7 @@ |`cudnnSetAlgorithmDescriptor`|7.1.3|8.0.2| |9.0.0| | | | | | | |`cudnnSetAlgorithmPerformance`|7.1.3|8.0.2| |9.0.0| | | | | | | |`cudnnSetAttnDescriptor`|7.5.0|9.0.0| | | | | | | | | -|`cudnnSetCTCLossDescriptor`|7.0.5|9.0.0| | |`miopenSetCTCLossDescriptor`| | | | | | +|`cudnnSetCTCLossDescriptor`|7.0.5|9.0.0| | |`miopenSetCTCLossDescriptor`|2.6.0| | | | | |`cudnnSetCTCLossDescriptorEx`|7.6.0|9.0.0| | | | | | | | | |`cudnnSetCTCLossDescriptor_v8`|8.0.1|9.0.0| | | | | | | | | |`cudnnSetCTCLossDescriptor_v9`|9.0.0| | | | | | | | | | @@ -1126,12 +1132,12 @@ |`cudnnSetConvolutionMathType`|7.0.5|9.0.0| | | | | | | | | |`cudnnSetConvolutionNdDescriptor`|2.0.0|9.0.0| | | | | | | | | |`cudnnSetConvolutionReorderType`|7.6.0|9.0.0| | | | | | | | | -|`cudnnSetDropoutDescriptor`|5.0.0| | | |`miopenSetDropoutDescriptor`| | | | | | +|`cudnnSetDropoutDescriptor`|5.0.0| | | |`miopenSetDropoutDescriptor`|2.8.0| | | | | |`cudnnSetFilter4dDescriptor`|2.0.0|9.0.0| | | | | | | | | |`cudnnSetFilterNdDescriptor`|2.0.0|9.0.0| | | | | | | | | |`cudnnSetFusedOpsConstParamPackAttribute`|7.6.0|9.0.0| | | | | | | | | |`cudnnSetFusedOpsVariantParamPackAttribute`|7.6.0|9.0.0| | | | | | | | | -|`cudnnSetLRNDescriptor`|3.0.0| | | |`miopenSetLRNDescriptor`| | | | | | +|`cudnnSetLRNDescriptor`|3.0.0| | | |`miopenSetLRNDescriptor`|2.1.0| | | | | |`cudnnSetOpTensorDescriptor`|5.0.0|9.0.0| | | | | | | | | |`cudnnSetPersistentRNNPlan`|6.0.0|8.0.1| |9.0.0| | | | | | | |`cudnnSetPooling2dDescriptor`|2.0.0|9.0.0| | |`miopenSet2dPoolingDescriptor`|2.1.0| | | | | @@ -1141,7 +1147,7 @@ |`cudnnSetRNNDataDescriptor`|7.2.1| | | | | | | | | | |`cudnnSetRNNDescriptor`|5.0.0|7.6.5| |8.0.1| | | | | | | |`cudnnSetRNNDescriptor_v5`|7.0.5|7.6.5| |8.0.1| | | | | | | -|`cudnnSetRNNDescriptor_v6`|6.0.0|8.0.1| |9.0.0|`miopenSetRNNDescriptor_V2`| | | | | | +|`cudnnSetRNNDescriptor_v6`|6.0.0|8.0.1| |9.0.0|`miopenSetRNNDescriptor_V2`|3.5.0| | | | | |`cudnnSetRNNDescriptor_v8`|8.0.1| | | | | | | | | | |`cudnnSetRNNMatrixMathType`|7.0.5|8.0.1| |9.0.0| | | | | | | |`cudnnSetRNNPaddingMode`|7.2.1|8.0.1| |9.0.0| | | | | | | diff --git a/docs/tables/CURAND_API_supported_by_HIP_and_ROC.md b/docs/tables/CURAND_API_supported_by_HIP_and_ROC.md index d580bd6b..9eca775c 100644 --- a/docs/tables/CURAND_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CURAND_API_supported_by_HIP_and_ROC.md @@ -57,8 +57,8 @@ |`curandDirectionVectorSet_t`| | | | |`hiprandDirectionVectorSet_t`|6.0.0| | | | |`rocrand_direction_vector_set`|6.0.0| | | | | |`curandDirectionVectors32_t`| | | | |`hiprandDirectionVectors32_t`|1.5.0| | | | | | | | | | | |`curandDirectionVectors64_t`| | | | |`hiprandDirectionVectors64_t`|6.0.0| | | | | | | | | | | -|`curandDiscreteDistribution_st`| | | | |`hiprandDiscreteDistribution_st`|1.5.0| | | | | | | | | | | -|`curandDiscreteDistribution_t`| | | | |`hiprandDiscreteDistribution_t`|1.5.0| | | | | | | | | | | +|`curandDiscreteDistribution_st`| | | | |`hiprandDiscreteDistribution_st`|1.5.0| | | | |`rocrand_discrete_distribution_st`|1.5.0| | | | | +|`curandDiscreteDistribution_t`| | | | |`hiprandDiscreteDistribution_t`|1.5.0| | | | |`rocrand_discrete_distribution`|1.5.0| | | | | |`curandDistributionM2Shift_st`| | | | | | | | | | | | | | | | | |`curandDistributionM2Shift_t`| | | | | | | | | | | | | | | | | |`curandDistributionShift_st`| | | | | | | | | | | | | | | | | @@ -80,20 +80,20 @@ |`curandRngType`| | | | |`hiprandRngType_t`|1.5.0| | | | |`rocrand_rng_type`|1.5.0| | | | | |`curandRngType_t`| | | | |`hiprandRngType_t`|1.5.0| | | | |`rocrand_rng_type`|1.5.0| | | | | |`curandState`| | | | |`hiprandState`|1.8.0| | | | | | | | | | | -|`curandStateMRG32k3a`| | | | |`hiprandStateMRG32k3a`|1.8.0| | | | | | | | | | | -|`curandStateMRG32k3a_t`| | | | |`hiprandStateMRG32k3a_t`|1.5.0| | | | | | | | | | | -|`curandStateMtgp32`| | | | |`hiprandStateMtgp32`|1.8.0| | | | | | | | | | | -|`curandStateMtgp32_t`| | | | |`hiprandStateMtgp32_t`|1.5.0| | | | | | | | | | | -|`curandStatePhilox4_32_10`| | | | |`hiprandStatePhilox4_32_10`|1.8.0| | | | | | | | | | | -|`curandStatePhilox4_32_10_t`| | | | |`hiprandStatePhilox4_32_10_t`|1.8.0| | | | | | | | | | | -|`curandStateScrambledSobol32`| | | | |`hiprandStateScrambledSobol32`|6.2.0| | | | | | | | | | | -|`curandStateScrambledSobol32_t`| | | | |`hiprandStateScrambledSobol32_t`|6.2.0| | | | | | | | | | | -|`curandStateScrambledSobol64`| | | | |`hiprandStateScrambledSobol64`|6.2.0| | | | | | | | | | | -|`curandStateScrambledSobol64_t`| | | | |`hiprandStateScrambledSobol64_t`|6.2.0| | | | | | | | | | | -|`curandStateSobol32`| | | | |`hiprandStateSobol32`|1.8.0| | | | | | | | | | | -|`curandStateSobol32_t`| | | | |`hiprandStateSobol32_t`|1.5.0| | | | | | | | | | | -|`curandStateSobol64`| | | | |`hiprandStateSobol64`|6.2.0| | | | | | | | | | | -|`curandStateSobol64_t`| | | | |`hiprandStateSobol64_t`|6.2.0| | | | | | | | | | | +|`curandStateMRG32k3a`| | | | |`hiprandStateMRG32k3a`|1.8.0| | | | |`rocrand_device::mrg32k3a_engine`|1.5.0| | | | | +|`curandStateMRG32k3a_t`| | | | |`hiprandStateMRG32k3a_t`|1.5.0| | | | |`rocrand_state_mrg32k3a`|1.5.0| | | | | +|`curandStateMtgp32`| | | | |`hiprandStateMtgp32`|1.8.0| | | | |`rocrand_device::mtgp32_engine`|1.5.0| | | | | +|`curandStateMtgp32_t`| | | | |`hiprandStateMtgp32_t`|1.5.0| | | | |`rocrand_state_mtgp32`|1.5.0| | | | | +|`curandStatePhilox4_32_10`| | | | |`hiprandStatePhilox4_32_10`|1.8.0| | | | |`rocrand_device::philox4x32_10_engine`|1.5.0| | | | | +|`curandStatePhilox4_32_10_t`| | | | |`hiprandStatePhilox4_32_10_t`|1.8.0| | | | |`rocrand_state_philox4x32_10`|1.5.0| | | | | +|`curandStateScrambledSobol32`| | | | |`hiprandStateScrambledSobol32`|6.2.0| | | | |`rocrand_device::scrambled_sobol32_engine`|5.4.0| | | | | +|`curandStateScrambledSobol32_t`| | | | |`hiprandStateScrambledSobol32_t`|6.2.0| | | | |`rocrand_state_scrambled_sobol32`|5.4.0| | | | | +|`curandStateScrambledSobol64`| | | | |`hiprandStateScrambledSobol64`|6.2.0| | | | |`rocrand_device::scrambled_sobol64_engine`|5.4.0| | | | | +|`curandStateScrambledSobol64_t`| | | | |`hiprandStateScrambledSobol64_t`|6.2.0| | | | |`rocrand_state_scrambled_sobol64`|5.4.0| | | | | +|`curandStateSobol32`| | | | |`hiprandStateSobol32`|1.8.0| | | | |`rocrand_device::sobol32_engine`|1.5.0| | | | | +|`curandStateSobol32_t`| | | | |`hiprandStateSobol32_t`|1.5.0| | | | |`rocrand_state_sobol32`|1.5.0| | | | | +|`curandStateSobol64`| | | | |`hiprandStateSobol64`|6.2.0| | | | |`rocrand_device::sobol64_engine`|4.5.0| | | | | +|`curandStateSobol64_t`| | | | |`hiprandStateSobol64_t`|6.2.0| | | | |`rocrand_state_sobol64`|4.5.0| | | | | |`curandStateXORWOW`| | | | |`hiprandStateXORWOW`|1.8.0| | | | | | | | | | | |`curandStateXORWOW_t`| | | | |`hiprandStateXORWOW_t`|1.5.0| | | | | | | | | | | |`curandState_t`| | | | |`hiprandState_t`|1.5.0| | | | | | | | | | | @@ -106,8 +106,8 @@ |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`curandCreateGenerator`| | | | |`hiprandCreateGenerator`|1.5.0| | | | |`rocrand_create_generator`|1.5.0| | | | | |`curandCreateGeneratorHost`| | | | |`hiprandCreateGeneratorHost`|1.5.0| | | | |`rocrand_create_generator_host_blocking`|6.2.0| | | | | -|`curandCreatePoissonDistribution`| | | | |`hiprandCreatePoissonDistribution`|1.5.0| | | | | | | | | | | -|`curandDestroyDistribution`| | | | |`hiprandDestroyDistribution`|1.5.0| | | | | | | | | | | +|`curandCreatePoissonDistribution`| | | | |`hiprandCreatePoissonDistribution`|1.5.0| | | | |`rocrand_create_poisson_distribution`|1.5.0| | | | | +|`curandDestroyDistribution`| | | | |`hiprandDestroyDistribution`|1.5.0| | | | |`rocrand_destroy_discrete_distribution`|1.5.0| | | | | |`curandDestroyGenerator`| | | | |`hiprandDestroyGenerator`|1.5.0| | | | |`rocrand_destroy_generator`|1.5.0| | | | | |`curandGenerate`| | | | |`hiprandGenerate`|1.5.0| | | | |`rocrand_generate`|1.5.0| | | | | |`curandGenerateLogNormal`| | | | |`hiprandGenerateLogNormal`|1.5.0| | | | |`rocrand_generate_log_normal`|1.5.0| | | | | @@ -119,18 +119,18 @@ |`curandGenerateSeeds`| | | | |`hiprandGenerateSeeds`|1.5.0| | | | |`rocrand_initialize_generator`|1.5.0| | | | | |`curandGenerateUniform`| | | | |`hiprandGenerateUniform`|1.5.0| | | | |`rocrand_generate_uniform`|1.5.0| | | | | |`curandGenerateUniformDouble`| | | | |`hiprandGenerateUniformDouble`|1.5.0| | | | |`rocrand_generate_uniform_double`|1.5.0| | | | | -|`curandGetDirectionVectors32`| | | | |`hiprandGetDirectionVectors32`|6.0.0| | | | | | | | | | | -|`curandGetDirectionVectors64`| | | | |`hiprandGetDirectionVectors64`|6.0.0| | | | | | | | | | | +|`curandGetDirectionVectors32`| | | | |`hiprandGetDirectionVectors32`|6.0.0| | | | |`rocrand_get_direction_vectors32`|6.0.0| | | | | +|`curandGetDirectionVectors64`| | | | |`hiprandGetDirectionVectors64`|6.0.0| | | | |`rocrand_get_direction_vectors64`|6.0.0| | | | | |`curandGetProperty`|8.0| | | | | | | | | | | | | | | | -|`curandGetScrambleConstants32`| | | | |`hiprandGetScrambleConstants32`|6.0.0| | | | | | | | | | | -|`curandGetScrambleConstants64`| | | | |`hiprandGetScrambleConstants64`|6.0.0| | | | | | | | | | | -|`curandGetVersion`| | | | |`hiprandGetVersion`|1.5.0| | | | | | | | | | | -|`curandMakeMTGP32Constants`| | | | |`hiprandMakeMTGP32Constants`|1.5.0| | | | | | | | | | | -|`curandMakeMTGP32KernelState`| | | | |`hiprandMakeMTGP32KernelState`|1.5.0| | | | | | | | | | | +|`curandGetScrambleConstants32`| | | | |`hiprandGetScrambleConstants32`|6.0.0| | | | |`rocrand_get_scramble_constants32`|6.0.0| | | | | +|`curandGetScrambleConstants64`| | | | |`hiprandGetScrambleConstants64`|6.0.0| | | | |`rocrand_get_scramble_constants64`|6.0.0| | | | | +|`curandGetVersion`| | | | |`hiprandGetVersion`|1.5.0| | | | |`rocrand_get_version`|1.5.0| | | | | +|`curandMakeMTGP32Constants`| | | | |`hiprandMakeMTGP32Constants`|1.5.0| | | | |`rocrand_make_constant`|1.5.0| | | | | +|`curandMakeMTGP32KernelState`| | | | |`hiprandMakeMTGP32KernelState`|1.5.0| | | | |`rocrand_make_state_mtgp32`|1.5.0| | | | | |`curandSetGeneratorOffset`| | | | |`hiprandSetGeneratorOffset`|1.5.0| | | | |`rocrand_set_offset`|1.5.0| | | | | -|`curandSetGeneratorOrdering`| | | | |`hiprandSetGeneratorOrdering`|6.2.0| | | | | | | | | | | +|`curandSetGeneratorOrdering`| | | | |`hiprandSetGeneratorOrdering`|6.2.0| | | | |`rocrand_set_ordering`|5.5.0| | | | | |`curandSetPseudoRandomGeneratorSeed`| | | | |`hiprandSetPseudoRandomGeneratorSeed`|1.5.0| | | | |`rocrand_set_seed`|1.5.0| | | | | -|`curandSetQuasiRandomGeneratorDimensions`| | | | |`hiprandSetQuasiRandomGeneratorDimensions`|1.5.0| | | | | | | | | | | +|`curandSetQuasiRandomGeneratorDimensions`| | | | |`hiprandSetQuasiRandomGeneratorDimensions`|1.5.0| | | | |`rocrand_set_quasi_random_generator_dimensions`|1.5.0| | | | | |`curandSetStream`| | | | |`hiprandSetStream`|1.5.0| | | | |`rocrand_set_stream`|1.5.0| | | | | ## **3. Device API Functions** @@ -138,33 +138,33 @@ |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**|**ROC**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`__curand_umul`|11.5| | | | | | | | | | | | | | | | -|`curand`| | | | |`hiprand`|1.5.0| | | | | | | | | | | +|`curand`| | | | |`hiprand`|1.5.0| | | | |`rocrand`|1.5.0| | | | | |`curand_Philox4x32_10`| | | | | | | | | | | | | | | | | -|`curand_discrete`| | | | |`hiprand_discrete`|1.5.0| | | | | | | | | | | -|`curand_discrete4`| | | | |`hiprand_discrete4`|1.5.0| | | | | | | | | | | -|`curand_init`| | | | |`hiprand_init`|1.5.0| | | | | | | | | | | -|`curand_log_normal`| | | | |`hiprand_log_normal`|1.5.0| | | | | | | | | | | -|`curand_log_normal2`| | | | |`hiprand_log_normal2`|1.5.0| | | | | | | | | | | -|`curand_log_normal2_double`| | | | |`hiprand_log_normal2_double`|1.5.0| | | | | | | | | | | -|`curand_log_normal4`| | | | |`hiprand_log_normal4`|1.5.0| | | | | | | | | | | -|`curand_log_normal4_double`| | | | |`hiprand_log_normal4_double`|1.5.0| | | | | | | | | | | -|`curand_log_normal_double`| | | | |`hiprand_log_normal_double`|1.5.0| | | | | | | | | | | +|`curand_discrete`| | | | |`hiprand_discrete`|1.5.0| | | | |`rocrand_discrete`|1.5.0| | | | | +|`curand_discrete4`| | | | |`hiprand_discrete4`|1.5.0| | | | |`rocrand_discrete4`|1.5.0| | | | | +|`curand_init`| | | | |`hiprand_init`|1.5.0| | | | |`rocrand_init`|1.5.0| | | | | +|`curand_log_normal`| | | | |`hiprand_log_normal`|1.5.0| | | | |`rocrand_log_normal`|1.5.0| | | | | +|`curand_log_normal2`| | | | |`hiprand_log_normal2`|1.5.0| | | | |`rocrand_log_normal2`|1.5.0| | | | | +|`curand_log_normal2_double`| | | | |`hiprand_log_normal2_double`|1.5.0| | | | |`rocrand_log_normal_double2`|1.5.0| | | | | +|`curand_log_normal4`| | | | |`hiprand_log_normal4`|1.5.0| | | | |`rocrand_log_normal4`|1.5.0| | | | | +|`curand_log_normal4_double`| | | | |`hiprand_log_normal4_double`|1.5.0| | | | |`rocrand_log_normal_double4`|1.5.0| | | | | +|`curand_log_normal_double`| | | | |`hiprand_log_normal_double`|1.5.0| | | | |`rocrand_log_normal_double`|1.5.0| | | | | |`curand_mtgp32_single`| | | | | | | | | | | | | | | | | |`curand_mtgp32_single_specific`| | | | | | | | | | | | | | | | | |`curand_mtgp32_specific`| | | | | | | | | | | | | | | | | -|`curand_normal`| | | | |`hiprand_normal`|1.5.0| | | | | | | | | | | -|`curand_normal2`| | | | |`hiprand_normal2`|1.5.0| | | | | | | | | | | -|`curand_normal2_double`| | | | |`hiprand_normal2_double`|1.5.0| | | | | | | | | | | -|`curand_normal4`| | | | |`hiprand_normal4`|1.5.0| | | | | | | | | | | -|`curand_normal4_double`| | | | |`hiprand_normal4_double`|1.5.0| | | | | | | | | | | -|`curand_normal_double`| | | | |`hiprand_normal_double`|1.5.0| | | | | | | | | | | -|`curand_poisson`| | | | |`hiprand_poisson`|1.5.0| | | | | | | | | | | -|`curand_poisson4`| | | | |`hiprand_poisson4`|1.5.0| | | | | | | | | | | -|`curand_uniform`| | | | |`hiprand_uniform`|1.5.0| | | | | | | | | | | -|`curand_uniform2_double`| | | | |`hiprand_uniform2_double`|1.5.0| | | | | | | | | | | -|`curand_uniform4`| | | | |`hiprand_uniform4`|1.5.0| | | | | | | | | | | -|`curand_uniform4_double`| | | | |`hiprand_uniform4_double`|1.5.0| | | | | | | | | | | -|`curand_uniform_double`| | | | |`hiprand_uniform_double`|1.5.0| | | | | | | | | | | +|`curand_normal`| | | | |`hiprand_normal`|1.5.0| | | | |`rocrand_normal`|1.5.0| | | | | +|`curand_normal2`| | | | |`hiprand_normal2`|1.5.0| | | | |`rocrand_normal2`|1.5.0| | | | | +|`curand_normal2_double`| | | | |`hiprand_normal2_double`|1.5.0| | | | |`rocrand_normal_double2`|1.5.0| | | | | +|`curand_normal4`| | | | |`hiprand_normal4`|1.5.0| | | | |`rocrand_normal4`|1.5.0| | | | | +|`curand_normal4_double`| | | | |`hiprand_normal4_double`|1.5.0| | | | |`rocrand_normal_double4`|1.5.0| | | | | +|`curand_normal_double`| | | | |`hiprand_normal_double`|1.5.0| | | | |`rocrand_normal_double`|1.5.0| | | | | +|`curand_poisson`| | | | |`hiprand_poisson`|1.5.0| | | | |`rocrand_poisson`|1.5.0| | | | | +|`curand_poisson4`| | | | |`hiprand_poisson4`|1.5.0| | | | |`rocrand_poisson4`|1.5.0| | | | | +|`curand_uniform`| | | | |`hiprand_uniform`|1.5.0| | | | |`rocrand_uniform`|1.5.0| | | | | +|`curand_uniform2_double`| | | | |`hiprand_uniform2_double`|1.5.0| | | | |`rocrand_uniform_double2`|1.5.0| | | | | +|`curand_uniform4`| | | | |`hiprand_uniform4`|1.5.0| | | | |`rocrand_uniform4`|1.5.0| | | | | +|`curand_uniform4_double`| | | | |`hiprand_uniform4_double`|1.5.0| | | | |`rocrand_uniform_double4`|1.5.0| | | | | +|`curand_uniform_double`| | | | |`hiprand_uniform_double`|1.5.0| | | | |`rocrand_uniform_double`|1.5.0| | | | | \*A - Added; D - Deprecated; C - Changed; R - Removed; E - Experimental \ No newline at end of file diff --git a/docs/tables/CURAND_API_supported_by_ROC.md b/docs/tables/CURAND_API_supported_by_ROC.md index c129ed89..13612cca 100644 --- a/docs/tables/CURAND_API_supported_by_ROC.md +++ b/docs/tables/CURAND_API_supported_by_ROC.md @@ -57,8 +57,8 @@ |`curandDirectionVectorSet_t`| | | | |`rocrand_direction_vector_set`|6.0.0| | | | | |`curandDirectionVectors32_t`| | | | | | | | | | | |`curandDirectionVectors64_t`| | | | | | | | | | | -|`curandDiscreteDistribution_st`| | | | | | | | | | | -|`curandDiscreteDistribution_t`| | | | | | | | | | | +|`curandDiscreteDistribution_st`| | | | |`rocrand_discrete_distribution_st`|1.5.0| | | | | +|`curandDiscreteDistribution_t`| | | | |`rocrand_discrete_distribution`|1.5.0| | | | | |`curandDistributionM2Shift_st`| | | | | | | | | | | |`curandDistributionM2Shift_t`| | | | | | | | | | | |`curandDistributionShift_st`| | | | | | | | | | | @@ -80,20 +80,20 @@ |`curandRngType`| | | | |`rocrand_rng_type`|1.5.0| | | | | |`curandRngType_t`| | | | |`rocrand_rng_type`|1.5.0| | | | | |`curandState`| | | | | | | | | | | -|`curandStateMRG32k3a`| | | | | | | | | | | -|`curandStateMRG32k3a_t`| | | | | | | | | | | -|`curandStateMtgp32`| | | | | | | | | | | -|`curandStateMtgp32_t`| | | | | | | | | | | -|`curandStatePhilox4_32_10`| | | | | | | | | | | -|`curandStatePhilox4_32_10_t`| | | | | | | | | | | -|`curandStateScrambledSobol32`| | | | | | | | | | | -|`curandStateScrambledSobol32_t`| | | | | | | | | | | -|`curandStateScrambledSobol64`| | | | | | | | | | | -|`curandStateScrambledSobol64_t`| | | | | | | | | | | -|`curandStateSobol32`| | | | | | | | | | | -|`curandStateSobol32_t`| | | | | | | | | | | -|`curandStateSobol64`| | | | | | | | | | | -|`curandStateSobol64_t`| | | | | | | | | | | +|`curandStateMRG32k3a`| | | | |`rocrand_device::mrg32k3a_engine`|1.5.0| | | | | +|`curandStateMRG32k3a_t`| | | | |`rocrand_state_mrg32k3a`|1.5.0| | | | | +|`curandStateMtgp32`| | | | |`rocrand_device::mtgp32_engine`|1.5.0| | | | | +|`curandStateMtgp32_t`| | | | |`rocrand_state_mtgp32`|1.5.0| | | | | +|`curandStatePhilox4_32_10`| | | | |`rocrand_device::philox4x32_10_engine`|1.5.0| | | | | +|`curandStatePhilox4_32_10_t`| | | | |`rocrand_state_philox4x32_10`|1.5.0| | | | | +|`curandStateScrambledSobol32`| | | | |`rocrand_device::scrambled_sobol32_engine`|5.4.0| | | | | +|`curandStateScrambledSobol32_t`| | | | |`rocrand_state_scrambled_sobol32`|5.4.0| | | | | +|`curandStateScrambledSobol64`| | | | |`rocrand_device::scrambled_sobol64_engine`|5.4.0| | | | | +|`curandStateScrambledSobol64_t`| | | | |`rocrand_state_scrambled_sobol64`|5.4.0| | | | | +|`curandStateSobol32`| | | | |`rocrand_device::sobol32_engine`|1.5.0| | | | | +|`curandStateSobol32_t`| | | | |`rocrand_state_sobol32`|1.5.0| | | | | +|`curandStateSobol64`| | | | |`rocrand_device::sobol64_engine`|4.5.0| | | | | +|`curandStateSobol64_t`| | | | |`rocrand_state_sobol64`|4.5.0| | | | | |`curandStateXORWOW`| | | | | | | | | | | |`curandStateXORWOW_t`| | | | | | | | | | | |`curandState_t`| | | | | | | | | | | @@ -106,8 +106,8 @@ |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`curandCreateGenerator`| | | | |`rocrand_create_generator`|1.5.0| | | | | |`curandCreateGeneratorHost`| | | | |`rocrand_create_generator_host_blocking`|6.2.0| | | | | -|`curandCreatePoissonDistribution`| | | | | | | | | | | -|`curandDestroyDistribution`| | | | | | | | | | | +|`curandCreatePoissonDistribution`| | | | |`rocrand_create_poisson_distribution`|1.5.0| | | | | +|`curandDestroyDistribution`| | | | |`rocrand_destroy_discrete_distribution`|1.5.0| | | | | |`curandDestroyGenerator`| | | | |`rocrand_destroy_generator`|1.5.0| | | | | |`curandGenerate`| | | | |`rocrand_generate`|1.5.0| | | | | |`curandGenerateLogNormal`| | | | |`rocrand_generate_log_normal`|1.5.0| | | | | @@ -119,18 +119,18 @@ |`curandGenerateSeeds`| | | | |`rocrand_initialize_generator`|1.5.0| | | | | |`curandGenerateUniform`| | | | |`rocrand_generate_uniform`|1.5.0| | | | | |`curandGenerateUniformDouble`| | | | |`rocrand_generate_uniform_double`|1.5.0| | | | | -|`curandGetDirectionVectors32`| | | | | | | | | | | -|`curandGetDirectionVectors64`| | | | | | | | | | | +|`curandGetDirectionVectors32`| | | | |`rocrand_get_direction_vectors32`|6.0.0| | | | | +|`curandGetDirectionVectors64`| | | | |`rocrand_get_direction_vectors64`|6.0.0| | | | | |`curandGetProperty`|8.0| | | | | | | | | | -|`curandGetScrambleConstants32`| | | | | | | | | | | -|`curandGetScrambleConstants64`| | | | | | | | | | | -|`curandGetVersion`| | | | | | | | | | | -|`curandMakeMTGP32Constants`| | | | | | | | | | | -|`curandMakeMTGP32KernelState`| | | | | | | | | | | +|`curandGetScrambleConstants32`| | | | |`rocrand_get_scramble_constants32`|6.0.0| | | | | +|`curandGetScrambleConstants64`| | | | |`rocrand_get_scramble_constants64`|6.0.0| | | | | +|`curandGetVersion`| | | | |`rocrand_get_version`|1.5.0| | | | | +|`curandMakeMTGP32Constants`| | | | |`rocrand_make_constant`|1.5.0| | | | | +|`curandMakeMTGP32KernelState`| | | | |`rocrand_make_state_mtgp32`|1.5.0| | | | | |`curandSetGeneratorOffset`| | | | |`rocrand_set_offset`|1.5.0| | | | | -|`curandSetGeneratorOrdering`| | | | | | | | | | | +|`curandSetGeneratorOrdering`| | | | |`rocrand_set_ordering`|5.5.0| | | | | |`curandSetPseudoRandomGeneratorSeed`| | | | |`rocrand_set_seed`|1.5.0| | | | | -|`curandSetQuasiRandomGeneratorDimensions`| | | | | | | | | | | +|`curandSetQuasiRandomGeneratorDimensions`| | | | |`rocrand_set_quasi_random_generator_dimensions`|1.5.0| | | | | |`curandSetStream`| | | | |`rocrand_set_stream`|1.5.0| | | | | ## **3. Device API Functions** @@ -138,33 +138,33 @@ |**CUDA**|**A**|**D**|**C**|**R**|**ROC**|**A**|**D**|**C**|**R**|**E**| |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| |`__curand_umul`|11.5| | | | | | | | | | -|`curand`| | | | | | | | | | | +|`curand`| | | | |`rocrand`|1.5.0| | | | | |`curand_Philox4x32_10`| | | | | | | | | | | -|`curand_discrete`| | | | | | | | | | | -|`curand_discrete4`| | | | | | | | | | | -|`curand_init`| | | | | | | | | | | -|`curand_log_normal`| | | | | | | | | | | -|`curand_log_normal2`| | | | | | | | | | | -|`curand_log_normal2_double`| | | | | | | | | | | -|`curand_log_normal4`| | | | | | | | | | | -|`curand_log_normal4_double`| | | | | | | | | | | -|`curand_log_normal_double`| | | | | | | | | | | +|`curand_discrete`| | | | |`rocrand_discrete`|1.5.0| | | | | +|`curand_discrete4`| | | | |`rocrand_discrete4`|1.5.0| | | | | +|`curand_init`| | | | |`rocrand_init`|1.5.0| | | | | +|`curand_log_normal`| | | | |`rocrand_log_normal`|1.5.0| | | | | +|`curand_log_normal2`| | | | |`rocrand_log_normal2`|1.5.0| | | | | +|`curand_log_normal2_double`| | | | |`rocrand_log_normal_double2`|1.5.0| | | | | +|`curand_log_normal4`| | | | |`rocrand_log_normal4`|1.5.0| | | | | +|`curand_log_normal4_double`| | | | |`rocrand_log_normal_double4`|1.5.0| | | | | +|`curand_log_normal_double`| | | | |`rocrand_log_normal_double`|1.5.0| | | | | |`curand_mtgp32_single`| | | | | | | | | | | |`curand_mtgp32_single_specific`| | | | | | | | | | | |`curand_mtgp32_specific`| | | | | | | | | | | -|`curand_normal`| | | | | | | | | | | -|`curand_normal2`| | | | | | | | | | | -|`curand_normal2_double`| | | | | | | | | | | -|`curand_normal4`| | | | | | | | | | | -|`curand_normal4_double`| | | | | | | | | | | -|`curand_normal_double`| | | | | | | | | | | -|`curand_poisson`| | | | | | | | | | | -|`curand_poisson4`| | | | | | | | | | | -|`curand_uniform`| | | | | | | | | | | -|`curand_uniform2_double`| | | | | | | | | | | -|`curand_uniform4`| | | | | | | | | | | -|`curand_uniform4_double`| | | | | | | | | | | -|`curand_uniform_double`| | | | | | | | | | | +|`curand_normal`| | | | |`rocrand_normal`|1.5.0| | | | | +|`curand_normal2`| | | | |`rocrand_normal2`|1.5.0| | | | | +|`curand_normal2_double`| | | | |`rocrand_normal_double2`|1.5.0| | | | | +|`curand_normal4`| | | | |`rocrand_normal4`|1.5.0| | | | | +|`curand_normal4_double`| | | | |`rocrand_normal_double4`|1.5.0| | | | | +|`curand_normal_double`| | | | |`rocrand_normal_double`|1.5.0| | | | | +|`curand_poisson`| | | | |`rocrand_poisson`|1.5.0| | | | | +|`curand_poisson4`| | | | |`rocrand_poisson4`|1.5.0| | | | | +|`curand_uniform`| | | | |`rocrand_uniform`|1.5.0| | | | | +|`curand_uniform2_double`| | | | |`rocrand_uniform_double2`|1.5.0| | | | | +|`curand_uniform4`| | | | |`rocrand_uniform4`|1.5.0| | | | | +|`curand_uniform4_double`| | | | |`rocrand_uniform_double4`|1.5.0| | | | | +|`curand_uniform_double`| | | | |`rocrand_uniform_double`|1.5.0| | | | | \*A - Added; D - Deprecated; C - Changed; R - Removed; E - Experimental \ No newline at end of file diff --git a/docs/tables/CUSOLVER_API_supported_by_HIP.md b/docs/tables/CUSOLVER_API_supported_by_HIP.md index 06eee6a8..d5f054e0 100644 --- a/docs/tables/CUSOLVER_API_supported_by_HIP.md +++ b/docs/tables/CUSOLVER_API_supported_by_HIP.md @@ -31,7 +31,7 @@ |`CUSOLVER_ALG_0`|11.0| | | |`HIPSOLVER_ALG_0`|6.2.0| | | | | |`CUSOLVER_ALG_1`|11.0| | | |`HIPSOLVER_ALG_1`|6.2.0| | | | | |`CUSOLVER_ALG_2`|11.5| | | | | | | | | | -|`CUSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS`|12.2| | | | | | | | | | +|`CUSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS`|12.2| | | |`HIPSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS`|6.3.0| | | | | |`CUSOLVER_C_16BF`|11.0| | | | | | | | | | |`CUSOLVER_C_16F`|11.0| | | | | | | | | | |`CUSOLVER_C_32F`|11.0| | | | | | | | | | @@ -40,7 +40,7 @@ |`CUSOLVER_C_8U`|11.0| | | | | | | | | | |`CUSOLVER_C_AP`|11.0| | | | | | | | | | |`CUSOLVER_C_TF32`|11.0| | | | | | | | | | -|`CUSOLVER_DETERMINISTIC_RESULTS`|12.2| | | | | | | | | | +|`CUSOLVER_DETERMINISTIC_RESULTS`|12.2| | | |`HIPSOLVER_DETERMINISTIC_RESULTS`|6.3.0| | | | | |`CUSOLVER_EIG_MODE_NOVECTOR`|8.0| | | |`HIPSOLVER_EIG_MODE_NOVECTOR`|4.5.0| | | | | |`CUSOLVER_EIG_MODE_VECTOR`|8.0| | | |`HIPSOLVER_EIG_MODE_VECTOR`|4.5.0| | | | | |`CUSOLVER_EIG_RANGE_ALL`|10.1| | | |`HIPSOLVER_EIG_RANGE_ALL`|5.3.0| | | | | @@ -109,7 +109,7 @@ |`cudaLibMgGrid_t`|10.1| | | | | | | | | | |`cudaLibMgMatrixDesc_t`|10.1| | | | | | | | | | |`cusolverAlgMode_t`|11.0| | | |`hipsolverAlgMode_t`|6.2.0| | | | | -|`cusolverDeterministicMode_t`|12.2| | | | | | | | | | +|`cusolverDeterministicMode_t`|12.2| | | |`hipsolverDeterministicMode_t`|6.3.0| | | | | |`cusolverDirectMode_t`|11.0| | | | | | | | | | |`cusolverDnContext`| | | | | | | | | | | |`cusolverDnFunction_t`|11.0| | | |`hipsolverDnFunction_t`|6.2.0| | | | | @@ -310,7 +310,7 @@ |`cusolverDnGeqrf_bufferSize`|11.0|11.1| | | | | | | | | |`cusolverDnGesvd`|11.0|11.1| | | | | | | | | |`cusolverDnGesvd_bufferSize`|11.0|11.1| | | | | | | | | -|`cusolverDnGetDeterministicMode`|12.2| | | | | | | | | | +|`cusolverDnGetDeterministicMode`|12.2| | | |`hipsolverDnGetDeterministicMode`|6.3.0| | | |6.3.0| |`cusolverDnGetStream`| | | | |`hipsolverGetStream`|4.5.0| | | | | |`cusolverDnGetrf`|11.0|11.1| | | | | | | | | |`cusolverDnGetrf_bufferSize`|11.0|11.1| | | | | | | | | @@ -365,7 +365,7 @@ |`cusolverDnSXgesv`|11.0| | | | | | | | | | |`cusolverDnSXgesv_bufferSize`|11.0| | | | | | | | | | |`cusolverDnSetAdvOptions`|11.0| | | |`hipsolverDnSetAdvOptions`|6.2.0| | | | | -|`cusolverDnSetDeterministicMode`|12.2| | | | | | | | | | +|`cusolverDnSetDeterministicMode`|12.2| | | |`hipsolverDnSetDeterministicMode`|6.3.0| | | |6.3.0| |`cusolverDnSetStream`| | | | |`hipsolverSetStream`|4.5.0| | | | | |`cusolverDnSgebrd`| | | | |`hipsolverDnSgebrd`|5.1.0| | | | | |`cusolverDnSgebrd_bufferSize`| | | | |`hipsolverDnSgebrd_bufferSize`|5.1.0| | | | | @@ -426,8 +426,8 @@ |`cusolverDnSyevd_bufferSize`|11.0|11.1| | | | | | | | | |`cusolverDnSyevdx`|11.0|11.1| | | | | | | | | |`cusolverDnSyevdx_bufferSize`|11.0|11.1| | | | | | | | | -|`cusolverDnXgeqrf`|11.1| | | | | | | | | | -|`cusolverDnXgeqrf_bufferSize`|11.1| | | | | | | | | | +|`cusolverDnXgeqrf`|11.1| | | |`hipsolverDnXgeqrf`|6.3.0| | | |6.3.0| +|`cusolverDnXgeqrf_bufferSize`|11.1| | | |`hipsolverDnXgeqrf_bufferSize`|6.3.0| | | |6.3.0| |`cusolverDnXgesvd`|11.1| | | | | | | | | | |`cusolverDnXgesvd_bufferSize`|11.1| | | | | | | | | | |`cusolverDnXgesvdjGetResidual`|9.0| | | |`hipsolverDnXgesvdjGetResidual`|5.1.0| | | | | @@ -444,9 +444,9 @@ |`cusolverDnXgetrs`|11.1| | | |`hipsolverDnXgetrs`|6.2.0| | | | | |`cusolverDnXlarft`|12.4| | | | | | | | | | |`cusolverDnXlarft_bufferSize`|12.4| | | | | | | | | | -|`cusolverDnXpotrf`|11.1| | | | | | | | | | -|`cusolverDnXpotrf_bufferSize`|11.1| | | | | | | | | | -|`cusolverDnXpotrs`|11.1| | | | | | | | | | +|`cusolverDnXpotrf`|11.1| | | |`hipsolverDnXpotrf`|6.3.0| | | |6.3.0| +|`cusolverDnXpotrf_bufferSize`|11.1| | | |`hipsolverDnXpotrf_bufferSize`|6.3.0| | | |6.3.0| +|`cusolverDnXpotrs`|11.1| | | |`hipsolverDnXpotrs`|6.3.0| | | |6.3.0| |`cusolverDnXsyevd`|11.1| | | | | | | | | | |`cusolverDnXsyevd_bufferSize`|11.1| | | | | | | | | | |`cusolverDnXsyevdx`|11.1| | | | | | | | | | diff --git a/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md b/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md index 1b491d6e..44ca42b8 100644 --- a/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md @@ -31,7 +31,7 @@ |`CUSOLVER_ALG_0`|11.0| | | |`HIPSOLVER_ALG_0`|6.2.0| | | | | | | | | | | |`CUSOLVER_ALG_1`|11.0| | | |`HIPSOLVER_ALG_1`|6.2.0| | | | | | | | | | | |`CUSOLVER_ALG_2`|11.5| | | | | | | | | | | | | | | | -|`CUSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS`|12.2| | | | | | | | | | | | | | | | +|`CUSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS`|12.2| | | |`HIPSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS`|6.3.0| | | | | | | | | | | |`CUSOLVER_C_16BF`|11.0| | | | | | | | | | | | | | | | |`CUSOLVER_C_16F`|11.0| | | | | | | | | | | | | | | | |`CUSOLVER_C_32F`|11.0| | | | | | | | | | | | | | | | @@ -40,7 +40,7 @@ |`CUSOLVER_C_8U`|11.0| | | | | | | | | | | | | | | | |`CUSOLVER_C_AP`|11.0| | | | | | | | | | | | | | | | |`CUSOLVER_C_TF32`|11.0| | | | | | | | | | | | | | | | -|`CUSOLVER_DETERMINISTIC_RESULTS`|12.2| | | | | | | | | | | | | | | | +|`CUSOLVER_DETERMINISTIC_RESULTS`|12.2| | | |`HIPSOLVER_DETERMINISTIC_RESULTS`|6.3.0| | | | | | | | | | | |`CUSOLVER_EIG_MODE_NOVECTOR`|8.0| | | |`HIPSOLVER_EIG_MODE_NOVECTOR`|4.5.0| | | | |`rocblas_evect_none`|4.1.0| | | | | |`CUSOLVER_EIG_MODE_VECTOR`|8.0| | | |`HIPSOLVER_EIG_MODE_VECTOR`|4.5.0| | | | |`rocblas_evect_original`|4.1.0| | | | | |`CUSOLVER_EIG_RANGE_ALL`|10.1| | | |`HIPSOLVER_EIG_RANGE_ALL`|5.3.0| | | | |`rocblas_erange_all`|5.2.0| | | | | @@ -109,7 +109,7 @@ |`cudaLibMgGrid_t`|10.1| | | | | | | | | | | | | | | | |`cudaLibMgMatrixDesc_t`|10.1| | | | | | | | | | | | | | | | |`cusolverAlgMode_t`|11.0| | | |`hipsolverAlgMode_t`|6.2.0| | | | | | | | | | | -|`cusolverDeterministicMode_t`|12.2| | | | | | | | | | | | | | | | +|`cusolverDeterministicMode_t`|12.2| | | |`hipsolverDeterministicMode_t`|6.3.0| | | | | | | | | | | |`cusolverDirectMode_t`|11.0| | | | | | | | | | | | | | | | |`cusolverDnContext`| | | | | | | | | | | | | | | | | |`cusolverDnFunction_t`|11.0| | | |`hipsolverDnFunction_t`|6.2.0| | | | | | | | | | | @@ -310,7 +310,7 @@ |`cusolverDnGeqrf_bufferSize`|11.0|11.1| | | | | | | | | | | | | | | |`cusolverDnGesvd`|11.0|11.1| | | | | | | | | | | | | | | |`cusolverDnGesvd_bufferSize`|11.0|11.1| | | | | | | | | | | | | | | -|`cusolverDnGetDeterministicMode`|12.2| | | | | | | | | | | | | | | | +|`cusolverDnGetDeterministicMode`|12.2| | | |`hipsolverDnGetDeterministicMode`|6.3.0| | | |6.3.0| | | | | | | |`cusolverDnGetStream`| | | | |`hipsolverGetStream`|4.5.0| | | | |`rocblas_get_stream`| | | | | | |`cusolverDnGetrf`|11.0|11.1| | | | | | | | | | | | | | | |`cusolverDnGetrf_bufferSize`|11.0|11.1| | | | | | | | | | | | | | | @@ -365,7 +365,7 @@ |`cusolverDnSXgesv`|11.0| | | | | | | | | | | | | | | | |`cusolverDnSXgesv_bufferSize`|11.0| | | | | | | | | | | | | | | | |`cusolverDnSetAdvOptions`|11.0| | | |`hipsolverDnSetAdvOptions`|6.2.0| | | | | | | | | | | -|`cusolverDnSetDeterministicMode`|12.2| | | | | | | | | | | | | | | | +|`cusolverDnSetDeterministicMode`|12.2| | | |`hipsolverDnSetDeterministicMode`|6.3.0| | | |6.3.0| | | | | | | |`cusolverDnSetStream`| | | | |`hipsolverSetStream`|4.5.0| | | | |`rocblas_set_stream`| | | | | | |`cusolverDnSgebrd`| | | | |`hipsolverDnSgebrd`|5.1.0| | | | | | | | | | | |`cusolverDnSgebrd_bufferSize`| | | | |`hipsolverDnSgebrd_bufferSize`|5.1.0| | | | | | | | | | | @@ -426,8 +426,8 @@ |`cusolverDnSyevd_bufferSize`|11.0|11.1| | | | | | | | | | | | | | | |`cusolverDnSyevdx`|11.0|11.1| | | | | | | | | | | | | | | |`cusolverDnSyevdx_bufferSize`|11.0|11.1| | | | | | | | | | | | | | | -|`cusolverDnXgeqrf`|11.1| | | | | | | | | | | | | | | | -|`cusolverDnXgeqrf_bufferSize`|11.1| | | | | | | | | | | | | | | | +|`cusolverDnXgeqrf`|11.1| | | |`hipsolverDnXgeqrf`|6.3.0| | | |6.3.0| | | | | | | +|`cusolverDnXgeqrf_bufferSize`|11.1| | | |`hipsolverDnXgeqrf_bufferSize`|6.3.0| | | |6.3.0| | | | | | | |`cusolverDnXgesvd`|11.1| | | | | | | | | | | | | | | | |`cusolverDnXgesvd_bufferSize`|11.1| | | | | | | | | | | | | | | | |`cusolverDnXgesvdjGetResidual`|9.0| | | |`hipsolverDnXgesvdjGetResidual`|5.1.0| | | | | | | | | | | @@ -444,9 +444,9 @@ |`cusolverDnXgetrs`|11.1| | | |`hipsolverDnXgetrs`|6.2.0| | | | | | | | | | | |`cusolverDnXlarft`|12.4| | | | | | | | | | | | | | | | |`cusolverDnXlarft_bufferSize`|12.4| | | | | | | | | | | | | | | | -|`cusolverDnXpotrf`|11.1| | | | | | | | | | | | | | | | -|`cusolverDnXpotrf_bufferSize`|11.1| | | | | | | | | | | | | | | | -|`cusolverDnXpotrs`|11.1| | | | | | | | | | | | | | | | +|`cusolverDnXpotrf`|11.1| | | |`hipsolverDnXpotrf`|6.3.0| | | |6.3.0| | | | | | | +|`cusolverDnXpotrf_bufferSize`|11.1| | | |`hipsolverDnXpotrf_bufferSize`|6.3.0| | | |6.3.0| | | | | | | +|`cusolverDnXpotrs`|11.1| | | |`hipsolverDnXpotrs`|6.3.0| | | |6.3.0| | | | | | | |`cusolverDnXsyevd`|11.1| | | | | | | | | | | | | | | | |`cusolverDnXsyevd_bufferSize`|11.1| | | | | | | | | | | | | | | | |`cusolverDnXsyevdx`|11.1| | | | | | | | | | | | | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 84ebe0bd..1729e718 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -242,43 +242,43 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRMV {"cublasStrmv", {"hipblasStrmv", "rocblas_strmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStrmv_64", {"hipblasStrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasStrmv_64", {"hipblasStrmv_64", "rocblas_strmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDtrmv", {"hipblasDtrmv", "rocblas_dtrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtrmv_64", {"hipblasDtrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDtrmv_64", {"hipblasDtrmv_64", "rocblas_dtrmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCtrmv", {"hipblasCtrmv_v2", "rocblas_ctrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtrmv_64", {"hipblasCtrmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCtrmv_64", {"hipblasCtrmv_v2_64", "rocblas_ctrmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZtrmv", {"hipblasZtrmv_v2", "rocblas_ztrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtrmv_64", {"hipblasZtrmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZtrmv_64", {"hipblasZtrmv_v2_64", "rocblas_ztrmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // TBMV {"cublasStbmv", {"hipblasStbmv", "rocblas_stbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStbmv_64", {"hipblasStbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasStbmv_64", {"hipblasStbmv_64", "rocblas_stbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDtbmv", {"hipblasDtbmv", "rocblas_dtbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtbmv_64", {"hipblasDtbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDtbmv_64", {"hipblasDtbmv_64", "rocblas_dtbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCtbmv", {"hipblasCtbmv_v2", "rocblas_ctbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtbmv_64", {"hipblasCtbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCtbmv_64", {"hipblasCtbmv_v2_64", "rocblas_ctbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZtbmv", {"hipblasZtbmv_v2", "rocblas_ztbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtbmv_64", {"hipblasZtbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZtbmv_64", {"hipblasZtbmv_v2_64", "rocblas_ztbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // TPMV {"cublasStpmv", {"hipblasStpmv", "rocblas_stpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStpmv_64", {"hipblasStpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasStpmv_64", {"hipblasStpmv_64", "rocblas_stpmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDtpmv", {"hipblasDtpmv", "rocblas_dtpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtpmv_64", {"hipblasDtpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDtpmv_64", {"hipblasDtpmv_64", "rocblas_dtpmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCtpmv", {"hipblasCtpmv_v2", "rocblas_ctpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtpmv_64", {"hipblasCtpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCtpmv_64", {"hipblasCtpmv_v2_64", "rocblas_ctpmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZtpmv", {"hipblasZtpmv_v2", "rocblas_ztpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtpmv_64", {"hipblasZtpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZtpmv_64", {"hipblasZtpmv_v2_64", "rocblas_ztpmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // TRSV {"cublasStrsv", {"hipblasStrsv", "rocblas_strsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStrsv_64", {"hipblasStrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasStrsv_64", {"hipblasStrsv_64", "rocblas_strsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDtrsv", {"hipblasDtrsv", "rocblas_dtrsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtrsv_64", {"hipblasDtrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDtrsv_64", {"hipblasDtrsv_64", "rocblas_dtrsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCtrsv", {"hipblasCtrsv_v2", "rocblas_ctrsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtrsv_64", {"hipblasCtrsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCtrsv_64", {"hipblasCtrsv_v2_64", "rocblas_ctrsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZtrsv", {"hipblasZtrsv_v2", "rocblas_ztrsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtrsv_64", {"hipblasZtrsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZtrsv_64", {"hipblasZtrsv_v2_64", "rocblas_ztrsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // TPSV {"cublasStpsv", {"hipblasStpsv", "rocblas_stpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, @@ -292,47 +292,47 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TBSV {"cublasStbsv", {"hipblasStbsv", "rocblas_stbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasStbsv_64", {"hipblasStbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasStbsv_64", {"hipblasStbsv_64", "rocblas_stbsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDtbsv", {"hipblasDtbsv", "rocblas_dtbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDtbsv_64", {"hipblasDtbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDtbsv_64", {"hipblasDtbsv_64", "rocblas_dtbsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCtbsv", {"hipblasCtbsv_v2", "rocblas_ctbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCtbsv_64", {"hipblasCtbsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCtbsv_64", {"hipblasCtbsv_v2_64", "rocblas_ctbsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZtbsv", {"hipblasZtbsv_v2", "rocblas_ztbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZtbsv_64", {"hipblasZtbsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZtbsv_64", {"hipblasZtbsv_v2_64", "rocblas_ztbsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SYMV/HEMV {"cublasSsymv", {"hipblasSsymv", "rocblas_ssymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSsymv_64", {"hipblasSsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSsymv_64", {"hipblasSsymv_64", "rocblas_ssymv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDsymv", {"hipblasDsymv", "rocblas_dsymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDsymv_64", {"hipblasDsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDsymv_64", {"hipblasDsymv_64", "rocblas_dsymv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCsymv", {"hipblasCsymv_v2", "rocblas_csymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCsymv_64", {"hipblasCsymv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCsymv_64", {"hipblasCsymv_v2_64", "rocblas_csymv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZsymv", {"hipblasZsymv_v2", "rocblas_zsymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZsymv_64", {"hipblasZsymv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZsymv_64", {"hipblasZsymv_v2_64", "rocblas_zsymv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasChemv", {"hipblasChemv_v2", "rocblas_chemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasChemv_64", {"hipblasChemv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasChemv_64", {"hipblasChemv_v2_64", "rocblas_chemv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZhemv", {"hipblasZhemv_v2", "rocblas_zhemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZhemv_64", {"hipblasZhemv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZhemv_64", {"hipblasZhemv_v2_64", "rocblas_zhemv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SBMV/HBMV {"cublasSsbmv", {"hipblasSsbmv", "rocblas_ssbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSsbmv_64", {"hipblasSsbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSsbmv_64", {"hipblasSsbmv_64", "rocblas_ssbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDsbmv", {"hipblasDsbmv", "rocblas_dsbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDsbmv_64", {"hipblasDsbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDsbmv_64", {"hipblasDsbmv_64", "rocblas_dsbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasChbmv", {"hipblasChbmv_v2", "rocblas_chbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasChbmv_64", {"hipblasChbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasChbmv_64", {"hipblasChbmv_v2_64", "rocblas_chbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZhbmv", {"hipblasZhbmv_v2", "rocblas_zhbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZhbmv_64", {"hipblasZhbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZhbmv_64", {"hipblasZhbmv_v2_64", "rocblas_zhbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SPMV/HPMV {"cublasSspmv", {"hipblasSspmv", "rocblas_sspmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSspmv_64", {"hipblasSspmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSspmv_64", {"hipblasSspmv_64", "rocblas_sspmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDspmv", {"hipblasDspmv", "rocblas_dspmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDspmv_64", {"hipblasDspmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDspmv_64", {"hipblasDspmv_64", "rocblas_dspmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasChpmv", {"hipblasChpmv_v2", "rocblas_chpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasChpmv_64", {"hipblasChpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasChpmv_64", {"hipblasChpmv_v2_64", "rocblas_chpmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZhpmv", {"hipblasZhpmv_v2", "rocblas_zhpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZhpmv_64", {"hipblasZhpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZhpmv_64", {"hipblasZhpmv_v2_64", "rocblas_zhpmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // GER {"cublasSger", {"hipblasSger", "rocblas_sger", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, @@ -350,51 +350,51 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // SYR/HER {"cublasSsyr", {"hipblasSsyr", "rocblas_ssyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSsyr_64", {"hipblasSsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSsyr_64", {"hipblasSsyr_64", "rocblas_ssyr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDsyr", {"hipblasDsyr", "rocblas_dsyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDsyr_64", {"hipblasDsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDsyr_64", {"hipblasDsyr_64", "rocblas_dsyr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCsyr", {"hipblasCsyr_v2", "rocblas_csyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCsyr_64", {"hipblasCsyr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCsyr_64", {"hipblasCsyr_v2_64", "rocblas_csyr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZsyr", {"hipblasZsyr_v2", "rocblas_zsyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZsyr_64", {"hipblasZsyr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZsyr_64", {"hipblasZsyr_v2_64", "rocblas_zsyr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCher", {"hipblasCher_v2", "rocblas_cher", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCher_64", {"hipblasCher_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCher_64", {"hipblasCher_v2_64", "rocblas_cher_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZher", {"hipblasZher_v2", "rocblas_zher", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZher_64", {"hipblasZher_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZher_64", {"hipblasZher_v2_64", "rocblas_zher_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SPR/HPR {"cublasSspr", {"hipblasSspr", "rocblas_sspr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSspr_64", {"hipblasSspr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSspr_64", {"hipblasSspr_64", "rocblas_sspr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDspr", {"hipblasDspr", "rocblas_dspr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDspr_64", {"hipblasDspr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDspr_64", {"hipblasDspr_64", "rocblas_dspr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasChpr", {"hipblasChpr_v2", "rocblas_chpr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasChpr_64", {"hipblasChpr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasChpr_64", {"hipblasChpr_v2_64", "rocblas_chpr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZhpr", {"hipblasZhpr_v2", "rocblas_zhpr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZhpr_64", {"hipblasZhpr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZhpr_64", {"hipblasZhpr_v2_64", "rocblas_zhpr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SYR2/HER2 {"cublasSsyr2", {"hipblasSsyr2", "rocblas_ssyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSsyr2_64", {"hipblasSsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSsyr2_64", {"hipblasSsyr2_64", "rocblas_ssyr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDsyr2", {"hipblasDsyr2", "rocblas_dsyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDsyr2_64", {"hipblasDsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDsyr2_64", {"hipblasDsyr2_64", "rocblas_dsyr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCsyr2", {"hipblasCsyr2_v2", "rocblas_csyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCsyr2_64", {"hipblasCsyr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCsyr2_64", {"hipblasCsyr2_v2_64", "rocblas_csyr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZsyr2", {"hipblasZsyr2_v2", "rocblas_zsyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZsyr2_64", {"hipblasZsyr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZsyr2_64", {"hipblasZsyr2_v2_64", "rocblas_zsyr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCher2", {"hipblasCher2_v2", "rocblas_cher2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCher2_64", {"hipblasCher2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCher2_64", {"hipblasCher2_v2_64", "rocblas_cher2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZher2", {"hipblasZher2_v2", "rocblas_zher2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZher2_64", {"hipblasZher2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZher2_64", {"hipblasZher2_v2_64", "rocblas_zher2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SPR2/HPR2 {"cublasSspr2", {"hipblasSspr2", "rocblas_sspr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSspr2_64", {"hipblasSspr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSspr2_64", {"hipblasSspr2_64", "rocblas_sspr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDspr2", {"hipblasDspr2", "rocblas_dspr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDspr2_64", {"hipblasDspr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDspr2_64", {"hipblasDspr2_64", "rocblas_dspr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasChpr2", {"hipblasChpr2_v2", "rocblas_chpr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasChpr2_64", {"hipblasChpr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasChpr2_64", {"hipblasChpr2_v2_64", "rocblas_chpr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZhpr2", {"hipblasZhpr2_v2", "rocblas_zhpr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZhpr2_64", {"hipblasZhpr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZhpr2_64", {"hipblasZhpr2_v2_64", "rocblas_zhpr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // Blas3 (v1) Routines // GEMM @@ -458,22 +458,22 @@ const std::map CUDA_BLAS_FUNCTION_MAP { {"cublasTSTgemvBatched_64", {"hipblasTSTgemvBatched_64", "rocblas_tstgemv_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasTSSgemvBatched", {"hipblasTSSgemvBatched", "rocblas_tssgemv_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasTSSgemvBatched_64", {"hipblasTSSgemvBatched_64", "rocblas_tssgemv_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, - {"cublasSgemvStridedBatched", {"hipblasSgemvStridedBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED}}, - {"cublasSgemvStridedBatched_64", {"hipblasSgemvStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED}}, - {"cublasDgemvStridedBatched", {"hipblasDgemvStridedBatched", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED}}, - {"cublasDgemvStridedBatched_64", {"hipblasDgemvStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED}}, + {"cublasSgemvStridedBatched", {"hipblasSgemvStridedBatched", "rocblas_sgemv_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, + {"cublasSgemvStridedBatched_64", {"hipblasSgemvStridedBatched_64", "rocblas_sgemv_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, + {"cublasDgemvStridedBatched", {"hipblasDgemvStridedBatched", "rocblas_dgemv_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, + {"cublasDgemvStridedBatched_64", {"hipblasDgemvStridedBatched_64", "rocblas_dgemv_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasCgemvStridedBatched", {"hipblasCgemvStridedBatched_v2", "rocblas_cgemv_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasCgemvStridedBatched_64", {"hipblasCgemvStridedBatched_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED}}, + {"cublasCgemvStridedBatched_64", {"hipblasCgemvStridedBatched_v2_64", "rocblas_cgemv_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasZgemvStridedBatched", {"hipblasZgemvStridedBatched_v2", "rocblas_zgemv_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, - {"cublasZgemvStridedBatched_64", {"hipblasZgemvStridedBatched_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, ROC_UNSUPPORTED}}, + {"cublasZgemvStridedBatched_64", {"hipblasZgemvStridedBatched_v2_64", "rocblas_zgemv_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3}}, {"cublasHSHgemvStridedBatched", {"hipblasHSHgemvStridedBatched", "rocblas_hshgemv_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, - {"cublasHSHgemvStridedBatched_64", {"hipblasHSHgemvStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasHSHgemvStridedBatched_64", {"hipblasHSHgemvStridedBatched_64", "rocblas_hshgemv_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasHSSgemvStridedBatched", {"hipblasHSSgemvStridedBatched", "rocblas_hssgemv_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, - {"cublasHSSgemvStridedBatched_64", {"hipblasHSSgemvStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasHSSgemvStridedBatched_64", {"hipblasHSSgemvStridedBatched_64", "rocblas_hssgemv_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasTSTgemvStridedBatched", {"hipblasTSTgemvStridedBatched", "rocblas_tstgemv_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, - {"cublasTSTgemvStridedBatched_64", {"hipblasTSTgemvStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasTSTgemvStridedBatched_64", {"hipblasTSTgemvStridedBatched_64", "rocblas_tstgemv_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, {"cublasTSSgemvStridedBatched", {"hipblasTSSgemvStridedBatched", "rocblas_tssgemv_strided_batched", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, - {"cublasTSSgemvStridedBatched_64", {"hipblasTSSgemvStridedBatched_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, UNSUPPORTED}}, + {"cublasTSSgemvStridedBatched_64", {"hipblasTSSgemvStridedBatched_64", "rocblas_tssgemv_strided_batched_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_UNSUPPORTED}}, // SYRK {"cublasSsyrk", {"hipblasSsyrk", "rocblas_ssyrk", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_3, HIP_SUPPORTED_V2_ONLY}}, @@ -660,43 +660,43 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TRMV {"cublasStrmv_v2", {"hipblasStrmv", "rocblas_strmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasStrmv_v2_64", {"hipblasStrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasStrmv_v2_64", {"hipblasStrmv_64", "rocblas_strmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDtrmv_v2", {"hipblasDtrmv", "rocblas_dtrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDtrmv_v2_64", {"hipblasDtrmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDtrmv_v2_64", {"hipblasDtrmv_64", "rocblas_dtrmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCtrmv_v2", {"hipblasCtrmv_v2", "rocblas_ctrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCtrmv_v2_64", {"hipblasCtrmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCtrmv_v2_64", {"hipblasCtrmv_v2_64", "rocblas_ctrmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZtrmv_v2", {"hipblasZtrmv_v2", "rocblas_ztrmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZtrmv_v2_64", {"hipblasZtrmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZtrmv_v2_64", {"hipblasZtrmv_v2_64", "rocblas_ztrmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // TBMV {"cublasStbmv_v2", {"hipblasStbmv", "rocblas_stbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasStbmv_v2_64", {"hipblasStbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasStbmv_v2_64", {"hipblasStbmv_64", "rocblas_stbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDtbmv_v2", {"hipblasDtbmv", "rocblas_dtbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDtbmv_v2_64", {"hipblasDtbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDtbmv_v2_64", {"hipblasDtbmv_64", "rocblas_dtbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCtbmv_v2", {"hipblasCtbmv_v2", "rocblas_ctbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCtbmv_v2_64", {"hipblasCtbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCtbmv_v2_64", {"hipblasCtbmv_v2_64", "rocblas_ctbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZtbmv_v2", {"hipblasZtbmv_v2", "rocblas_ztbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZtbmv_v2_64", {"hipblasZtbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZtbmv_v2_64", {"hipblasZtbmv_v2_64", "rocblas_ztbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // TPMV {"cublasStpmv_v2", {"hipblasStpmv", "rocblas_stpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasStpmv_v2_64", {"hipblasStpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasStpmv_v2_64", {"hipblasStpmv_64", "rocblas_stpmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDtpmv_v2", {"hipblasDtpmv", "rocblas_dtpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDtpmv_v2_64", {"hipblasDtpmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDtpmv_v2_64", {"hipblasDtpmv_64", "rocblas_dtpmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCtpmv_v2", {"hipblasCtpmv_v2", "rocblas_ctpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCtpmv_v2_64", {"hipblasCtpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCtpmv_v2_64", {"hipblasCtpmv_v2_64", "rocblas_ctpmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZtpmv_v2", {"hipblasZtpmv_v2", "rocblas_ztpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZtpmv_v2_64", {"hipblasZtpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZtpmv_v2_64", {"hipblasZtpmv_v2_64", "rocblas_ztpmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // TRSV {"cublasStrsv_v2", {"hipblasStrsv", "rocblas_strsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasStrsv_v2_64", {"hipblasStrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasStrsv_v2_64", {"hipblasStrsv_64", "rocblas_strsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDtrsv_v2", {"hipblasDtrsv", "rocblas_dtrsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDtrsv_v2_64", {"hipblasDtrsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDtrsv_v2_64", {"hipblasDtrsv_64", "rocblas_dtrsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCtrsv_v2", {"hipblasCtrsv_v2", "rocblas_ctrsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCtrsv_v2_64", {"hipblasCtrsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCtrsv_v2_64", {"hipblasCtrsv_v2_64", "rocblas_ctrsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZtrsv_v2", {"hipblasZtrsv_v2", "rocblas_ztrsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZtrsv_v2_64", {"hipblasZtrsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZtrsv_v2_64", {"hipblasZtrsv_v2_64", "rocblas_ztrsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // TPSV {"cublasStpsv_v2", {"hipblasStpsv", "rocblas_stpsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, @@ -710,47 +710,47 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // TBSV {"cublasStbsv_v2", {"hipblasStbsv", "rocblas_stbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasStbsv_v2_64", {"hipblasStbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasStbsv_v2_64", {"hipblasStbsv_64", "rocblas_stbsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDtbsv_v2", {"hipblasDtbsv", "rocblas_dtbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDtbsv_v2_64", {"hipblasDtbsv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDtbsv_v2_64", {"hipblasDtbsv_64", "rocblas_dtbsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCtbsv_v2", {"hipblasCtbsv_v2", "rocblas_ctbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCtbsv_v2_64", {"hipblasCtbsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCtbsv_v2_64", {"hipblasCtbsv_v2_64", "rocblas_ctbsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZtbsv_v2", {"hipblasZtbsv_v2", "rocblas_ztbsv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZtbsv_v2_64", {"hipblasZtbsv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZtbsv_v2_64", {"hipblasZtbsv_v2_64", "rocblas_ztbsv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SYMV/HEMV {"cublasSsymv_v2", {"hipblasSsymv", "rocblas_ssymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSsymv_v2_64", {"hipblasSsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSsymv_v2_64", {"hipblasSsymv_64", "rocblas_ssymv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDsymv_v2", {"hipblasDsymv", "rocblas_dsymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDsymv_v2_64", {"hipblasDsymv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDsymv_v2_64", {"hipblasDsymv_64", "rocblas_dsymv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCsymv_v2", {"hipblasCsymv_v2", "rocblas_csymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCsymv_v2_64", {"hipblasCsymv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCsymv_v2_64", {"hipblasCsymv_v2_64", "rocblas_csymv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZsymv_v2", {"hipblasZsymv_v2", "rocblas_zsymv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZsymv_v2_64", {"hipblasZsymv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZsymv_v2_64", {"hipblasZsymv_v2_64", "rocblas_zsymv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasChemv_v2", {"hipblasChemv_v2", "rocblas_chemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasChemv_v2_64", {"hipblasChemv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasChemv_v2_64", {"hipblasChemv_v2_64", "rocblas_chemv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZhemv_v2", {"hipblasZhemv_v2", "rocblas_zhemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZhemv_v2_64", {"hipblasZhemv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZhemv_v2_64", {"hipblasZhemv_v2_64", "rocblas_zhemv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SBMV/HBMV {"cublasSsbmv_v2", {"hipblasSsbmv", "rocblas_ssbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSsbmv_v2_64", {"hipblasSsbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSsbmv_v2_64", {"hipblasSsbmv_64", "rocblas_ssbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDsbmv_v2", {"hipblasDsbmv", "rocblas_dsbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDsbmv_v2_64", {"hipblasDsbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDsbmv_v2_64", {"hipblasDsbmv_64", "rocblas_dsbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasChbmv_v2", {"hipblasChbmv_v2", "rocblas_chbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasChbmv_v2_64", {"hipblasChbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasChbmv_v2_64", {"hipblasChbmv_v2_64", "rocblas_chbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZhbmv_v2", {"hipblasZhbmv_v2", "rocblas_zhbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZhbmv_v2_64", {"hipblasZhbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZhbmv_v2_64", {"hipblasZhbmv_v2_64", "rocblas_zhbmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SPMV/HPMV {"cublasSspmv_v2", {"hipblasSspmv", "rocblas_sspmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSspmv_v2_64", {"hipblasSspmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSspmv_v2_64", {"hipblasSspmv_64", "rocblas_sspmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDspmv_v2", {"hipblasDspmv", "rocblas_dspmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDspmv_v2_64", {"hipblasDspmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDspmv_v2_64", {"hipblasDspmv_64", "rocblas_dspmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasChpmv_v2", {"hipblasChpmv_v2", "rocblas_chpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasChpmv_v2_64", {"hipblasChpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasChpmv_v2_64", {"hipblasChpmv_v2_64", "rocblas_chpmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZhpmv_v2", {"hipblasZhpmv_v2", "rocblas_zhpmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZhpmv_v2_64", {"hipblasZhpmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZhpmv_v2_64", {"hipblasZhpmv_v2_64", "rocblas_zhpmv_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // GER {"cublasSger_v2", {"hipblasSger", "rocblas_sger", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, @@ -768,51 +768,51 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // SYR/HER {"cublasSsyr_v2", {"hipblasSsyr", "rocblas_ssyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSsyr_v2_64", {"hipblasSsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSsyr_v2_64", {"hipblasSsyr_64", "rocblas_ssyr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDsyr_v2", {"hipblasDsyr", "rocblas_dsyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDsyr_v2_64", {"hipblasDsyr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDsyr_v2_64", {"hipblasDsyr_64", "rocblas_dsyr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCsyr_v2", {"hipblasCsyr_v2", "rocblas_csyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCsyr_v2_64", {"hipblasCsyr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCsyr_v2_64", {"hipblasCsyr_v2_64", "rocblas_csyr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZsyr_v2", {"hipblasZsyr_v2", "rocblas_zsyr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZsyr_v2_64", {"hipblasZsyr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZsyr_v2_64", {"hipblasZsyr_v2_64", "rocblas_zsyr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCher_v2", {"hipblasCher_v2", "rocblas_cher", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCher_v2_64", {"hipblasCher_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCher_v2_64", {"hipblasCher_v2_64", "rocblas_cher_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZher_v2", {"hipblasZher_v2", "rocblas_zher", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZher_v2_64", {"hipblasZher_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZher_v2_64", {"hipblasZher_v2_64", "rocblas_zher_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SPR/HPR {"cublasSspr_v2", {"hipblasSspr", "rocblas_sspr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSspr_v2_64", {"hipblasSspr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSspr_v2_64", {"hipblasSspr_64", "rocblas_sspr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDspr_v2", {"hipblasDspr", "rocblas_dspr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDspr_v2_64", {"hipblasDspr_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDspr_v2_64", {"hipblasDspr_64", "rocblas_dspr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasChpr_v2", {"hipblasChpr_v2", "rocblas_chpr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasChpr_v2_64", {"hipblasChpr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasChpr_v2_64", {"hipblasChpr_v2_64", "rocblas_chpr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZhpr_v2", {"hipblasZhpr_v2", "rocblas_zhpr", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZhpr_v2_64", {"hipblasZhpr_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZhpr_v2_64", {"hipblasZhpr_v2_64", "rocblas_zhpr_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SYR2/HER2 {"cublasSsyr2_v2", {"hipblasSsyr2", "rocblas_ssyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSsyr2_v2_64", {"hipblasSsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSsyr2_v2_64", {"hipblasSsyr2_64", "rocblas_ssyr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDsyr2_v2", {"hipblasDsyr2", "rocblas_dsyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDsyr2_v2_64", {"hipblasDsyr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDsyr2_v2_64", {"hipblasDsyr2_64", "rocblas_dsyr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCsyr2_v2", {"hipblasCsyr2_v2", "rocblas_csyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCsyr2_v2_64", {"hipblasCsyr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCsyr2_v2_64", {"hipblasCsyr2_v2_64", "rocblas_csyr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZsyr2_v2", {"hipblasZsyr2_v2", "rocblas_zsyr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZsyr2_v2_64", {"hipblasZsyr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZsyr2_v2_64", {"hipblasZsyr2_v2_64", "rocblas_zsyr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasCher2_v2", {"hipblasCher2_v2", "rocblas_cher2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCher2_v2_64", {"hipblasCher2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasCher2_v2_64", {"hipblasCher2_v2_64", "rocblas_cher2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZher2_v2", {"hipblasZher2_v2", "rocblas_zher2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZher2_v2_64", {"hipblasZher2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZher2_v2_64", {"hipblasZher2_v2_64", "rocblas_zher2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // SPR2/HPR2 {"cublasSspr2_v2", {"hipblasSspr2", "rocblas_sspr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSspr2_v2_64", {"hipblasSspr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasSspr2_v2_64", {"hipblasSspr2_64", "rocblas_sspr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasDspr2_v2", {"hipblasDspr2", "rocblas_dspr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDspr2_v2_64", {"hipblasDspr2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasDspr2_v2_64", {"hipblasDspr2_64", "rocblas_dspr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasChpr2_v2", {"hipblasChpr2_v2", "rocblas_chpr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasChpr2_v2_64", {"hipblasChpr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasChpr2_v2_64", {"hipblasChpr2_v2_64", "rocblas_chpr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, {"cublasZhpr2_v2", {"hipblasZhpr2_v2", "rocblas_zhpr2", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZhpr2_v2_64", {"hipblasZhpr2_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED}}, + {"cublasZhpr2_v2_64", {"hipblasZhpr2_v2_64", "rocblas_zhpr2_64", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, // Blas3 (v2) Routines // GEMM @@ -2329,6 +2329,70 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"rocblas_hssgemv_batched_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_tstgemv_batched_64", {HIP_6020, HIP_0, HIP_0 }}, {"rocblas_tssgemv_batched_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_sgemv_strided_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_dgemv_strided_batched", {HIP_3050, HIP_0, HIP_0 }}, + {"rocblas_sgemv_strided_batched_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dgemv_strided_batched_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_cgemv_strided_batched_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zgemv_strided_batched_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_hshgemv_strided_batched_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_hssgemv_strided_batched_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_tstgemv_strided_batched_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_tssgemv_strided_batched_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ssbmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dsbmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_chbmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zhbmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ssymv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dsymv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_csymv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zsymv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_chemv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zhemv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ssyr_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dsyr_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_csyr_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zsyr_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_cher_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zher_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ssyr2_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dsyr2_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_csyr2_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zsyr2_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_cher2_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zher2_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_sspmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dspmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_chpmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zhpmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_sspr_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dspr_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_chpr_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zhpr_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_sspr2_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dspr2_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_chpr2_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_zhpr2_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_strmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dtrmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ctrmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ztrmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_stpmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dtpmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ctpmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ztpmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_stbmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dtbmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ctbmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ztbmv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_stbsv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dtbsv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ctbsv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ztbsv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_strsv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_dtrsv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ctrsv_64", {HIP_6020, HIP_0, HIP_0 }}, + {"rocblas_ztrsv_64", {HIP_6020, HIP_0, HIP_0 }}, }; const std::map HIP_BLAS_FUNCTION_CHANGED_VER_MAP { diff --git a/src/CUDA2HIP_DNN_API_functions.cpp b/src/CUDA2HIP_DNN_API_functions.cpp index f34f811a..96c9f6d8 100644 --- a/src/CUDA2HIP_DNN_API_functions.cpp +++ b/src/CUDA2HIP_DNN_API_functions.cpp @@ -347,6 +347,8 @@ const std::map CUDA_DNN_FUNCTION_MAP { {"cudnnBackendSetAttribute", {"hipdnnBackendSetAttribute", "miopenBackendSetAttribute", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}}, {"cudnnBackendGetAttribute", {"hipdnnBackendGetAttribute", "miopenBackendGetAttribute", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}}, {"cudnnBackendExecute", {"hipdnnBackendExecute", "miopenBackendExecute", CONV_LIB_FUNC, API_DNN, 2, HIP_UNSUPPORTED}}, + {"cudnnBackendPopulateCudaGraph", {"hipdnnBackendPopulateCudaGraph", "", CONV_LIB_FUNC, API_DNN, 2, UNSUPPORTED}}, + {"cudnnBackendUpdateCudaGraph", {"hipdnnBackendUpdateCudaGraph", "", CONV_LIB_FUNC, API_DNN, 2, UNSUPPORTED}}, }; const std::map CUDA_DNN_FUNCTION_VER_MAP { @@ -628,6 +630,8 @@ const std::map CUDA_DNN_FUNCTION_VER_MAP { {"cudnnGetLastErrorString", {CUDNN_900, CUDA_0, CUDA_0 }}, {"cudnnGraphVersionCheck", {CUDNN_900, CUDA_0, CUDA_0 }}, {"cudnnOpsVersionCheck", {CUDNN_900, CUDA_0, CUDA_0 }}, + {"cudnnBackendPopulateCudaGraph", {CUDNN_950, CUDA_0, CUDA_0 }}, + {"cudnnBackendUpdateCudaGraph", {CUDNN_950, CUDA_0, CUDA_0 }}, }; const std::map HIP_DNN_FUNCTION_VER_MAP { @@ -671,6 +675,50 @@ const std::map HIP_DNN_FUNCTION_VER_MAP { {"miopenGetPoolingNdForwardOutputDim", {HIP_3030, HIP_0, HIP_0 }}, {"miopenGetPoolingForwardOutputDim", {HIP_2010, HIP_0, HIP_0 }}, {"miopenDestroyPoolingDescriptor", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenCreateActivationDescriptor", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenDestroyActivationDescriptor", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenActivationForward", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenActivationBackward", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenCreateLRNDescriptor", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenSetLRNDescriptor", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenGetLRNDescriptor", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenDestroyLRNDescriptor", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenDeriveBNTensorDescriptor", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenBatchNormalizationForwardTraining", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenBatchNormalizationForwardInference", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenBatchNormalizationBackward", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenCreateDropoutDescriptor", {HIP_2080, HIP_0, HIP_0 }}, + {"miopenDestroyDropoutDescriptor", {HIP_2080, HIP_0, HIP_0 }}, + {"miopenDropoutGetStatesSize", {HIP_2080, HIP_0, HIP_0 }}, + {"miopenDropoutGetReserveSpaceSize", {HIP_2080, HIP_0, HIP_0 }}, + {"miopenSetDropoutDescriptor", {HIP_2080, HIP_0, HIP_0 }}, + {"miopenGetDropoutDescriptor", {HIP_2080, HIP_0, HIP_0 }}, + {"miopenRestoreDropoutDescriptor", {HIP_2080, HIP_0, HIP_0 }}, + {"miopenDropoutForward", {HIP_2080, HIP_0, HIP_0 }}, + {"miopenDropoutBackward", {HIP_2080, HIP_0, HIP_0 }}, + {"miopenCreateRNNDescriptor", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenDestroyRNNDescriptor", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenGetRNNDescriptor_V2", {HIP_3050, HIP_0, HIP_0 }}, + {"miopenGetRNNWorkspaceSize", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenGetRNNTrainingReserveSize", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenGetRNNParamsSize", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenRNNForwardInference", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenRNNForwardTraining", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenRNNBackwardData", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenRNNBackwardWeights", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenSetRNNDescriptor_V2", {HIP_3050, HIP_0, HIP_0 }}, + {"miopenCreateCTCLossDescriptor", {HIP_2060, HIP_0, HIP_0 }}, + {"miopenSetCTCLossDescriptor", {HIP_2060, HIP_0, HIP_0 }}, + {"miopenGetCTCLossDescriptor", {HIP_2060, HIP_0, HIP_0 }}, + {"miopenDestroyCTCLossDescriptor", {HIP_2060, HIP_0, HIP_0 }}, + {"miopenCTCLoss", {HIP_2060, HIP_0, HIP_0 }}, + {"miopenGetCTCLossWorkspaceSize", {HIP_2060, HIP_0, HIP_0 }}, + {"miopenBackendCreateDescriptor", {HIP_6020, HIP_0, HIP_0 }}, + {"miopenBackendDestroyDescriptor", {HIP_6020, HIP_0, HIP_0 }}, + {"miopenBackendFinalize", {HIP_6020, HIP_0, HIP_0 }}, + {"miopenBackendSetAttribute", {HIP_6020, HIP_0, HIP_0 }}, + {"miopenBackendGetAttribute", {HIP_6020, HIP_0, HIP_0 }}, + {"miopenBackendExecute", {HIP_6020, HIP_0, HIP_0 }}, }; const std::map CUDA_DNN_API_SECTION_MAP { diff --git a/src/CUDA2HIP_DNN_API_types.cpp b/src/CUDA2HIP_DNN_API_types.cpp index 8384d56d..49fedd2d 100644 --- a/src/CUDA2HIP_DNN_API_types.cpp +++ b/src/CUDA2HIP_DNN_API_types.cpp @@ -69,6 +69,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH", {"HIPDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES", {"HIPDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE", {"HIPDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, + {"CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH", {"HIPDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN", {"HIPDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_STATUS_NOT_SUPPORTED_SHAPE", {"HIPDNN_STATUS_NOT_SUPPORTED_SHAPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE", {"HIPDNN_STATUS_NOT_SUPPORTED_DATA_TYPE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, @@ -81,6 +82,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT", {"HIPDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_STATUS_NOT_SUPPORTED_PADDING", {"HIPDNN_STATUS_NOT_SUPPORTED_PADDING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM", {"HIPDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, + {"CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API", {"HIPDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED", {"HIPDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_STATUS_INTERNAL_ERROR_UNEXPECTED_VALUE", {"HIPDNN_STATUS_INTERNAL_ERROR_UNEXPECTED_VALUE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED", {"HIPDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, @@ -116,8 +118,8 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_DATA_BFLOAT16", {"HIPDNN_DATA_BFLOAT16", "miopenBFloat16", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 9 {"CUDNN_DATA_INT64", {"HIPDNN_DATA_INT64", "miopenInt64", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 10 {"CUDNN_DATA_BOOLEAN", {"HIPDNN_DATA_BOOLEAN", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 11 - {"CUDNN_DATA_FP8_E4M3", {"HIPDNN_DATA_FP8_E4M3", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 12 - {"CUDNN_DATA_FP8_E5M2", {"HIPDNN_DATA_FP8_E5M2", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 13 + {"CUDNN_DATA_FP8_E4M3", {"HIPDNN_DATA_FP8_E4M3", "miopenFloat8", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 12 + {"CUDNN_DATA_FP8_E5M2", {"HIPDNN_DATA_FP8_E5M2", "miopenBFloat8", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 13 {"CUDNN_DATA_FAST_FLOAT_FOR_FP8", {"HIPDNN_DATA_FAST_FLOAT_FOR_FP8", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 14 {"cudnnErrQueryMode_t", {"hipdnnErrQueryMode_t", "", CONV_TYPE, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_ERRQUERY_RAWCODE", {"HIPDNN_ERRQUERY_RAWCODE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 0 @@ -173,7 +175,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 3 {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD", "miopenConvolutionBwdDataAlgoWinograd", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 4 {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 5 - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_TRANSPOSE_GEMM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 6 + {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_TRANSPOSE_GEMM", "miopenTransposeBwdDataAlgoGEMM", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 6 {"cudnnConvolutionBwdFilterAlgo_t", {"hipdnnConvolutionBwdFilterAlgo_t", "", CONV_TYPE, API_DNN, 1, ROC_UNSUPPORTED}}, {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_0", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 0 {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_1", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED}}, // 1 @@ -241,11 +243,11 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_SOFTMAX_MODE_CHANNEL", {"HIPDNN_SOFTMAX_MODE_CHANNEL", "MIOPEN_SOFTMAX_MODE_CHANNEL", CONV_NUMERIC_LITERAL, API_DNN, 1}}, // 1 {"cudnnPoolingMode_t", {"hipdnnPoolingMode_t", "miopenPoolingMode_t", CONV_TYPE, API_DNN, 1, CUDA_DEPRECATED}}, {"CUDNN_POOLING_MAX", {"HIPDNN_POOLING_MAX", "miopenPoolingMax", CONV_NUMERIC_LITERAL, API_DNN, 1, CUDA_DEPRECATED}}, // 0 - {"CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", {"HIPDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED | CUDA_DEPRECATED}}, // 1 - {"CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", {"HIPDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED | CUDA_DEPRECATED}}, // 2 + {"CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", {"HIPDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", "miopenPoolingAverageInclusive", CONV_NUMERIC_LITERAL, API_DNN, 1, CUDA_DEPRECATED}}, // 1 + {"CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", {"HIPDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", "miopenPoolingAverage", CONV_NUMERIC_LITERAL, API_DNN, 1, CUDA_DEPRECATED}}, // 2 {"CUDNN_POOLING_MAX_DETERMINISTIC", {"HIPDNN_POOLING_MAX_DETERMINISTIC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED | CUDA_DEPRECATED}}, // 3 {"cudnnActivationMode_t", {"hipdnnActivationMode_t", "miopenActivationMode_t", CONV_TYPE, API_DNN, 1, CUDA_DEPRECATED}}, - {"CUDNN_ACTIVATION_SIGMOID", {"HIPDNN_ACTIVATION_SIGMOID", "", CONV_NUMERIC_LITERAL, API_DNN, 1, ROC_UNSUPPORTED | CUDA_DEPRECATED}}, // 0 + {"CUDNN_ACTIVATION_SIGMOID", {"HIPDNN_ACTIVATION_SIGMOID", "miopenActivationLOGISTIC", CONV_NUMERIC_LITERAL, API_DNN, 1, CUDA_DEPRECATED}}, // 0 {"CUDNN_ACTIVATION_RELU", {"HIPDNN_ACTIVATION_RELU", "miopenActivationRELU", CONV_NUMERIC_LITERAL, API_DNN, 1, CUDA_DEPRECATED}}, // 1 {"CUDNN_ACTIVATION_TANH", {"HIPDNN_ACTIVATION_TANH", "miopenActivationTANH", CONV_NUMERIC_LITERAL, API_DNN, 1, CUDA_DEPRECATED}}, // 2 {"CUDNN_ACTIVATION_CLIPPED_RELU", {"HIPDNN_ACTIVATION_CLIPPED_RELU", "miopenActivationCLIPPEDRELU", CONV_NUMERIC_LITERAL, API_DNN, 1, CUDA_DEPRECATED}}, // 3 @@ -269,9 +271,9 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED", {"HIPDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 0 {"CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED", {"HIPDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 1 {"CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED", {"HIPDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 2 - {"cudnnRNNPaddingMode_t", {"hipdnnRNNPaddingMode_t", "", CONV_TYPE, API_DNN, 1, UNSUPPORTED}}, - {"CUDNN_RNN_PADDED_IO_DISABLED", {"HIPDNN_RNN_PADDED_IO_DISABLED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 0 - {"CUDNN_RNN_PADDED_IO_ENABLED", {"HIPDNN_RNN_PADDED_IO_ENABLED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 1 + {"cudnnRNNPaddingMode_t", {"hipdnnRNNPaddingMode_t", "miopenRNNPaddingMode_t", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED | CUDA_REMOVED}}, + {"CUDNN_RNN_PADDED_IO_DISABLED", {"HIPDNN_RNN_PADDED_IO_DISABLED", "miopenRNNIONotPadded", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED | CUDA_REMOVED}}, // 0 + {"CUDNN_RNN_PADDED_IO_ENABLED", {"HIPDNN_RNN_PADDED_IO_ENABLED", "miopenRNNIOWithPadding", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED | CUDA_REMOVED}}, // 1 {"cudnnSeqDataAxis_t", {"hipdnnSeqDataAxis_t", "", CONV_TYPE, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_SEQDATA_TIME_DIM", {"HIPDNN_SEQDATA_TIME_DIM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 0 {"CUDNN_SEQDATA_BATCH_DIM", {"HIPDNN_SEQDATA_BATCH_DIM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 1 @@ -382,9 +384,9 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", {"HIPDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 101 {"CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", {"HIPDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 102 {"CUDNN_SCALAR_DOUBLE_BN_EPSILON", {"HIPDNN_SCALAR_DOUBLE_BN_EPSILON", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 103 - {"cudnnForwardMode_t", {"hipdnnForwardMode_t", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, - {"CUDNN_FWD_MODE_INFERENCE", {"HIPDNN_FWD_MODE_INFERENCE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 0 - {"CUDNN_FWD_MODE_TRAINING", {"HIPDNN_FWD_MODE_TRAINING", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 1 + {"cudnnForwardMode_t", {"hipdnnForwardMode_t", "miopenRNNFWDMode_t", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_FWD_MODE_INFERENCE", {"HIPDNN_FWD_MODE_INFERENCE", "miopenRNNInference", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 0 + {"CUDNN_FWD_MODE_TRAINING", {"HIPDNN_FWD_MODE_TRAINING", "miopenRNNTraining", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1 {"cudnnPointwiseMode_t", {"hipdnnPointwiseMode_t", "miopenPointwiseMode_t", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_POINTWISE_ADD", {"HIPDNN_POINTWISE_ADD", "MIOPEN_POINTWISE_ADD", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 0 {"CUDNN_POINTWISE_MUL", {"HIPDNN_POINTWISE_MUL", "MIOPEN_POINTWISE_MUL", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1 @@ -472,7 +474,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", {"HIPDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", "MIOPEN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 403 {"CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", {"HIPDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", "MIOPEN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 404 {"CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", {"HIPDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", "MIOPEN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 405 - {"CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE", {"HIPDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE", "MIOPEN_ATTR_EXECUTION_PLAN_KERNEL_CACHE", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 406 + {"CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE", {"HIPDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 406 {"CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID", {"HIPDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID", "MIOPEN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 500 {"CUDNN_ATTR_INTERMEDIATE_INFO_SIZE", {"HIPDNN_ATTR_INTERMEDIATE_INFO_SIZE", "MIOPEN_ATTR_INTERMEDIATE_INFO_SIZE", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 501 {"CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS", {"HIPDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS", "MIOPEN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 502 @@ -531,7 +533,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_OPERATIONGRAPH_HANDLE", {"HIPDNN_ATTR_OPERATIONGRAPH_HANDLE", "MIOPEN_ATTR_OPERATIONGRAPH_HANDLE", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 800 {"CUDNN_ATTR_OPERATIONGRAPH_OPS", {"HIPDNN_ATTR_OPERATIONGRAPH_OPS", "MIOPEN_ATTR_OPERATIONGRAPH_OPS", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 801 {"CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT", {"HIPDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT", "MIOPEN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 802 - {"CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED", {"HIPDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED", "MIOPEN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 803 + {"CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED", {"HIPDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 803 {"CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT", {"HIPDNN_ATTR_TENSOR_BYTE_ALIGNMENT", "MIOPEN_ATTR_TENSOR_BYTE_ALIGNMENT", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 900 {"CUDNN_ATTR_TENSOR_DATA_TYPE", {"HIPDNN_ATTR_TENSOR_DATA_TYPE", "MIOPEN_ATTR_TENSOR_DATA_TYPE", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 901 {"CUDNN_ATTR_TENSOR_DIMENSIONS", {"HIPDNN_ATTR_TENSOR_DIMENSIONS", "MIOPEN_ATTR_TENSOR_DIMENSIONS", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 902 @@ -618,10 +620,10 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_OPERATION_SIGNAL_VALUE", {"HIPDNN_ATTR_OPERATION_SIGNAL_VALUE", "MIOPEN_ATTR_OPERATION_SIGNAL_VALUE", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1902 {"CUDNN_ATTR_OPERATION_SIGNAL_XDESC", {"HIPDNN_ATTR_OPERATION_SIGNAL_XDESC", "MIOPEN_ATTR_OPERATION_SIGNAL_XDESC", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1903 {"CUDNN_ATTR_OPERATION_SIGNAL_YDESC", {"HIPDNN_ATTR_OPERATION_SIGNAL_YDESC", "MIOPEN_ATTR_OPERATION_SIGNAL_YDESC", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 1904 - {"CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC", {"HIPDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC", "MIOPEN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 1950 - {"CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC", {"HIPDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC", "MIOPEN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 1951 - {"CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC", {"HIPDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC", "MIOPEN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 1952 - {"CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC", {"HIPDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC", "MIOPEN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 1953 + {"CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC", {"HIPDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 1950 + {"CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC", {"HIPDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 1951 + {"CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC", {"HIPDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 1952 + {"CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC", {"HIPDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 1953 {"CUDNN_ATTR_OPERATION_NORM_FWD_MODE", {"HIPDNN_ATTR_OPERATION_NORM_FWD_MODE", "MIOPEN_ATTR_OPERATION_NORM_FWD_MODE", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2000 {"CUDNN_ATTR_OPERATION_NORM_FWD_PHASE", {"HIPDNN_ATTR_OPERATION_NORM_FWD_PHASE", "MIOPEN_ATTR_OPERATION_NORM_FWD_PHASE", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2001 {"CUDNN_ATTR_OPERATION_NORM_FWD_XDESC", {"HIPDNN_ATTR_OPERATION_NORM_FWD_XDESC", "MIOPEN_ATTR_OPERATION_NORM_FWD_XDESC", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2002 @@ -660,7 +662,8 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_ATTR_OPERATION_RNG_SEED", {"HIPDNN_ATTR_OPERATION_RNG_SEED", "MIOPEN_ATTR_OPERATION_RNG_SEED", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2311 {"CUDNN_ATTR_OPERATION_RNG_DESC", {"HIPDNN_ATTR_OPERATION_RNG_DESC", "MIOPEN_ATTR_OPERATION_RNG_DESC", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2312 {"CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC", {"HIPDNN_ATTR_OPERATION_RNG_OFFSET_DESC", "MIOPEN_ATTR_OPERATION_RNG_OFFSET_DESC", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, // 2313 - {"CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED", {"HIPDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED", "MIOPEN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 2400 + {"CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH", {"HIPDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 2400 + {"CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED", {"HIPDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 2401 {"cudnnBackendAttributeType_t", {"hipdnnBackendAttributeType_t", "miopenBackendAttributeType_t", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_TYPE_HANDLE", {"HIPDNN_TYPE_HANDLE", "MIOPEN_TYPE_HANDLE", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_TYPE_DATA_TYPE", {"HIPDNN_TYPE_DATA_TYPE", "MIOPEN_TYPE_DATA_TYPE", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, @@ -727,8 +730,8 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_BACKEND_RNG_DESCRIPTOR", {"HIPDNN_BACKEND_RNG_DESCRIPTOR", "MIOPEN_BACKEND_RNG_DESCRIPTOR", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_RNG_DESCRIPTOR", "MIOPEN_BACKEND_OPERATION_RNG_DESCRIPTOR", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, - {"CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR", {"HIPDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR", "MIOPEN_BACKEND_KERNEL_CACHE_DESCRIPTOR", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, - {"CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR", "MIOPEN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, + {"CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR", {"HIPDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, + {"CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR", {"HIPDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"cudnnBackendNumericalNote_t", {"hipdnnBackendNumericalNote_t", "", CONV_TYPE, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_NUMERICAL_NOTE_TENSOR_CORE", {"HIPDNN_NUMERICAL_NOTE_TENSOR_CORE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", {"HIPDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, @@ -809,6 +812,7 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", {"HIPDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 0 {"CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", {"HIPDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 1 {"CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", {"HIPDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 2 + {"CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API", {"HIPDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, // 3 {"CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", {"HIPDNN_BEHAVIOR_NOTE_TYPE_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"cudnnResampleMode_t", {"hipdnnResampleMode_t", "", CONV_TYPE, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_RESAMPLE_NEAREST", {"HIPDNN_RESAMPLE_NEAREST", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, @@ -824,10 +828,10 @@ const std::map CUDA_DNN_TYPE_NAME_MAP { {"CUDNN_TENSOR_REORDERING_NONE", {"HIPDNN_TENSOR_REORDERING_NONE", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_TENSOR_REORDERING_INT8x32", {"HIPDNN_TENSOR_REORDERING_INT8x32", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_TENSOR_REORDERING_F16x16", {"HIPDNN_TENSOR_REORDERING_F16x16", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, - {"cudnnPaddingMode_t", {"hipdnnPaddingMode_t", "", CONV_TYPE, API_DNN, 1, UNSUPPORTED}}, - {"CUDNN_ZERO_PAD", {"HIPDNN_ZERO_PAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, - {"CUDNN_NEG_INF_PAD", {"HIPDNN_NEG_INF_PAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, - {"CUDNN_EDGE_VAL_PAD", {"HIPDNN_EDGE_VAL_PAD", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, + {"cudnnPaddingMode_t", {"hipdnnPaddingMode_t", "miopenPaddingMode_t", CONV_TYPE, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_ZERO_PAD", {"HIPDNN_ZERO_PAD", "miopenPaddingDefault", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_NEG_INF_PAD", {"HIPDNN_NEG_INF_PAD", "miopenPaddingSame", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, + {"CUDNN_EDGE_VAL_PAD", {"HIPDNN_EDGE_VAL_PAD", "miopenPaddingValid", CONV_NUMERIC_LITERAL, API_DNN, 1, HIP_UNSUPPORTED}}, {"cudnnBackendNormMode_t", {"hipdnnBackendNormMode_t", "", CONV_TYPE, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_LAYER_NORM", {"HIPDNN_LAYER_NORM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, {"CUDNN_INSTANCE_NORM", {"HIPDNN_INSTANCE_NORM", "", CONV_NUMERIC_LITERAL, API_DNN, 1, UNSUPPORTED}}, @@ -938,9 +942,9 @@ const std::map CUDA_DNN_TYPE_NAME_VER_MAP { {"CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED", {CUDNN_721, CUDA_0, CUDA_0 }}, {"CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED", {CUDNN_721, CUDA_0, CUDA_0 }}, {"CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED", {CUDNN_721, CUDA_0, CUDA_0 }}, - {"cudnnRNNPaddingMode_t", {CUDNN_721, CUDA_0, CUDA_0 }}, - {"CUDNN_RNN_PADDED_IO_DISABLED", {CUDNN_721, CUDA_0, CUDA_0 }}, - {"CUDNN_RNN_PADDED_IO_ENABLED", {CUDNN_721, CUDA_0, CUDA_0 }}, + {"cudnnRNNPaddingMode_t", {CUDNN_721, CUDNN_801, CUDNN_900}}, + {"CUDNN_RNN_PADDED_IO_DISABLED", {CUDNN_721, CUDNN_801, CUDNN_900}}, + {"CUDNN_RNN_PADDED_IO_ENABLED", {CUDNN_721, CUDNN_801, CUDNN_900}}, {"cudnnRNNStruct", {CUDNN_50, CUDA_0, CUDA_0 }}, {"cudnnRNNDescriptor_t", {CUDNN_50, CUDA_0, CUDA_0 }}, {"cudnnPersistentRNNPlan", {CUDNN_60, CUDA_0, CUDA_0 }}, @@ -1790,6 +1794,11 @@ const std::map CUDA_DNN_TYPE_NAME_VER_MAP { {"cudnnCTCGradMode_t", {CUDNN_900, CUDA_0, CUDA_0 }}, {"CUDNN_CTC_ZERO_OOB_GRADIENTS", {CUDNN_900, CUDA_0, CUDA_0 }}, {"CUDNN_CTC_SKIP_OOB_GRADIENTS", {CUDNN_900, CUDA_0, CUDA_0 }}, + {"CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH", {CUDNN_950, CUDA_0, CUDA_0 }}, + {"CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API", {CUDNN_950, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH", {CUDNN_950, CUDA_0, CUDA_0 }}, + {"CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API", {CUDNN_950, CUDA_0, CUDA_0 }}, + {"CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH", {CUDNN_950, CUDA_0, CUDA_0 }}, }; const std::map HIP_DNN_TYPE_NAME_VER_MAP { @@ -2209,4 +2218,51 @@ const std::map HIP_DNN_TYPE_NAME_VER_MAP { {"miopenDropoutDescriptor_t", {HIP_2080, HIP_0, HIP_0 }}, {"miopenRNNDescriptor_t", {HIP_2010, HIP_0, HIP_0 }}, {"miopenBackendDescriptor_t", {HIP_6020, HIP_0, HIP_0 }}, + {"miopenBackendAttributeType_t", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_HANDLE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_DATA_TYPE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_BOOLEAN", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_INT64", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_FLOAT", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_DOUBLE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_VOID_PTR", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_CONVOLUTION_MODE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_HEUR_MODE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_KNOB_TYPE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_NAN_PROPOGATION", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_NUMERICAL_NOTE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_LAYOUT_TYPE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_ATTRIB_NAME", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_POINTWISE_MODE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_BACKEND_DESCRIPTOR", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_GENSTATS_MODE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_BN_FINALIZE_STATS_MODE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_REDUCTION_OPERATOR_TYPE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_BEHAVIOR_NOTE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_TENSOR_REORDERING_MODE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_RESAMPLE_MODE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_PADDING_MODE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_INT32", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_CHAR", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_SIGNAL_MODE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_FRACTION", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_NORM_MODE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_NORM_FWD_PHASE", {HIP_6020, HIP_0, HIP_0 }}, + {"MIOPEN_TYPE_RNG_DISTRIBUTION", {HIP_6020, HIP_0, HIP_0 }}, + {"miopenFloat8", {HIP_6000, HIP_0, HIP_0 }}, + {"miopenBFloat8", {HIP_6000, HIP_0, HIP_0 }}, + {"miopenActivationLOGISTIC", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenTransposeBwdDataAlgoGEMM", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenPoolingAverageInclusive", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenPoolingAverage", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenRNNPaddingMode_t", {HIP_6000, HIP_0, HIP_0 }}, + {"miopenRNNIONotPadded", {HIP_6000, HIP_0, HIP_0 }}, + {"miopenRNNIOWithPadding", {HIP_6000, HIP_0, HIP_0 }}, + {"miopenRNNFWDMode_t", {HIP_6000, HIP_0, HIP_0 }}, + {"miopenRNNInference", {HIP_6000, HIP_0, HIP_0 }}, + {"miopenRNNTraining", {HIP_6000, HIP_0, HIP_0 }}, + {"miopenPaddingMode_t", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenPaddingDefault", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenPaddingSame", {HIP_2010, HIP_0, HIP_0 }}, + {"miopenPaddingValid", {HIP_2010, HIP_0, HIP_0 }}, }; diff --git a/src/CUDA2HIP_Driver_API_functions.cpp b/src/CUDA2HIP_Driver_API_functions.cpp index 8d168ed7..3c5d4e16 100644 --- a/src/CUDA2HIP_Driver_API_functions.cpp +++ b/src/CUDA2HIP_Driver_API_functions.cpp @@ -710,9 +710,9 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphLaunch {"cuGraphLaunch", {"hipGraphLaunch", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH}}, // NOTE: cudaGraphMemcpyNodeGetParams has a different signature - {"cuGraphMemcpyNodeGetParams", {"hipDrvGraphMemcpyNodeGetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, + {"cuGraphMemcpyNodeGetParams", {"hipDrvGraphMemcpyNodeGetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, // NOTE: cudaGraphMemcpyNodeSetParams has a different signature - {"cuGraphMemcpyNodeSetParams", {"hipDrvGraphMemcpyNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, + {"cuGraphMemcpyNodeSetParams", {"hipDrvGraphMemcpyNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, // cudaGraphMemsetNodeGetParams {"cuGraphMemsetNodeGetParams", {"hipGraphMemsetNodeGetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH}}, // cudaGraphMemsetNodeSetParams @@ -738,7 +738,9 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphRemoveDependencies_v2 {"cuGraphRemoveDependencies_v2", {"hipGraphRemoveDependencies_v2", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // no analogue - {"cuGraphExecMemcpyNodeSetParams", {"hipDrvGraphExecMemcpyNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, + {"cuGraphExecMemcpyNodeSetParams", {"hipDrvGraphExecMemcpyNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, + // no analogue + {"cuGraphExecMemsetNodeSetParams", {"hipDrvGraphExecMemsetNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, // cudaGraphExecHostNodeSetParams {"cuGraphExecHostNodeSetParams", {"hipGraphExecHostNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH}}, // TODO: take into account the new signature since 12.0 @@ -795,7 +797,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphMemAllocNodeGetParams {"cuGraphMemAllocNodeGetParams", {"hipGraphMemAllocNodeGetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH}}, // no analogue - {"cuGraphAddMemFreeNode", {"hipDrvGraphAddMemFreeNode", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, + {"cuGraphAddMemFreeNode", {"hipDrvGraphAddMemFreeNode", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, // cudaGraphMemFreeNodeGetParams {"cuGraphMemFreeNodeGetParams", {"hipGraphMemFreeNodeGetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH}}, // cudaDeviceGraphMemTrim @@ -817,15 +819,15 @@ const std::map CUDA_DRIVER_FUNCTION_MAP { // cudaGraphInstantiateWithParams {"cuGraphInstantiateWithParams", {"hipGraphInstantiateWithParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH}}, // cudaGraphExecGetFlags - {"cuGraphExecGetFlags", {"hipGraphExecGetFlags", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, + {"cuGraphExecGetFlags", {"hipGraphExecGetFlags", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, // cudaGraphAddNode {"cuGraphAddNode", {"hipGraphAddNode", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH}}, // cudaGraphAddNode_v2 {"cuGraphAddNode_v2", {"hipGraphAddNode_v2", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, // cudaGraphNodeSetParams - {"cuGraphNodeSetParams", {"hipGraphNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, + {"cuGraphNodeSetParams", {"hipGraphNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, // cudaGraphExecNodeSetParams - {"cuGraphExecNodeSetParams", {"hipGraphExecNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, + {"cuGraphExecNodeSetParams", {"hipGraphExecNodeSetParams", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_EXPERIMENTAL}}, // cudaGraphConditionalHandleCreate {"cuGraphConditionalHandleCreate", {"hipGraphConditionalHandleCreate", "", CONV_GRAPH, API_DRIVER, SEC::GRAPH, HIP_UNSUPPORTED}}, @@ -1658,6 +1660,11 @@ const std::map HIP_DRIVER_FUNCTION_VER_MAP { {"hipMemcpyAtoA", {HIP_6020, HIP_0, HIP_0, }}, {"hipMemcpyAtoHAsync", {HIP_6020, HIP_0, HIP_0, }}, {"hipMemcpyHtoAAsync", {HIP_6020, HIP_0, HIP_0, }}, + {"hipDrvGraphAddMemFreeNode", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipDrvGraphMemcpyNodeGetParams", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipDrvGraphMemcpyNodeSetParams", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipDrvGraphExecMemcpyNodeSetParams", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipDrvGraphExecMemsetNodeSetParams", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map CUDA_DRIVER_FUNCTION_CHANGED_VER_MAP { diff --git a/src/CUDA2HIP_RAND_API_functions.cpp b/src/CUDA2HIP_RAND_API_functions.cpp index b2788e82..95599386 100644 --- a/src/CUDA2HIP_RAND_API_functions.cpp +++ b/src/CUDA2HIP_RAND_API_functions.cpp @@ -27,8 +27,8 @@ const std::map CUDA_RAND_FUNCTION_MAP { // RAND Host functions {"curandCreateGenerator", {"hiprandCreateGenerator", "rocrand_create_generator", CONV_LIB_FUNC, API_RAND, 2}}, {"curandCreateGeneratorHost", {"hiprandCreateGeneratorHost", "rocrand_create_generator_host_blocking", CONV_LIB_FUNC, API_RAND, 2}}, - {"curandCreatePoissonDistribution", {"hiprandCreatePoissonDistribution", "", CONV_LIB_FUNC, API_RAND, 2}}, - {"curandDestroyDistribution", {"hiprandDestroyDistribution", "", CONV_LIB_FUNC, API_RAND, 2}}, + {"curandCreatePoissonDistribution", {"hiprandCreatePoissonDistribution", "rocrand_create_poisson_distribution", CONV_LIB_FUNC, API_RAND, 2}}, + {"curandDestroyDistribution", {"hiprandDestroyDistribution", "rocrand_destroy_discrete_distribution", CONV_LIB_FUNC, API_RAND, 2}}, {"curandDestroyGenerator", {"hiprandDestroyGenerator", "rocrand_destroy_generator", CONV_LIB_FUNC, API_RAND, 2}}, {"curandGenerate", {"hiprandGenerate", "rocrand_generate", CONV_LIB_FUNC, API_RAND, 2}}, {"curandGenerateLogNormal", {"hiprandGenerateLogNormal", "rocrand_generate_log_normal", CONV_LIB_FUNC, API_RAND, 2}}, @@ -40,49 +40,49 @@ const std::map CUDA_RAND_FUNCTION_MAP { {"curandGenerateSeeds", {"hiprandGenerateSeeds", "rocrand_initialize_generator", CONV_LIB_FUNC, API_RAND, 2}}, {"curandGenerateUniform", {"hiprandGenerateUniform", "rocrand_generate_uniform", CONV_LIB_FUNC, API_RAND, 2}}, {"curandGenerateUniformDouble", {"hiprandGenerateUniformDouble", "rocrand_generate_uniform_double", CONV_LIB_FUNC, API_RAND, 2}}, - {"curandGetDirectionVectors32", {"hiprandGetDirectionVectors32", "", CONV_LIB_FUNC, API_RAND, 2}}, - {"curandGetDirectionVectors64", {"hiprandGetDirectionVectors64", "", CONV_LIB_FUNC, API_RAND, 2}}, - {"curandGetProperty", {"hiprandGetProperty", "", CONV_LIB_FUNC, API_RAND, 2, HIP_UNSUPPORTED}}, - {"curandGetScrambleConstants32", {"hiprandGetScrambleConstants32", "", CONV_LIB_FUNC, API_RAND, 2}}, - {"curandGetScrambleConstants64", {"hiprandGetScrambleConstants64", "", CONV_LIB_FUNC, API_RAND, 2}}, - {"curandGetVersion", {"hiprandGetVersion", "", CONV_LIB_FUNC, API_RAND, 2}}, + {"curandGetDirectionVectors32", {"hiprandGetDirectionVectors32", "rocrand_get_direction_vectors32", CONV_LIB_FUNC, API_RAND, 2}}, + {"curandGetDirectionVectors64", {"hiprandGetDirectionVectors64", "rocrand_get_direction_vectors64", CONV_LIB_FUNC, API_RAND, 2}}, + {"curandGetProperty", {"hiprandGetProperty", "", CONV_LIB_FUNC, API_RAND, 2, UNSUPPORTED}}, + {"curandGetScrambleConstants32", {"hiprandGetScrambleConstants32", "rocrand_get_scramble_constants32", CONV_LIB_FUNC, API_RAND, 2}}, + {"curandGetScrambleConstants64", {"hiprandGetScrambleConstants64", "rocrand_get_scramble_constants64", CONV_LIB_FUNC, API_RAND, 2}}, + {"curandGetVersion", {"hiprandGetVersion", "rocrand_get_version", CONV_LIB_FUNC, API_RAND, 2}}, {"curandSetGeneratorOffset", {"hiprandSetGeneratorOffset", "rocrand_set_offset", CONV_LIB_FUNC, API_RAND, 2}}, - {"curandSetGeneratorOrdering", {"hiprandSetGeneratorOrdering", "", CONV_LIB_FUNC, API_RAND, 2}}, + {"curandSetGeneratorOrdering", {"hiprandSetGeneratorOrdering", "rocrand_set_ordering", CONV_LIB_FUNC, API_RAND, 2}}, {"curandSetPseudoRandomGeneratorSeed", {"hiprandSetPseudoRandomGeneratorSeed", "rocrand_set_seed", CONV_LIB_FUNC, API_RAND, 2}}, - {"curandSetQuasiRandomGeneratorDimensions", {"hiprandSetQuasiRandomGeneratorDimensions", "", CONV_LIB_FUNC, API_RAND, 2}}, + {"curandSetQuasiRandomGeneratorDimensions", {"hiprandSetQuasiRandomGeneratorDimensions", "rocrand_set_quasi_random_generator_dimensions", CONV_LIB_FUNC, API_RAND, 2}}, {"curandSetStream", {"hiprandSetStream", "rocrand_set_stream", CONV_LIB_FUNC, API_RAND, 2}}, - {"curandMakeMTGP32Constants", {"hiprandMakeMTGP32Constants", "", CONV_LIB_FUNC, API_RAND, 2}}, - {"curandMakeMTGP32KernelState", {"hiprandMakeMTGP32KernelState", "", CONV_LIB_FUNC, API_RAND, 2}}, + {"curandMakeMTGP32Constants", {"hiprandMakeMTGP32Constants", "rocrand_make_constant", CONV_LIB_FUNC, API_RAND, 2}}, + {"curandMakeMTGP32KernelState", {"hiprandMakeMTGP32KernelState", "rocrand_make_state_mtgp32", CONV_LIB_FUNC, API_RAND, 2}}, // RAND Device functions - {"curand", {"hiprand", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_init", {"hiprand_init", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_log_normal", {"hiprand_log_normal", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_log_normal_double", {"hiprand_log_normal_double", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_log_normal2", {"hiprand_log_normal2", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_log_normal2_double", {"hiprand_log_normal2_double", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_log_normal4", {"hiprand_log_normal4", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_log_normal4_double", {"hiprand_log_normal4_double", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_mtgp32_single", {"hiprand_mtgp32_single", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3, HIP_UNSUPPORTED}}, - {"curand_mtgp32_single_specific", {"hiprand_mtgp32_single_specific", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3, HIP_UNSUPPORTED}}, - {"curand_mtgp32_specific", {"hiprand_mtgp32_specific", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3, HIP_UNSUPPORTED}}, - {"curand_normal", {"hiprand_normal", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_normal_double", {"hiprand_normal_double", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_normal2", {"hiprand_normal2", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_normal2_double", {"hiprand_normal2_double", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_normal4", {"hiprand_normal4", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_normal4_double", {"hiprand_normal4_double", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_uniform", {"hiprand_uniform", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_uniform_double", {"hiprand_uniform_double", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_uniform2_double", {"hiprand_uniform2_double", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_uniform4", {"hiprand_uniform4", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_uniform4_double", {"hiprand_uniform4_double", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_discrete", {"hiprand_discrete", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_discrete4", {"hiprand_discrete4", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_poisson", {"hiprand_poisson", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_poisson4", {"hiprand_poisson4", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, - {"curand_Philox4x32_10", {"hiprand_Philox4x32_10", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3, HIP_UNSUPPORTED}}, - {"__curand_umul", {"__hiprand_umul", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3, HIP_UNSUPPORTED}}, + {"curand", {"hiprand", "rocrand", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_init", {"hiprand_init", "rocrand_init", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_log_normal", {"hiprand_log_normal", "rocrand_log_normal", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_log_normal_double", {"hiprand_log_normal_double", "rocrand_log_normal_double", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_log_normal2", {"hiprand_log_normal2", "rocrand_log_normal2", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_log_normal2_double", {"hiprand_log_normal2_double", "rocrand_log_normal_double2", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_log_normal4", {"hiprand_log_normal4", "rocrand_log_normal4", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_log_normal4_double", {"hiprand_log_normal4_double", "rocrand_log_normal_double4", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_mtgp32_single", {"hiprand_mtgp32_single", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3, UNSUPPORTED}}, + {"curand_mtgp32_single_specific", {"hiprand_mtgp32_single_specific", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3, UNSUPPORTED}}, + {"curand_mtgp32_specific", {"hiprand_mtgp32_specific", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3, UNSUPPORTED}}, + {"curand_normal", {"hiprand_normal", "rocrand_normal", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_normal_double", {"hiprand_normal_double", "rocrand_normal_double", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_normal2", {"hiprand_normal2", "rocrand_normal2", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_normal2_double", {"hiprand_normal2_double", "rocrand_normal_double2", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_normal4", {"hiprand_normal4", "rocrand_normal4", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_normal4_double", {"hiprand_normal4_double", "rocrand_normal_double4", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_uniform", {"hiprand_uniform", "rocrand_uniform", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_uniform_double", {"hiprand_uniform_double", "rocrand_uniform_double", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_uniform2_double", {"hiprand_uniform2_double", "rocrand_uniform_double2", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_uniform4", {"hiprand_uniform4", "rocrand_uniform4", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_uniform4_double", {"hiprand_uniform4_double", "rocrand_uniform_double4", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_discrete", {"hiprand_discrete", "rocrand_discrete", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_discrete4", {"hiprand_discrete4", "rocrand_discrete4", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_poisson", {"hiprand_poisson", "rocrand_poisson", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_poisson4", {"hiprand_poisson4", "rocrand_poisson4", CONV_LIB_DEVICE_FUNC, API_RAND, 3}}, + {"curand_Philox4x32_10", {"hiprand_Philox4x32_10", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3, UNSUPPORTED}}, + {"__curand_umul", {"__hiprand_umul", "", CONV_LIB_DEVICE_FUNC, API_RAND, 3, UNSUPPORTED}}, // unchanged function names: skipahead, skipahead_sequence, skipahead_subsequence }; @@ -159,6 +159,40 @@ const std::map HIP_RAND_FUNCTION_VER_MAP { {"rocrand_set_stream", {HIP_1050, HIP_0, HIP_0, }}, {"rocrand_set_seed", {HIP_1050, HIP_0, HIP_0, }}, {"rocrand_set_offset", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_set_ordering", {HIP_5050, HIP_0, HIP_0, }}, + {"rocrand_set_quasi_random_generator_dimensions", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_get_version", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_create_poisson_distribution", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_get_direction_vectors32", {HIP_6000, HIP_0, HIP_0, }}, + {"rocrand_get_direction_vectors64", {HIP_6000, HIP_0, HIP_0, }}, + {"rocrand_get_scramble_constants32", {HIP_6000, HIP_0, HIP_0, }}, + {"rocrand_get_scramble_constants64", {HIP_6000, HIP_0, HIP_0, }}, + {"rocrand_destroy_discrete_distribution", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_make_constant", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_make_state_mtgp32", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_init", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_log_normal", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_log_normal_double", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_log_normal2", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_log_normal_double2", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_log_normal4", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_log_normal_double4", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_normal", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_normal_double", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_normal2", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_normal_double2", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_normal4", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_normal_double4", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_uniform", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_uniform_double", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_uniform_double2", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_uniform4", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_uniform_double4", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_discrete", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_discrete4", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_poisson", {HIP_1050, HIP_0, HIP_0, }}, + {"rocrand_poisson4", {HIP_1050, HIP_0, HIP_0, }}, }; const std::map CUDA_RAND_API_SECTION_MAP { diff --git a/src/CUDA2HIP_RAND_API_types.cpp b/src/CUDA2HIP_RAND_API_types.cpp index f2f42298..0184a0a6 100644 --- a/src/CUDA2HIP_RAND_API_types.cpp +++ b/src/CUDA2HIP_RAND_API_types.cpp @@ -79,63 +79,63 @@ const std::map CUDA_RAND_TYPE_NAME_MAP { {"curandGenerator_st", {"hiprandGenerator_st", "rocrand_generator_base_type", CONV_TYPE, API_RAND, 1}}, {"curandGenerator_t", {"hiprandGenerator_t", "rocrand_generator", CONV_TYPE, API_RAND, 1}}, - {"curandDistribution_st", {"hiprandDistribution_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandDistribution_t", {"hiprandDistribution_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - - {"curandHistogramM2V_st", {"hiprandHistogramM2V_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandHistogramM2V_t", {"hiprandHistogramM2V_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - - {"curandDistributionShift_st", {"hiprandDistributionShift_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - {"curandDistributionShift_t", {"hiprandDistributionShift_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, - - {"curandDistributionM2Shift_st", {"hiprandDistributionM2Shift_st", "", CONV_TYPE, API_RAND, 1, HIP_UNSUPPORTED}}, - {"curandDistributionM2Shift_t", {"hiprandDistributionM2Shift_t", "", CONV_TYPE, API_RAND, 1, HIP_UNSUPPORTED}}, - {"curandHistogramM2_st", {"hiprandHistogramM2_st", "", CONV_TYPE, API_RAND, 1, HIP_UNSUPPORTED}}, - {"curandHistogramM2_t", {"hiprandHistogramM2_t", "", CONV_TYPE, API_RAND, 1, HIP_UNSUPPORTED}}, - {"curandHistogramM2K_st", {"hiprandHistogramM2K_st", "", CONV_TYPE, API_RAND, 1, HIP_UNSUPPORTED}}, - {"curandHistogramM2K_t", {"hiprandHistogramM2K_t", "", CONV_TYPE, API_RAND, 1, HIP_UNSUPPORTED}}, - {"curandDiscreteDistribution_st", {"hiprandDiscreteDistribution_st", "", CONV_TYPE, API_RAND, 1}}, - {"curandDiscreteDistribution_t", {"hiprandDiscreteDistribution_t", "", CONV_TYPE, API_RAND, 1}}, - {"curandMethod", {"hiprandMethod_t", "", CONV_TYPE, API_RAND, 1, HIP_UNSUPPORTED}}, - {"curandMethod_t", {"hiprandMethod_t", "", CONV_TYPE, API_RAND, 1, HIP_UNSUPPORTED}}, - {"curandDirectionVectors32_t", {"hiprandDirectionVectors32_t", "", CONV_TYPE, API_RAND, 1}}, - {"curandDirectionVectors64_t", {"hiprandDirectionVectors64_t", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, + {"curandDistribution_st", {"hiprandDistribution_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandDistribution_t", {"hiprandDistribution_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + + {"curandHistogramM2V_st", {"hiprandHistogramM2V_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandHistogramM2V_t", {"hiprandHistogramM2V_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + + {"curandDistributionShift_st", {"hiprandDistributionShift_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandDistributionShift_t", {"hiprandDistributionShift_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + + {"curandDistributionM2Shift_st", {"hiprandDistributionM2Shift_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandDistributionM2Shift_t", {"hiprandDistributionM2Shift_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandHistogramM2_st", {"hiprandHistogramM2_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandHistogramM2_t", {"hiprandHistogramM2_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandHistogramM2K_st", {"hiprandHistogramM2K_st", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandHistogramM2K_t", {"hiprandHistogramM2K_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandDiscreteDistribution_st", {"hiprandDiscreteDistribution_st", "rocrand_discrete_distribution_st", CONV_TYPE, API_RAND, 1}}, + {"curandDiscreteDistribution_t", {"hiprandDiscreteDistribution_t", "rocrand_discrete_distribution", CONV_TYPE, API_RAND, 1}}, + {"curandMethod", {"hiprandMethod_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandMethod_t", {"hiprandMethod_t", "", CONV_TYPE, API_RAND, 1, UNSUPPORTED}}, + {"curandDirectionVectors32_t", {"hiprandDirectionVectors32_t", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, + {"curandDirectionVectors64_t", {"hiprandDirectionVectors64_t", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, // RAND types for Device functions - {"curandStateMtgp32", {"hiprandStateMtgp32", "", CONV_TYPE, API_RAND, 1}}, - {"curandStateMtgp32_t", {"hiprandStateMtgp32_t", "", CONV_TYPE, API_RAND, 1}}, - {"curandStateScrambledSobol64", {"hiprandStateScrambledSobol64", "", CONV_TYPE, API_RAND, 1}}, - {"curandStateScrambledSobol64_t", {"hiprandStateScrambledSobol64_t", "", CONV_TYPE, API_RAND, 1}}, - {"curandStateSobol64", {"hiprandStateSobol64", "", CONV_TYPE, API_RAND, 1}}, - {"curandStateSobol64_t", {"hiprandStateSobol64_t", "", CONV_TYPE, API_RAND, 1}}, - {"curandStateScrambledSobol32", {"hiprandStateScrambledSobol32", "", CONV_TYPE, API_RAND, 1}}, - {"curandStateScrambledSobol32_t", {"hiprandStateScrambledSobol32_t", "", CONV_TYPE, API_RAND, 1}}, - {"curandStateSobol32", {"hiprandStateSobol32", "", CONV_TYPE, API_RAND, 1}}, - {"curandStateSobol32_t", {"hiprandStateSobol32_t", "", CONV_TYPE, API_RAND, 1}}, - {"curandStateMRG32k3a", {"hiprandStateMRG32k3a", "", CONV_TYPE, API_RAND, 1}}, - {"curandStateMRG32k3a_t", {"hiprandStateMRG32k3a_t", "", CONV_TYPE, API_RAND, 1}}, - {"curandStatePhilox4_32_10", {"hiprandStatePhilox4_32_10", "", CONV_TYPE, API_RAND, 1}}, - {"curandStatePhilox4_32_10_t", {"hiprandStatePhilox4_32_10_t", "", CONV_TYPE, API_RAND, 1}}, - {"curandStateXORWOW", {"hiprandStateXORWOW", "", CONV_TYPE, API_RAND, 1}}, - {"curandStateXORWOW_t", {"hiprandStateXORWOW_t", "", CONV_TYPE, API_RAND, 1}}, - {"curandState", {"hiprandState", "", CONV_TYPE, API_RAND, 1}}, - {"curandState_t", {"hiprandState_t", "", CONV_TYPE, API_RAND, 1}}, + {"curandStateMtgp32", {"hiprandStateMtgp32", "rocrand_device::mtgp32_engine", CONV_TYPE, API_RAND, 1}}, + {"curandStateMtgp32_t", {"hiprandStateMtgp32_t", "rocrand_state_mtgp32", CONV_TYPE, API_RAND, 1}}, + {"curandStateScrambledSobol64", {"hiprandStateScrambledSobol64", "rocrand_device::scrambled_sobol64_engine", CONV_TYPE, API_RAND, 1}}, + {"curandStateScrambledSobol64_t", {"hiprandStateScrambledSobol64_t", "rocrand_state_scrambled_sobol64", CONV_TYPE, API_RAND, 1}}, + {"curandStateSobol64", {"hiprandStateSobol64", "rocrand_device::sobol64_engine", CONV_TYPE, API_RAND, 1}}, + {"curandStateSobol64_t", {"hiprandStateSobol64_t", "rocrand_state_sobol64", CONV_TYPE, API_RAND, 1}}, + {"curandStateScrambledSobol32", {"hiprandStateScrambledSobol32", "rocrand_device::scrambled_sobol32_engine", CONV_TYPE, API_RAND, 1}}, + {"curandStateScrambledSobol32_t", {"hiprandStateScrambledSobol32_t", "rocrand_state_scrambled_sobol32", CONV_TYPE, API_RAND, 1}}, + {"curandStateSobol32", {"hiprandStateSobol32", "rocrand_device::sobol32_engine", CONV_TYPE, API_RAND, 1}}, + {"curandStateSobol32_t", {"hiprandStateSobol32_t", "rocrand_state_sobol32", CONV_TYPE, API_RAND, 1}}, + {"curandStateMRG32k3a", {"hiprandStateMRG32k3a", "rocrand_device::mrg32k3a_engine", CONV_TYPE, API_RAND, 1}}, + {"curandStateMRG32k3a_t", {"hiprandStateMRG32k3a_t", "rocrand_state_mrg32k3a", CONV_TYPE, API_RAND, 1}}, + {"curandStatePhilox4_32_10", {"hiprandStatePhilox4_32_10", "rocrand_device::philox4x32_10_engine", CONV_TYPE, API_RAND, 1}}, + {"curandStatePhilox4_32_10_t", {"hiprandStatePhilox4_32_10_t", "rocrand_state_philox4x32_10", CONV_TYPE, API_RAND, 1}}, + {"curandStateXORWOW", {"hiprandStateXORWOW", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, + {"curandStateXORWOW_t", {"hiprandStateXORWOW_t", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, + {"curandState", {"hiprandState", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, + {"curandState_t", {"hiprandState_t", "", CONV_TYPE, API_RAND, 1, ROC_UNSUPPORTED}}, // RAND method (enum curandMethod) - {"CURAND_CHOOSE_BEST", {"HIPRAND_CHOOSE_BEST", "", CONV_NUMERIC_LITERAL, API_RAND, 1, HIP_UNSUPPORTED}}, - {"CURAND_ITR", {"HIPRAND_ITR", "", CONV_NUMERIC_LITERAL, API_RAND, 1, HIP_UNSUPPORTED}}, - {"CURAND_KNUTH", {"HIPRAND_KNUTH", "", CONV_NUMERIC_LITERAL, API_RAND, 1, HIP_UNSUPPORTED}}, - {"CURAND_HITR", {"HIPRAND_HITR", "", CONV_NUMERIC_LITERAL, API_RAND, 1, HIP_UNSUPPORTED}}, - {"CURAND_M1", {"HIPRAND_M1", "", CONV_NUMERIC_LITERAL, API_RAND, 1, HIP_UNSUPPORTED}}, - {"CURAND_M2", {"HIPRAND_M2", "", CONV_NUMERIC_LITERAL, API_RAND, 1, HIP_UNSUPPORTED}}, - {"CURAND_BINARY_SEARCH", {"HIPRAND_BINARY_SEARCH", "", CONV_NUMERIC_LITERAL, API_RAND, 1, HIP_UNSUPPORTED}}, - {"CURAND_DISCRETE_GAUSS", {"HIPRAND_DISCRETE_GAUSS", "", CONV_NUMERIC_LITERAL, API_RAND, 1, HIP_UNSUPPORTED}}, - {"CURAND_REJECTION", {"HIPRAND_REJECTION", "", CONV_NUMERIC_LITERAL, API_RAND, 1, HIP_UNSUPPORTED}}, - {"CURAND_DEVICE_API", {"HIPRAND_DEVICE_API", "", CONV_NUMERIC_LITERAL, API_RAND, 1, HIP_UNSUPPORTED}}, - {"CURAND_FAST_REJECTION", {"HIPRAND_FAST_REJECTION", "", CONV_NUMERIC_LITERAL, API_RAND, 1, HIP_UNSUPPORTED}}, - {"CURAND_3RD", {"HIPRAND_3RD", "", CONV_NUMERIC_LITERAL, API_RAND, 1, HIP_UNSUPPORTED}}, - {"CURAND_DEFINITION", {"HIPRAND_DEFINITION", "", CONV_NUMERIC_LITERAL, API_RAND, 1, HIP_UNSUPPORTED}}, - {"CURAND_POISSON", {"HIPRAND_POISSON", "", CONV_NUMERIC_LITERAL, API_RAND, 1, HIP_UNSUPPORTED}}, + {"CURAND_CHOOSE_BEST", {"HIPRAND_CHOOSE_BEST", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_ITR", {"HIPRAND_ITR", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_KNUTH", {"HIPRAND_KNUTH", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_HITR", {"HIPRAND_HITR", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_M1", {"HIPRAND_M1", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_M2", {"HIPRAND_M2", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_BINARY_SEARCH", {"HIPRAND_BINARY_SEARCH", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_DISCRETE_GAUSS", {"HIPRAND_DISCRETE_GAUSS", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_REJECTION", {"HIPRAND_REJECTION", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_DEVICE_API", {"HIPRAND_DEVICE_API", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_FAST_REJECTION", {"HIPRAND_FAST_REJECTION", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_3RD", {"HIPRAND_3RD", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_DEFINITION", {"HIPRAND_DEFINITION", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, + {"CURAND_POISSON", {"HIPRAND_POISSON", "", CONV_NUMERIC_LITERAL, API_RAND, 1, UNSUPPORTED}}, }; const std::map CUDA_RAND_TYPE_NAME_VER_MAP { @@ -247,4 +247,20 @@ const std::map HIP_RAND_TYPE_NAME_VER_MAP { {"ROCRAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6",{HIP_6000, HIP_0, HIP_0 }}, {"rocrand_generator_base_type", {HIP_1050, HIP_0, HIP_0 }}, {"rocrand_generator", {HIP_1050, HIP_0, HIP_0 }}, + {"rocrand_discrete_distribution_st", {HIP_1050, HIP_0, HIP_0 }}, + {"rocrand_discrete_distribution", {HIP_1050, HIP_0, HIP_0 }}, + {"rocrand_device::philox4x32_10_engine", {HIP_1050, HIP_0, HIP_0 }}, + {"rocrand_state_philox4x32_10", {HIP_1050, HIP_0, HIP_0 }}, + {"rocrand_device::mtgp32_engine", {HIP_1050, HIP_0, HIP_0 }}, + {"rocrand_state_mtgp32", {HIP_1050, HIP_0, HIP_0 }}, + {"rocrand_device::scrambled_sobol32_engine", {HIP_5040, HIP_0, HIP_0 }}, + {"rocrand_state_scrambled_sobol32", {HIP_5040, HIP_0, HIP_0 }}, + {"rocrand_device::scrambled_sobol64_engine", {HIP_5040, HIP_0, HIP_0 }}, + {"rocrand_state_scrambled_sobol64", {HIP_5040, HIP_0, HIP_0 }}, + {"rocrand_device::sobol32_engine", {HIP_1050, HIP_0, HIP_0 }}, + {"rocrand_state_sobol32", {HIP_1050, HIP_0, HIP_0 }}, + {"rocrand_device::sobol64_engine", {HIP_4050, HIP_0, HIP_0 }}, + {"rocrand_state_sobol64", {HIP_4050, HIP_0, HIP_0 }}, + {"rocrand_device::mrg32k3a_engine", {HIP_1050, HIP_0, HIP_0 }}, + {"rocrand_state_mrg32k3a", {HIP_1050, HIP_0, HIP_0 }}, }; diff --git a/src/CUDA2HIP_Runtime_API_functions.cpp b/src/CUDA2HIP_Runtime_API_functions.cpp index 492162aa..2484e596 100644 --- a/src/CUDA2HIP_Runtime_API_functions.cpp +++ b/src/CUDA2HIP_Runtime_API_functions.cpp @@ -863,15 +863,15 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP { // cuGraphInstantiateWithParams {"cudaGraphInstantiateWithParams", {"hipGraphInstantiateWithParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH}}, // cuGraphExecGetFlags - {"cudaGraphExecGetFlags", {"hipGraphExecGetFlags", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, + {"cudaGraphExecGetFlags", {"hipGraphExecGetFlags", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_EXPERIMENTAL}}, // cuGraphAddNode {"cudaGraphAddNode", {"hipGraphAddNode", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH}}, // cuGraphAddNode_v2 {"cudaGraphAddNode_v2", {"hipGraphAddNode_v2", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, // cuGraphNodeSetParams - {"cudaGraphNodeSetParams", {"hipGraphNodeSetParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, + {"cudaGraphNodeSetParams", {"hipGraphNodeSetParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_EXPERIMENTAL}}, // cuGraphExecNodeSetParams - {"cudaGraphExecNodeSetParams", {"hipGraphExecNodeSetParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, + {"cudaGraphExecNodeSetParams", {"hipGraphExecNodeSetParams", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_EXPERIMENTAL}}, // cuGraphConditionalHandleCreate {"cudaGraphConditionalHandleCreate", {"hipGraphConditionalHandleCreate", "", CONV_GRAPH, API_RUNTIME, SEC::GRAPH, HIP_UNSUPPORTED}}, @@ -1434,6 +1434,9 @@ const std::map HIP_RUNTIME_FUNCTION_VER_MAP { {"hipStreamBeginCaptureToGraph", {HIP_6020, HIP_0, HIP_0, }}, {"hipSetValidDevices", {HIP_6020, HIP_0, HIP_0, }}, {"hipMemcpy2DArrayToArray", {HIP_6020, HIP_0, HIP_0, }}, + {"hipGraphExecGetFlags", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipGraphNodeSetParams", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipGraphExecNodeSetParams", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, }; const std::map CUDA_RUNTIME_FUNCTION_CHANGED_VER_MAP { diff --git a/src/CUDA2HIP_SOLVER_API_functions.cpp b/src/CUDA2HIP_SOLVER_API_functions.cpp index c5f6f720..0eaf1c92 100644 --- a/src/CUDA2HIP_SOLVER_API_functions.cpp +++ b/src/CUDA2HIP_SOLVER_API_functions.cpp @@ -49,8 +49,8 @@ const std::map CUDA_SOLVER_FUNCTION_MAP { {"cusolverDnSetAdvOptions", {"hipsolverDnSetAdvOptions", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED}}, {"cusolverDnSetStream", {"hipsolverSetStream", "rocblas_set_stream", CONV_LIB_FUNC, API_SOLVER, 2}}, {"cusolverDnGetStream", {"hipsolverGetStream", "rocblas_get_stream", CONV_LIB_FUNC, API_SOLVER, 2}}, - {"cusolverDnSetDeterministicMode", {"hipsolverDnSetDeterministicMode", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, - {"cusolverDnGetDeterministicMode", {"hipsolverDnGetDeterministicMode", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnSetDeterministicMode", {"hipsolverDnSetDeterministicMode", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnGetDeterministicMode", {"hipsolverDnGetDeterministicMode", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cusolverDnIRSParamsCreate", {"hipsolverDnIRSParamsCreate", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnIRSParamsDestroy", {"hipsolverDnIRSParamsDestroy", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnIRSParamsSetRefinementSolver", {"hipsolverDnIRSParamsSetRefinementSolver", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, @@ -452,11 +452,11 @@ const std::map CUDA_SOLVER_FUNCTION_MAP { {"cusolverDnSyevdx", {"hipsolverDnSyevdx", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnGesvd_bufferSize", {"hipsolverDnGesvd_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnGesvd", {"hipsolverDnGesvd", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, - {"cusolverDnXpotrf_bufferSize", {"hipsolverDnXpotrf_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, - {"cusolverDnXpotrf", {"hipsolverDnXpotrf", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, - {"cusolverDnXpotrs", {"hipsolverDnXpotrs", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, - {"cusolverDnXgeqrf_bufferSize", {"hipsolverDnXgeqrf_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, - {"cusolverDnXgeqrf", {"hipsolverDnXgeqrf", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, + {"cusolverDnXpotrf_bufferSize", {"hipsolverDnXpotrf_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnXpotrf", {"hipsolverDnXpotrf", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnXpotrs", {"hipsolverDnXpotrs", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnXgeqrf_bufferSize", {"hipsolverDnXgeqrf_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnXgeqrf", {"hipsolverDnXgeqrf", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cusolverDnXsyevd_bufferSize", {"hipsolverDnXsyevd_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnXsyevd", {"hipsolverDnXsyevd", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, {"cusolverDnXsyevdx_bufferSize", {"hipsolverDnXsyevdx_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, UNSUPPORTED}}, @@ -1436,6 +1436,13 @@ const std::map HIP_SOLVER_FUNCTION_VER_MAP { {"hipsolverDnXgetrf", {HIP_6020, HIP_0, HIP_0, }}, {"hipsolverDnXgetrf_bufferSize", {HIP_6020, HIP_0, HIP_0, }}, {"hipsolverDnXgetrs", {HIP_6020, HIP_0, HIP_0, }}, + {"hipsolverDnSetDeterministicMode", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnGetDeterministicMode", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnXgeqrf_bufferSize", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnXgeqrf", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnXpotrf_bufferSize", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnXpotrf", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnXpotrs", {HIP_6030, HIP_0, HIP_0, HIP_LATEST}}, {"rocsolver_spotrf", {HIP_3020, HIP_0, HIP_0 }}, {"rocsolver_dpotrf", {HIP_3020, HIP_0, HIP_0 }}, diff --git a/src/CUDA2HIP_SOLVER_API_types.cpp b/src/CUDA2HIP_SOLVER_API_types.cpp index 534763af..87001718 100644 --- a/src/CUDA2HIP_SOLVER_API_types.cpp +++ b/src/CUDA2HIP_SOLVER_API_types.cpp @@ -119,9 +119,9 @@ const std::map CUDA_SOLVER_TYPE_NAME_MAP { {"cusolverDnFunction_t", {"hipsolverDnFunction_t", "", CONV_TYPE, API_SOLVER, 1, ROC_UNSUPPORTED}}, {"CUSOLVERDN_GETRF", {"HIPSOLVERDN_GETRF", "", CONV_NUMERIC_LITERAL, API_SOLVER, 1, ROC_UNSUPPORTED}}, {"CUSOLVERDN_POTRF", {"HIPSOLVERDN_POTRF", "", CONV_NUMERIC_LITERAL, API_SOLVER, 1, UNSUPPORTED}}, - {"cusolverDeterministicMode_t", {"hipsolverDeterministicMode_t", "", CONV_TYPE, API_SOLVER, 1, UNSUPPORTED}}, - {"CUSOLVER_DETERMINISTIC_RESULTS", {"HIPSOLVER_DETERMINISTIC_RESULTS", "", CONV_NUMERIC_LITERAL, API_SOLVER, 1, UNSUPPORTED}}, - {"CUSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS", {"HIPSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS", "", CONV_NUMERIC_LITERAL, API_SOLVER, 1, UNSUPPORTED}}, + {"cusolverDeterministicMode_t", {"hipsolverDeterministicMode_t", "", CONV_TYPE, API_SOLVER, 1, ROC_UNSUPPORTED}}, + {"CUSOLVER_DETERMINISTIC_RESULTS", {"HIPSOLVER_DETERMINISTIC_RESULTS", "", CONV_NUMERIC_LITERAL, API_SOLVER, 1, ROC_UNSUPPORTED}}, + {"CUSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS", {"HIPSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS", "", CONV_NUMERIC_LITERAL, API_SOLVER, 1, ROC_UNSUPPORTED}}, {"cusolver_int_t", {"int", "rocblas_int", CONV_NUMERIC_LITERAL, API_SOLVER, 1}}, {"cusolverDnLoggerCallback_t", {"hipsolverDnLoggerCallback_t", "", CONV_NUMERIC_LITERAL, API_SOLVER, 1, UNSUPPORTED}}, {"cusolverMgContext", {"hipsolverMgContext", "", CONV_TYPE, API_SOLVER, 1, UNSUPPORTED}}, @@ -329,6 +329,9 @@ const std::map HIP_SOLVER_TYPE_NAME_VER_MAP { {"HIPSOLVER_ALG_1", {HIP_6020, HIP_0, HIP_0, }}, {"hipsolverDnFunction_t", {HIP_6020, HIP_0, HIP_0, }}, {"HIPSOLVERDN_GETRF", {HIP_6020, HIP_0, HIP_0, }}, + {"hipsolverDeterministicMode_t", {HIP_6030, HIP_0, HIP_0, }}, + {"HIPSOLVER_DETERMINISTIC_RESULTS", {HIP_6030, HIP_0, HIP_0, }}, + {"HIPSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS", {HIP_6030, HIP_0, HIP_0, }}, {"rocblas_int", {HIP_3000, HIP_0, HIP_0 }}, {"rocblas_status", {HIP_3000, HIP_0, HIP_0 }}, diff --git a/src/Statistics.cpp b/src/Statistics.cpp index 91e01fb3..97f76c57 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -544,6 +544,7 @@ std::string Statistics::getCudaVersion(const cudaVersions &ver) { case CUDNN_920: return "9.2.0"; case CUDNN_930: return "9.3.0"; case CUDNN_940: return "9.4.0"; + case CUDNN_950: return "9.5.0"; } return ""; } @@ -617,6 +618,7 @@ std::string Statistics::getHipVersion(const hipVersions &ver) { case HIP_6010: return "6.1.0"; case HIP_6011: return "6.1.1"; case HIP_6020: return "6.2.0"; + case HIP_6030: return "6.3.0"; } return ""; } diff --git a/src/Statistics.h b/src/Statistics.h index 3095d47b..fa114715 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -298,7 +298,8 @@ enum cudaVersions { CUDNN_920 = 920, CUDNN_930 = 930, CUDNN_940 = 940, - CUDNN_LATEST = CUDNN_940, + CUDNN_950 = 950, + CUDNN_LATEST = CUDNN_950, }; enum hipVersions { @@ -368,7 +369,8 @@ enum hipVersions { HIP_6010 = 6010, HIP_6011 = 6011, HIP_6020 = 6020, - HIP_LATEST = HIP_6020, + HIP_6030 = 6030, + HIP_LATEST = HIP_6030, }; struct cudaAPIversions { diff --git a/tests/unit_tests/synthetic/driver_functions.cu b/tests/unit_tests/synthetic/driver_functions.cu index 3e2ee455..14c53578 100644 --- a/tests/unit_tests/synthetic/driver_functions.cu +++ b/tests/unit_tests/synthetic/driver_functions.cu @@ -1252,6 +1252,16 @@ int main() { // CHECK: result = hipGraphLaunch(graphExec, stream); result = cuGraphLaunch(graphExec, stream); + // CUDA: CUresult CUDAAPI cuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D *nodeParams); + // HIP: hipError_t hipDrvGraphMemcpyNodeGetParams(hipGraphNode_t node, hipMemcpy3DParms* pNodeParams); + // CHECK: result = hipDrvGraphMemcpyNodeGetParams(graphNode, &MEMCPY3D); + result = cuGraphMemcpyNodeGetParams(graphNode, &MEMCPY3D); + + // CUDA: CUresult CUDAAPI cuGraphMemcpyNodeSetParams(CUgraphNode hNode, const CUDA_MEMCPY3D *nodeParams); + // HIP: hipError_t hipDrvGraphMemcpyNodeSetParams(hipGraphNode_t node, const hipMemcpy3DParms* pNodeParams); + // CHECK: result = hipDrvGraphMemcpyNodeSetParams(graphNode, &MEMCPY3D); + result = cuGraphMemcpyNodeSetParams(graphNode, &MEMCPY3D); + // CUDA: CUresult CUDAAPI cuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams); // HIP: hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipMemsetParams* pNodeParams); // CHECK: result = hipGraphMemsetNodeGetParams(graphNode, &MEMSET_NODE_PARAMS); @@ -1463,6 +1473,11 @@ int main() { // HIP: hipError_t hipMemUnmap(void* ptr, size_t size); // CHECK: result = hipMemUnmap(deviceptr, bytes); result = cuMemUnmap(deviceptr, bytes); + + // CUDA: CUresult CUDAAPI cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMCPY3D *copyParams, CUcontext ctx); + // HIP: hipError_t hipDrvGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, const HIP_MEMCPY3D* copyParams, hipCtx_t ctx); + // CHECK: result = hipDrvGraphExecMemcpyNodeSetParams(graphExec, graphNode, &MEMCPY3D, context); + result = cuGraphExecMemcpyNodeSetParams(graphExec, graphNode, &MEMCPY3D, context); #endif #if CUDA_VERSION >= 10020 && CUDA_VERSION < 12000 @@ -1800,6 +1815,11 @@ int main() { // CHECK: result = hipGraphMemAllocNodeGetParams(graphNode, &MEM_ALLOC_NODE_PARAMS); result = cuGraphMemAllocNodeGetParams(graphNode, &MEM_ALLOC_NODE_PARAMS); + // CUDA: CUresult CUDAAPI cuGraphAddMemFreeNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUdeviceptr dptr); + // HIP: hipError_t hipDrvGraphAddMemFreeNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, size_t numDependencies, void* dev_ptr); + // CHECK: result = hipDrvGraphAddMemFreeNode(&graphNode, graph, &graphNode2, bytes, deviceptr); + result = cuGraphAddMemFreeNode(&graphNode, graph, &graphNode2, bytes, deviceptr); + // CUDA: CUresult CUDAAPI cuGraphMemFreeNodeGetParams(CUgraphNode hNode, CUdeviceptr *dptr_out); // HIP: hipError_t hipGraphMemFreeNodeGetParams(hipGraphNode_t node, void* dev_ptr); // CHECK: result = hipGraphMemFreeNodeGetParams(graphNode, &deviceptr); @@ -1886,6 +1906,11 @@ int main() { // HIP: hipError_t hipGraphInstantiateWithParams(hipGraphExec_t* pGraphExec, hipGraph_t graph, hipGraphInstantiateParams *instantiateParams); // CHECK: result = hipGraphInstantiateWithParams(&graphExec, graph, &GRAPH_INSTANTIATE_PARAMS); result = cuGraphInstantiateWithParams(&graphExec, graph, &GRAPH_INSTANTIATE_PARAMS); + + // CUDA: CUresult CUDAAPI cuGraphExecGetFlags(CUgraphExec hGraphExec, cuuint64_t *flags); + // HIP: hipError_t hipGraphExecGetFlags(hipGraphExec_t graphExec, unsigned long long* flags); + // CHECK: result = hipGraphExecGetFlags(graphExec, &ull); + result = cuGraphExecGetFlags(graphExec, &ull); #endif #if CUDA_VERSION >= 12020 @@ -1896,6 +1921,16 @@ int main() { // HIP: hipError_t hipGraphAddNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipGraphNode_t *pDependencies, size_t numDependencies, hipGraphNodeParams *nodeParams); // CHECK: result = hipGraphAddNode(&graphNode, graph, &graphNode2, bytes, &graphNodeParams); result = cuGraphAddNode(&graphNode, graph, &graphNode2, bytes, &graphNodeParams); + + // CUDA: CUresult CUDAAPI cuGraphNodeSetParams(CUgraphNode hNode, CUgraphNodeParams *nodeParams); + // HIP: hipError_t hipGraphNodeSetParams(hipGraphNode_t node, hipGraphNodeParams *nodeParams); + // CHECK: result = hipGraphNodeSetParams(graphNode, &graphNodeParams); + result = cuGraphNodeSetParams(graphNode, &graphNodeParams); + + // CUDA: CUresult CUDAAPI cuGraphExecNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraphNodeParams *nodeParams); + // HIP: hipError_t hipGraphExecNodeSetParams(hipGraphExec_t graphExec, hipGraphNode_t node, hipGraphNodeParams* nodeParams); + // CHECK: result = hipGraphExecNodeSetParams(graphExec, graphNode, &graphNodeParams); + result = cuGraphExecNodeSetParams(graphExec, graphNode, &graphNodeParams); #endif #if CUDA_VERSION >= 12030 diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 9e7f7e54..ff3cdbff 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -1651,6 +1651,10 @@ int main() { __half* hc = 0; // CHECK: __half* hC = 0; __half* hC = 0; + // CHECK: __half* hx = 0; + __half* hx = 0; + // CHECK: __half* hy = 0; + __half* hy = 0; // CHECK: __half** hAarray = 0; __half** hAarray = 0; @@ -1848,16 +1852,24 @@ int main() { cublasDataType_t R_16BF = CUDA_R_16BF; cublasDataType_t C_16BF = CUDA_C_16BF; + // CHECK: hip_bfloat16* bf16A = nullptr; + __nv_bfloat16* bf16A = nullptr; // CHECK: hip_bfloat16** bf16Aarray = 0; __nv_bfloat16** bf16Aarray = 0; // CHECK: const hip_bfloat16** const bf16Aarray_const = const_cast(bf16Aarray); const __nv_bfloat16** const bf16Aarray_const = const_cast(bf16Aarray); + // CHECK: hip_bfloat16* bf16X = nullptr; + __nv_bfloat16* bf16X = nullptr; // CHECK: hip_bfloat16** bf16xarray = 0; __nv_bfloat16** bf16xarray = 0; - // CHECK: const hip_bfloat16** const bf16xarray_const = const_cast(bf16xarray_const); - const __nv_bfloat16** const bf16xarray_const = const_cast(bf16xarray_const); + // CHECK: const hip_bfloat16** const bf16xarray_const = const_cast(bf16xarray); + const __nv_bfloat16** const bf16xarray_const = const_cast(bf16xarray); + // CHECK: hip_bfloat16* bf16Y = nullptr; + __nv_bfloat16* bf16Y = nullptr; // CHECK: hip_bfloat16** bf16yarray = 0; __nv_bfloat16** bf16yarray = 0; + // CHECK: const hip_bfloat16** const bf16yarray_const = const_cast(bf16yarray); + const __nv_bfloat16** const bf16yarray_const = const_cast(bf16yarray); // CHECK: hipblasComputeType_t blasComputeType; cublasComputeType_t blasComputeType; @@ -2315,23 +2327,23 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, long long int strideA, const float* x, int64_t incx, long long int stridex, const float* beta, float* y, int64_t incy, long long int stridey, int64_t batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemvStridedBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, const float* alpha, const float* AP, int64_t lda, hipblasStride strideA, const float* x, int64_t incx, hipblasStride stridex, const float* beta, float* y, int64_t incy, hipblasStride stridey, int64_t batchCount); - // CHECK: blasStatus = hipblasSgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, &fA, lda_64, strideA, &fx, incx_64, strideX, &fb, &fy, incy_64, strideY, batchCount); - blasStatus = cublasSgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, &fA, lda_64, strideA, &fx, incx_64, strideX, &fb, &fy, incy_64, strideY, batchCount); + // CHECK: blasStatus = hipblasSgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, &fA, lda_64, strideA, &fx, incx_64, strideX, &fb, &fy, incy_64, strideY, batchCount_64); + blasStatus = cublasSgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, &fA, lda_64, strideA, &fx, incx_64, strideX, &fb, &fy, incy_64, strideY, batchCount_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, long long int strideA, const double* x, int64_t incx, long long int stridex, const double* beta, double* y, int64_t incy, long long int stridey, int64_t batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemvStridedBatched_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, const double* alpha, const double* AP, int64_t lda, hipblasStride strideA, const double* x, int64_t incx, hipblasStride stridex, const double* beta, double* y, int64_t incy, hipblasStride stridey, int64_t batchCount); - // CHECK: blasStatus = hipblasDgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, strideA, &dx, incx_64, strideX, &db, &dy, incy_64, strideY, batchCount); - blasStatus = cublasDgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, strideA, &dx, incx_64, strideX, &db, &dy, incy_64, strideY, batchCount); + // CHECK: blasStatus = hipblasDgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, strideA, &dx, incx_64, strideX, &db, &dy, incy_64, strideY, batchCount_64); + blasStatus = cublasDgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, strideA, &dx, incx_64, strideX, &db, &dy, incy_64, strideY, batchCount_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, long long int strideA, const cuComplex* x, int64_t incx, long long int stridex, const cuComplex* beta, cuComplex* y, int64_t incy, long long int stridey, int64_t batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemvStridedBatched_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n,const hipComplex* alpha, const hipComplex* AP, int64_t lda,hipblasStride strideA, const hipComplex* x, int64_t incx, hipblasStride stridex, const hipComplex* beta, hipComplex* y, int64_t incy, hipblasStride stridey, int64_t batchCount); - // CHECK: blasStatus = hipblasCgemvStridedBatched_v2_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, strideA, &complexx, incx_64, strideX, &complexb, &complexy, incy_64, strideY, batchCount); - blasStatus = cublasCgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, strideA, &complexx, incx_64, strideX, &complexb, &complexy, incy_64, strideY, batchCount); + // CHECK: blasStatus = hipblasCgemvStridedBatched_v2_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, strideA, &complexx, incx_64, strideX, &complexb, &complexy, incy_64, strideY, batchCount_64); + blasStatus = cublasCgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, strideA, &complexx, incx_64, strideX, &complexb, &complexy, incy_64, strideY, batchCount_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, long long int strideA, const cuDoubleComplex* x, int64_t incx, long long int stridex, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy, long long int stridey, int64_t batchCount); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemvStridedBatched_v2_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, hipblasStride strideA, const hipDoubleComplex* x, int64_t incx, hipblasStride stridex, const hipDoubleComplex* beta, hipDoubleComplex* y, int64_t incy, hipblasStride stridey, int64_t batchCount); - // CHECK: blasStatus = hipblasZgemvStridedBatched_v2_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexx, incx_64, strideX, &dcomplexb, &dcomplexy, incy_64, strideY, batchCount); - blasStatus = cublasZgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexx, incx_64, strideX, &dcomplexb, &dcomplexy, incy_64, strideY, batchCount); + // CHECK: blasStatus = hipblasZgemvStridedBatched_v2_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexx, incx_64, strideX, &dcomplexb, &dcomplexy, incy_64, strideY, batchCount_64); + blasStatus = cublasZgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexx, incx_64, strideX, &dcomplexb, &dcomplexy, incy_64, strideY, batchCount_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSger_v2_64(cublasHandle_t handle, int64_t m, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* A, int64_t lda); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSger_64(hipblasHandle_t handle, int64_t m, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* AP, int64_t lda); diff --git a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu index 1ffb03fb..cc4fe1dd 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2rocblas_v2.cu @@ -166,6 +166,7 @@ int main() { int incy = 0; int64_t incy_64 = 0; int k = 0; + int64_t k_64 = 0; int kl = 0; int64_t kl_64 = 0; int ku = 0; @@ -1731,6 +1732,8 @@ int main() { long long int strideA = 0; long long int strideB = 0; long long int strideC = 0; + long long int strideX = 0; + long long int strideY = 0; #if CUDA_VERSION >= 7050 // CHECK: rocblas_half* ha = 0; @@ -1745,6 +1748,10 @@ int main() { __half* hc = 0; // CHECK: rocblas_half* hC = 0; __half* hC = 0; + // CHECK: rocblas_half* hx = 0; + __half* hx = 0; + // CHECK: rocblas_half* hy = 0; + __half* hy = 0; // CHECK: rocblas_half** hAarray = 0; __half** hAarray = 0; @@ -1958,17 +1965,24 @@ int main() { cublasDataType_t R_16BF = CUDA_R_16BF; cublasDataType_t C_16BF = CUDA_C_16BF; - // CHECK: rocblas_bfloat16** bfAarray = 0; - __nv_bfloat16** bfAarray = 0; - // CHECK: const rocblas_bfloat16** const bfAarray_const = const_cast(bfAarray); - const __nv_bfloat16** const bfAarray_const = const_cast(bfAarray); - // CHECK: rocblas_bfloat16** bfXarray = 0; - __nv_bfloat16** bfXarray = 0; - // CHECK: const rocblas_bfloat16** const bfXarray_const = const_cast(bfXarray); - const __nv_bfloat16** const bfXarray_const = const_cast(bfXarray); - __nv_bfloat16** bfYarray = 0; - // CHECK: const rocblas_bfloat16** const bfYarray_const = const_cast(bfYarray); - const __nv_bfloat16** const bfYarray_const = const_cast(bfYarray); + // CHECK: rocblas_bfloat16* bf16A = nullptr; + __nv_bfloat16* bf16A = nullptr; + // CHECK: rocblas_bfloat16** bf16Aarray = 0; + __nv_bfloat16** bf16Aarray = 0; + // CHECK: const rocblas_bfloat16** const bf16Aarray_const = const_cast(bf16Aarray); + const __nv_bfloat16** const bf16Aarray_const = const_cast(bf16Aarray); + // CHECK: rocblas_bfloat16* bf16X = nullptr; + __nv_bfloat16* bf16X = nullptr; + // CHECK: rocblas_bfloat16** bf16xarray = 0; + __nv_bfloat16** bf16xarray = 0; + // CHECK: const rocblas_bfloat16** const bf16xarray_const = const_cast(bf16xarray); + const __nv_bfloat16** const bf16xarray_const = const_cast(bf16xarray); + // CHECK: rocblas_bfloat16* bf16Y = nullptr; + __nv_bfloat16* bf16Y = nullptr; + // CHECK: rocblas_bfloat16** bf16yarray = 0; + __nv_bfloat16** bf16yarray = 0; + // CHECK: const rocblas_bfloat16** const bf16yarray_const = const_cast(bf16yarray); + const __nv_bfloat16** const bf16yarray_const = const_cast(bf16yarray); #endif #if CUDA_VERSION >= 11040 && CUBLAS_VERSION >= 11600 @@ -1988,6 +2002,36 @@ int main() { // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemv_batched(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, const double* alpha, const double* const A[], rocblas_int lda, const double* const x[], rocblas_int incx, const double* beta, double* const y[], rocblas_int incy, rocblas_int batch_count); // CHECK: blasStatus = rocblas_dgemv_batched(blasHandle, blasOperation, m, n, &da, dAarray_const, lda, dXarray_const, incx, &db, dYarray, incy, batchCount); blasStatus = cublasDgemvBatched(blasHandle, blasOperation, m, n, &da, dAarray_const, lda, dXarray_const, incx, &db, dYarray, incy, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemvBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const cuComplex* alpha, const cuComplex* const Aarray[], int lda, const cuComplex* const xarray[], int incx, const cuComplex* beta, cuComplex* const yarray[], int incy, int batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemv_batched(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* const A[], rocblas_int lda, const rocblas_float_complex* const x[], rocblas_int incx, const rocblas_float_complex* beta, rocblas_float_complex* const y[], rocblas_int incy, rocblas_int batch_count); + // CHECK: blasStatus = rocblas_cgemv_batched(blasHandle, blasOperation, m, n, &complexa, complexAarray_const, lda, complexXarray_const, incx, &complexb, complexYarray, incy, batchCount); + blasStatus = cublasCgemvBatched(blasHandle, blasOperation, m, n, &complexa, complexAarray_const, lda, complexXarray_const, incx, &complexb, complexYarray, incy, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemvBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* const Aarray[], int lda, const cuDoubleComplex* const xarray[], int incx, const cuDoubleComplex* beta, cuDoubleComplex* const yarray[], int incy, int batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemv_batched(rocblas_handle handle, rocblas_operation trans, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* const A[], rocblas_int lda, const rocblas_double_complex* const x[], rocblas_int incx, const rocblas_double_complex* beta, rocblas_double_complex* const y[], rocblas_int incy, rocblas_int batch_count); + // CHECK: blasStatus = rocblas_zgemv_batched(blasHandle, blasOperation, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexXarray_const, incx, &dcomplexb, dcomplexYarray, incy, batchCount); + blasStatus = cublasZgemvBatched(blasHandle, blasOperation, m, n, &dcomplexa, dcomplexAarray_const, lda, dcomplexXarray_const, incx, &dcomplexb, dcomplexYarray, incy, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemvStridedBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const float* alpha, const float* A, int lda, long long int strideA, const float* x, int incx, long long int stridex, const float* beta, float* y, int incy, long long int stridey, int batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemv_strided_batched(rocblas_handle handle, rocblas_operation transA, rocblas_int m, rocblas_int n, const float* alpha, const float* A, rocblas_int lda, rocblas_stride strideA, const float* x, rocblas_int incx, rocblas_stride stridex, const float* beta, float* y, rocblas_int incy, rocblas_stride stridey, rocblas_int batch_count); + // CHECK: blasStatus = rocblas_sgemv_strided_batched(blasHandle, blasOperation, m, n, &fa, &fA, lda, strideA, &fx, incx, strideX, &fb, &fy, incy, strideY, batchCount); + blasStatus = cublasSgemvStridedBatched(blasHandle, blasOperation, m, n, &fa, &fA, lda, strideA, &fx, incx, strideX, &fb, &fy, incy, strideY, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemvStridedBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const double* alpha, const double* A, int lda, long long int strideA, const double* x, int incx, long long int stridex, const double* beta, double* y, int incy, long long int stridey, int batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemv_strided_batched(rocblas_handle handle, rocblas_operation transA, rocblas_int m, rocblas_int n, const double* alpha, const double* A, rocblas_int lda, rocblas_stride strideA, const double* x, rocblas_int incx, rocblas_stride stridex, const double* beta, double* y, rocblas_int incy, rocblas_stride stridey, rocblas_int batch_count); + // CHECK: blasStatus = rocblas_dgemv_strided_batched(blasHandle, blasOperation, m, n, &da, &dA, lda, strideA, &dx, incx, strideX, &db, &dy, incy, strideY, batchCount); + blasStatus = cublasDgemvStridedBatched(blasHandle, blasOperation, m, n, &da, &dA, lda, strideA, &dx, incx, strideX, &db, &dy, incy, strideY, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemvStridedBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, long long int strideA, const cuComplex* x, int incx, long long int stridex, const cuComplex* beta, cuComplex* y, int incy, long long int stridey, int batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemv_strided_batched(rocblas_handle handle, rocblas_operation transA, rocblas_int m, rocblas_int n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, rocblas_int lda, rocblas_stride strideA, const rocblas_float_complex* x, rocblas_int incx, rocblas_stride stridex, const rocblas_float_complex* beta, rocblas_float_complex* y, rocblas_int incy, rocblas_stride stridey, rocblas_int batch_count); + // CHECK: blasStatus = rocblas_cgemv_strided_batched(blasHandle, blasOperation, m, n, &complexa, &complexA, lda, strideA, &complexx, incx, strideX, &complexb, &complexy, incy, strideY, batchCount); + blasStatus = cublasCgemvStridedBatched(blasHandle, blasOperation, m, n, &complexa, &complexA, lda, strideA, &complexx, incx, strideX, &complexb, &complexy, incy, strideY, batchCount); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemvStridedBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, long long int strideA, const cuDoubleComplex* x, int incx, long long int stridex, const cuDoubleComplex* beta, cuDoubleComplex* y, int incy, long long int stridey, int batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemv_strided_batched(rocblas_handle handle, rocblas_operation transA, rocblas_int m, rocblas_int n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, rocblas_int lda, rocblas_stride strideA, const rocblas_double_complex* x, rocblas_int incx, rocblas_stride stridex, const rocblas_double_complex* beta, rocblas_double_complex* y, rocblas_int incy, rocblas_stride stridey, rocblas_int batch_count); + // CHECK: blasStatus = rocblas_zgemv_strided_batched(blasHandle, blasOperation, m, n, &dcomplexa, &dcomplexA, lda, strideA, &dcomplexx, incx, strideX, &dcomplexb, &dcomplexy, incy, strideY, batchCount); + blasStatus = cublasZgemvStridedBatched(blasHandle, blasOperation, m, n, &dcomplexa, &dcomplexA, lda, strideA, &dcomplexx, incx, strideX, &dcomplexb, &dcomplexy, incy, strideY, batchCount); #endif #if CUDA_VERSION >= 12000 @@ -2440,13 +2484,431 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasTSTgemvBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const float* alpha, const __nv_bfloat16* const Aarray[], int64_t lda, const __nv_bfloat16* const xarray[], int64_t incx, const float* beta, __nv_bfloat16* const yarray[], int64_t incy, int64_t batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_tstgemv_batched_64(rocblas_handle handle, rocblas_operation trans, int64_t m, int64_t n, const float* alpha, const rocblas_bfloat16* const A[], int64_t lda, const rocblas_bfloat16* const x[], int64_t incx, const float* beta, rocblas_bfloat16* const y[], int64_t incy, int64_t batch_count); - // CHECK: blasStatus = rocblas_tstgemv_batched_64(blasHandle, blasOperation, m_64, n_64, &fa, bfAarray_const, lda_64, bfXarray_const, incx_64, &fb, bfYarray, incy_64, batchCount_64); - blasStatus = cublasTSTgemvBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, bfAarray_const, lda_64, bfXarray_const, incx_64, &fb, bfYarray, incy_64, batchCount_64); + // CHECK: blasStatus = rocblas_tstgemv_batched_64(blasHandle, blasOperation, m_64, n_64, &fa, bf16Aarray_const, lda_64, bf16xarray_const, incx_64, &fb, bf16yarray, incy_64, batchCount_64); + blasStatus = cublasTSTgemvBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, bf16Aarray_const, lda_64, bf16xarray_const, incx_64, &fb, bf16yarray, incy_64, batchCount_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasTSSgemvBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const float* alpha, const __nv_bfloat16* const Aarray[], int64_t lda, const __nv_bfloat16* const xarray[], int64_t incx, const float* beta, float* const yarray[], int64_t incy, int64_t batchCount); // ROC: ROCBLAS_EXPORT rocblas_status rocblas_tssgemv_batched_64(rocblas_handle handle, rocblas_operation trans, int64_t m, int64_t n, const float* alpha, const rocblas_bfloat16* const A[], int64_t lda, const rocblas_bfloat16* const x[], int64_t incx, const float* beta, float* const y[], int64_t incy, int64_t batch_count); - // CHECK: blasStatus = rocblas_tssgemv_batched_64(blasHandle, blasOperation, m_64, n_64, &fa, bfAarray_const, lda_64, bfXarray_const, incx_64, &fb, fYarray, incy_64, batchCount_64); - blasStatus = cublasTSSgemvBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, bfAarray_const, lda_64, bfXarray_const, incx_64, &fb, fYarray, incy_64, batchCount_64); + // CHECK: blasStatus = rocblas_tssgemv_batched_64(blasHandle, blasOperation, m_64, n_64, &fa, bf16Aarray_const, lda_64, bf16xarray_const, incx_64, &fb, fYarray, incy_64, batchCount_64); + blasStatus = cublasTSSgemvBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, bf16Aarray_const, lda_64, bf16xarray_const, incx_64, &fb, fYarray, incy_64, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, long long int strideA, const float* x, int64_t incx, long long int stridex, const float* beta, float* y, int64_t incy, long long int stridey, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sgemv_strided_batched_64(rocblas_handle handle, rocblas_operation transA, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, rocblas_stride strideA, const float* x, int64_t incx, rocblas_stride stridex, const float* beta, float* y, int64_t incy, rocblas_stride stridey, int64_t batch_count); + // CHECK: blasStatus = rocblas_sgemv_strided_batched_64(blasHandle, blasOperation, m_64, n_64, &fa, &fA, lda_64, strideA, &fx, incx_64, strideX, &fb, &fy, incy_64, strideY, batchCount_64); + blasStatus = cublasSgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, &fA, lda_64, strideA, &fx, incx_64, strideX, &fb, &fy, incy_64, strideY, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, long long int strideA, const double* x, int64_t incx, long long int stridex, const double* beta, double* y, int64_t incy, long long int stridey, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dgemv_strided_batched_64(rocblas_handle handle, rocblas_operation transA, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, rocblas_stride strideA, const double* x, int64_t incx, rocblas_stride stridex, const double* beta, double* y, int64_t incy, rocblas_stride stridey, int64_t batch_count); + // CHECK: blasStatus = rocblas_dgemv_strided_batched_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, strideA, &dx, incx_64, strideX, &db, &dy, incy_64, strideY, batchCount_64); + blasStatus = cublasDgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, strideA, &dx, incx_64, strideX, &db, &dy, incy_64, strideY, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, long long int strideA, const cuComplex* x, int64_t incx, long long int stridex, const cuComplex* beta, cuComplex* y, int64_t incy, long long int stridey, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cgemv_strided_batched_64(rocblas_handle handle, rocblas_operation transA, int64_t m, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, rocblas_stride strideA, const rocblas_float_complex* x, int64_t incx, rocblas_stride stridex, const rocblas_float_complex* beta, rocblas_float_complex* y, int64_t incy, rocblas_stride stridey, int64_t batch_count); + // CHECK: blasStatus = rocblas_cgemv_strided_batched_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, strideA, &complexx, incx_64, strideX, &complexb, &complexy, incy_64, strideY, batchCount_64); + blasStatus = cublasCgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, strideA, &complexx, incx_64, strideX, &complexb, &complexy, incy_64, strideY, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, long long int strideA, const cuDoubleComplex* x, int64_t incx, long long int stridex, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy, long long int stridey, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zgemv_strided_batched_64(rocblas_handle handle, rocblas_operation transA, int64_t m, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, rocblas_stride strideA, const rocblas_double_complex* x, int64_t incx, rocblas_stride stridex, const rocblas_double_complex* beta, rocblas_double_complex* y, int64_t incy, rocblas_stride stridey, int64_t batch_count); + // CHECK: blasStatus = rocblas_zgemv_strided_batched_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexx, incx_64, strideX, &dcomplexb, &dcomplexy, incy_64, strideY, batchCount_64); + blasStatus = cublasZgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, strideA, &dcomplexx, incx_64, strideX, &dcomplexb, &dcomplexy, incy_64, strideY, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHSHgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const float* alpha, const __half* A, int64_t lda, long long int strideA, const __half* x, int64_t incx, long long int stridex, const float* beta, __half* y, int64_t incy, long long int stridey, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hshgemv_strided_batched_64(rocblas_handle handle, rocblas_operation transA, int64_t m, int64_t n, const float* alpha, const rocblas_half* A, int64_t lda, rocblas_stride strideA, const rocblas_half* x, int64_t incx, rocblas_stride stridex, const float* beta, rocblas_half* y, int64_t incy, rocblas_stride stridey, int64_t batch_count); + // CHECK: blasStatus = rocblas_hshgemv_strided_batched_64(blasHandle, blasOperation, m_64, n_64, &fa, ha, lda_64, strideA, hx, incx_64, strideX, &fb, hy, incy_64, strideY, batchCount_64); + blasStatus = cublasHSHgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, ha, lda_64, strideA, hx, incx_64, strideX, &fb, hy, incy_64, strideY, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHSSgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const float* alpha, const __half* A, int64_t lda, long long int strideA, const __half* x, int64_t incx, long long int stridex, const float* beta, float* y, int64_t incy, long long int stridey, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_hssgemv_strided_batched_64(rocblas_handle handle, rocblas_operation transA, int64_t m, int64_t n, const float* alpha, const rocblas_half* A, int64_t lda, rocblas_stride strideA, const rocblas_half* x, int64_t incx, rocblas_stride stridex, const float* beta, float* y, int64_t incy, rocblas_stride stridey, int64_t batch_count); + // CHECK: blasStatus = rocblas_hssgemv_strided_batched_64(blasHandle, blasOperation, m_64, n_64, &fa, ha, lda_64, strideA, hx, incx_64, strideX, &fb, &fy, incy_64, strideY, batchCount_64); + blasStatus = cublasHSSgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, ha, lda_64, strideA, hx, incx_64, strideX, &fb, &fy, incy_64, strideY, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasTSTgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const float* alpha, const __nv_bfloat16* A, int64_t lda, long long int strideA, const __nv_bfloat16* x, int64_t incx, long long int stridex, const float* beta, __nv_bfloat16* y, int64_t incy, long long int stridey, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_tstgemv_strided_batched_64(rocblas_handle handle, rocblas_operation transA, int64_t m, int64_t n, const float* alpha, const rocblas_bfloat16* A, int64_t lda, rocblas_stride strideA, const rocblas_bfloat16* x, int64_t incx, rocblas_stride stridex, const float* beta, rocblas_bfloat16* y, int64_t incy, rocblas_stride stridey, int64_t batch_count); + // CHECK: blasStatus = rocblas_tstgemv_strided_batched_64(blasHandle, blasOperation, m_64, n_64, &fa, bf16A, lda_64, strideA, bf16X, incx_64, strideX, &fb, bf16Y, incy_64, strideY, batchCount_64); + blasStatus = cublasTSTgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, bf16A, lda_64, strideA, bf16X, incx_64, strideX, &fb, bf16Y, incy_64, strideY, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasTSSgemvStridedBatched_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const float* alpha, const __nv_bfloat16* A, int64_t lda, long long int strideA, const __nv_bfloat16* x, int64_t incx, long long int stridex, const float* beta, float* y, int64_t incy, long long int stridey, int64_t batchCount); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_tssgemv_strided_batched_64(rocblas_handle handle, rocblas_operation transA, int64_t m, int64_t n, const float* alpha, const rocblas_bfloat16* A, int64_t lda, rocblas_stride strideA, const rocblas_bfloat16* x, int64_t incx, rocblas_stride stridex, const float* beta, float* y, int64_t incy, rocblas_stride stridey, int64_t batch_count); + // CHECK: blasStatus = rocblas_tssgemv_strided_batched_64(blasHandle, blasOperation, m_64, n_64, &fa, bf16A, lda_64, strideA, bf16X, incx_64, strideX, &fb, &fy, incy_64, strideY, batchCount_64); + blasStatus = cublasTSSgemvStridedBatched_64(blasHandle, blasOperation, m_64, n_64, &fa, bf16A, lda_64, strideA, bf16X, incx_64, strideX, &fb, &fy, incy_64, strideY, batchCount_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssbmv_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, int64_t k, const float* alpha, const float* A, int64_t lda, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // CHECK: blasStatus = rocblas_ssbmv_64(blasHandle, blasFillMode, n_64, k_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_ssbmv_64(blasHandle, blasFillMode, n_64, k_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSsbmv_64(blasHandle, blasFillMode, n_64, k_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSsbmv_v2_64(blasHandle, blasFillMode, n_64, k_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsbmv_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, int64_t k, const double* alpha, const double* A, int64_t lda, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // CHECK: blasStatus = rocblas_dsbmv_64(blasHandle, blasFillMode, n_64, k_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_dsbmv_64(blasHandle, blasFillMode, n_64, k_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDsbmv_64(blasHandle, blasFillMode, n_64, k_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDsbmv_v2_64(blasHandle, blasFillMode, n_64, k_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, int64_t k, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, const cuComplex* beta, cuComplex* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chbmv_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, int64_t k, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* x, int64_t incx, const rocblas_float_complex* beta, rocblas_float_complex* y, int64_t incy); + // CHECK: blasStatus = rocblas_chbmv_64(blasHandle, blasFillMode, n_64, k_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_chbmv_64(blasHandle, blasFillMode, n_64, k_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasChbmv_64(blasHandle, blasFillMode, n_64, k_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasChbmv_v2_64(blasHandle, blasFillMode, n_64, k_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, int64_t k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhbmv_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, int64_t k, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* x, int64_t incx, const rocblas_double_complex* beta, rocblas_double_complex* y, int64_t incy); + // CHECK: blasStatus = rocblas_zhbmv_64(blasHandle, blasFillMode, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_zhbmv_64(blasHandle, blasFillMode, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZhbmv_64(blasHandle, blasFillMode, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZhbmv_v2_64(blasHandle, blasFillMode, n_64, k_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const float* A, int64_t lda, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssymv_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const float* alpha, const float* A, int64_t lda, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // CHECK: blasStatus = rocblas_ssymv_64(blasHandle, blasFillMode, n_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_ssymv_64(blasHandle, blasFillMode, n_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSsymv_64(blasHandle, blasFillMode, n_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSsymv_v2_64(blasHandle, blasFillMode, n_64, &fa, &fA, lda_64, &fx, incx_64, &fb, &fy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const double* A, int64_t lda, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsymv_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const double* alpha, const double* A, int64_t lda, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // CHECK: blasStatus = rocblas_dsymv_64(blasHandle, blasFillMode, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_dsymv_64(blasHandle, blasFillMode, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDsymv_64(blasHandle, blasFillMode, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDsymv_v2_64(blasHandle, blasFillMode, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, const cuComplex* beta, cuComplex* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csymv_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* x, int64_t incx, const rocblas_float_complex* beta, rocblas_float_complex* y, int64_t incy); + // CHECK: blasStatus = rocblas_csymv_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_csymv_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasCsymv_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasCsymv_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsymv_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* x, int64_t incx, const rocblas_double_complex* beta, rocblas_double_complex* y, int64_t incy); + // CHECK: blasStatus = rocblas_zsymv_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_zsymv_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZsymv_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZsymv_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, const cuComplex* beta, cuComplex* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chemv_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* A, int64_t lda, const rocblas_float_complex* x, int64_t incx, const rocblas_float_complex* beta, rocblas_float_complex* y, int64_t incy); + // CHECK: blasStatus = rocblas_chemv_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_chemv_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasChemv_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasChemv_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhemv_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* A, int64_t lda, const rocblas_double_complex* x, int64_t incx, const rocblas_double_complex* beta, rocblas_double_complex* y, int64_t incy); + // CHECK: blasStatus = rocblas_zhemv_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_zhemv_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZhemv_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZhemv_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const float* x, int64_t incx, float* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyr_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const float* alpha, const float* x, int64_t incx, float* A, int64_t lda); + // CHECK: blasStatus = rocblas_ssyr_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_ssyr_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA, lda_64); + blasStatus = cublasSsyr_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA, lda_64); + blasStatus = cublasSsyr_v2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const double* x, int64_t incx, double* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyr_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const double* alpha, const double* x, int64_t incx, double* A, int64_t lda); + // CHECK: blasStatus = rocblas_dsyr_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_dsyr_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA, lda_64); + blasStatus = cublasDsyr_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA, lda_64); + blasStatus = cublasDsyr_v2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, cuComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyr_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, int64_t incx, rocblas_float_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_csyr_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_csyr_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexA, lda_64); + blasStatus = cublasCsyr_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexA, lda_64); + blasStatus = cublasCsyr_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, cuDoubleComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyr_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, int64_t incx, rocblas_double_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_zsyr_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_zsyr_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexA, lda_64); + blasStatus = cublasZsyr_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexA, lda_64); + blasStatus = cublasZsyr_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const cuComplex* x, int64_t incx, cuComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cher_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const float* alpha, const rocblas_float_complex* x, int64_t incx, rocblas_float_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_cher_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_cher_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA, lda_64); + blasStatus = cublasCher_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA, lda_64); + blasStatus = cublasCher_v2_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const cuDoubleComplex* x, int64_t incx, cuDoubleComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zher_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const double* alpha, const rocblas_double_complex* x, int64_t incx, rocblas_double_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_zher_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_zher_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA, lda_64); + blasStatus = cublasZher_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA, lda_64); + blasStatus = cublasZher_v2_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ssyr2_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* A, int64_t lda); + // CHECK: blasStatus = rocblas_ssyr2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_ssyr2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA, lda_64); + blasStatus = cublasSsyr2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA, lda_64); + blasStatus = cublasSsyr2_v2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const double* x, int64_t incx, const double* y, int64_t incy, double* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dsyr2_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const double* alpha, const double* x, int64_t incx, const double* y, int64_t incy, double* A, int64_t lda); + // CHECK: blasStatus = rocblas_dsyr2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_dsyr2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + blasStatus = cublasDsyr2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + blasStatus = cublasDsyr2_v2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, const cuComplex* y, int64_t incy, cuComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_csyr2_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, int64_t incx, const rocblas_float_complex* y, int64_t incy, rocblas_float_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_csyr2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_csyr2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCsyr2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCsyr2_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* y, int64_t incy, cuDoubleComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zsyr2_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, int64_t incx, const rocblas_double_complex* y, int64_t incy, rocblas_double_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_zsyr2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_zsyr2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZsyr2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZsyr2_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, const cuComplex* y, int64_t incy, cuComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_cher2_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, int64_t incx, const rocblas_float_complex* y, int64_t incy, rocblas_float_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_cher2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_cher2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCher2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + blasStatus = cublasCher2_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* y, int64_t incy, cuDoubleComplex* A, int64_t lda); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zher2_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, int64_t incx, const rocblas_double_complex* y, int64_t incy, rocblas_double_complex* A, int64_t lda); + // CHECK: blasStatus = rocblas_zher2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + // CHECK-NEXT: blasStatus = rocblas_zher2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZher2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + blasStatus = cublasZher2_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA, lda_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const float* AP, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sspmv_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const float* alpha, const float* A, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // CHECK: blasStatus = rocblas_sspmv_64(blasHandle, blasFillMode, n_64, &fa, &fA, &fx, incx_64, &fb, &fy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_sspmv_64(blasHandle, blasFillMode, n_64, &fa, &fA, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSspmv_64(blasHandle, blasFillMode, n_64, &fa, &fA, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSspmv_v2_64(blasHandle, blasFillMode, n_64, &fa, &fA, &fx, incx_64, &fb, &fy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const double* AP, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dspmv_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const double* alpha, const double* A, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // CHECK: blasStatus = rocblas_dspmv_64(blasHandle, blasFillMode, n_64, &da, &dA, &dx, incx_64, &db, &dy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_dspmv_64(blasHandle, blasFillMode, n_64, &da, &dA, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDspmv_64(blasHandle, blasFillMode, n_64, &da, &dA, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDspmv_v2_64(blasHandle, blasFillMode, n_64, &da, &dA, &dx, incx_64, &db, &dy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuComplex* alpha, const cuComplex* AP, const cuComplex* x, int64_t incx, const cuComplex* beta, cuComplex* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chpmv_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* AP, const rocblas_float_complex* x, int64_t incx, const rocblas_float_complex* beta, rocblas_float_complex* y, int64_t incy); + // CHECK: blasStatus = rocblas_chpmv_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, &complexx, incx_64, &complexb, &complexy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_chpmv_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasChpmv_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasChpmv_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexA, &complexx, incx_64, &complexb, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* AP, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhpmv_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* AP, const rocblas_double_complex* x, int64_t incx, const rocblas_double_complex* beta, rocblas_double_complex* y, int64_t incy); + // CHECK: blasStatus = rocblas_zhpmv_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = rocblas_zhpmv_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZhpmv_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZhpmv_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexA, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const float* x, int64_t incx, float* AP); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sspr_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const float* alpha, const float* x, int64_t incx, float* AP); + // CHECK: blasStatus = rocblas_sspr_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA); + // CHECK-NEXT: blasStatus = rocblas_sspr_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA); + blasStatus = cublasSspr_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA); + blasStatus = cublasSspr_v2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const double* x, int64_t incx, double* AP); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dspr_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const double* alpha, const double* x, int64_t incx, double* AP); + // CHECK: blasStatus = rocblas_dspr_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA); + // CHECK-NEXT: blasStatus = rocblas_dspr_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA); + blasStatus = cublasDspr_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA); + blasStatus = cublasDspr_v2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const cuComplex* x, int64_t incx, cuComplex* AP); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chpr_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const float* alpha, const rocblas_float_complex* x, int64_t incx, rocblas_float_complex* AP); + // CHECK: blasStatus = rocblas_chpr_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA); + // CHECK-NEXT: blasStatus = rocblas_chpr_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA); + blasStatus = cublasChpr_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA); + blasStatus = cublasChpr_v2_64(blasHandle, blasFillMode, n_64, &fa, &complexx, incx_64, &complexA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const cuDoubleComplex* x, int64_t incx, cuDoubleComplex* AP); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhpr_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const double* alpha, const rocblas_double_complex* x, int64_t incx, rocblas_double_complex* AP); + // CHECK: blasStatus = rocblas_zhpr_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA); + // CHECK-NEXT: blasStatus = rocblas_zhpr_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA); + blasStatus = cublasZhpr_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA); + blasStatus = cublasZhpr_v2_64(blasHandle, blasFillMode, n_64, &da, &dcomplexx, incx_64, &dcomplexA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* AP); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_sspr2_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const float* alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* AP); + // CHECK: blasStatus = rocblas_sspr2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA); + // CHECK-NEXT: blasStatus = rocblas_sspr2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA); + blasStatus = cublasSspr2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA); + blasStatus = cublasSspr2_v2_64(blasHandle, blasFillMode, n_64, &fa, &fx, incx_64, &fy, incy_64, &fA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const double* alpha, const double* x, int64_t incx, const double* y, int64_t incy, double* AP); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dspr2_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const double* alpha, const double* x, int64_t incx, const double* y, int64_t incy, double* AP); + // CHECK: blasStatus = rocblas_dspr2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA); + // CHECK-NEXT: blasStatus = rocblas_dspr2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA); + blasStatus = cublasDspr2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA); + blasStatus = cublasDspr2_v2_64(blasHandle, blasFillMode, n_64, &da, &dx, incx_64, &dy, incy_64, &dA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuComplex* alpha, const cuComplex* x, int64_t incx, const cuComplex* y, int64_t incy, cuComplex* AP); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_chpr2_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const rocblas_float_complex* alpha, const rocblas_float_complex* x, int64_t incx, const rocblas_float_complex* y, int64_t incy, rocblas_float_complex* AP); + // CHECK: blasStatus = rocblas_chpr2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA); + // CHECK-NEXT: blasStatus = rocblas_chpr2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA); + blasStatus = cublasChpr2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA); + blasStatus = cublasChpr2_v2_64(blasHandle, blasFillMode, n_64, &complexa, &complexx, incx_64, &complexy, incy_64, &complexA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr2_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* y, int64_t incy, cuDoubleComplex* AP); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_zhpr2_64(rocblas_handle handle, rocblas_fill uplo, int64_t n, const rocblas_double_complex* alpha, const rocblas_double_complex* x, int64_t incx, const rocblas_double_complex* y, int64_t incy, rocblas_double_complex* AP); + // CHECK: blasStatus = rocblas_zhpr2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA); + // CHECK-NEXT: blasStatus = rocblas_zhpr2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA); + blasStatus = cublasZhpr2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA); + blasStatus = cublasZhpr2_v2_64(blasHandle, blasFillMode, n_64, &dcomplexa, &dcomplexx, incx_64, &dcomplexy, incy_64, &dcomplexA); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const float* A, int64_t lda, float* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const float* A, int64_t lda, float* x, int64_t incx); + // CHECK: blasStatus = rocblas_strmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_strmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const double* A, int64_t lda, double* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const double* A, int64_t lda, double* x, int64_t incx); + // CHECK: blasStatus = rocblas_dtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_dtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuComplex* A, int64_t lda, cuComplex* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const rocblas_float_complex* A, int64_t lda, rocblas_float_complex* x, int64_t incx); + // CHECK: blasStatus = rocblas_ctrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_ctrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuDoubleComplex* A, int64_t lda, cuDoubleComplex* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const rocblas_double_complex* A, int64_t lda, rocblas_double_complex* x, int64_t incx); + // CHECK: blasStatus = rocblas_ztrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_ztrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtrmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtrmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const float* AP, float* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_stpmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const float* A, float* x, int64_t incx); + // CHECK: blasStatus = rocblas_stpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_stpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + blasStatus = cublasStpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + blasStatus = cublasStpmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, &fx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const double* AP, double* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtpmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const double* A, double* x, int64_t incx); + // CHECK: blasStatus = rocblas_dtpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_dtpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + blasStatus = cublasDtpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + blasStatus = cublasDtpmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, &dx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuComplex* AP, cuComplex* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctpmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const rocblas_float_complex* A, rocblas_float_complex* x, int64_t incx); + // CHECK: blasStatus = rocblas_ctpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_ctpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + blasStatus = cublasCtpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + blasStatus = cublasCtpmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, &complexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuDoubleComplex* AP, cuDoubleComplex* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztpmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const rocblas_double_complex* A, rocblas_double_complex* x, int64_t incx); + // CHECK: blasStatus = rocblas_ztpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_ztpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + blasStatus = cublasZtpmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + blasStatus = cublasZtpmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const float* A, int64_t lda, float* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_stbmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_diagonal diag, int64_t n, int64_t k, const float* A, int64_t lda, float* x, int64_t incx); + // CHECK: blasStatus = rocblas_stbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_stbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStbmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const double* A, int64_t lda, double* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtbmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_diagonal diag, int64_t n, int64_t k, const double* A, int64_t lda, double* x, int64_t incx); + // CHECK: blasStatus = rocblas_dtbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_dtbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtbmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const cuComplex* A, int64_t lda, cuComplex* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctbmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_diagonal diag, int64_t n, int64_t k, const rocblas_float_complex* A, int64_t lda, rocblas_float_complex* x, int64_t incx); + // CHECK: blasStatus = rocblas_ctbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_ctbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtbmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbmv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const cuDoubleComplex* A, int64_t lda, cuDoubleComplex* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztbmv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation trans, rocblas_diagonal diag, int64_t n, int64_t k, const rocblas_double_complex* A, int64_t lda, rocblas_double_complex* x, int64_t incx); + // CHECK: blasStatus = rocblas_ztbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_ztbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtbmv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtbmv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const float* A, int64_t lda, float* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_stbsv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, int64_t k, const float* A, int64_t lda, float* x, int64_t incx); + // CHECK: blasStatus = rocblas_stbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_stbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStbsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &fA, lda_64, &fx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const double* A, int64_t lda, double* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtbsv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, int64_t k, const double* A, int64_t lda, double* x, int64_t incx); + // CHECK: blasStatus = rocblas_dtbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_dtbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtbsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dA, lda_64, &dx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const cuComplex* A, int64_t lda, cuComplex* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctbsv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, int64_t k, const rocblas_float_complex* A, int64_t lda, rocblas_float_complex* x, int64_t incx); + // CHECK: blasStatus = rocblas_ctbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_ctbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtbsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &complexA, lda_64, &complexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, int64_t k, const cuDoubleComplex* A, int64_t lda, cuDoubleComplex* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztbsv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, int64_t k, const rocblas_double_complex* A, int64_t lda, rocblas_double_complex* x, int64_t incx); + // CHECK: blasStatus = rocblas_ztbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_ztbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtbsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtbsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, k_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const float* A, int64_t lda, float* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_strsv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const float* A, int64_t lda, float* x, int64_t incx); + // CHECK: blasStatus = rocblas_strsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_strsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + blasStatus = cublasStrsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &fA, lda_64, &fx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const double* A, int64_t lda, double* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_dtrsv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const double* A, int64_t lda, double* x, int64_t incx); + // CHECK: blasStatus = rocblas_dtrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_dtrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + blasStatus = cublasDtrsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dA, lda_64, &dx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuComplex* A, int64_t lda, cuComplex* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ctrsv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const rocblas_float_complex* A, int64_t lda, rocblas_float_complex* x, int64_t incx); + // CHECK: blasStatus = rocblas_ctrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_ctrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + blasStatus = cublasCtrsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &complexA, lda_64, &complexx, incx_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsv_v2_64(cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int64_t n, const cuDoubleComplex* A, int64_t lda, cuDoubleComplex* x, int64_t incx); + // ROC: ROCBLAS_EXPORT rocblas_status rocblas_ztrsv_64(rocblas_handle handle, rocblas_fill uplo, rocblas_operation transA, rocblas_diagonal diag, int64_t n, const rocblas_double_complex* A, int64_t lda, rocblas_double_complex* x, int64_t incx); + // CHECK: blasStatus = rocblas_ztrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + // CHECK-NEXT: blasStatus = rocblas_ztrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtrsv_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); + blasStatus = cublasZtrsv_v2_64(blasHandle, blasFillMode, blasOperation, blasDiagType, n_64, &dcomplexA, lda_64, &dcomplexx, incx_64); #endif return 0; diff --git a/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu b/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu index b1906098..98aa2b92 100644 --- a/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu +++ b/tests/unit_tests/synthetic/libraries/cudnn2miopen.cu @@ -185,9 +185,11 @@ int main() { // CHECK: miopenActivationMode_t activationMode; // CHECK-NEXT: miopenActivationMode_t ACTIVATION_RELU = miopenActivationRELU; // CHECK-NEXT: miopenActivationMode_t ACTIVATION_TANH = miopenActivationTANH; + // CHECK-NEXT: miopenActivationMode_t ACTIVATION_SIGMOID = miopenActivationLOGISTIC; cudnnActivationMode_t activationMode; cudnnActivationMode_t ACTIVATION_RELU = CUDNN_ACTIVATION_RELU; cudnnActivationMode_t ACTIVATION_TANH = CUDNN_ACTIVATION_TANH; + cudnnActivationMode_t ACTIVATION_SIGMOID = CUDNN_ACTIVATION_SIGMOID; // CHECK: miopenSoftmaxAlgorithm_t softmaxAlgorithm; // CHECK-NEXT: miopenSoftmaxAlgorithm_t SOFTMAX_FAST = MIOPEN_SOFTMAX_FAST; @@ -792,6 +794,13 @@ int main() { // CHECK: status = miopenReduceTensor(handle, ReduceTensorDescriptor, indices, indicesSizeInBytes, workSpace, workSpaceSizeInBytes, alpha, aD, A, beta, cD, C); status = cudnnReduceTensor(handle, ReduceTensorDescriptor, indices, indicesSizeInBytes, workSpace, workSpaceSizeInBytes, alpha, aD, A, beta, cD, C); +#if CUDNN_VERSION >= 2000 + // CHECK: miopenPoolingMode_t POOLING_AVERAGE_COUNT_INCLUDE_PADDING = miopenPoolingAverageInclusive; + // CHECK-NEXT: miopenPoolingMode_t POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = miopenPoolingAverage; + cudnnPoolingMode_t POOLING_AVERAGE_COUNT_INCLUDE_PADDING = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING; + cudnnPoolingMode_t POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; +#endif + #if CUDNN_VERSION >= 3008 // CHECK: miopenDataType_t DATA_BFLOAT16 = miopenBFloat16; cudnnDataType_t DATA_BFLOAT16 = CUDNN_DATA_BFLOAT16; @@ -832,6 +841,9 @@ int main() { // CHECK: miopenActivationMode_t ACTIVATION_ELU = miopenActivationELU; cudnnActivationMode_t ACTIVATION_ELU = CUDNN_ACTIVATION_ELU; + // CHECK: miopenConvBwdDataAlgorithm_t CONVOLUTION_BWD_DATA_ALGO_COUNT = miopenTransposeBwdDataAlgoGEMM; + cudnnConvolutionBwdDataAlgo_t CONVOLUTION_BWD_DATA_ALGO_COUNT = CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT; + // CUDA: cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc, cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType, cudnnNanPropagation_t reduceTensorNanOpt, cudnnReduceTensorIndices_t reduceTensorIndices, cudnnIndicesType_t reduceTensorIndicesType); // MIOPEN: MIOPEN_EXPORT miopenStatus_t miopenSetReduceTensorDescriptor(miopenReduceTensorDescriptor_t reduceTensorDesc, miopenReduceTensorOp_t reduceTensorOp, miopenDataType_t reduceTensorCompType, miopenNanPropagation_t reduceTensorNanOpt, miopenReduceTensorIndices_t reduceTensorIndices, miopenIndicesType_t reduceTensorIndicesType); // CHECK: status = miopenSetReduceTensorDescriptor(ReduceTensorDescriptor, reduceTensorOp, dataType, nanPropagation_t, reduceTensorIndices, indicesType); @@ -848,6 +860,15 @@ int main() { cudnnActivationMode_t ACTIVATION_IDENTITY = CUDNN_ACTIVATION_IDENTITY; #endif +#if CUDNN_VERSION >= 7201 && CUDNN_VERSION <= 8907 + // CHECK: miopenRNNPaddingMode_t RNNPaddingMode_t; + // CHECK-NEXT: miopenRNNPaddingMode_t RNN_PADDED_IO_DISABLED = miopenRNNIONotPadded; + // CHECK-NEXT: miopenRNNPaddingMode_t RNN_PADDED_IO_ENABLED = miopenRNNIOWithPadding; + cudnnRNNPaddingMode_t RNNPaddingMode_t; + cudnnRNNPaddingMode_t RNN_PADDED_IO_DISABLED = CUDNN_RNN_PADDED_IO_DISABLED; + cudnnRNNPaddingMode_t RNN_PADDED_IO_ENABLED = CUDNN_RNN_PADDED_IO_ENABLED; +#endif + #if CUDNN_VERSION >= 8001 // CHECK: miopenStatus_t STATUS_VERSION_MISMATCH = miopenStatusVersionMismatch; cudnnStatus_t STATUS_VERSION_MISMATCH = CUDNN_STATUS_VERSION_MISMATCH; @@ -1110,6 +1131,13 @@ int main() { cudnnBackendHeurMode_t HEUR_MODE_B = CUDNN_HEUR_MODE_B; cudnnBackendHeurMode_t HEUR_MODES_COUNT = CUDNN_HEUR_MODES_COUNT; + // CHECK: miopenRNNFWDMode_t RNNFWDMode_t; + // CHECK-NEXT: miopenRNNFWDMode_t FWD_MODE_INFERENCE = miopenRNNInference; + // CHECK-NEXT: miopenRNNFWDMode_t FWD_MODE_TRAINING = miopenRNNTraining; + cudnnForwardMode_t RNNFWDMode_t; + cudnnForwardMode_t FWD_MODE_INFERENCE = CUDNN_FWD_MODE_INFERENCE; + cudnnForwardMode_t FWD_MODE_TRAINING = CUDNN_FWD_MODE_TRAINING; + // CUDA: cudnnStatus_t CUDNNWINAPI cudnnBackendCreateDescriptor(cudnnBackendDescriptorType_t descriptorType, cudnnBackendDescriptor_t *descriptor); // MIOPEN: MIOPEN_EXPORT miopenStatus_t miopenBackendCreateDescriptor(miopenBackendDescriptorType_t descriptorType, miopenBackendDescriptor_t* descriptor); // CHECK: status = miopenBackendCreateDescriptor(backendDescriptorType_t, &backendDescriptor_t); @@ -1425,6 +1453,15 @@ int main() { // CHECK: miopenBackendHeurMode_t HEUR_MODE_A = MIOPEN_HEUR_MODE_A; cudnnBackendHeurMode_t HEUR_MODE_FALLBACK = CUDNN_HEUR_MODE_FALLBACK; cudnnBackendHeurMode_t HEUR_MODE_A = CUDNN_HEUR_MODE_A; + + // CHECK: miopenPaddingMode_t PaddingMode_t; + // CHECK-NEXT: miopenPaddingMode_t ZERO_PAD = miopenPaddingDefault; + // CHECK-NEXT: miopenPaddingMode_t NEG_INF_PAD = miopenPaddingSame; + // CHECK-NEXT: miopenPaddingMode_t EDGE_VAL_PAD = miopenPaddingValid; + cudnnPaddingMode_t PaddingMode_t; + cudnnPaddingMode_t ZERO_PAD = CUDNN_ZERO_PAD; + cudnnPaddingMode_t NEG_INF_PAD = CUDNN_NEG_INF_PAD; + cudnnPaddingMode_t EDGE_VAL_PAD = CUDNN_EDGE_VAL_PAD; #endif #if CUDNN_VERSION >= 8400 @@ -1542,6 +1579,13 @@ int main() { cudnnPointwiseMode_t POINTWISE_GELU_APPROX_TANH_BWD = CUDNN_POINTWISE_GELU_APPROX_TANH_BWD; #endif +#if CUDNN_VERSION >= 8600 + // CHECK: miopenDataType_t DATA_FP8_E4M3 = miopenFloat8; + // CHECK-NEXT: miopenDataType_t DATA_FP8_E5M2 = miopenBFloat8; + cudnnDataType_t DATA_FP8_E4M3 = CUDNN_DATA_FP8_E4M3; + cudnnDataType_t DATA_FP8_E5M2 = CUDNN_DATA_FP8_E5M2; +#endif + #if CUDNN_VERSION >= 8700 // CHECK: miopenBackendDescriptorType_t BACKEND_RNG_DESCRIPTOR = MIOPEN_BACKEND_RNG_DESCRIPTOR; // CHECK-NEXT: miopenBackendDescriptorType_t BACKEND_OPERATION_RNG_DESCRIPTOR = MIOPEN_BACKEND_OPERATION_RNG_DESCRIPTOR; diff --git a/tests/unit_tests/synthetic/libraries/curand2hiprand.cu b/tests/unit_tests/synthetic/libraries/curand2hiprand.cu index 20f7a220..4b43299e 100644 --- a/tests/unit_tests/synthetic/libraries/curand2hiprand.cu +++ b/tests/unit_tests/synthetic/libraries/curand2hiprand.cu @@ -19,10 +19,14 @@ int main() { printf("21. cuRAND API to hipRAND API synthetic test\n"); unsigned int *outputPtr = nullptr; + unsigned int *constants = nullptr; + unsigned long long *constantsLL = nullptr; float *outputPtrFloat = nullptr; double *outputPtrDouble = nullptr; + unsigned int num_dimensions = 0; unsigned long long *outputPtrUll = nullptr; unsigned long long offset = 0; + int version = 0; size_t num = 0; float mean = 0.f; double dmean = 0.f; @@ -121,6 +125,11 @@ int main() { curandStateScrambledSobol64 randStateScrambledSobol64; curandStateScrambledSobol64_t randStateScrambledSobol64_t; + // CHECK: hiprandStateSobol32 randStateSobol32; + // CHECK-NEXT: hiprandStateSobol32_t randStateSobol32_t; + curandStateSobol32 randStateSobol32; + curandStateSobol32_t randStateSobol32_t; + // CHECK: hiprandStateScrambledSobol32 randStateScrambledSobol32; // CHECK-NEXT: hiprandStateScrambledSobol32_t randStateScrambledSobol32_t; curandStateScrambledSobol32 randStateScrambledSobol32; @@ -133,6 +142,26 @@ int main() { curandDirectionVectors64_t directions64; curandDirectionVectors64_t *pDirections64 = nullptr; + // CHECK: hiprandDiscreteDistribution_st *discreteDistribution_st = nullptr; + // CHECK: hiprandDiscreteDistribution_t discreteDistribution_t = nullptr; + curandDiscreteDistribution_st *discreteDistribution_st = nullptr; + curandDiscreteDistribution_t discreteDistribution_t = nullptr; + + // CHECK: hiprandStateMtgp32 stateMtgp32; + // CHECK-NEXT: hiprandStateMtgp32_t stateMtgp32_t; + curandStateMtgp32 stateMtgp32; + curandStateMtgp32_t stateMtgp32_t; + + // CHECK: hiprandStateMRG32k3a stateMRG32k3a; + // CHECK-NEXT: hiprandStateMRG32k3a_t stateMRG32k3a_t; + curandStateMRG32k3a stateMRG32k3a; + curandStateMRG32k3a_t stateMRG32k3a_t; + + // CHECK: hiprandStatePhilox4_32_10 statePhilox4_32_10; + // CHECK-NEXT: hiprandStatePhilox4_32_10_t statePhilox4_32_10_t; + curandStatePhilox4_32_10 statePhilox4_32_10; + curandStatePhilox4_32_10_t statePhilox4_32_10_t; + // CHECK: hiprandDirectionVectorSet_t directionVectorSet; // CHECK-NEXT: hiprandDirectionVectorSet_t directionVectorSet_t; // CHECK-NEXT: hiprandDirectionVectorSet_t DIRECTION_VECTORS_32_JOEKUO6 = HIPRAND_DIRECTION_VECTORS_32_JOEKUO6; @@ -236,6 +265,36 @@ int main() { // CHECK: status = hiprandSetStream(randGenerator, stream); status = curandSetStream(randGenerator, stream); + // CUDA: curandStatus_t CURANDAPI curandCreatePoissonDistribution(double lambda, curandDiscreteDistribution_t *discrete_distribution); + // HIP: hiprandStatus_t HIPRANDAPI hiprandCreatePoissonDistribution(double lambda, hiprandDiscreteDistribution_t * discrete_distribution); + // CHECK: status = hiprandCreatePoissonDistribution(dlambda, &discreteDistribution_t); + status = curandCreatePoissonDistribution(dlambda, &discreteDistribution_t); + + // CUDA: curandStatus_t CURANDAPI curandDestroyDistribution(curandDiscreteDistribution_t discrete_distribution); + // HIP: hiprandStatus_t HIPRANDAPI hiprandDestroyDistribution(hiprandDiscreteDistribution_t discrete_distribution); + // CHECK: status = hiprandDestroyDistribution(discreteDistribution_t); + status = curandDestroyDistribution(discreteDistribution_t); + + // CUDA: curandStatus_t CURANDAPI curandGetScrambleConstants32(unsigned int * * constants); + // HIP: hiprandStatus_t HIPRANDAPI hiprandGetScrambleConstants32(const unsigned int** constants); + // CHECK: status = hiprandGetScrambleConstants32(&constants); + status = curandGetScrambleConstants32(&constants); + + // CUDA: curandStatus_t CURANDAPI curandGetScrambleConstants64(unsigned long long * * constants); + // HIP: hiprandStatus_t HIPRANDAPI hiprandGetScrambleConstants64(const unsigned long long** constants); + // CHECK: status = hiprandGetScrambleConstants64(&constantsLL); + status = curandGetScrambleConstants64(&constantsLL); + + // CUDA: curandStatus_t CURANDAPI curandGetVersion(int *version); + // HIP: hiprandStatus_t HIPRANDAPI hiprandGetVersion(int * version); + // CHECK: status = hiprandGetVersion(&version); + status = curandGetVersion(&version); + + // CUDA: curandStatus_t CURANDAPI curandSetQuasiRandomGeneratorDimensions(curandGenerator_t generator, unsigned int num_dimensions); + // HIP: hiprandStatus_t HIPRANDAPI hiprandSetQuasiRandomGeneratorDimensions(hiprandGenerator_t generator, unsigned int dimensions); + // CHECK: status = hiprandSetQuasiRandomGeneratorDimensions(randGenerator, num_dimensions); + status = curandSetQuasiRandomGeneratorDimensions(randGenerator, num_dimensions); + #if CUDA_VERSION >= 11000 && CURAND_VERSION >= 10200 // CHECK: hiprandOrdering_t RAND_ORDERING_PSEUDO_LEGACY = HIPRAND_ORDERING_PSEUDO_LEGACY; curandOrdering_t RAND_ORDERING_PSEUDO_LEGACY = CURAND_ORDERING_PSEUDO_LEGACY; diff --git a/tests/unit_tests/synthetic/libraries/curand2rocrand.cu b/tests/unit_tests/synthetic/libraries/curand2rocrand.cu index 80169a47..93a168c0 100644 --- a/tests/unit_tests/synthetic/libraries/curand2rocrand.cu +++ b/tests/unit_tests/synthetic/libraries/curand2rocrand.cu @@ -4,17 +4,24 @@ #include #include // CHECK: #include "rocrand/rocrand.h" +// CHECK-NEXT: #include "rocrand/rocrand_kernel.h" #include "curand.h" +#include "curand_kernel.h" // CHECK-NOT: #include "rocrand/rocrand.h" +// CHECK-NOT: #include "rocrand/rocrand_kernel.h" int main() { printf("21.1. cuRAND API to rocRAND API synthetic test\n"); unsigned int *outputPtr = nullptr; + unsigned int *constants = nullptr; + unsigned long long *constantsLL = nullptr; float *outputPtrFloat = nullptr; double *outputPtrDouble = nullptr; + unsigned int num_dimensions = 0; unsigned long long *outputPtrUll = nullptr; unsigned long long offset = 0; + int version = 0; size_t num = 0; float mean = 0.f; double dmean = 0.f; @@ -106,6 +113,46 @@ int main() { curandGenerator_st *randGenerator_st = nullptr; curandGenerator_t randGenerator; + // CHECK: rocrand_device::sobol64_engine randStateSobol64; + // CHECK-NEXT: rocrand_state_sobol64 randStateSobol64_t; + curandStateSobol64 randStateSobol64; + curandStateSobol64_t randStateSobol64_t; + + // CHECK: rocrand_device::scrambled_sobol64_engine randStateScrambledSobol64; + // CHECK-NEXT: rocrand_state_scrambled_sobol64 randStateScrambledSobol64_t; + curandStateScrambledSobol64 randStateScrambledSobol64; + curandStateScrambledSobol64_t randStateScrambledSobol64_t; + + // CHECK: rocrand_device::sobol32_engine randStateSobol32; + // CHECK-NEXT: rocrand_state_sobol32 randStateSobol32_t; + curandStateSobol32 randStateSobol32; + curandStateSobol32_t randStateSobol32_t; + + // CHECK: rocrand_device::scrambled_sobol32_engine randStateScrambledSobol32; + // CHECK-NEXT: rocrand_state_scrambled_sobol32 randStateScrambledSobol32_t; + curandStateScrambledSobol32 randStateScrambledSobol32; + curandStateScrambledSobol32_t randStateScrambledSobol32_t; + + // CHECK: rocrand_discrete_distribution_st *discreteDistribution_st = nullptr; + // CHECK-NEXT: rocrand_discrete_distribution discreteDistribution_t = nullptr; + curandDiscreteDistribution_st *discreteDistribution_st = nullptr; + curandDiscreteDistribution_t discreteDistribution_t = nullptr; + + // CHECK: rocrand_device::mtgp32_engine stateMtgp32; + // CHECK-NEXT: rocrand_state_mtgp32 stateMtgp32_t; + curandStateMtgp32 stateMtgp32; + curandStateMtgp32_t stateMtgp32_t; + + // CHECK: rocrand_device::mrg32k3a_engine stateMRG32k3a; + // CHECK-NEXT: rocrand_state_mrg32k3a stateMRG32k3a_t; + curandStateMRG32k3a stateMRG32k3a; + curandStateMRG32k3a_t stateMRG32k3a_t; + + // CHECK: rocrand_device::philox4x32_10_engine statePhilox4_32_10; + // CHECK-NEXT: rocrand_state_philox4x32_10 statePhilox4_32_10_t; + curandStatePhilox4_32_10 statePhilox4_32_10; + curandStatePhilox4_32_10_t statePhilox4_32_10_t; + // CUDA: curandStatus_t CURANDAPI curandCreateGenerator(curandGenerator_t *generator, curandRngType_t rng_type); // ROC: rocrand_status ROCRANDAPI rocrand_create_generator(rocrand_generator * generator, rocrand_rng_type rng_type); // CHECK: status = rocrand_create_generator(&randGenerator, randRngType_t); @@ -186,6 +233,41 @@ int main() { // CHECK: status = rocrand_set_stream(randGenerator, stream); status = curandSetStream(randGenerator, stream); + // CUDA: curandStatus_t CURANDAPI curandCreatePoissonDistribution(double lambda, curandDiscreteDistribution_t *discrete_distribution); + // ROC: rocrand_status ROCRANDAPI rocrand_create_poisson_distribution(double lambda, rocrand_discrete_distribution * discrete_distribution); + // CHECK: status = rocrand_create_poisson_distribution(dlambda, &discreteDistribution_t); + status = curandCreatePoissonDistribution(dlambda, &discreteDistribution_t); + + // CUDA: curandStatus_t CURANDAPI curandDestroyDistribution(curandDiscreteDistribution_t discrete_distribution); + // ROC: rocrand_status ROCRANDAPI rocrand_destroy_discrete_distribution(rocrand_discrete_distribution discrete_distribution); + // CHECK: status = rocrand_destroy_discrete_distribution(discreteDistribution_t); + status = curandDestroyDistribution(discreteDistribution_t); + + // CUDA: curandStatus_t CURANDAPI curandGetScrambleConstants32(unsigned int * * constants); + // ROC: rocrand_status ROCRANDAPI rocrand_get_scramble_constants32(const unsigned int** constants); + // CHECK: status = rocrand_get_scramble_constants32(&constants); + status = curandGetScrambleConstants32(&constants); + + // CUDA: curandStatus_t CURANDAPI curandGetScrambleConstants64(unsigned long long * * constants); + // ROC: rocrand_status ROCRANDAPI rocrand_get_scramble_constants64(const unsigned long long** constants); + // CHECK: status = rocrand_get_scramble_constants64(&constantsLL); + status = curandGetScrambleConstants64(&constantsLL); + + // CUDA: curandStatus_t CURANDAPI curandGetVersion(int *version); + // ROC: rocrand_status ROCRANDAPI rocrand_get_version(int * version); + // CHECK: status = rocrand_get_version(&version); + status = curandGetVersion(&version); + + // CUDA: curandStatus_t CURANDAPI curandSetGeneratorOrdering(curandGenerator_t generator, curandOrdering_t order); + // ROC: rocrand_status ROCRANDAPI rocrand_set_ordering(rocrand_generator generator, rocrand_ordering order); + // CHECK: status = rocrand_set_ordering(randGenerator, randOrdering); + status = curandSetGeneratorOrdering(randGenerator, randOrdering); + + // CUDA: curandStatus_t CURANDAPI curandSetQuasiRandomGeneratorDimensions(curandGenerator_t generator, unsigned int num_dimensions); + // ROC: rocrand_status ROCRANDAPI rocrand_set_quasi_random_generator_dimensions(rocrand_generator generator, unsigned int dimensions); + // CHECK: status = rocrand_set_quasi_random_generator_dimensions(randGenerator, num_dimensions); + status = curandSetQuasiRandomGeneratorDimensions(randGenerator, num_dimensions); + #if CUDA_VERSION >= 11000 && CURAND_VERSION >= 10200 // CHECK: rocrand_ordering RAND_ORDERING_PSEUDO_LEGACY = ROCRAND_ORDERING_PSEUDO_LEGACY; curandOrdering_t RAND_ORDERING_PSEUDO_LEGACY = CURAND_ORDERING_PSEUDO_LEGACY; diff --git a/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu b/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu index a82f5904..511c868c 100644 --- a/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu +++ b/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu @@ -131,8 +131,11 @@ int main() { void *Workspace = nullptr; void *workOnDevice = nullptr; void *workOnHost = nullptr; + void *bufferOnDevice = nullptr; + void *bufferOnHost = nullptr; void *A = nullptr; void *B = nullptr; + void *tau = nullptr; int *piMp = nullptr; int *piMi = nullptr; double *pdMx = nullptr; @@ -751,8 +754,8 @@ int main() { status = cusolverSpDcsrlsvcholHost(SpHandle_t, m, nnzA, MatDescr_t, &dcsrVal, &icsrRowPtr, &icsrColInd, &dB, dtol, ireorder, &dX, &isingularity); #if CUDA_VERSION >= 8000 - // CHECK: hipDataType dataTypeA, dataTypeB, computeType; - cudaDataType dataTypeA, dataTypeB, computeType; + // CHECK: hipDataType dataTypeA, dataTypeB, dataTypeTau, computeType; + cudaDataType dataTypeA, dataTypeB, dataTypeTau, computeType; // CHECK: hipsolverEigType_t eigType; // CHECK-NEXT: hipsolverEigType_t EIG_TYPE_1 = HIPSOLVER_EIG_TYPE_1; @@ -1729,6 +1732,51 @@ int main() { // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnXgetrs(hipsolverDnHandle_t handle, hipsolverDnParams_t params, hipsolverOperation_t trans, int64_t n, int64_t nrhs, hipDataType dataTypeA, const void* A, int64_t lda, const int64_t* devIpiv, hipDataType dataTypeB, void* B, int64_t ldb, int* devInfo); // CHECK: status = hipsolverDnXgetrs(handle, solverDnParams, blasOperation, n64, nrhs64, dataTypeA, A, lda64, &devIpiv64, dataTypeB, B, ldb64, &info); status = cusolverDnXgetrs(handle, solverDnParams, blasOperation, n64, nrhs64, dataTypeA, A, lda64, &devIpiv64, dataTypeB, B, ldb64, &info); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnXpotrf_bufferSize(cusolverDnHandle_t handle, cusolverDnParams_t params, cublasFillMode_t uplo, int64_t n, cudaDataType dataTypeA, const void * A, int64_t lda, cudaDataType computeType, size_t * workspaceInBytesOnDevice, size_t * workspaceInBytesOnHost); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnXpotrf_bufferSize(hipsolverDnHandle_t handle, hipsolverDnParams_t params, hipsolverFillMode_t uplo, int64_t n, hipDataType dataTypeA, const void* A, int64_t lda, hipDataType computeType, size_t* lworkOnDevice, size_t* lworkOnHost); + // CHECK: status = hipsolverDnXpotrf_bufferSize(handle, solverDnParams, fillMode, n64, dataTypeA, A, lda64, computeType, &lworkOnDevice, &lworkOnHost); + status = cusolverDnXpotrf_bufferSize(handle, solverDnParams, fillMode, n64, dataTypeA, A, lda64, computeType, &lworkOnDevice, &lworkOnHost); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnXpotrf(cusolverDnHandle_t handle, cusolverDnParams_t params, cublasFillMode_t uplo, int64_t n, cudaDataType dataTypeA, void * A, int64_t lda, cudaDataType computeType, void * bufferOnDevice, size_t workspaceInBytesOnDevice, void * bufferOnHost, size_t workspaceInBytesOnHost, int * info); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnXpotrf(hipsolverDnHandle_t handle, hipsolverDnParams_t params, hipsolverFillMode_t uplo, int64_t n, hipDataType dataTypeA, void* A, int64_t lda, hipDataType computeType, void* workOnDevice, size_t lworkOnDevice, void* workOnHost, size_t lworkOnHost, int* info); + // CHECK: status = hipsolverDnXpotrf(handle, solverDnParams, fillMode, n64, dataTypeA, A, lda64, computeType, workOnDevice, lworkOnDevice, workOnHost, lworkOnHost, &info); + status = cusolverDnXpotrf(handle, solverDnParams, fillMode, n64, dataTypeA, A, lda64, computeType, workOnDevice, lworkOnDevice, workOnHost, lworkOnHost, &info); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnXpotrs(cusolverDnHandle_t handle, cusolverDnParams_t params, cublasFillMode_t uplo, int64_t n, int64_t nrhs, cudaDataType dataTypeA, const void * A, int64_t lda, cudaDataType dataTypeB, void * B, int64_t ldb, int * info); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnXpotrs(hipsolverDnHandle_t handle, hipsolverDnParams_t params, hipsolverFillMode_t uplo, int64_t n, int64_t nrhs, hipDataType dataTypeA, const void* A, int64_t lda, hipDataType dataTypeB, void* B, int64_t ldb, int* info); + // CHECK: status = hipsolverDnXpotrs(handle, solverDnParams, fillMode, n64, nrhs64, dataTypeA, A, lda64, dataTypeB, B, ldb64, &info); + status = cusolverDnXpotrs(handle, solverDnParams, fillMode, n64, nrhs64, dataTypeA, A, lda64, dataTypeB, B, ldb64, &info); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnXgeqrf_bufferSize(cusolverDnHandle_t handle, cusolverDnParams_t params, int64_t m, int64_t n, cudaDataType dataTypeA, const void * A, int64_t lda, cudaDataType dataTypeTau, const void * tau, cudaDataType computeType, size_t * workspaceInBytesOnDevice, size_t * workspaceInBytesOnHost); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnXgeqrf_bufferSize(hipsolverDnHandle_t handle, hipsolverDnParams_t params, int64_t m, int64_t n, hipDataType dataTypeA, const void* A, int64_t lda, hipDataType dataTypeTau, const void* tau, hipDataType computeType, size_t* lworkOnDevice, size_t* lworkOnHost); + // CHECK: status = hipsolverDnXgeqrf_bufferSize(handle, solverDnParams, m64, n64, dataTypeA, A, lda64, dataTypeTau, tau, computeType, &lworkOnDevice, &lworkOnHost); + status = cusolverDnXgeqrf_bufferSize(handle, solverDnParams, m64, n64, dataTypeA, A, lda64, dataTypeTau, tau, computeType, &lworkOnDevice, &lworkOnHost); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnXgeqrf(cusolverDnHandle_t handle, cusolverDnParams_t params, int64_t m, int64_t n, cudaDataType dataTypeA, void * A, int64_t lda, cudaDataType dataTypeTau, void * tau, cudaDataType computeType, void * bufferOnDevice, size_t workspaceInBytesOnDevice, void * bufferOnHost, size_t workspaceInBytesOnHost, int * info); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnXgeqrf(hipsolverDnHandle_t handle, hipsolverDnParams_t params, int64_t m, int64_t n, hipDataType dataTypeA, void* A, int64_t lda, hipDataType dataTypeTau, void* tau, hipDataType computeType, void* workOnDevice, size_t lworkOnDevice, void* workOnHost, size_t lworkOnHost, int* devInfo); + // CHECK: status = hipsolverDnXgeqrf(handle, solverDnParams, m64, n64, dataTypeA, A, lda64, dataTypeTau, tau, computeType, bufferOnDevice, lworkOnDevice, bufferOnHost, lworkOnHost, &devInfo); + status = cusolverDnXgeqrf(handle, solverDnParams, m64, n64, dataTypeA, A, lda64, dataTypeTau, tau, computeType, bufferOnDevice, lworkOnDevice, bufferOnHost, lworkOnHost, &devInfo); +#endif + +#if CUDA_VERSION >= 12020 + // CHECK: hipsolverDeterministicMode_t DeterministicMode_t; + // CHECK-NEXT: hipsolverDeterministicMode_t DETERMINISTIC_RESULTS = HIPSOLVER_DETERMINISTIC_RESULTS; + // CHECK-NEXT: hipsolverDeterministicMode_t ALLOW_NON_DETERMINISTIC_RESULTS = HIPSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS; + cusolverDeterministicMode_t DeterministicMode_t; + cusolverDeterministicMode_t DETERMINISTIC_RESULTS = CUSOLVER_DETERMINISTIC_RESULTS; + cusolverDeterministicMode_t ALLOW_NON_DETERMINISTIC_RESULTS = CUSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS; + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnSetDeterministicMode(cusolverDnHandle_t handle, cusolverDeterministicMode_t mode); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnSetDeterministicMode(hipsolverHandle_t handle, hipsolverDeterministicMode_t mode); + // CHECK: status = hipsolverDnSetDeterministicMode(handle, DeterministicMode_t); + status = cusolverDnSetDeterministicMode(handle, DeterministicMode_t); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnGetDeterministicMode(cusolverDnHandle_t handle, cusolverDeterministicMode_t* mode); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnGetDeterministicMode(hipsolverHandle_t handle, hipsolverDeterministicMode_t* mode); + // CHECK: status = hipsolverDnGetDeterministicMode(handle, &DeterministicMode_t); + status = cusolverDnGetDeterministicMode(handle, &DeterministicMode_t); #endif + return 0; } diff --git a/tests/unit_tests/synthetic/runtime_functions.cu b/tests/unit_tests/synthetic/runtime_functions.cu index 1422edd6..f9c92a39 100644 --- a/tests/unit_tests/synthetic/runtime_functions.cu +++ b/tests/unit_tests/synthetic/runtime_functions.cu @@ -1606,6 +1606,13 @@ int main() { result = cudaUnbindTexture(texref); #endif +#if CUDA_VERSION >= 12000 + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecGetFlags(cudaGraphExec_t graphExec, unsigned long long *flags); + // HIP: hipError_t hipGraphExecGetFlags(hipGraphExec_t graphExec, unsigned long long* flags); + // CHECK: result = hipGraphExecGetFlags(GraphExec_t, &ull_2); + result = cudaGraphExecGetFlags(GraphExec_t, &ull_2); +#endif + #if CUDA_VERSION >= 12020 // CHECK: hipGraphNodeParams *graphNodeParams = nullptr; cudaGraphNodeParams *graphNodeParams = nullptr; @@ -1614,6 +1621,16 @@ int main() { // HIP: hipError_t hipGraphAddNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipGraphNode_t *pDependencies, size_t numDependencies, hipGraphNodeParams *nodeParams); // CHECK: result = hipGraphAddNode(&graphNode, Graph_t, &graphNode_2, bytes, graphNodeParams); result = cudaGraphAddNode(&graphNode, Graph_t, &graphNode_2, bytes, graphNodeParams); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphNodeSetParams(cudaGraphNode_t node, struct cudaGraphNodeParams *nodeParams); + // HIP: hipError_t hipGraphNodeSetParams(hipGraphNode_t node, hipGraphNodeParams *nodeParams); + // CHECK: result = hipGraphNodeSetParams(graphNode, graphNodeParams); + result = cudaGraphNodeSetParams(graphNode, graphNodeParams); + + // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphExecNodeSetParams(cudaGraphExec_t graphExec, cudaGraphNode_t node, struct cudaGraphNodeParams *nodeParams); + // HIP: hipError_t hipGraphExecNodeSetParams(hipGraphExec_t graphExec, hipGraphNode_t node, hipGraphNodeParams* nodeParams); + // CHECK: result = hipGraphExecNodeSetParams(GraphExec_t, graphNode, graphNodeParams); + result = cudaGraphExecNodeSetParams(GraphExec_t, graphNode, graphNodeParams); #endif #if CUDA_VERSION >= 12030 diff --git a/tests/unit_tests/synthetic/runtime_functions_12000.cu b/tests/unit_tests/synthetic/runtime_functions_12000.cu index 9cce4fad..d2e646ae 100644 --- a/tests/unit_tests/synthetic/runtime_functions_12000.cu +++ b/tests/unit_tests/synthetic/runtime_functions_12000.cu @@ -46,7 +46,7 @@ int main() { // TODO: detect cudaGetDriverEntryPoint signature and report warning/error for old (before CUDA 12.0) signature // HIP: hipError_t hipGetProcAddress(const char* symbol, void** pfn, int hipVersion, uint64_t flags, hipDriverProcAddressQueryResult* symbolStatus); // TODO: add an explicit static_cast for ull - // CHECK: result = hipGetProcAddress(symbol.c_str(), &pfn, 602, ull, &driverProcAddressQueryResult); + // CHECK: result = hipGetProcAddress(symbol.c_str(), &pfn, 603, ull, &driverProcAddressQueryResult); result = cudaGetDriverEntryPoint(symbol.c_str(), &pfn, ull, &driverProcAddressQueryResult); #endif