diff --git a/otherarch/ggml_v2-cuda-legacy.cu b/otherarch/ggml_v2-cuda-legacy.cu index e7053b764b670..967a192551c63 100644 --- a/otherarch/ggml_v2-cuda-legacy.cu +++ b/otherarch/ggml_v2-cuda-legacy.cu @@ -4,60 +4,7 @@ #include #include -#if defined(GGML_USE_HIPBLAS) -#include -#include -#include -#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F -#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F -#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT -#define CUBLAS_OP_N HIPBLAS_OP_N -#define CUBLAS_OP_T HIPBLAS_OP_T -#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS -#define CUBLAS_TF32_TENSOR_OP_MATH 0 -#define CUDA_R_16F HIPBLAS_R_16F -#define CUDA_R_32F HIPBLAS_R_32F -#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width) -#define cublasCreate hipblasCreate -#define cublasGemmEx hipblasGemmEx -#define cublasHandle_t hipblasHandle_t -#define cublasSetMathMode(handle, mode) CUBLAS_STATUS_SUCCESS -#define cublasSetStream hipblasSetStream -#define cublasSgemm hipblasSgemm -#define cublasStatus_t hipblasStatus_t -#define cudaDeviceProp hipDeviceProp_t -#define cudaDeviceSynchronize hipDeviceSynchronize -#define cudaError_t hipError_t -#define cudaEventCreateWithFlags hipEventCreateWithFlags -#define cudaEventDisableTiming hipEventDisableTiming -#define cudaEventRecord hipEventRecord -#define cudaEvent_t hipEvent_t -#define cudaFree hipFree -#define cudaFreeHost hipHostFree -#define cudaGetDevice hipGetDevice -#define cudaGetDeviceCount hipGetDeviceCount -#define cudaGetDeviceProperties hipGetDeviceProperties -#define cudaGetErrorString hipGetErrorString -#define cudaGetLastError hipGetLastError -#define cudaMalloc hipMalloc -#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault) -#define cudaMemcpy hipMemcpy -#define cudaMemcpy2DAsync hipMemcpy2DAsync -#define cudaMemcpyAsync hipMemcpyAsync -#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice -#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost -#define cudaMemcpyHostToDevice hipMemcpyHostToDevice -#define cudaMemcpyKind hipMemcpyKind -#define cudaMemset hipMemset -#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize -#define cudaSetDevice hipSetDevice -#define cudaStreamCreateWithFlags hipStreamCreateWithFlags -#define cudaStreamNonBlocking hipStreamNonBlocking -#define cudaStreamSynchronize hipStreamSynchronize -#define cudaStreamWaitEvent hipStreamWaitEvent -#define cudaStream_t hipStream_t -#define cudaSuccess hipSuccess -#else +#ifndef GGML_USE_HIPBLAS #include #include #include diff --git a/otherarch/ggml_v2-cuda-legacy.h b/otherarch/ggml_v2-cuda-legacy.h index fbee9eff30969..ee2d502a2a122 100644 --- a/otherarch/ggml_v2-cuda-legacy.h +++ b/otherarch/ggml_v2-cuda-legacy.h @@ -1,5 +1,61 @@ #include "ggml_v2.h" +#if defined(GGML_USE_HIPBLAS) +#include +#include +#include + +#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F +#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F +#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT +#define CUBLAS_OP_N HIPBLAS_OP_N +#define CUBLAS_OP_T HIPBLAS_OP_T +#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS +#define CUBLAS_TF32_TENSOR_OP_MATH 0 +#define CUDA_R_16F HIPBLAS_R_16F +#define CUDA_R_32F HIPBLAS_R_32F +#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width) +#define cublasCreate hipblasCreate +#define cublasGemmEx hipblasGemmEx +#define cublasHandle_t hipblasHandle_t +#define cublasSetMathMode(handle, mode) CUBLAS_STATUS_SUCCESS +#define cublasSetStream hipblasSetStream +#define cublasSgemm hipblasSgemm +#define cublasStatus_t hipblasStatus_t +#define cudaDeviceProp hipDeviceProp_t +#define cudaDeviceSynchronize hipDeviceSynchronize +#define cudaError_t hipError_t +#define cudaEventCreateWithFlags hipEventCreateWithFlags +#define cudaEventDisableTiming hipEventDisableTiming +#define cudaEventRecord hipEventRecord +#define cudaEvent_t hipEvent_t +#define cudaFree hipFree +#define cudaFreeHost hipHostFree +#define cudaGetDevice hipGetDevice +#define cudaGetDeviceCount hipGetDeviceCount +#define cudaGetDeviceProperties hipGetDeviceProperties +#define cudaGetErrorString hipGetErrorString +#define cudaGetLastError hipGetLastError +#define cudaMalloc hipMalloc +#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault) +#define cudaMemcpy hipMemcpy +#define cudaMemcpy2DAsync hipMemcpy2DAsync +#define cudaMemcpyAsync hipMemcpyAsync +#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice +#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost +#define cudaMemcpyHostToDevice hipMemcpyHostToDevice +#define cudaMemcpyKind hipMemcpyKind +#define cudaMemset hipMemset +#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize +#define cudaSetDevice hipSetDevice +#define cudaStreamCreateWithFlags hipStreamCreateWithFlags +#define cudaStreamNonBlocking hipStreamNonBlocking +#define cudaStreamSynchronize hipStreamSynchronize +#define cudaStreamWaitEvent hipStreamWaitEvent +#define cudaStream_t hipStream_t +#define cudaSuccess hipSuccess +#endif + #ifdef __cplusplus extern "C" { #endif diff --git a/otherarch/ggml_v2-cuda.cu b/otherarch/ggml_v2-cuda.cu index b4502df00c637..53371b8954dd8 100644 --- a/otherarch/ggml_v2-cuda.cu +++ b/otherarch/ggml_v2-cuda.cu @@ -4,64 +4,10 @@ #include #include -#if defined(GGML_USE_HIPBLAS) -#include -#include -#include -#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F -#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F -#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT -#define CUBLAS_OP_N HIPBLAS_OP_N -#define CUBLAS_OP_T HIPBLAS_OP_T -#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS -#define CUBLAS_TF32_TENSOR_OP_MATH 0 -#define CUDA_R_16F HIPBLAS_R_16F -#define CUDA_R_32F HIPBLAS_R_32F -#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width) -#define cublasCreate hipblasCreate -#define cublasGemmEx hipblasGemmEx -#define cublasHandle_t hipblasHandle_t -#define cublasSetMathMode(handle, mode) CUBLAS_STATUS_SUCCESS -#define cublasSetStream hipblasSetStream -#define cublasSgemm hipblasSgemm -#define cublasStatus_t hipblasStatus_t -#define cudaDeviceProp hipDeviceProp_t -#define cudaDeviceSynchronize hipDeviceSynchronize -#define cudaError_t hipError_t -#define cudaEventCreateWithFlags hipEventCreateWithFlags -#define cudaEventDisableTiming hipEventDisableTiming -#define cudaEventRecord hipEventRecord -#define cudaEvent_t hipEvent_t -#define cudaFree hipFree -#define cudaFreeHost hipHostFree -#define cudaGetDevice hipGetDevice -#define cudaGetDeviceCount hipGetDeviceCount -#define cudaGetDeviceProperties hipGetDeviceProperties -#define cudaGetErrorString hipGetErrorString -#define cudaGetLastError hipGetLastError -#define cudaMalloc hipMalloc -#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault) -#define cudaMemcpy hipMemcpy -#define cudaMemcpy2DAsync hipMemcpy2DAsync -#define cudaMemcpyAsync hipMemcpyAsync -#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice -#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost -#define cudaMemcpyHostToDevice hipMemcpyHostToDevice -#define cudaMemcpyKind hipMemcpyKind -#define cudaMemset hipMemset -#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize -#define cudaSetDevice hipSetDevice -#define cudaStreamCreateWithFlags hipStreamCreateWithFlags -#define cudaStreamNonBlocking hipStreamNonBlocking -#define cudaStreamSynchronize hipStreamSynchronize -#define cudaStreamWaitEvent hipStreamWaitEvent -#define cudaStream_t hipStream_t -#define cudaSuccess hipSuccess -#else +#ifndef GGML_USE_HIPBLAS #include #include #include - #endif #include "ggml_v2-cuda.h" diff --git a/otherarch/ggml_v2-cuda.h b/otherarch/ggml_v2-cuda.h index 4f3c75bafde11..72f7cc4c76438 100644 --- a/otherarch/ggml_v2-cuda.h +++ b/otherarch/ggml_v2-cuda.h @@ -1,5 +1,61 @@ #include "ggml_v2.h" +#if defined(GGML_USE_HIPBLAS) +#include +#include +#include + +#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F +#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F +#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT +#define CUBLAS_OP_N HIPBLAS_OP_N +#define CUBLAS_OP_T HIPBLAS_OP_T +#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS +#define CUBLAS_TF32_TENSOR_OP_MATH 0 +#define CUDA_R_16F HIPBLAS_R_16F +#define CUDA_R_32F HIPBLAS_R_32F +#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width) +#define cublasCreate hipblasCreate +#define cublasGemmEx hipblasGemmEx +#define cublasHandle_t hipblasHandle_t +#define cublasSetMathMode(handle, mode) CUBLAS_STATUS_SUCCESS +#define cublasSetStream hipblasSetStream +#define cublasSgemm hipblasSgemm +#define cublasStatus_t hipblasStatus_t +#define cudaDeviceProp hipDeviceProp_t +#define cudaDeviceSynchronize hipDeviceSynchronize +#define cudaError_t hipError_t +#define cudaEventCreateWithFlags hipEventCreateWithFlags +#define cudaEventDisableTiming hipEventDisableTiming +#define cudaEventRecord hipEventRecord +#define cudaEvent_t hipEvent_t +#define cudaFree hipFree +#define cudaFreeHost hipHostFree +#define cudaGetDevice hipGetDevice +#define cudaGetDeviceCount hipGetDeviceCount +#define cudaGetDeviceProperties hipGetDeviceProperties +#define cudaGetErrorString hipGetErrorString +#define cudaGetLastError hipGetLastError +#define cudaMalloc hipMalloc +#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault) +#define cudaMemcpy hipMemcpy +#define cudaMemcpy2DAsync hipMemcpy2DAsync +#define cudaMemcpyAsync hipMemcpyAsync +#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice +#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost +#define cudaMemcpyHostToDevice hipMemcpyHostToDevice +#define cudaMemcpyKind hipMemcpyKind +#define cudaMemset hipMemset +#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize +#define cudaSetDevice hipSetDevice +#define cudaStreamCreateWithFlags hipStreamCreateWithFlags +#define cudaStreamNonBlocking hipStreamNonBlocking +#define cudaStreamSynchronize hipStreamSynchronize +#define cudaStreamWaitEvent hipStreamWaitEvent +#define cudaStream_t hipStream_t +#define cudaSuccess hipSuccess +#endif + #ifdef __cplusplus extern "C" { #endif