From 2df8d0353649df118cb73698b91101951a378383 Mon Sep 17 00:00:00 2001 From: Daniele <57776841+daniandtheweb@users.noreply.github.com> Date: Wed, 3 Jul 2024 23:02:58 +0000 Subject: [PATCH] Define and optimize RDNA1 (#8085) --- ggml-cuda/common.cuh | 4 ++++ ggml-cuda/mmq.cuh | 10 +++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ggml-cuda/common.cuh b/ggml-cuda/common.cuh index 472f4ace1c2ad2..4ff06b8719d378 100644 --- a/ggml-cuda/common.cuh +++ b/ggml-cuda/common.cuh @@ -227,6 +227,10 @@ typedef float2 dfloat2; #define RDNA2 #endif +#if defined(__gfx1010__) || defined(__gfx1012__) +#define RDNA1 +#endif + #ifndef __has_builtin #define __has_builtin(x) 0 #endif diff --git a/ggml-cuda/mmq.cuh b/ggml-cuda/mmq.cuh index 94d6c2c7e08d62..ffb3b4bd572a16 100644 --- a/ggml-cuda/mmq.cuh +++ b/ggml-cuda/mmq.cuh @@ -61,12 +61,16 @@ static constexpr __device__ int get_mmq_x_max_device() { } static constexpr int get_mmq_y_host(const int cc) { - return int8_mma_available(cc) || cc >= CC_VOLTA ? 128 : 64; + return cc >= CC_OFFSET_AMD ? (cc == CC_RDNA1 ? 64 : 128) : (cc >= CC_VOLTA ? 128 : 64); } static constexpr __device__ int get_mmq_y_device() { #if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) +#if defined(RDNA1) + return 64; +#else return 128; +#endif // defined RDNA1 #else #if __CUDA_ARCH__ >= CC_VOLTA return 128; @@ -2400,9 +2404,9 @@ static __device__ void mul_mat_q_process_tile( template #if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) -#if defined(RDNA3) || defined(RDNA2) +#if defined(RDNA3) || defined(RDNA2) || defined(RDNA1) __launch_bounds__(WARP_SIZE*nwarps, 2) -#endif // defined(RDNA3) || defined(RDNA2) +#endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1) #else #if __CUDA_ARCH__ >= CC_VOLTA __launch_bounds__(WARP_SIZE*nwarps, 1)