From 2b8e2683233b828cd7a59a7b02dc274c4d830afb Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Tue, 15 Oct 2024 18:41:22 +0000 Subject: [PATCH 1/2] Dont build 9.0a for c2x since its outside of cuda 12.0 guard and won't help perf that much that anyways --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3a424ad7b110..fe7426086c36 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -305,7 +305,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") # For the cutlass_scaled_mm kernels we want to build the c2x (CUTLASS 2.x) # kernels for the remaining archs that are not already built for 3x. cuda_archs_loose_intersection(SCALED_MM_2X_ARCHS - "7.5;8.0;8.6;8.9;9.0;9.0a" "${CUDA_ARCHS}") + "7.5;8.0;8.6;8.9;9.0" "${CUDA_ARCHS}") # subtract out the archs that are already built for 3x list(REMOVE_ITEM SCALED_MM_2X_ARCHS ${SCALED_MM_3X_ARCHS}) if (SCALED_MM_2X_ARCHS) From 5a7b00e7a6377ca7971de3ca762583a9153f4a55 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Tue, 15 Oct 2024 19:08:48 +0000 Subject: [PATCH 2/2] fix error message --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fe7426086c36..1f4648a37dbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -286,10 +286,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") list(APPEND VLLM_GPU_FLAGS "-DENABLE_SCALED_MM_C3X=1") message(STATUS "Building scaled_mm_c3x for archs: ${SCALED_MM_3X_ARCHS}") else() - # clear SCALED_MM_3X_ARCHS so the scaled_mm_c2x kernels know we didn't - # build any 3x kernels - set(SCALED_MM_3X_ARCHS) - if (NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0 AND SCALED_MM_3X_ARCHS) message(STATUS "Not building scaled_mm_c3x as CUDA Compiler version is " "not >= 12.0, we recommend upgrading to CUDA 12.0 or " @@ -299,6 +295,10 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") message(STATUS "Not building scaled_mm_c3x as no compatible archs found " "in CUDA target architectures") endif() + + # clear SCALED_MM_3X_ARCHS so the scaled_mm_c2x kernels know we didn't + # build any 3x kernels + set(SCALED_MM_3X_ARCHS) endif() #