Temp hack to work with prebuilt 0.4.0-post kernels

Signed-off-by: Nick Hill <nickhill@us.ibm.com>
IBM · Apr 11, 2024 · 3d71ef7 · 3d71ef7
1 parent a880d89
commit 3d71ef7
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/vllm/attention/ops/paged_attn.py b/vllm/attention/ops/paged_attn.py
@@ -76,7 +76,7 @@ def write_to_paged_cache(
             value_cache,
             slot_mapping.flatten(),
             kv_cache_dtype,
-            kv_scale,
+            # kv_scale,
         )
 
     @staticmethod
@@ -123,7 +123,7 @@ def forward_decode(
                 max_context_len,
                 alibi_slopes,
                 kv_cache_dtype,
-                kv_scale,
+                # kv_scale,
             )
         else:
             # Run PagedAttention V2.
@@ -155,7 +155,7 @@ def forward_decode(
                 max_context_len,
                 alibi_slopes,
                 kv_cache_dtype,
-                kv_scale,
+                # kv_scale,
             )
         return output