[Kernel] Tuned FP8 Kernels for Ada Lovelace (vllm-project#6677)

Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
neuralmagic · Aug 17, 2024 · 18087ae · 18087ae
1 parent f2362f1
commit 18087ae
Show file tree

Hide file tree

Showing 6 changed files with 877 additions and 490 deletions.
diff --git a/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py b/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py
@@ -13,7 +13,7 @@
 from vllm import _custom_ops as ops
 from vllm.utils import FlexibleArgumentParser
 
-DEFAULT_MODELS = list(WEIGHT_SHAPES.keys())[1:]
+DEFAULT_MODELS = list(WEIGHT_SHAPES.keys())
 DEFAULT_BATCH_SIZES = [1, 16, 32, 64, 128, 256, 512]
 DEFAULT_TP_SIZES = [1]