From 34a0e96d463d37cf85cee9c2cd01397034e97573 Mon Sep 17 00:00:00 2001 From: Avshalom Manevich <12231371+avshalomman@users.noreply.github.com> Date: Fri, 30 Aug 2024 11:11:39 +0700 Subject: [PATCH] [Kernel] changing fused moe kernel chunk size default to 32k (#7995) --- vllm/envs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/envs.py b/vllm/envs.py index 590698416329..30320af5fa43 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -352,7 +352,7 @@ def get_default_config_root(): os.path.join(get_default_cache_root(), "vllm", "xla_cache"), )), "VLLM_FUSED_MOE_CHUNK_SIZE": - lambda: int(os.getenv("VLLM_FUSED_MOE_CHUNK_SIZE", "65536")), + lambda: int(os.getenv("VLLM_FUSED_MOE_CHUNK_SIZE", "32768")), # If set, vllm will skip the deprecation warnings. "VLLM_NO_DEPRECATION_WARNING":