vllm-project · ywang96 · Jun 17, 2024 · Jun 17, 2024 · Jun 17, 2024 · Jun 17, 2024
diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py
@@ -4,10 +4,12 @@
 import time
 import traceback
 from dataclasses import dataclass, field
-from typing import List, Optional
+from typing import List, Optional, Union
 
 import aiohttp
 from tqdm.asyncio import tqdm
+from transformers import (AutoTokenizer, PreTrainedTokenizer,
+                          PreTrainedTokenizerFast)
 
 AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60)
 
@@ -388,6 +390,13 @@ def remove_prefix(text: str, prefix: str) -> str:
     return text
 
 
+def get_tokenizer(
+    pretrained_model_name_or_path: str, trust_remote_code: bool
+) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
+    return AutoTokenizer.from_pretrained(pretrained_model_name_or_path,
+                                         trust_remote_code=trust_remote_code)
 if VLLM_USE_MODELSCOPE: 
     # download model from ModelScope hub, 
     # lazy import so that modelscope is not required for normal use. 
     # pylint: disable=C. 
     from modelscope.hub.snapshot_download import snapshot_download 
     # Only set the tokenizer here, model will be downloaded on the workers. 
     if not os.path.exists(tokenizer_name): 
         tokenizer_path = snapshot_download( 
             model_id=tokenizer_name, 
             cache_dir=download_dir, 
             revision=revision, 
             local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE, 
             # Ignore weights - we only need the tokenizer. 
             ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"]) 
         tokenizer_name = tokenizer_path 
 if VLLM_USE_MODELSCOPE: 
     # download model from ModelScope hub, 
     # lazy import so that modelscope is not required for normal use. 
     # pylint: disable=C. 
     from modelscope.hub.snapshot_download import snapshot_download 
  
     # Only set the tokenizer here, model will be downloaded on the workers. 
     if not os.path.exists(tokenizer_name): 
         tokenizer_path = snapshot_download( 
             model_id=tokenizer_name, 
             cache_dir=download_dir, 
             revision=revision, 
             local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE, 
             # Ignore weights - we only need the tokenizer. 
             ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"]) 
         tokenizer_name = tokenizer_path 
+
+
 ASYNC_REQUEST_FUNCS = {
     "tgi": async_request_tgi,
     "vllm": async_request_openai_completions,

diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
@@ -39,7 +39,10 @@
 from tqdm.asyncio import tqdm
 from transformers import PreTrainedTokenizerBase
 
-from vllm.transformers_utils.tokenizer import get_tokenizer
+try:
+    from vllm.transformers_utils.tokenizer import get_tokenizer
+except ImportError:
+    from backend_request_func import get_tokenizer
 
 
 @dataclass