Skip to content

Commit ed1161f

Browse files
zhyncsjimpang
authored and
jimpang
committed
[Misc] use AutoTokenizer for benchmark serving when vLLM not installed (vllm-project#5588)
1 parent 9909112 commit ed1161f

File tree

2 files changed

+32
-2
lines changed

2 files changed

+32
-2
lines changed

benchmarks/backend_request_func.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@
44
import time
55
import traceback
66
from dataclasses import dataclass, field
7-
from typing import List, Optional
7+
from typing import List, Optional, Union
88

99
import aiohttp
10+
import huggingface_hub.constants
1011
from tqdm.asyncio import tqdm
12+
from transformers import (AutoTokenizer, PreTrainedTokenizer,
13+
PreTrainedTokenizerFast)
1114

1215
AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60)
1316

@@ -388,6 +391,30 @@ def remove_prefix(text: str, prefix: str) -> str:
388391
return text
389392

390393

394+
def get_model(pretrained_model_name_or_path: str):
395+
if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true':
396+
from modelscope import snapshot_download
397+
else:
398+
from huggingface_hub import snapshot_download
399+
400+
model_path = snapshot_download(
401+
model_id=pretrained_model_name_or_path,
402+
local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE,
403+
ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"])
404+
return model_path
405+
406+
407+
def get_tokenizer(
408+
pretrained_model_name_or_path: str, trust_remote_code: bool
409+
) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
410+
if pretrained_model_name_or_path is not None and not os.path.exists(
411+
pretrained_model_name_or_path):
412+
pretrained_model_name_or_path = get_model(
413+
pretrained_model_name_or_path)
414+
return AutoTokenizer.from_pretrained(pretrained_model_name_or_path,
415+
trust_remote_code=trust_remote_code)
416+
417+
391418
ASYNC_REQUEST_FUNCS = {
392419
"tgi": async_request_tgi,
393420
"vllm": async_request_openai_completions,

benchmarks/benchmark_serving.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@
3939
from tqdm.asyncio import tqdm
4040
from transformers import PreTrainedTokenizerBase
4141

42-
from vllm.transformers_utils.tokenizer import get_tokenizer
42+
try:
43+
from vllm.transformers_utils.tokenizer import get_tokenizer
44+
except ImportError:
45+
from backend_request_func import get_tokenizer
4346

4447

4548
@dataclass

0 commit comments

Comments
 (0)