Skip to content
This repository was archived by the owner on Oct 11, 2024. It is now read-only.
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 72cd745

Browse files
danieljannai21robertgshaw2-redhat
authored andcommittedJul 7, 2024
[Frontend] Add template related params to request (vllm-project#5709)
1 parent 77f588c commit 72cd745

File tree

3 files changed

+30
-1
lines changed

3 files changed

+30
-1
lines changed
 

‎requirements-common.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ numpy < 2.0.0
66
requests
77
tqdm
88
py-cpuinfo
9-
transformers >= 4.42.0 # Required for Gemma 2.
9+
transformers >= 4.42.0 # Required for Gemma 2 and for additional chat template parameters.
1010
tokenizers >= 0.19.1 # Required for Llama 3.
1111
fastapi
1212
aiohttp

‎vllm/entrypoints/openai/protocol.py

+21
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,27 @@ class ChatCompletionRequest(OpenAIBaseModel):
190190
"special tokens so this should be set to False (as is the "
191191
"default)."),
192192
)
193+
documents: Optional[List[Dict[str, str]]] = Field(
194+
default=None,
195+
description=
196+
("A list of dicts representing documents that will be accessible to "
197+
"the model if it is performing RAG (retrieval-augmented generation)."
198+
" If the template does not support RAG, this argument will have no "
199+
"effect. We recommend that each document should be a dict containing "
200+
"\"title\" and \"text\" keys."),
201+
)
202+
chat_template: Optional[str] = Field(
203+
default=None,
204+
description=(
205+
"A Jinja template to use for this conversion. "
206+
"If this is not passed, the model's default chat template will be "
207+
"used instead."),
208+
)
209+
chat_template_kwargs: Optional[Dict[str, Any]] = Field(
210+
default=None,
211+
description=("Additional kwargs to pass to the template renderer. "
212+
"Will be accessible by the chat template."),
213+
)
193214
include_stop_str_in_output: Optional[bool] = Field(
194215
default=False,
195216
description=(

‎vllm/entrypoints/openai/serving_chat.py

+8
Original file line numberDiff line numberDiff line change
@@ -218,10 +218,18 @@ async def create_chat_completion(
218218
conversation.extend(chat_parsed_result.messages)
219219
image_futures.extend(chat_parsed_result.image_futures)
220220

221+
tool_dicts = None if request.tools is None else [
222+
tool.model_dump() for tool in request.tools
223+
]
224+
221225
prompt = self.tokenizer.apply_chat_template(
222226
conversation=conversation,
223227
tokenize=False,
224228
add_generation_prompt=request.add_generation_prompt,
229+
tools=tool_dicts,
230+
documents=request.documents,
231+
chat_template=request.chat_template,
232+
**(request.chat_template_kwargs or {}),
225233
)
226234
except Exception as e:
227235
logger.error("Error in applying chat template from request: %s", e)

0 commit comments

Comments
 (0)
This repository has been archived.