Skip to content

Commit

Permalink
FEAT: support scheduling-policy for vllm
Browse files Browse the repository at this point in the history
  • Loading branch information
hwzhuhao committed Dec 24, 2024
1 parent 5597394 commit 2ade8f8
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions xinference/model/llm/vllm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class VLLMModelConfig(TypedDict, total=False):
max_model_len: Optional[int]
limit_mm_per_prompt: Optional[Dict[str, int]]
guided_decoding_backend: Optional[str]
scheduling_policy: Optional[str]


class VLLMGenerateConfig(TypedDict, total=False):
Expand Down Expand Up @@ -247,7 +248,6 @@ def load(self):
multiprocessing.set_start_method("fork", force=True)

self._model_config = self._sanitize_model_config(self._model_config)

if self.lora_modules is None:
self.lora_requests = []
else:
Expand Down Expand Up @@ -330,7 +330,9 @@ def _sanitize_model_config(
model_config.setdefault("quantization", None)
model_config.setdefault("max_model_len", None)
model_config.setdefault("guided_decoding_backend", "outlines")

# Add scheduling policy if vLLM version is 0.6.3 or higher
if vllm.__version__ >= "0.6.3":
model_config.setdefault("scheduling_policy", "fcfs")
return model_config

@staticmethod
Expand Down Expand Up @@ -862,6 +864,9 @@ def _sanitize_model_config(
"image": 2, # default 2 images all chat
}
)
# Add scheduling policy if vLLM version is 0.6.3 or higher
if vllm.__version__ >= "0.6.3":
model_config.setdefault("scheduling_policy", "fcfs")

return model_config

Expand Down

0 comments on commit 2ade8f8

Please sign in to comment.