Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support vLLM as inference runtime #83

Merged
merged 3 commits into from
May 8, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat: support vllm
  • Loading branch information
jjleng committed May 8, 2024
commit 2c58b9b9b3d677c6989de22a175afe1fa1ca8592
49 changes: 49 additions & 0 deletions paka/k8s/model_group/runtime/vllm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from __future__ import annotations

import re
import shlex
from typing import List

from huggingface_hub.utils import validate_repo_id

from paka.cluster.context import Context
from paka.cluster.utils import get_model_store
from paka.config import CloudModelGroup
from paka.constants import MODEL_MOUNT_PATH


# Heuristic to determine if the image is a vLLM image
def is_vllm_image(image: str) -> bool:
return image.lower().startswith("vllm")


def get_runtime_command_vllm(ctx: Context, model_group: CloudModelGroup) -> List[str]:
runtime = model_group.runtime
if runtime.command:
command_str = " ".join(runtime.command) if runtime.command else ""
if re.search(r"(--model)[ \t]*\S+", command_str):
return runtime.command

if model_group.model:
if model_group.model.useModelStore:
store = get_model_store(ctx, with_progress_bar=False)
if not store.glob(f"{model_group.name}/*"):
raise ValueError(
f"No model named {model_group.name} was found in the model store."
)
model_to_load = f"{MODEL_MOUNT_PATH}"
elif model_group.model.hfRepoId:
validate_repo_id(model_group.model.hfRepoId)
model_to_load = model_group.model.hfRepoId
else:
raise ValueError("Did not find a model to load.")

def attach_model_to_command(command: List[str]) -> List[str]:
return command + ["--model", model_to_load]

if runtime.command:
return attach_model_to_command(runtime.command)

command = shlex.split("python3 -O -u -m vllm.entrypoints.api_server --host 0.0.0.0")

return attach_model_to_command(command)
19 changes: 12 additions & 7 deletions paka/k8s/model_group/service.py
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@
get_runtime_command_llama_cpp,
is_llama_cpp_image,
)
from paka.k8s.model_group.runtime.vllm import get_runtime_command_vllm, is_vllm_image
from paka.k8s.utils import CustomResource, apply_resource
from paka.logger import logger
from paka.model.hf_model import HuggingFaceModel
@@ -42,14 +43,16 @@ def get_runtime_command(
# If user did not provide a command, we need to provide a default command with heuristics.
if is_llama_cpp_image(runtime.image):
command = get_runtime_command_llama_cpp(ctx, model_group)
elif is_vllm_image(runtime.image):
command = get_runtime_command_vllm(ctx, model_group)

# Add or replace the port in the command
for i in range(len(command)):
if command[i] == "--port":
command[i + 1] = str(port)
break
else:
command.extend(["--port", str(port)])
# Add or replace the port in the command
for i in range(len(command)):
if command[i] == "--port":
command[i + 1] = str(port)
break
else:
command.extend(["--port", str(port)])

return command

@@ -58,6 +61,8 @@ def get_health_check_paths(model_group: T_CloudModelGroup) -> Tuple[str, str]:
# Return a tuple for ready and live probes
if is_llama_cpp_image(model_group.runtime.image):
return ("/health", "/health")
elif is_vllm_image(model_group.runtime.image):
return ("/health", "/health")

raise ValueError("Unsupported runtime image for health check paths.")