feat: support vllm

jjleng · jjleng · May 8, 2024 · May 7, 2024 · May 8, 2024 · May 8, 2024
commit 2c58b9b9b3d677c6989de22a175afe1fa1ca8592
diff --git a/paka/k8s/model_group/runtime/vllm.py b/paka/k8s/model_group/runtime/vllm.py
@@ -0,0 +1,49 @@
+from __future__ import annotations
+
+import re
+import shlex
+from typing import List
+
+from huggingface_hub.utils import validate_repo_id
+
+from paka.cluster.context import Context
+from paka.cluster.utils import get_model_store
+from paka.config import CloudModelGroup
+from paka.constants import MODEL_MOUNT_PATH
+
+
+# Heuristic to determine if the image is a vLLM image
+def is_vllm_image(image: str) -> bool:
+    return image.lower().startswith("vllm")
+
+
+def get_runtime_command_vllm(ctx: Context, model_group: CloudModelGroup) -> List[str]:
+    runtime = model_group.runtime
+    if runtime.command:
+        command_str = " ".join(runtime.command) if runtime.command else ""
+        if re.search(r"(--model)[ \t]*\S+", command_str):
+            return runtime.command
+
+    if model_group.model:
+        if model_group.model.useModelStore:
+            store = get_model_store(ctx, with_progress_bar=False)
+            if not store.glob(f"{model_group.name}/*"):
+                raise ValueError(
+                    f"No model named {model_group.name} was found in the model store."
+                )
+            model_to_load = f"{MODEL_MOUNT_PATH}"
+        elif model_group.model.hfRepoId:
+            validate_repo_id(model_group.model.hfRepoId)
+            model_to_load = model_group.model.hfRepoId
+        else:
+            raise ValueError("Did not find a model to load.")
+
+    def attach_model_to_command(command: List[str]) -> List[str]:
+        return command + ["--model", model_to_load]
+
+    if runtime.command:
+        return attach_model_to_command(runtime.command)
+
+    command = shlex.split("python3 -O -u -m vllm.entrypoints.api_server --host 0.0.0.0")
+
+    return attach_model_to_command(command)
diff --git a/paka/k8s/model_group/service.py b/paka/k8s/model_group/service.py
@@ -15,6 +15,7 @@
     get_runtime_command_llama_cpp,
     is_llama_cpp_image,
 )
+from paka.k8s.model_group.runtime.vllm import get_runtime_command_vllm, is_vllm_image
 from paka.k8s.utils import CustomResource, apply_resource
 from paka.logger import logger
 from paka.model.hf_model import HuggingFaceModel
@@ -42,14 +43,16 @@ def get_runtime_command(
     # If user did not provide a command, we need to provide a default command with heuristics.
     if is_llama_cpp_image(runtime.image):
         command = get_runtime_command_llama_cpp(ctx, model_group)
+    elif is_vllm_image(runtime.image):
+        command = get_runtime_command_vllm(ctx, model_group)
 
-        # Add or replace the port in the command
-        for i in range(len(command)):
-            if command[i] == "--port":
-                command[i + 1] = str(port)
-                break
-        else:
-            command.extend(["--port", str(port)])
+    # Add or replace the port in the command
+    for i in range(len(command)):
+        if command[i] == "--port":
+            command[i + 1] = str(port)
+            break
+    else:
+        command.extend(["--port", str(port)])
 
     return command
 
@@ -58,6 +61,8 @@ def get_health_check_paths(model_group: T_CloudModelGroup) -> Tuple[str, str]:
     # Return a tuple for ready and live probes
     if is_llama_cpp_image(model_group.runtime.image):
         return ("/health", "/health")
+    elif is_vllm_image(model_group.runtime.image):
+        return ("/health", "/health")
 
     raise ValueError("Unsupported runtime image for health check paths.")