Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
🐛 postpone server shutdown await
Browse files Browse the repository at this point in the history
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
  • Loading branch information
joerunde committed Jul 31, 2024
1 parent 5c58ecb commit c605515
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ async def run_server(args, **uvicorn_kwargs) -> None:
logger.info("vLLM API server version %s", VLLM_VERSION)
logger.info("args: %s", args)

shutdown_task = None
async with build_backend(args) as backend:

server = await build_server(
Expand All @@ -387,7 +388,11 @@ def signal_handler() -> None:
await server_task
except asyncio.CancelledError:
logger.info("Gracefully stopping http server")
await server.shutdown()
shutdown_task = server.shutdown()

if shutdown_task:
# NB: Await server shutdown only after the backend context is exited
await shutdown_task


if __name__ == "__main__":
Expand Down

0 comments on commit c605515

Please sign in to comment.