From 806949514ab07a2d7218645022c22962696adf46 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Fri, 2 Aug 2024 10:03:24 -0700 Subject: [PATCH] [ci] set timeout for test_oot_registration.py (#7082) --- tests/entrypoints/openai/test_oot_registration.py | 4 ++++ vllm/worker/worker.py | 4 +++- vllm/worker/xpu_worker.py | 4 +++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/entrypoints/openai/test_oot_registration.py b/tests/entrypoints/openai/test_oot_registration.py index dbbda6de1fa0..5272ac4065f1 100644 --- a/tests/entrypoints/openai/test_oot_registration.py +++ b/tests/entrypoints/openai/test_oot_registration.py @@ -36,10 +36,12 @@ def test_oot_registration_for_api_server(): ctx = torch.multiprocessing.get_context() server = ctx.Process(target=server_function, args=(port, )) server.start() + MAX_SERVER_START_WAIT_S = 60 client = OpenAI( base_url=f"http://localhost:{port}/v1", api_key="token-abc123", ) + now = time.time() while True: try: completion = client.chat.completions.create( @@ -57,6 +59,8 @@ def test_oot_registration_for_api_server(): except OpenAIError as e: if "Connection error" in str(e): time.sleep(3) + if time.time() - now > MAX_SERVER_START_WAIT_S: + raise RuntimeError("Server did not start in time") from e else: raise e server.kill() diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py index f3c379d1aa34..9e2cfff435cf 100644 --- a/vllm/worker/worker.py +++ b/vllm/worker/worker.py @@ -186,7 +186,9 @@ def determine_num_available_blocks(self) -> Tuple[int, int]: # GPU did not change their memory usage during the profiling. peak_memory = self.init_gpu_memory - free_gpu_memory assert peak_memory > 0, ( - "Error in memory profiling. This happens when the GPU memory was " + "Error in memory profiling. " + f"Initial free memory {self.init_gpu_memory}, current free memory" + f" {free_gpu_memory}. This happens when the GPU memory was " "not properly cleaned up before initializing the vLLM instance.") cache_block_size = self.get_cache_block_size_bytes() diff --git a/vllm/worker/xpu_worker.py b/vllm/worker/xpu_worker.py index 6a822c2ba3e7..0f22d67c4f25 100644 --- a/vllm/worker/xpu_worker.py +++ b/vllm/worker/xpu_worker.py @@ -138,7 +138,9 @@ def determine_num_available_blocks(self) -> Tuple[int, int]: # GPU did not change their memory usage during the profiling. peak_memory = self.init_gpu_memory - free_gpu_memory assert peak_memory > 0, ( - "Error in memory profiling. This happens when the GPU memory was " + "Error in memory profiling. " + f"Initial free memory {self.init_gpu_memory}, current free memory" + f" {free_gpu_memory}. This happens when the GPU memory was " "not properly cleaned up before initializing the vLLM instance.") cache_block_size = self.get_cache_block_size_bytes()