diff --git a/llama_index/llms/vllm.py b/llama_index/llms/vllm.py index f7dce9a5f45f0..5233bd9d05ebc 100644 --- a/llama_index/llms/vllm.py +++ b/llama_index/llms/vllm.py @@ -186,6 +186,17 @@ def __init__( output_parser=output_parser, ) + def __del__(self) -> None: + import torch + from vllm.model_executor.parallel_utils.parallel_state import ( + destroy_model_parallel, + ) + + destroy_model_parallel() + del self._client + if torch.cuda.is_available(): + torch.cuda.synchronize() + @classmethod def class_name(cls) -> str: return "Vllm"