From 504704c7fcc9ed6a20e6eba444e81f7f51799bee Mon Sep 17 00:00:00 2001 From: Sergey Lyalin Date: Fri, 8 Mar 2024 15:44:10 +0000 Subject: [PATCH] Set position_ids name for optimum-intel based modeling. Fix for model destroy --- vllm/worker/model_runner.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index f8c86cb140f2..d1a07d42e183 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -467,6 +467,7 @@ def callback(m: Matcher) -> bool: position_ids_parameter.append(opset13.parameter(shape=[-1, -1], dtype=np.int64, name="position_ids")) print('CREATED A NEW position_ids PARAMETER') replace_node(mapping[position_ids].get_node(), position_ids_parameter[0]) + position_ids_parameter[0].get_output_tensor(0).set_names({'position_ids'}) print('APPLIED position_ids PARAMETER INSTEAD OF attention_mask-BASED SUB-GRAPH') return True @@ -574,11 +575,12 @@ def load_model(self) -> None: def __del__(self): # Order is important - del self.model.ov_request - del self.model.model - if gc: # when app is being destroyed the module may not be available - gc.collect() - del self.model.ov_node_factory + if hasattr(self.model, 'ov_node_factory'): + del self.model.ov_request + del self.model.model + if gc: # when app is being destroyed the module may not be available + gc.collect() + del self.model.ov_node_factory def set_block_size(self, block_size: int) -> None: self.block_size = block_size