Skip to content

Commit

Permalink
apply review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
luo-cheng2021 committed Mar 20, 2024
1 parent 1678cd8 commit 5cb452d
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
2 changes: 0 additions & 2 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,6 @@ def create_engine_configs(
) -> Tuple[ModelConfig, CacheConfig, ParallelConfig, SchedulerConfig,
DeviceConfig, Optional[LoRAConfig]]:
device_config = DeviceConfig(self.device)
if device_config.device_type == "openvino" and device_config.device.type == "cpu":
self.block_size = 1
model_config = ModelConfig(
self.model, self.tokenizer, self.tokenizer_mode,
self.trust_remote_code, self.download_dir, self.load_format,
Expand Down
12 changes: 9 additions & 3 deletions vllm/executor/openvino_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ def __init__(
self.num_layers = model_config.get_num_layers(parallel_config)
self.num_heads = model_config.get_num_kv_heads(parallel_config)

if device_config.device.type == "cpu":
if cache_config.block_size != 1:
print(f"Warning: CPU only support block_size = 1, current is {cache_config.block_size}, forced to 1.")
cache_config.block_size = 1
self.block_size = cache_config.block_size
self.num_gpu_blocks = cache_config.num_gpu_blocks
self.num_cpu_blocks = cache_config.num_cpu_blocks
Expand Down Expand Up @@ -463,12 +467,12 @@ def __init__(
self.device_config = device_config

# Instantiate the worker and load the model to OpenVINO device.
self._init_worker(OpenVINOCacheEngine.get_cache_dtype(cache_config.cache_dtype, model_config, device_config))
self._init_worker()

# Profile the memory usage and initialize the cache.
self._init_cache()

def _init_worker(self, cache_dtype):
def _init_worker(self):
assert self.parallel_config.world_size == 1, (
"OpenVINO worker only supports single inference device.")

Expand All @@ -478,7 +482,9 @@ def _init_worker(self, cache_dtype):
self.scheduler_config,
self.device_config,
lora_config=self.lora_config,
kv_cache_dtype=cache_dtype,
kv_cache_dtype=OpenVINOCacheEngine.get_cache_dtype(self.cache_config.cache_dtype,
self.model_config,
self.device_config)
)
self.driver_worker.init_model()
self.driver_worker.load_model()
Expand Down

0 comments on commit 5cb452d

Please sign in to comment.