From 569a0cbe8dc123b2ad9070208169915310b7eeb2 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Tue, 1 Oct 2024 11:43:00 +0200 Subject: [PATCH] fix OPENVINO_DEBUG --- modules/llama_cpp_plugin/src/compiled_model.cpp | 6 +++--- modules/llama_cpp_plugin/src/infer_request.cpp | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/llama_cpp_plugin/src/compiled_model.cpp b/modules/llama_cpp_plugin/src/compiled_model.cpp index b53b11363..e56416034 100644 --- a/modules/llama_cpp_plugin/src/compiled_model.cpp +++ b/modules/llama_cpp_plugin/src/compiled_model.cpp @@ -27,11 +27,11 @@ LlamaCppModel::LlamaCppModel(const std::string& gguf_fname, : ICompiledModel(nullptr, plugin), m_gguf_fname(gguf_fname), m_num_threads(num_threads) { - OPENVINO_DEBUG << "llama_cpp_plugin: loading llama model directly from GGUF... " << std::endl; + OPENVINO_DEBUG("llama_cpp_plugin: loading llama model directly from GGUF... ") llama_model_params mparams = llama_model_default_params(); mparams.n_gpu_layers = 99; m_llama_model_ptr = llama_load_model_from_file(gguf_fname.c_str(), mparams); - OPENVINO_DEBUG << "llama_cpp_plugin: llama model loaded successfully from GGUF..." << std::endl; + OPENVINO_DEBUG("llama_cpp_plugin: llama model loaded successfully from GGUF...") auto input_ids = std::make_shared(ov::element::Type_t::i64, ov::PartialShape({-1, -1})); auto fake_convert = std::make_shared(input_ids->output(0), ov::element::Type_t::f32); @@ -71,7 +71,7 @@ std::shared_ptr LlamaCppModel::get_runtime_model() const { } void LlamaCppModel::set_property(const ov::AnyMap& properties) { - OPENVINO_DEBUG << "llama_cpp_plugin: attempted to set_property (did nothing)"; + OPENVINO_DEBUG("llama_cpp_plugin: attempted to set_property (did nothing)"); } ov::Any LlamaCppModel::get_property(const std::string& name) const { diff --git a/modules/llama_cpp_plugin/src/infer_request.cpp b/modules/llama_cpp_plugin/src/infer_request.cpp index 3eefd56d9..de7c47471 100644 --- a/modules/llama_cpp_plugin/src/infer_request.cpp +++ b/modules/llama_cpp_plugin/src/infer_request.cpp @@ -28,7 +28,7 @@ void allocate_tensor_impl(ov::SoPtr& tensor, LlamaCppSyncInferRequest::LlamaCppSyncInferRequest(const std::shared_ptr& compiled_model, size_t num_threads) : ov::ISyncInferRequest(compiled_model) { - OPENVINO_DEBUG << "llama_cpp_plugin: infer request ctor called\n"; + OPENVINO_DEBUG("llama_cpp_plugin: infer request ctor called"); llama_context_params cparams = llama_context_default_params(); cparams.n_threads = num_threads ? num_threads : std::thread::hardware_concurrency(); cparams.n_ctx = 0; // this means that the actual n_ctx will be taken equal to the model's train-time value @@ -51,7 +51,7 @@ LlamaCppSyncInferRequest::LlamaCppSyncInferRequest(const std::shared_ptr port, const std::vector>& tensors) { - OPENVINO_DEBUG << "llama_cpp_plugin: set_tensors_impl called\n"; + OPENVINO_DEBUG("llama_cpp_plugin: set_tensors_impl called"); } void llama_batch_add_reimpl(struct llama_batch& batch, @@ -131,12 +131,12 @@ void LlamaCppSyncInferRequest::infer() { llama_batch_free(batch); }; std::vector LlamaCppSyncInferRequest::get_profiling_info() const { - OPENVINO_DEBUG << "llama_cpp_plugin: get_profiling_info() called\n"; + OPENVINO_DEBUG("llama_cpp_plugin: get_profiling_info() called"); return std::vector{}; }; std::vector> LlamaCppSyncInferRequest::query_state() const { - OPENVINO_DEBUG << "llama_cpp_plugin: query_state() called\n"; + OPENVINO_DEBUG("llama_cpp_plugin: query_state() called"); return {std::static_pointer_cast(std::make_shared(m_llama_ctx))}; }