[GPU] Fix cache mode and weights path interaction (#27328)

### Details: - Currently ov::CacheMode::OPTIMIZE_SIZE behaves like ov::CacheMode::OPTIMIZE_SPEED if weights_path is provided. This change fixes that. - Additionally, after this change if cache is saved with OPTIMIZE_SIZE and the user tries to load with OPTIMIZE_SPEED (or vice versa), import_model() will fail and the workload will behave like during the first launch, according to the cache mode set by the user. - This change also tightens the weights_path value validation - only files with ".bin" extension will be accepted. However, if the user provides the path to the wrong bin file, the execution will still fail - there's no way to validate if the bin file is correct without storing information about it in the cache. ### Tickets: - 156265 --------- Co-authored-by: Tomasz Krupa <tomasz.krupa@intel.com>
openvinotoolkit · Oct 30, 2024 · a5a0941 · a5a0941
1 parent 118efc8
commit a5a0941
Show file tree

Hide file tree

Showing 6 changed files with 57 additions and 10 deletions.
diff --git a/src/common/util/include/openvino/util/weights_path.hpp b/src/common/util/include/openvino/util/weights_path.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/util/common_util.hpp"
+
+namespace ov {
+namespace util {
+
+bool validate_weights_path(std::string& weights_path);
+
+} // namespace ov
+} // namespace util
diff --git a/src/common/util/src/weights_path.cpp b/src/common/util/src/weights_path.cpp
@@ -0,0 +1,14 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include "openvino/util/weights_path.hpp"
+
+bool ov::util::validate_weights_path(std::string& weights_path) {
+    if (weights_path.empty() || !ov::util::ends_with(weights_path, ".bin")) {
+        return false;
+    }
+
+    return true;
+}
diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -7,6 +7,7 @@
 #include "openvino/core/type.hpp"
 #include "openvino/runtime/system_conf.hpp"
 #include "openvino/runtime/threading/cpu_streams_info.hpp"
+#include "openvino/util/weights_path.hpp"
 
 #include "intel_gpu/runtime/memory.hpp"
 #include "intel_gpu/runtime/engine.hpp"
@@ -1839,7 +1840,8 @@ void program::load(cldnn::BinaryInputBuffer& ib) {
 
     std::shared_ptr<ov::MappedMemory> mapped_memory = nullptr;
     std::string weights_path = _config.get_property(ov::weights_path);
-    if (!weights_path.empty()) {
+    if (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE &&
+        ov::util::validate_weights_path(weights_path)) {
         mapped_memory = ov::load_mmap_object(weights_path);
     }
 

diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
@@ -5,6 +5,7 @@
 #include "openvino/runtime/iplugin.hpp"
 #include "openvino/runtime/intel_gpu/properties.hpp"
 #include "openvino/runtime/internal_properties.hpp"
+#include "openvino/util/weights_path.hpp"
 
 #include "intel_gpu/graph/serialization/binary_buffer.hpp"
 #include "intel_gpu/runtime/itt.hpp"
@@ -169,14 +170,17 @@ std::shared_ptr<ov::IAsyncInferRequest> CompiledModel::create_infer_request() co
 void CompiledModel::export_model(std::ostream& model) const {
     // If ov::CacheMode::OPTIMIZE_SIZE is set, do the export iff it's possible to do weightless caching
     // which requires the weights_path.
-    if (m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE &&
-        m_config.get_property(ov::weights_path).empty())
+    ov::CacheMode cache_mode = m_config.get_property(ov::cache_mode);
+    std::string weights_path = m_config.get_property(ov::weights_path);
+    if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE &&
+        !ov::util::validate_weights_path(weights_path))
         return;
 
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model");
     OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded");
 
     cldnn::BinaryOutputBuffer ob(model);
+    ob << cldnn::make_data(&cache_mode, sizeof(ov::CacheMode));
 
     // Inputs
     {

diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -35,6 +35,7 @@
 #include "openvino/runtime/performance_heuristics.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "openvino/util/common_util.hpp"
+#include "openvino/util/weights_path.hpp"
 #include "transformations/common_optimizations/dimension_tracking.hpp"
 #include "transformations/init_node_info.hpp"
 #include "transformations/rt_info/fused_names_attribute.hpp"
@@ -330,8 +331,16 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,
 
     cldnn::BinaryInputBuffer ib(model, context_impl->get_engine());
 
+    ov::CacheMode cache_mode;
+    ib >> cldnn::make_data(&cache_mode, sizeof(ov::CacheMode));
+
+    if (cache_mode != config.get_property(ov::cache_mode)) {
+        return nullptr;
+    }
+
+    std::string weights_path = config.get_property(ov::weights_path);
     if (config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE &&
-        config.get_property(ov::weights_path).empty()) {
+        !ov::util::validate_weights_path(weights_path)) {
         return nullptr;
     }
 

diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
@@ -305,12 +305,15 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptr<cldnn::pr
     prim->origin_op_name = op.get_friendly_name();
     prim->origin_op_type_name = op.get_type_name();
 
-    if (auto data_prim = dynamic_cast<cldnn::data*>(prim.get())) {
-        auto rt_info = op.get_rt_info();
-        auto weightless_cache_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static());
-        if (weightless_cache_attr != rt_info.end()) {
-            data_prim->bin_offset = weightless_cache_attr->second.as<ov::WeightlessCacheAttribute>().bin_offset;
-            data_prim->original_size = weightless_cache_attr->second.as<ov::WeightlessCacheAttribute>().original_size;
+    if (this->m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) {
+        if (auto data_prim = dynamic_cast<cldnn::data*>(prim.get())) {
+            auto rt_info = op.get_rt_info();
+            auto weightless_cache_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static());
+            if (weightless_cache_attr != rt_info.end()) {
+                data_prim->bin_offset = weightless_cache_attr->second.as<ov::WeightlessCacheAttribute>().bin_offset;
+                data_prim->original_size =
+                    weightless_cache_attr->second.as<ov::WeightlessCacheAttribute>().original_size;
+            }
         }
     }