Skip to content

Commit

Permalink
[GPU] Fix cache mode and weights path interaction (#27328)
Browse files Browse the repository at this point in the history
### Details:
- Currently ov::CacheMode::OPTIMIZE_SIZE behaves like
ov::CacheMode::OPTIMIZE_SPEED if weights_path is provided. This change
fixes that.
- Additionally, after this change if cache is saved with OPTIMIZE_SIZE
and the user tries to load with OPTIMIZE_SPEED (or vice versa),
import_model() will fail and the workload will behave like during the
first launch, according to the cache mode set by the user.
- This change also tightens the weights_path value validation - only
files with ".bin" extension will be accepted. However, if the user
provides the path to the wrong bin file, the execution will still fail -
there's no way to validate if the bin file is correct without storing
information about it in the cache.

### Tickets:
 - 156265

---------

Co-authored-by: Tomasz Krupa <tomasz.krupa@intel.com>
  • Loading branch information
isanghao and tkrupa-intel authored Oct 30, 2024
1 parent 118efc8 commit a5a0941
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 10 deletions.
15 changes: 15 additions & 0 deletions src/common/util/include/openvino/util/weights_path.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/util/common_util.hpp"

namespace ov {
namespace util {

bool validate_weights_path(std::string& weights_path);

} // namespace ov
} // namespace util
14 changes: 14 additions & 0 deletions src/common/util/src/weights_path.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//


#include "openvino/util/weights_path.hpp"

bool ov::util::validate_weights_path(std::string& weights_path) {
if (weights_path.empty() || !ov::util::ends_with(weights_path, ".bin")) {
return false;
}

return true;
}
4 changes: 3 additions & 1 deletion src/plugins/intel_gpu/src/graph/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "openvino/core/type.hpp"
#include "openvino/runtime/system_conf.hpp"
#include "openvino/runtime/threading/cpu_streams_info.hpp"
#include "openvino/util/weights_path.hpp"

#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/runtime/engine.hpp"
Expand Down Expand Up @@ -1839,7 +1840,8 @@ void program::load(cldnn::BinaryInputBuffer& ib) {

std::shared_ptr<ov::MappedMemory> mapped_memory = nullptr;
std::string weights_path = _config.get_property(ov::weights_path);
if (!weights_path.empty()) {
if (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE &&
ov::util::validate_weights_path(weights_path)) {
mapped_memory = ov::load_mmap_object(weights_path);
}

Expand Down
8 changes: 6 additions & 2 deletions src/plugins/intel_gpu/src/plugin/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "openvino/runtime/iplugin.hpp"
#include "openvino/runtime/intel_gpu/properties.hpp"
#include "openvino/runtime/internal_properties.hpp"
#include "openvino/util/weights_path.hpp"

#include "intel_gpu/graph/serialization/binary_buffer.hpp"
#include "intel_gpu/runtime/itt.hpp"
Expand Down Expand Up @@ -169,14 +170,17 @@ std::shared_ptr<ov::IAsyncInferRequest> CompiledModel::create_infer_request() co
void CompiledModel::export_model(std::ostream& model) const {
// If ov::CacheMode::OPTIMIZE_SIZE is set, do the export iff it's possible to do weightless caching
// which requires the weights_path.
if (m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE &&
m_config.get_property(ov::weights_path).empty())
ov::CacheMode cache_mode = m_config.get_property(ov::cache_mode);
std::string weights_path = m_config.get_property(ov::weights_path);
if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE &&
!ov::util::validate_weights_path(weights_path))
return;

OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model");
OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded");

cldnn::BinaryOutputBuffer ob(model);
ob << cldnn::make_data(&cache_mode, sizeof(ov::CacheMode));

// Inputs
{
Expand Down
11 changes: 10 additions & 1 deletion src/plugins/intel_gpu/src/plugin/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "openvino/runtime/performance_heuristics.hpp"
#include "openvino/runtime/properties.hpp"
#include "openvino/util/common_util.hpp"
#include "openvino/util/weights_path.hpp"
#include "transformations/common_optimizations/dimension_tracking.hpp"
#include "transformations/init_node_info.hpp"
#include "transformations/rt_info/fused_names_attribute.hpp"
Expand Down Expand Up @@ -330,8 +331,16 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,

cldnn::BinaryInputBuffer ib(model, context_impl->get_engine());

ov::CacheMode cache_mode;
ib >> cldnn::make_data(&cache_mode, sizeof(ov::CacheMode));

if (cache_mode != config.get_property(ov::cache_mode)) {
return nullptr;
}

std::string weights_path = config.get_property(ov::weights_path);
if (config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE &&
config.get_property(ov::weights_path).empty()) {
!ov::util::validate_weights_path(weights_path)) {
return nullptr;
}

Expand Down
15 changes: 9 additions & 6 deletions src/plugins/intel_gpu/src/plugin/program_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,12 +305,15 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptr<cldnn::pr
prim->origin_op_name = op.get_friendly_name();
prim->origin_op_type_name = op.get_type_name();

if (auto data_prim = dynamic_cast<cldnn::data*>(prim.get())) {
auto rt_info = op.get_rt_info();
auto weightless_cache_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static());
if (weightless_cache_attr != rt_info.end()) {
data_prim->bin_offset = weightless_cache_attr->second.as<ov::WeightlessCacheAttribute>().bin_offset;
data_prim->original_size = weightless_cache_attr->second.as<ov::WeightlessCacheAttribute>().original_size;
if (this->m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) {
if (auto data_prim = dynamic_cast<cldnn::data*>(prim.get())) {
auto rt_info = op.get_rt_info();
auto weightless_cache_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static());
if (weightless_cache_attr != rt_info.end()) {
data_prim->bin_offset = weightless_cache_attr->second.as<ov::WeightlessCacheAttribute>().bin_offset;
data_prim->original_size =
weightless_cache_attr->second.as<ov::WeightlessCacheAttribute>().original_size;
}
}
}

Expand Down

0 comments on commit a5a0941

Please sign in to comment.