diff --git a/src/common/util/include/openvino/util/weights_path.hpp b/src/common/util/include/openvino/util/weights_path.hpp new file mode 100644 index 00000000000000..db97484be98d35 --- /dev/null +++ b/src/common/util/include/openvino/util/weights_path.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/util/common_util.hpp" + +namespace ov { +namespace util { + +bool validate_weights_path(std::string& weights_path); + +} // namespace ov +} // namespace util diff --git a/src/common/util/src/weights_path.cpp b/src/common/util/src/weights_path.cpp new file mode 100644 index 00000000000000..9cf2336f064dd0 --- /dev/null +++ b/src/common/util/src/weights_path.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + + +#include "openvino/util/weights_path.hpp" + +bool ov::util::validate_weights_path(std::string& weights_path) { + if (weights_path.empty() || !ov::util::ends_with(weights_path, ".bin")) { + return false; + } + + return true; +} diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 1e2e84043dc82b..07fad4873659cd 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -7,6 +7,7 @@ #include "openvino/core/type.hpp" #include "openvino/runtime/system_conf.hpp" #include "openvino/runtime/threading/cpu_streams_info.hpp" +#include "openvino/util/weights_path.hpp" #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/engine.hpp" @@ -1839,7 +1840,8 @@ void program::load(cldnn::BinaryInputBuffer& ib) { std::shared_ptr mapped_memory = nullptr; std::string weights_path = _config.get_property(ov::weights_path); - if (!weights_path.empty()) { + if (_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && + ov::util::validate_weights_path(weights_path)) { mapped_memory = ov::load_mmap_object(weights_path); } diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 233bc97c249cd4..527e08f07432ef 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -5,6 +5,7 @@ #include "openvino/runtime/iplugin.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" #include "openvino/runtime/internal_properties.hpp" +#include "openvino/util/weights_path.hpp" #include "intel_gpu/graph/serialization/binary_buffer.hpp" #include "intel_gpu/runtime/itt.hpp" @@ -169,14 +170,17 @@ std::shared_ptr CompiledModel::create_infer_request() co void CompiledModel::export_model(std::ostream& model) const { // If ov::CacheMode::OPTIMIZE_SIZE is set, do the export iff it's possible to do weightless caching // which requires the weights_path. - if (m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && - m_config.get_property(ov::weights_path).empty()) + ov::CacheMode cache_mode = m_config.get_property(ov::cache_mode); + std::string weights_path = m_config.get_property(ov::weights_path); + if (cache_mode == ov::CacheMode::OPTIMIZE_SIZE && + !ov::util::validate_weights_path(weights_path)) return; OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model"); OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded"); cldnn::BinaryOutputBuffer ob(model); + ob << cldnn::make_data(&cache_mode, sizeof(ov::CacheMode)); // Inputs { diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index d3d70ec92cd23c..7d010a9b590e2e 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -35,6 +35,7 @@ #include "openvino/runtime/performance_heuristics.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/util/common_util.hpp" +#include "openvino/util/weights_path.hpp" #include "transformations/common_optimizations/dimension_tracking.hpp" #include "transformations/init_node_info.hpp" #include "transformations/rt_info/fused_names_attribute.hpp" @@ -330,8 +331,16 @@ std::shared_ptr Plugin::import_model(std::istream& model, cldnn::BinaryInputBuffer ib(model, context_impl->get_engine()); + ov::CacheMode cache_mode; + ib >> cldnn::make_data(&cache_mode, sizeof(ov::CacheMode)); + + if (cache_mode != config.get_property(ov::cache_mode)) { + return nullptr; + } + + std::string weights_path = config.get_property(ov::weights_path); if (config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && - config.get_property(ov::weights_path).empty()) { + !ov::util::validate_weights_path(weights_path)) { return nullptr; } diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 510d715e7ac805..899110872ba633 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -305,12 +305,15 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_name = op.get_friendly_name(); prim->origin_op_type_name = op.get_type_name(); - if (auto data_prim = dynamic_cast(prim.get())) { - auto rt_info = op.get_rt_info(); - auto weightless_cache_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); - if (weightless_cache_attr != rt_info.end()) { - data_prim->bin_offset = weightless_cache_attr->second.as().bin_offset; - data_prim->original_size = weightless_cache_attr->second.as().original_size; + if (this->m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) { + if (auto data_prim = dynamic_cast(prim.get())) { + auto rt_info = op.get_rt_info(); + auto weightless_cache_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); + if (weightless_cache_attr != rt_info.end()) { + data_prim->bin_offset = weightless_cache_attr->second.as().bin_offset; + data_prim->original_size = + weightless_cache_attr->second.as().original_size; + } } }