Skip to content

Commit

Permalink
[NPUW] Support submodels caching and stabilize inputs/outputs order o…
Browse files Browse the repository at this point in the history
…f submodels (openvinotoolkit#26788)
  • Loading branch information
smirnov-alexey authored Oct 9, 2024
1 parent 1c01983 commit 9d98a8d
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ DEFINE_OPT(NPUW_DCOFF_SCALE, bool, false, npuw::partitioning::dcoff_with_scale,
DEFINE_OPT(NPUW_FUNCALL_FOR_ALL, bool, false, npuw::partitioning::funcall_for_all, CompileTime);
DEFINE_OPT(NPUW_PARALLEL_COMPILE, bool, false, npuw::parallel_compilation, CompileTime);
DEFINE_OPT(NPUW_WEIGHTS_BANK, std::string, "", npuw::weights_bank, CompileTime);
DEFINE_OPT(NPUW_CACHE_DIR, std::string, "", npuw::cache_dir, CompileTime);
DEFINE_OPT(NPUW_FUNCALL_ASYNC, bool, false, npuw::funcall_async, RunTime);
DEFINE_OPT(NPUW_ACC_CHECK, bool, false, npuw::accuracy::check, RunTime);
DEFINE_OPT(NPUW_ACC_THRESH, double, 0.01, npuw::accuracy::threshold, RunTime);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ static constexpr ov::Property<std::string> submodel_device{"NPUW_SUBMODEL_DEVICE
*/
static constexpr ov::Property<std::string> weights_bank{"NPUW_WEIGHTS_BANK"};

/**
* @brief
* Type: std::string.
* Specify a directory where to store cached submodels.
* Default value: empty.
*/
static constexpr ov::Property<std::string> cache_dir{"NPUW_CACHE_DIR"};

namespace partitioning {
namespace online {
/**
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_npu/src/al/src/config/npuw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ void intel_npu::registerNPUWOptions(OptionsDesc& desc) {
desc.add<NPUW_PARALLEL_COMPILE>();
desc.add<NPUW_FUNCALL_ASYNC>();
desc.add<NPUW_WEIGHTS_BANK>();
desc.add<NPUW_CACHE_DIR>();
desc.add<NPUW_ACC_CHECK>();
desc.add<NPUW_ACC_THRESH>();
desc.add<NPUW_ACC_DEVICE>();
Expand Down
12 changes: 10 additions & 2 deletions src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,12 +582,19 @@ ov::SoPtr<ov::ICompiledModel> ov::npuw::CompiledModel::compile_submodel(const st
const std::string& device) {
auto plugin = get_npuw_plugin();
auto core = plugin->get_core();

// set exclusive_async_requests in case when model is split
// NOTE(dm): Not sure if it is required for the NPUW plugin, but likely it is
auto& device_config = m_meta_devices[device];

const auto& cache_dir = m_cfg.get<::intel_npu::NPUW_CACHE_DIR>();
if (!cache_dir.empty()) {
LOG_INFO("NPUW will try to utilize CACHE_DIR for " << submodel->get_friendly_name() << " submodel.");
device_config.insert(ov::cache_dir(cache_dir));
}

if (m_compiled_submodels.size() > 1) {
auto supported_internal_properties =
plugin->get_core()->get_property(device, ov::internal::supported_properties);
auto supported_internal_properties = core->get_property(device, ov::internal::supported_properties);
if (std::find(supported_internal_properties.begin(),
supported_internal_properties.end(),
ov::internal::exclusive_async_requests) != supported_internal_properties.end()) {
Expand Down Expand Up @@ -831,6 +838,7 @@ void ov::npuw::CompiledModel::implement_properties() {
BIND(npuw::parallel_compilation, NPUW_PARALLEL_COMPILE),
BIND(npuw::funcall_async, NPUW_FUNCALL_ASYNC),
BIND(npuw::weights_bank, NPUW_WEIGHTS_BANK),
BIND(npuw::cache_dir, NPUW_CACHE_DIR),
BIND(npuw::accuracy::check, NPUW_ACC_CHECK),
BIND(npuw::accuracy::threshold, NPUW_ACC_THRESH),
BIND(npuw::accuracy::reference_device, NPUW_ACC_DEVICE),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ ov::npuw::Group Group::toGroup() const {
for (auto&& node : content_copy) {
g.all_layers.push_back(node->get_friendly_name());
}

// Sort layers to stabilize the partitioning
std::sort(g.input_layers.begin(), g.input_layers.end());
std::sort(g.output_layers.begin(), g.output_layers.end());
std::sort(g.all_layers.begin(), g.all_layers.end());

g.gflops = 0.0001f; // FIXME: calculate proper flops

if (m_repeated && !isNoFold()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -487,12 +487,23 @@ void Partitioner::identifySubgraphs() {
LOG_VERB("Populating _parameters...");
group.sg._parameters.clear();

// Stabilize input order - sort layers based on names
using PairNodePtr = std::pair<std::shared_ptr<ov::Node>, std::shared_ptr<ov::Node>>;
std::vector<PairNodePtr> input_mapping_sorted(input_mapping.begin(), input_mapping.end());
std::sort(input_mapping_sorted.begin(),
input_mapping_sorted.end(),
[](const PairNodePtr& p1, const PairNodePtr& p2) {
// Sanity check
NPUW_ASSERT(p1.first->get_friendly_name() != p2.first->get_friendly_name());
return p1.first->get_friendly_name() < p2.first->get_friendly_name();
});

// Now (after unknown slices/converts were introduced) params may be referred to
// from multiple places in the model - so may be added multiple times to the
// input mapping. This is a w/a, better they're added only once (TODO).
// This set handles it.
std::set<std::shared_ptr<ov::Node>> unique_params;
for (auto&& im : input_mapping) {
for (auto&& im : input_mapping_sorted) {
LOG_BLOCK();
auto& src_node = im.first;
auto& maybe_param = im.second;
Expand All @@ -513,7 +524,7 @@ void Partitioner::identifySubgraphs() {
} else {
// assert is_constant(), there's no other way
}
} // for(input_mapping)
} // for(input_mapping_sorted)

// The same logic for group's final layers: replace their direct
// connections with Result stubs (but remember where these outputs
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
if (localProperties.at(useNpuwKey).as<bool>() == true) {
// CACHE_DIR isn't supported with NPU_USE_NPUW
if (localProperties.count(ov::cache_dir.name()) || !_globalConfig.get<CACHE_DIR>().empty()) {
OPENVINO_THROW("Option 'CACHE_DIR' is not supported with NPU_USE_NPUW");
OPENVINO_THROW("Option 'CACHE_DIR' is not supported with NPU_USE_NPUW!");
}
return std::make_shared<ov::npuw::CompiledModel>(model->clone(), shared_from_this(), localProperties);
} else {
Expand Down

0 comments on commit 9d98a8d

Please sign in to comment.