[NPUW] Better utilize threads on compilation (openvinotoolkit#26864)

Co-authored-by: Dmitry Matveev <dmitry.matveev@intel.com>
bbielawx · Oct 7, 2024 · 339a956 · 339a956
1 parent 8151e00
commit 339a956
Showing 1 changed file with 18 additions and 6 deletions.
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -323,13 +323,25 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
     std::map<std::size_t, std::string> forced_sub_devices{};
     const std::string fsd_opt = m_cfg.get<::intel_npu::NPUW_SUBMODEL_DEVICE>();
     forced_sub_devices = ::intel_npu ::OptionParser<std::map<std::size_t, std::string>>::parse(fsd_opt);
+
+    // Exclude optimized out subgraphs from compilation target beforehand - otherwise we might get head and repeated
+    // block in the same chunk
+    std::vector<std::size_t> idx_subgraph_to_compile;
+    for (std::size_t i = 0u; i < orderedSubgraphs.size(); i++) {
+        if (orderedSubgraphs[i]._optimized_out || m_compiled_submodels[i].replaced_by.value_or(i) != i) {
+            continue;  // do nothing here
+        } else {
+            idx_subgraph_to_compile.push_back(i);
+        }
+    }
+
     // Compile submodels. Some of them can be functions: track which model will be
     // used as function(s): function name -> index of the compiled subgraph
-    auto compile = [&](size_t id) {
+    auto compile = [&](size_t i) {
+        const auto& id = idx_subgraph_to_compile[i];
         const auto& subgraph = orderedSubgraphs[id];
-        if (subgraph._optimized_out) {
-            return;
-        }
+
+        NPUW_ASSERT(!subgraph._optimized_out);
 
         const std::size_t real_id = m_compiled_submodels[id].replaced_by.value_or(id);
         if (!orderedSubgraphs[real_id]._avoid_list.empty()) {
@@ -385,10 +397,10 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
     // Parallel compilation is unstable so is disabled by default.
     const bool par_opt = m_cfg.get<::intel_npu::NPUW_PARALLEL_COMPILE>();
     if (par_opt) {
-        ov::parallel_for(orderedSubgraphs.size(), compile);
+        ov::parallel_for(idx_subgraph_to_compile.size(), compile);
     } else {
         // TODO: Introduce npuw::serial(i, f) instead where f is a _funcall
-        for (std::size_t i = 0u; i < orderedSubgraphs.size(); i++) {
+        for (std::size_t i = 0u; i < idx_subgraph_to_compile.size(); i++) {
             compile(i);
         }
     }