diff --git a/src/inference/dev_api/openvino/runtime/memory_solver.hpp b/src/inference/dev_api/openvino/runtime/memory_solver.hpp
index b2e11d203ef50f..a43e02f7a0b43a 100644
--- a/src/inference/dev_api/openvino/runtime/memory_solver.hpp
+++ b/src/inference/dev_api/openvino/runtime/memory_solver.hpp
@@ -16,6 +16,7 @@
 #include <vector>
 
 #include "openvino/core/except.hpp"
+// #include "global_execution_index.hpp"
 
 namespace ov {
 
@@ -52,6 +53,7 @@ class MemorySolver {
     struct Box {
         /** Execution order index of first use. The data will be produced here. */
         int start;
+        // intel_cpu::GlobalExecutionIndex start;
 
         /**
          * The execution order index of last use. After that data will be released.
@@ -59,6 +61,7 @@ class MemorySolver {
          * end of execution.
          */
         int finish;
+        // intel_cpu::GlobalExecutionIndex finish;
 
         /** Size of data. In abstract unit of measure (byte, simd, cache line, ...) */
         int64_t size;
diff --git a/src/plugins/intel_cpu/src/allocation_context.hpp b/src/plugins/intel_cpu/src/allocation_context.hpp
new file mode 100644
index 00000000000000..8affe814807004
--- /dev/null
+++ b/src/plugins/intel_cpu/src/allocation_context.hpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+namespace ov {
+namespace intel_cpu {
+
+class Node;
+class Edge;
+
+using GlobalExecutionIndex = std::unordered_map<std::shared_ptr<Node>, std::pair<int, int>>;
+
+struct AllocationContext {
+    std::vector<std::shared_ptr<Edge>> edges;
+    GlobalExecutionIndex execIndex;
+    std::vector<size_t> syncPoints;
+};
+
+}  // namespace intel_cpu
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
index bbee5d937be5d5..9f9fed5421b163 100644
--- a/src/plugins/intel_cpu/src/compiled_model.cpp
+++ b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -4,6 +4,7 @@
 
 #include "compiled_model.h"
 #include "async_infer_request.h"
+#include "graph.h"
 #include "infer_request.h"
 #include "itt.h"
 #include "low_precision/low_precision.hpp"
@@ -19,6 +20,7 @@
 #include "openvino/runtime/threading/cpu_streams_info.hpp"
 #include "openvino/runtime/threading/cpu_message.hpp"
 #include "utils/serialize.hpp"
+#include "memory_control.hpp"
 
 #include "cpu/x64/cpu_isa_traits.hpp"
 #include <cstring>
@@ -52,7 +54,8 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
       m_cfg{cfg},
       m_name{model->get_name()},
       m_loaded_from_cache(loaded_from_cache),
-      m_sub_memory_manager(sub_memory_manager) {
+      m_sub_memory_manager(sub_memory_manager),
+      m_networkMemoryControl(std::make_shared<NetworkMemoryControl>()) {
     m_mutex = std::make_shared<std::mutex>();
     const auto& core = m_plugin->get_core();
     if (!core)
@@ -152,20 +155,26 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
         std::exception_ptr exception;
         auto makeGraph = [&] {
             try {
+                MemoryControl* memoryControl = m_networkMemoryControl->createMemoryControlUnit();
                 GraphContext::Ptr ctx;
                 {
                     std::lock_guard<std::mutex> lock{*m_mutex.get()};
                     auto isQuantizedFlag = (m_cfg.lpTransformsMode == Config::On) &&
                                            ov::pass::low_precision::LowPrecision::isFunctionQuantized(m_model);
-
                     ctx = std::make_shared<GraphContext>(m_cfg,
                                                          m_socketWeights[socketId],
                                                          isQuantizedFlag,
+                                                         memoryControl,
+                                                         m_networkMemoryControl,
                                                          streamsExecutor,
                                                          m_sub_memory_manager);
                 }
+
                 const std::shared_ptr<const ov::Model> model = m_model;
-                graphLock._graph.CreateGraph(model, ctx);
+                // @todo propagate input / output memory descriptors
+                graphLock._graph.Init(model, ctx);
+                // @todo pass input / output memory
+                graphLock._graph.Activate({}, {}, true);
             } catch (...) {
                 exception = std::current_exception();
             }
@@ -346,7 +355,7 @@ void CompiledModel::release_memory() {
     for (auto&& graph : m_graphs) {
         GraphGuard::Lock graph_lock{graph};
         auto ctx = graph_lock._graph.getGraphContext();
-        ctx->getNetworkMemoryControl()->releaseMemory();
+        m_networkMemoryControl->releaseMemory();
     }
 }
 
diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h
index faedf1ae5a744c..cab50971f31a78 100644
--- a/src/plugins/intel_cpu/src/compiled_model.h
+++ b/src/plugins/intel_cpu/src/compiled_model.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -19,6 +20,8 @@
 namespace ov {
 namespace intel_cpu {
 
+class NetworkMemoryControl;
+
 class CompiledModel : public ov::ICompiledModel {
 public:
     typedef std::shared_ptr<CompiledModel> Ptr;
@@ -51,6 +54,10 @@ class CompiledModel : public ov::ICompiledModel {
 
     void release_memory() override;
 
+    std::shared_ptr<NetworkMemoryControl> get_network_memory_control() const {
+        return m_networkMemoryControl;
+    }
+
 private:
     std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
     friend class SyncInferRequest;
@@ -91,6 +98,7 @@ class CompiledModel : public ov::ICompiledModel {
 
     std::vector<std::shared_ptr<CompiledModel>> m_sub_compiled_models;
     std::shared_ptr<SubMemoryManager> m_sub_memory_manager = nullptr;
+    std::shared_ptr<NetworkMemoryControl> m_networkMemoryControl = nullptr;
     bool m_has_sub_compiled_models = false;
 };
 
diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp
index c314718bb82416..c5d75bc3d16b62 100644
--- a/src/plugins/intel_cpu/src/edge.cpp
+++ b/src/plugins/intel_cpu/src/edge.cpp
@@ -235,7 +235,7 @@ Edge::ReorderStatus Edge::needReorder() {
 }
 
 void Edge::reuse(MemoryPtr ptr) {
-    OPENVINO_ASSERT(ptr != nullptr, "Attempt to reuse initialized memory in " + name());
+    OPENVINO_ASSERT(ptr != nullptr, "Attempt to reuse uninitialized memory in " + name());
     memoryPtr = ptr;
     changeStatus(Status::Allocated);
 
@@ -292,7 +292,11 @@ std::string Edge::name() const {
 
     std::stringstream result;
 
-    result << parentPtr->getName() << " port " << parent_port << " <-> " << childPtr->getName() << " port " << child_port;
+    result << parentPtr->getName()<< " port " << parent_port
+           << " <-> "
+           << childPtr->getName() << " port " << child_port
+           << " status: "
+           << static_cast<int>(getStatus());
 
     return  result.str();
 }
@@ -441,7 +445,7 @@ void Edge::validate() {
     getChild();
 
     if (status != Status::Allocated || !memoryPtr) {
-        OPENVINO_THROW("Error memory is not allocated!");
+        OPENVINO_THROW("Error memory is not allocated for edge: ", name());
     }
     status = Status::Validated;
 }
diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h
index 29cb8113943cd3..e77a5cecf89aeb 100644
--- a/src/plugins/intel_cpu/src/edge.h
+++ b/src/plugins/intel_cpu/src/edge.h
@@ -82,6 +82,7 @@ class Edge {
     }
 
     std::string name() const;
+    const MemoryDesc& getDesc() const;
 
 private:
     std::weak_ptr<Node> parent;
@@ -99,7 +100,6 @@ class Edge {
     PortDescBaseCPtr getInputPortDesc() const;
     PortDescBaseCPtr getOutputPortDesc() const;
 
-    const MemoryDesc& getDesc() const;
     bool enforceReorder();
 
     void collectConsumers(std::vector<std::shared_ptr<Node>>& result) const;
diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp
index f3f3a379fc2af7..a552cf616724d4 100644
--- a/src/plugins/intel_cpu/src/graph.cpp
+++ b/src/plugins/intel_cpu/src/graph.cpp
@@ -6,6 +6,9 @@
 
 #include <algorithm>
 #include <cstddef>
+#include <cstdlib>
+#include <iostream>
+#include <iterator>
 #include <limits>
 #include <map>
 #include <memory>
@@ -16,11 +19,14 @@
 #include <utility>
 #include <vector>
 
+#include "allocation_context.hpp"
 #include "edge.h"
+#include "graph_context.h"
 #include "graph_dumper.h"
 #include "graph_optimizer.h"
 #include "infer_request.h"
 #include "itt.h"
+#include "memory_control.hpp"
 #include "memory_desc/cpu_memory_desc_utils.h"
 #include "memory_desc/dnnl_blocked_memory_desc.h"
 #include "node.h"
@@ -350,16 +356,23 @@ static void UseExternalOutputMemory(const std::map<std::size_t, NodePtr>& output
 }
 
 void Graph::Activate(const std::vector<MemoryPtr>& externalInputMemory,
-                               const std::vector<MemoryPtr>& externalOutputMemory) {
-    OPENVINO_ASSERT(status == Status::Initialized, "Invalid graph status");
+                     const std::vector<MemoryPtr>& externalOutputMemory,
+                     bool globalAllocation) {
+    // OPENVINO_ASSERT(status == Status::Initialized, "Invalid graph status");
 
-    const bool hasDynNodes = ProcessDynNodes();
-    const auto syncNodesInds = hasDynNodes ? IdentifySyncPoints(graphNodes) : std::vector<size_t>{};
+    // const bool hasDynNodes = ProcessDynNodes();
+    // const auto syncNodesInds = hasDynNodes ? IdentifySyncPoints(graphNodes) : std::vector<size_t>{};
 
     UseExternalInputMemory(inputNodesMap, externalInputMemory);
     UseExternalOutputMemory(outputNodesMap, externalOutputMemory);
 
-    Allocate(syncNodesInds);
+    // std::tie(m_executableGraphNodes, m_executableSyncNodesInds) = ExtractExecutableNodesAndSyncPoints(syncNodesInds, graphNodes);
+
+    // status = hasDynNodes ? (parallel_get_max_threads() > 1 ? Status::ReadyDynamic : Status::ReadyDynamicSeq)
+    //     : Status::ReadyStatic;
+
+    // CPU_DEBUG_CAP_ENABLE(serialize(*this));
+    Allocate(globalAllocation);
 
     CreatePrimitivesAndExecConstants();
 
@@ -369,22 +382,22 @@ void Graph::Activate(const std::vector<MemoryPtr>& externalInputMemory,
     }
 #endif
 
-    std::tie(m_executableGraphNodes, m_executableSyncNodesInds) = ExtractExecutableNodesAndSyncPoints(syncNodesInds, graphNodes);
-
-    if (hasDynNodes) {
-        status = Status::ReadyDynamic;
-        // Here we use the following heuristic: if the number of sync nodes is less than 10 times of the number of exec
-        // nodes, it does make sense to use Sequential dynamic shapes processing due to the high overheads on context
-        // switching when the dynamic shapes are being processed in parallel and there are a lot of sync points. Also
-        // this rule works for short graphs (usually subgraphs) when the amount of nodes is to low to process them in
-        // parallel.
-        const auto exec2sync = m_executableGraphNodes.size() / m_executableSyncNodesInds.size();
-        if (exec2sync < 10 || parallel_get_max_threads() < 2) {
-            status = Status::ReadyDynamicSeq;
-        }
-    } else {
-        status = Status::ReadyStatic;
-    }
+    // std::tie(m_executableGraphNodes, m_executableSyncNodesInds) = ExtractExecutableNodesAndSyncPoints(syncNodesInds, graphNodes);
+
+    // if (hasDynNodes) {
+    //     status = Status::ReadyDynamic;
+    //     // Here we use the following heuristic: if the number of sync nodes is less than 10 times of the number of exec
+    //     // nodes, it does make sense to use Sequential dynamic shapes processing due to the high overheads on context
+    //     // switching when the dynamic shapes are being processed in parallel and there are a lot of sync points. Also
+    //     // this rule works for short graphs (usually subgraphs) when the amount of nodes is to low to process them in
+    //     // parallel.
+    //     const auto exec2sync = m_executableGraphNodes.size() / m_executableSyncNodesInds.size();
+    //     if (exec2sync < 10 || parallel_get_max_threads() < 2) {
+    //         status = Status::ReadyDynamicSeq;
+    //     }
+    // } else {
+    //     status = Status::ReadyStatic;
+    // }
     CPU_DEBUG_CAP_ENABLE(serialize(*this));
 }
 
@@ -713,213 +726,141 @@ void Graph::ResolveComplexInplaceConflicts() {
     }
 }
 
-static inline bool isConstOutput(EdgePtr edge) {
-    return edge->getParent()->isConstant() && !edge->getChild()->isConstant();
-}
-
-void Graph::AllocateWithReuse(const std::vector<size_t>& syncNodesInds) {
-    edgeClusters edge_clusters = MemoryControl::findEdgeClusters(graphEdges);
-
-    size_t remaining_edge_clusters_count = edge_clusters.size();
-
-    // Resolve special cases:
-    for (size_t i = 0; i < remaining_edge_clusters_count;) {
-        auto &cluster = edge_clusters[i];
-        bool erase = false;
-        for (auto &edge : cluster) {
-            // Remove already allocated edges from the mem reuse algo
-            if (edge->getStatus() == Edge::Status::Allocated) {
-                erase = true;
-                break;
-            }
-
-            // Special allocation for string tensors
-            if (edge->getDesc().getPrecision() == element::string && edge->getStatus() == Edge::Status::NeedAllocation) {
-                StringMemory::StringMemoryBlockPtr memBlcok;
-                if (edge->getParent()->isConstant()) {
-                    if (edge->getParent()->getType() == Type::Input) {
-                        auto constNode = static_cast<node::Input *>(edge->getParent().get());
-                        edge->reuse(std::const_pointer_cast<IMemory>(constNode->getMemoryPtr()));
-                    } else {
-                        edge->externalAllocate(m_context->getWeightsCache());
-                    }
-                    auto stringMemory = dynamic_cast<StringMemory *>(edge->getMemoryPtr().get());
-                    OPENVINO_ASSERT(stringMemory, "[CPU] Edge between nodes '",
-                            edge->getParent()->getName(), "' and '", edge->getChild()->getName(), "' must have StringMemory.");
-                    memBlcok = stringMemory->getStringMemoryBlockPtr();
-                } else {
-                    auto memory = std::make_shared<StringMemory>(getEngine(), edge->getDesc());
-                    edge->reuse(memory);
-                    memBlcok = memory->getStringMemoryBlockPtr();
-                }
-                for (auto& edge_c : cluster) {
-                    if (edge_c == edge) {
-                        continue;
-                    }
-                    OPENVINO_ASSERT(edge_c->getDesc().getPrecision() == element::string, "All edges in the cluster must be string.");
-                    if (edge_c->getStatus() == Edge::Status::NotAllocated) {
-                        auto memory = std::make_shared<StringMemory>(getEngine(), edge_c->getDesc(), memBlcok);
-                        edge_c->reuse(memory);
-                    } else {
-                        OPENVINO_THROW("[CPU] String tensors allocation in the cluster. Edge between nodes '", edge_c->getParent()->getName(), "' and '",
-                            edge_c->getChild()->getName(), "' has an unexpected status: ", static_cast<int>(edge_c->getStatus()));
-                    }
-                }
-                erase = true;
-                continue;
-            }
-
-            // Special allocation for constants
-            if (edge->getStatus() != Edge::Status::NeedAllocation || !edge->getParent()->isConstant()) {
-                continue;
-            }
+/**
+ * Partition the \clusters of Edges, by moving and allocating at the same time
+ * the clusters which cannot be handled as part of generic memory solver algorithm.
+ * Such clusters meet one of the following criteria:
+ * - base edge of a cluster is already Allocated
+ * - base edge of a cluster is a "ov::element::string" type of edge
+ * - base edge of a cluster is a Constant edge
+ *
+ * @return a remaining number of clusters to process (left partition)
+ */
+static size_t AllocateStringsAndConstants(EdgeClusters& clusters,
+                                          const GraphContext::CPtr context) {
+    auto allocateStringMemory = [context](const EdgePtr& edge) {
+        if (edge->getParent()->isConstant()) {
             if (edge->getParent()->getType() == Type::Input) {
-                auto constNode = std::static_pointer_cast<node::Input>(edge->getParent());
+                auto constNode = static_cast<node::Input *>(edge->getParent().get());
                 edge->reuse(std::const_pointer_cast<IMemory>(constNode->getMemoryPtr()));
             } else {
-                edge->externalAllocate(m_context->getWeightsCache());
-            }
-            erase = true;
-        }
-
-        if (erase) {
-            std::swap(edge_clusters[i], edge_clusters[remaining_edge_clusters_count - 1]);
-            --remaining_edge_clusters_count;
-        } else {
-            ++i;
-        }
-    }
-
-    // Markup the memory regions
-    std::vector<MemoryRegion> memoryRegions;
-    memoryRegions.reserve(remaining_edge_clusters_count);
-
-    for (size_t i = 0; i < remaining_edge_clusters_count; ++i) {
-        MemoryRegion reg = {std::numeric_limits<int>::max(),
-                            0,
-                            0,
-                            static_cast<int64_t>(i),
-                            MemoryRegion::RegionType::VARIABLE,
-                            MemoryRegion::AllocType::UNKNOWN};
-
-        int64_t boxSize = 0;
-        bool isConst = false, isOutput = false, isInput = false;
-        for (auto &edge : edge_clusters[i]) {
-            int e_start = edge->getParent()->getExecIndex();
-            int e_finish = edge->getChild()->getExecIndex();
-
-            auto&& desc = edge->getDesc();
-
-            if (boxSize != -1 && desc.isDefined()) {
-                int64_t e_size = desc.getCurrentMemSize();  // size in bytes (from the beginning of data to the last element)
-                boxSize = std::max(e_size, boxSize);
-            } else {
-                boxSize = -1;
-            }
-
-            reg.start = std::min(e_start, reg.start);
-            reg.finish = std::max(e_finish, reg.finish);
-
-            auto allocType =
-                desc.getPrecision() == element::string ? MemoryRegion::AllocType::STRING : MemoryRegion::AllocType::POD;
-
-            if (reg.alloc_type != allocType && MemoryRegion::AllocType::UNKNOWN != reg.alloc_type) {
-                OPENVINO_THROW("Different allocation types in the same memory region");
-            }
-            reg.alloc_type = allocType;
-
-            isConst  |= isConstOutput(edge);
-            isOutput |= edge->getChild()->getType() == Type::Output;
-            isInput  |= edge->getParent()->getType() == Type::Input;
-        }
-
-        reg.size = boxSize;
-
-        if (isConst) {
-            reg.type = MemoryRegion::RegionType::CONSTANT;
-        } else if (isInput) {
-            if (isOutput) {
-                reg.type = MemoryRegion::RegionType::IO;
-            } else {
-                reg.type = MemoryRegion::RegionType::INPUT;
+                edge->externalAllocate(context->getWeightsCache());
             }
-        } else if (isOutput) {
-            reg.type = MemoryRegion::RegionType::OUTPUT;
+            auto stringMemory = dynamic_cast<StringMemory *>(edge->getMemoryPtr().get());
+            OPENVINO_ASSERT(stringMemory, "[CPU] Edge between nodes '",
+                            edge->getParent()->getName(), "' and '", edge->getChild()->getName(), "' must have StringMemory.");
+            return stringMemory->getStringMemoryBlockPtr();
         }
 
-        memoryRegions.push_back(reg);
-    }
+        auto memory = std::make_shared<StringMemory>(context->getEngine(), edge->getDesc());
+        edge->reuse(memory);
+        return memory->getStringMemoryBlockPtr();
+    };
 
-    // special processing of the dynamic output edges
-    auto it = std::remove_if(memoryRegions.begin(), memoryRegions.end(), [&](const MemoryRegion& region) {
-        if (region.size >= 0 || !one_of(region.type, MemoryRegion::RegionType::OUTPUT, MemoryRegion::RegionType::IO)) {
-            return false;
-        }
-        bool result = false;
-        for (auto& edge : edge_clusters[region.id]) {
-            auto child = edge->getChild();
-            if (child->getType() == Type::Output && edge->getStatus() == Edge::Status::NeedAllocation) {
-                auto proxyMemBlock = std::make_shared<ProxyMemoryBlock>();
-                DEBUG_LOG("ProxyMemoryBlock ", proxyMemBlock, " ", this);
-                edge->allocate(proxyMemBlock);
-
-                // Store the output memory blocks.
-                // So that, the infer requests can be able to access them.
-                int count = 0;
-                for (auto& output : outputNodesMap) {
-                    if (output.second == child) {
-                        outputNodesMemBlocksMap[output.first] = proxyMemBlock;
-                        count++;
-                    }
-                }
-                // sometimes there are unused output ports.
-                OPENVINO_ASSERT(count <= 1, "CPU plugin cannot find output node. count ", count);
-                result = true;
-            }
+    auto allocateConstantEdge = [context](const EdgePtr& edge) {
+        if (edge->getParent()->getType() == Type::Input) {
+            auto constNode = std::static_pointer_cast<node::Input>(edge->getParent());
+            edge->reuse(std::const_pointer_cast<IMemory>(constNode->getMemoryPtr()));
+        } else {
+            edge->externalAllocate(context->getWeightsCache());
         }
-        return result;
-    });
-
-    memoryRegions.erase(it, memoryRegions.end());
+    };
 
-    //Set up the memory control subsystem.
-    this->m_pMemoryControl = &(getGraphContext()->getNetworkMemoryControl()->createMemoryControlUnit(syncNodesInds));
-    auto memoryBlocks = m_pMemoryControl->insert(memoryRegions);
+    auto endOfNotAllocatedPartition =
+        std::partition(clusters.begin(), clusters.end(),
+                       [&allocateStringMemory, &allocateConstantEdge, &context](const EdgeCluster& cluster) {
+                           if (cluster.empty()) return false;
+
+                           auto baseEdgeIt = std::find_if(cluster.begin(), cluster.end(), [](const EdgePtr& edge) {
+                               return one_of(edge->getStatus(), Edge::Status::Allocated, Edge::Status::NeedAllocation);
+                           });
+
+                           OPENVINO_ASSERT(baseEdgeIt != cluster.end(), "Unexpected cluster state");
+
+                           // const auto& baseEdge = cluster.front();
+                           const auto& baseEdge = *baseEdgeIt;
+                           // Skip already allocated cluster
+                           if (baseEdge->getStatus() == Edge::Status::Allocated) {
+                               return false;
+                           }
+
+                           // Skip if the baseEdge does not require allocation
+                           if (baseEdge->getStatus() != Edge::Status::NeedAllocation) {
+                               return true;
+                           }
+
+                           // Allocate a string cluster
+                           if (baseEdge->getDesc().getPrecision() == element::string) {
+                               OPENVINO_ASSERT(std::all_of(cluster.begin(), cluster.end(),
+                                                           [](const EdgePtr& edge) {
+                                                               return edge->getDesc().getPrecision() == element::string;
+                                                           }), "All edges in the cluster must be string.");
+                               auto memBlock = allocateStringMemory(baseEdge);
+                               for (auto &edge : cluster) {
+                                   if (edge->getStatus() == Edge::Status::NotAllocated) {
+                                       edge->reuse(std::make_shared<StringMemory>(context->getEngine(), edge->getDesc(), memBlock));
+                                   }
+                               }
+                               return false;
+                           }
+
+                           // Allocate a constant cluster
+                           if (baseEdge->getParent()->isConstant()) {
+                               // @todo can we add some meaningful assert here?
+                               for (auto &edge : cluster) {
+                                   if (edge->getParent()->isConstant() && edge->getStatus() == Edge::Status::NeedAllocation) {
+                                       allocateConstantEdge(edge);
+                                   }
+                               }
+                               return false;
+                           }
+
+                           return true;
+                       });
+
+    return std::distance(clusters.begin(), endOfNotAllocatedPartition);
+}
 
-    // attach all the not yet allocated edges to the memory contol
+static void attachEdgeToMemoryControl(const EdgeClusters& edgeClusters,
+                                      const MemoryControl::MemoryBlockMap& memoryBlocks) {
+    // attach all the not yet allocated edges to the memory control
     for (auto&& item : memoryBlocks) {
         int count = 0;
-        for (auto&& edge : edge_clusters[item.first]) {
+        // std::cout << "Processing cluster: " << item.first << "\n";
+        for (auto&& edge : edgeClusters[item.first]) {
+            // std::cout << "Processing edge: " << edge->name() << "\n";
             if (edge->getStatus() == Edge::Status::NeedAllocation) {
+                // std::cout << "Allocating edge: " << edge->name() << "\n";
+
                 edge->allocate(item.second);
 
                 // TODO: WA for some test (like strided_slice_test) which use tensors with
                 //       shapes {0}. And it is implicitly converted into {1} tensor.
                 //       Zeroing of input data allow pass tests.
-                if (edge->getParent()->type == Type::Input && edge->hasDefinedMaxSize())
+                if (edge->getParent()->getType() == Type::Input && edge->hasDefinedMaxSize())
                     edge->getMemoryPtr()->nullify();
 
                 count++;
             }
         }
-        OPENVINO_ASSERT(count == 1);
+        OPENVINO_ASSERT(count == 1, "Expected exactly one allocation. Actual number of allocations: ", count);
     }
+}
 
-    m_pMemoryControl->allocateMemory();
-
-    // Resolve all other edges with status NotAllocated and in-place
-    for (auto& cluster : edge_clusters) {
+static void resolveInPlaceEdges(const EdgeClusters& clusters) {
+    for (auto& cluster : clusters) {
         for (auto& edge : cluster) {
             if (edge->getStatus() != Edge::Status::NotAllocated) {
                 continue;
             }
+
             std::vector<EdgePtr> edges_to_process;
             edges_to_process.push_back(edge);
-            for (auto next_edge = edge->getSharedEdge(std::nothrow);
-                next_edge;
-                next_edge = next_edge->getSharedEdge(std::nothrow)) {
+            for (auto next_edge = edge->getSharedEdge(std::nothrow); next_edge;
+                 next_edge = next_edge->getSharedEdge(std::nothrow)) {
                 edges_to_process.push_back(next_edge);
             }
+
             std::for_each(edges_to_process.rbegin(), edges_to_process.rend(), [](const EdgePtr& edge) {
                 if (edge->getStatus() == Edge::Status::NotAllocated) {
                     if (edge->inPlace(Edge::LOOK_DOWN)) {
@@ -938,11 +879,37 @@ void Graph::AllocateWithReuse(const std::vector<size_t>& syncNodesInds) {
     }
 }
 
-void Graph::Allocate(const std::vector<size_t>& syncNodesInds) {
-    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::Allocate");
+std::vector<size_t> Graph::CreateExecutionGraph() {
+    const bool hasDynNodes = ProcessDynNodes();
+    auto syncNodesInds = hasDynNodes ? IdentifySyncPoints(graphNodes) : std::vector<size_t>{};
+
+    std::tie(m_executableGraphNodes, m_executableSyncNodesInds) =
+        ExtractExecutableNodesAndSyncPoints(syncNodesInds, graphNodes);
+
+    status = hasDynNodes ? (parallel_get_max_threads() > 1 ? Status::ReadyDynamic : Status::ReadyDynamicSeq)
+                         : Status::ReadyStatic;
+
+    if (hasDynNodes) {
+        status = Status::ReadyDynamic;
+        // Here we use the following heuristic: if the number of sync nodes is less than 10 times of the number of exec
+        // nodes, it does make sense to use Sequential dynamic shapes processing due to the high overheads on context
+        // switching when the dynamic shapes are being processed in parallel and there are a lot of sync points. Also
+        // this rule works for short graphs (usually subgraphs) when the amount of nodes is to low to process them in
+        // parallel.
+        const auto exec2sync = m_executableGraphNodes.size() / m_executableSyncNodesInds.size();
+        if (exec2sync < 10 || parallel_get_max_threads() < 2) {
+            status = Status::ReadyDynamicSeq;
+        }
+    } else {
+        status = Status::ReadyStatic;
+    }
+
+    return syncNodesInds;
+}
 
-    //resolve inplace dead end nodes
-    for (const auto& edge : graphEdges) {
+static void ResolveInOutInPlaceEdgesLegacy(const std::vector<EdgePtr>& edges) {
+    for (const auto& edge : edges) {
+        // std::cout << edge->name() << "\n";
         if (edge->getStatus() == Edge::Status::Uninitialized) {
             if (edge->getParent()->getParentEdges().empty() &&
                 one_of(edge->getParent()->getType(), Type::Input, Type::MemoryInput) &&
@@ -955,20 +922,123 @@ void Graph::Allocate(const std::vector<size_t>& syncNodesInds) {
             }
         }
     }
+}
+
+static void ResolveInOutInPlaceEdges(const std::vector<EdgePtr>& edges) {
+    for (const auto& edge : edges) {
+        if (edge->getStatus() == Edge::Status::Uninitialized) {
+            if (edge->getParent()->getParentEdges().empty() &&
+                one_of(edge->getParent()->getType(), Type::MemoryInput) &&
+                edge->inPlace(Edge::LOOK_UP)) {
+                edge->getParent()->resolveInPlaceEdges(Edge::LOOK_UP);
+            } else if (edge->getChild()->getChildEdges().empty() &&
+                one_of(edge->getChild()->getType(), Type::MemoryOutput) &&
+                edge->inPlace(Edge::LOOK_DOWN)) {
+                edge->getChild()->resolveInPlaceEdges(Edge::LOOK_DOWN);
+            }
+        }
+    }
+}
+
+int Graph::RegisterToAllocationContext(int offset, AllocationContext& context) {
+    auto syncNodesInds = CreateExecutionGraph();
+
+    ResolveInOutInPlaceEdges(graphEdges);
+
+    for (size_t i = 0, j = 0; i < graphNodes.size(); i++) {
+        const auto& node = graphNodes[i];
+        const auto inputExecIndex = i + offset;
+        offset = node->registerToAllocationContext(offset, context) - 1;
+        const auto outputExecIndex = i + offset;
+        context.execIndex[node] = {inputExecIndex, outputExecIndex};
+
+        if (j < syncNodesInds.size() && syncNodesInds[j] == i) {
+            context.syncPoints.push_back(inputExecIndex);
+        }
+    }
+
+    context.edges.insert(context.edges.end(), graphEdges.begin(), graphEdges.end());
+
+    return offset;
+}
+
+AllocationContext Graph::CreateAllocationContext(bool global) {
+    AllocationContext allocationContext;
+
+    if (global) {
+        RegisterToAllocationContext(-1, allocationContext);
+    } else { // local allocation context. Used for the nodes with inner graph which are not updated yet
+        ResolveInOutInPlaceEdgesLegacy(graphEdges);
+
+        auto syncNodesInds = CreateExecutionGraph();
+
+        for (size_t i = 0; i < graphNodes.size(); i++) {
+            const auto& node = graphNodes[i];
+            const int inputExecIndex = i;
+            const int outputExecIndex = i;
+            allocationContext.execIndex[node] = {inputExecIndex, outputExecIndex};
+        }
+
+        allocationContext.edges = graphEdges;
+        allocationContext.syncPoints = syncNodesInds;
+    }
 
-    // resolve edges. Define which will be a view on others
-    //   NeedAllocation - real blob
-    //   NotAllocated - view on other blob, peer or in-place
-    for (auto& edge : graphEdges) edge->init();
+    return allocationContext;
+}
+
+void Graph::Allocate(bool globalAllocation) {
+    if (std::getenv("FORCE_LOCAL"))
+        globalAllocation = false;
+    // Set up the memory control subsystem.
+    auto memoryControl = globalAllocation ? m_context->getMemoryControl() : m_context->getNetworkMemoryControl()->createMemoryControlUnit();
+
+    if (memoryControl->allocated()) {
+        // std::cout << "Memory is already allocated for a subgraph: " << _name << "\n";
+        return;
+    }
+
+    // @todo collect syncNodesInds with respect to global context as well
+    auto allocationContext = CreateAllocationContext(globalAllocation);
+    const auto& edges = allocationContext.edges;
+
+    // std::cout << "### Global edges:" << "\n";
+    // for (const auto& edge : edges) {
+    //     const auto& parent = edge->getParent();
+    //     const auto& child = edge->getChild();
+    //     std::cout << "[" << allocationContext.execIndex[parent].second << " - " << allocationContext.execIndex[child].first << "]"
+    //               << edge->name()
+    //               << "\n";
+    // }
+
+    // ResolveInOutInPlaceEdges(edges);
+
+    for (auto& edge : edges) edge->init();
+
+    auto edgeClusters = MemoryControl::formEdgeClusters(edges);
 
-    // Allocate memory space for all edges marked with NeedAllocation
-    AllocateWithReuse(syncNodesInds);
+    const size_t remainingEdgeClustersCount = AllocateStringsAndConstants(edgeClusters, m_context);
 
-    // Check all getters. Should work.
-    for (auto& edge : graphEdges) edge->validate();
+    // std::cout << "Edge clusters size: " << edgeClusters.size() << " remaining: " << remainingEdgeClustersCount << "\n";
+
+    auto memoryRegions = MemoryControl::formMemoryRegions(edgeClusters,
+                                                          remainingEdgeClustersCount,
+                                                          allocationContext.execIndex);
+
+    m_outputNodesMemBlocks = MemoryControl::filterOutDynamicOutputEdges(memoryRegions,
+                                                                        edgeClusters,
+                                                                        outputNodesMap);
+
+    memoryControl->insert(memoryRegions, allocationContext.syncPoints);
+    auto memoryBlocks = memoryControl->solve();
+
+    attachEdgeToMemoryControl(edgeClusters, memoryBlocks);
+    memoryControl->allocateMemory();
+    resolveInPlaceEdges(edgeClusters);
+
+    for (auto& edge : edges) edge->validate();
 }
 
-bool Graph::ProcessDynNodes() {
+bool Graph::ProcessDynNodes() const {
     OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ProcessDynNodes");
 
     const bool containsDynamicNodes = std::any_of(graphNodes.begin(), graphNodes.end(), [](const NodePtr& node) {
@@ -1395,14 +1465,6 @@ void Graph::Infer(SyncInferRequest* request) {
     DEBUG_LOG("Infer graph: ", GetName(), ". Status: ", static_cast<int>(status));
     const int numaId = GetNumaNodeId(m_context);
 
-    if (!m_pMemoryControl) {
-        OPENVINO_THROW("Memory control unit is not initilized in graph: ", GetName());
-    }
-
-    if (!m_pMemoryControl->allocated()) {
-        m_pMemoryControl->allocateMemory();
-    }
-
     switch (status) {
     case Status::ReadyDynamic:
         InferDynamic(request, numaId, UpdateNodes(m_executableGraphNodes));
diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h
index d50ccc152c9186..2e31c9f9243b0c 100644
--- a/src/plugins/intel_cpu/src/graph.h
+++ b/src/plugins/intel_cpu/src/graph.h
@@ -4,17 +4,17 @@
 
 #pragma once
 
+#include "allocation_context.hpp"
 #include "config.h"
 #include "cpu_memory.h"
 #include "nodes/input.h"
-#include "openvino/core/node_vector.hpp"
 #include "openvino/runtime/profiling_info.hpp"
 #include "node.h"
 #include "edge.h"
 #include "graph_context.h"
-#include "memory_control.hpp"
 #include "openvino/runtime/profiling_info.hpp"
 
+#include <algorithm>
 #include <map>
 #include <memory>
 #include <string>
@@ -31,6 +31,19 @@ namespace node {
 class MemoryStateNode;
 } // namespace node
 
+struct MemoryRegion {
+    int start;     // Execution order index of first use.
+    int finish;    // Execution order index of last use. -1 means inf
+    int64_t size;  // size in bytes
+    int64_t id;    // ID unique for each region
+
+    enum class RegionType : uint8_t { VARIABLE, CONSTANT, INPUT, OUTPUT, IO } type;
+    enum class AllocType : uint8_t { POD, STRING, UNKNOWN } alloc_type;
+};
+
+using MemoryRegions = std::vector<MemoryRegion>;
+using OutputMemoryBlocks = std::unordered_map<std::size_t, ProxyMemoryBlockPtr>;
+
 class Graph {
 public:
     typedef std::shared_ptr<Graph> Ptr;
@@ -75,6 +88,9 @@ class Graph {
 
     void PushInputData(const std::size_t& index, const ov::SoPtr<ITensor>& input);
     void PullOutputData(std::unordered_map<std::size_t, ov::SoPtr<ITensor>>& output);
+    // @todo pass as part of one of the graph configuration stages
+    // void SetGlobalExecutionIndex() {
+    // }
 
     // Returns Output nodes memory descriptors
     VecMemoryDescs getOutputMemoryDescriptors() const;
@@ -213,12 +229,26 @@ class Graph {
 
     /**
      * Activate execution graph using \p externalInputMemory and \p externalOutputMemory
+     * 'globalAllocation' is a temporary flag indicating that the current graph is participaing in
+     * global memory reuse (together with all inner / outer graphs).
+     * The flag should be dropped after all the nodes with inner graphs participate in
+     * global memory reuse by default
      */
     void Activate(const std::vector<MemoryPtr>& externalInputMemory = {},
-                            const std::vector<MemoryPtr>& externalOutputMemory = {});
+                  const std::vector<MemoryPtr>& externalOutputMemory = {},
+                  bool globalAllocation = false);
+
+    MemoryRegions RegisterExternalMemory(const std::vector<MemoryPtr>& externalInputMemory = {},
+                                         const std::vector<MemoryPtr>& externalOutputMemory = {});
+
+    void Allocate(bool globalAllocation = false);
+
+    AllocationContext CreateAllocationContext(bool global);
+
+    int RegisterToAllocationContext(int offset, AllocationContext& context);
 
     const std::unordered_map<std::size_t, ProxyMemoryBlockPtr>& getOutputNodesMemBlocksMap() const {
-        return outputNodesMemBlocksMap;
+        return m_outputNodesMemBlocks;
     }
 
 protected:
@@ -256,10 +286,10 @@ class Graph {
     void InitOptimalPrimitiveDescriptors();
     void ResolveEdgeConflicts();
     void ResolveComplexInplaceConflicts();
-    bool ProcessDynNodes();
-    void Allocate(const std::vector<size_t>& syncNodesInds);
-    void AllocateWithReuse(const std::vector<size_t>& syncNodesInds);
+    bool ProcessDynNodes() const;
+    void AllocateWithReuse(const std::vector<size_t>& syncNodesInds, GlobalExecutionIndex globalExecIndex);
     void CreatePrimitivesAndExecConstants() const;
+    std::vector<size_t> CreateExecutionGraph();
 
     /**
      * Execute a given \p node within \p request using \p numaId
@@ -300,7 +330,7 @@ class Graph {
     std::map<std::size_t, NodePtr> inputNodesMap;
     std::map<std::size_t, NodePtr> outputNodesMap;
 
-    std::unordered_map<std::size_t, ProxyMemoryBlockPtr> outputNodesMemBlocksMap;
+    OutputMemoryBlocks m_outputNodesMemBlocks;
 
     // these node pointers (from graphNodes) are to avoid regular checking for
     // constantness of nodes in Infer methods and calls of
@@ -310,8 +340,6 @@ class Graph {
 
     GraphContext::CPtr m_context;
     dnnl::stream m_stream;
-
-    MemoryControl* m_pMemoryControl = nullptr;
 };
 
 using GraphPtr = std::shared_ptr<Graph>;
diff --git a/src/plugins/intel_cpu/src/graph_context.cpp b/src/plugins/intel_cpu/src/graph_context.cpp
index e200766fa4791c..e4eb13ed58f53f 100644
--- a/src/plugins/intel_cpu/src/graph_context.cpp
+++ b/src/plugins/intel_cpu/src/graph_context.cpp
@@ -1,7 +1,6 @@
 // Copyright (C) 2018-2024 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-#include "dnnl_types.h"
 #include "graph_context.h"
 #include "nodes/memory.hpp"
 #include "memory_control.hpp"
@@ -12,6 +11,8 @@ namespace intel_cpu {
 GraphContext::GraphContext(const Config& config,
                            WeightsSharing::Ptr w_cache,
                            bool isGraphQuantized,
+                           MemoryControl* memoryControl,
+                           std::shared_ptr<NetworkMemoryControl> networkMemoryControl,
                            ov::threading::IStreamsExecutor::Ptr streamExecutor,
                            std::shared_ptr<SubMemoryManager> sub_memory_manager)
     : config(config),
@@ -20,7 +21,8 @@ GraphContext::GraphContext(const Config& config,
       streamExecutor(streamExecutor),
       subMemoryManager(sub_memory_manager),
       memoryStatesRegister(std::make_shared<node::MemoryStatesRegister>()),
-      networkMemoryControl(std::make_shared<NetworkMemoryControl>()) {
+      memoryControl(memoryControl),
+      networkMemoryControl(networkMemoryControl) {
     rtParamsCache = std::make_shared<MultiCache>(config.rtCacheCapacity);
     // primitive/executors can be shared across sub-stream
     // but scratch pad cannot be shared.
diff --git a/src/plugins/intel_cpu/src/graph_context.h b/src/plugins/intel_cpu/src/graph_context.h
index db2b126213978c..15f947d17788a9 100644
--- a/src/plugins/intel_cpu/src/graph_context.h
+++ b/src/plugins/intel_cpu/src/graph_context.h
@@ -18,6 +18,7 @@ namespace node {
 class MemoryStatesRegister;
 } // namespace node
 
+class MemoryControl;
 class NetworkMemoryControl;
 
 class GraphContext {
@@ -28,6 +29,8 @@ class GraphContext {
     GraphContext(const Config& config,
                  WeightsSharing::Ptr w_cache,
                  bool isGraphQuantized,
+                 MemoryControl* memoryControl,
+                 std::shared_ptr<NetworkMemoryControl> networkMemoryControl, // obsolete in favor of local memoryControl
                  ov::threading::IStreamsExecutor::Ptr streamExecutor = nullptr,
                  std::shared_ptr<SubMemoryManager> sub_memory_manager = nullptr);
 
@@ -78,6 +81,10 @@ class GraphContext {
         return memoryStatesRegister;
     }
 
+    MemoryControl* getMemoryControl() const {
+        return memoryControl;
+    }
+
     const std::shared_ptr<NetworkMemoryControl>& getNetworkMemoryControl() const {
         return networkMemoryControl;
     }
@@ -103,6 +110,10 @@ class GraphContext {
     int numNumaNodes = 1;
 
     std::shared_ptr<node::MemoryStatesRegister> memoryStatesRegister;
+    MemoryControl* memoryControl;
+    // to be removed in favor of local memoryControl
+    // currently required for the nodes with inner graphs which
+    // do not participate in global memory reuse
     std::shared_ptr<NetworkMemoryControl> networkMemoryControl;
 };
 
diff --git a/src/plugins/intel_cpu/src/graph_dumper.cpp b/src/plugins/intel_cpu/src/graph_dumper.cpp
index fab6e99dcf2550..6bcc46153cbf79 100644
--- a/src/plugins/intel_cpu/src/graph_dumper.cpp
+++ b/src/plugins/intel_cpu/src/graph_dumper.cpp
@@ -243,7 +243,6 @@ void serializeToXML(const Graph &graph, const std::string& path) {
 
 void serializeToCout(const Graph &graph) {
     for (const auto& node : graph.GetNodes()) {
-        std::cout << "name: " << node->getName() << " [ ";
         auto nodeDesc = node->getSelectedPrimitiveDescriptor();
         if (nodeDesc) {
             auto& inConfs = nodeDesc->getConfig().inConfs;
diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp
index f0b817dcda859c..2110d837ab7bc9 100644
--- a/src/plugins/intel_cpu/src/infer_request.cpp
+++ b/src/plugins/intel_cpu/src/infer_request.cpp
@@ -19,6 +19,7 @@
 #include "utils/general_utils.h"
 #include "utils/ngraph_utils.hpp"
 #include "openvino/runtime/threading/cpu_message.hpp"
+#include "memory_control.hpp"
 
 using OvString = ov::element_type_traits<ov::element::string>::value_type;
 
@@ -135,6 +136,15 @@ void SyncInferRequest::infer() {
 
     push_input_data();
 
+    MemoryControl* network_memory_control = m_graph->getGraphContext()->getMemoryControl();
+    if (!network_memory_control) {
+        OPENVINO_THROW("Memory control unit is not initilized for graph: ", m_graph->GetName());
+    }
+
+    if (!network_memory_control->allocated()) {
+        network_memory_control->allocateMemory();
+    }
+
     m_graph->Infer(this);
 
     throw_if_canceled();
diff --git a/src/plugins/intel_cpu/src/memory_control.cpp b/src/plugins/intel_cpu/src/memory_control.cpp
index 0f202c296891c1..3edc3f423b9032 100644
--- a/src/plugins/intel_cpu/src/memory_control.cpp
+++ b/src/plugins/intel_cpu/src/memory_control.cpp
@@ -4,10 +4,15 @@
 
 #include "memory_control.hpp"
 
+#include <cstddef>
+#include <new>
 #include <ov_optional.hpp>
 
+#include "edge.h"
 #include "node.h"
 #include "openvino/runtime/memory_solver.hpp"
+#include "proxy_mem_blk.h"
+#include "utils/general_utils.h"
 
 namespace ov {
 namespace intel_cpu {
@@ -84,7 +89,7 @@ class MemoryBlockWithRelease : public IMemoryBlockObserver {
 class IMemoryManager {
 public:
     virtual ~IMemoryManager() = default;
-    virtual void insert(const MemoryRegion& reg) = 0;
+    virtual void insert(const MemoryRegion& reg, const std::vector<size_t>& syncInds) = 0;
     virtual const MemoryControl::MemoryBlockMap& lastSolution() = 0;
     virtual void allocate() = 0;
     virtual void release() = 0;
@@ -99,7 +104,8 @@ std::shared_ptr<DnnlMemoryBlock> makeDnnlMemoryBlock(Args&&... args) {
 
 class MemoryManagerIO : public IMemoryManager {
 public:
-    void insert(const MemoryRegion& reg) override {
+    void insert(const MemoryRegion& reg, const std::vector<size_t>& syncInds) override {
+        (void) syncInds;
         m_blocks.insert({reg.id, makeDnnlMemoryBlock<MemoryBlockWithReuse>()});
     }
 
@@ -120,7 +126,8 @@ class MemoryManagerIO : public IMemoryManager {
 
 class MemoryManagerStatic : public IMemoryManager {
 public:
-    void insert(const MemoryRegion& reg) override {
+    void insert(const MemoryRegion& reg, const std::vector<size_t>& syncInds) override {
+        (void) syncInds;
         m_boxes.emplace_back(MemorySolver::Box{reg.start, reg.finish, reg.size, reg.id});
     }
 
@@ -167,19 +174,18 @@ class MemoryManagerStatic : public IMemoryManager {
 
 class MemoryManageNonOverlapingSets : public IMemoryManager {
 public:
-    MemoryManageNonOverlapingSets(std::vector<size_t> syncInds) : m_syncInds(std::move(syncInds)) {}
-    void insert(const MemoryRegion& reg) override {
+    void insert(const MemoryRegion& reg, const std::vector<size_t>& syncInds) override {
         MemorySolver::Box box = {reg.start, reg.finish, reg.size, reg.id};
         if (-1 != reg.finish) {
             //We have to extend the lifespan of tensors that are crossing a sync point border in order to save
             //the intermediate computation results from possible loss due to the tensor resize
             auto itr_upper =
-                std::upper_bound(m_syncInds.begin(), m_syncInds.end(), box.finish, [](int y, int x) {
+                std::upper_bound(syncInds.begin(), syncInds.end(), box.finish, [](int y, int x) {
                     return y <= x;
                 });
-            auto itr_lower = std::lower_bound(m_syncInds.begin(), m_syncInds.end(), box.start);
+            auto itr_lower = std::lower_bound(syncInds.begin(), syncInds.end(), box.start);
             if (itr_lower != itr_upper) { // across sections
-                if (itr_upper == m_syncInds.end()) {
+                if (itr_upper == syncInds.end()) {
                     box.finish = -1;
                 } else {
                     box.finish = *itr_upper;
@@ -242,7 +248,6 @@ class MemoryManageNonOverlapingSets : public IMemoryManager {
     std::unordered_map<MemoryControl::MemoryBlockMap::key_type, std::shared_ptr<MemoryBlockWithRelease>>
         m_internalBlocks;
     std::vector<MemorySolver::Box> m_boxes;
-    std::vector<size_t> m_syncInds;
 };
 
 }  // namespace
@@ -256,12 +261,12 @@ class MemoryControl::RegionHandler {
         : m_cond(std::move(cond)),
           m_memManager(std::move(memManager)) {}
 
-    bool insert(const MemoryRegion& reg) {
+    bool insert(const MemoryRegion& reg, const std::vector<size_t>& syncInds) {
         if (!m_cond(reg)) {
             return false;
         }
 
-        m_memManager->insert(reg);
+        m_memManager->insert(reg, syncInds);
         return true;
     }
 
@@ -292,9 +297,8 @@ MemoryControl::RegionHandlerPtr buildHandler(F&& f, Args&&... args) {
 
 }  // namespace
 
-MemoryControl::MemoryControl(std::vector<size_t> syncInds) {
+MemoryControl::MemoryControl() {
     // init handlers
-
     // handler for dynamic tensors
     m_handlers.emplace_back(buildHandler<MemoryManagerStatic>([](const MemoryRegion& reg) {
         if (reg.size < 0 || MemoryRegion::RegionType::VARIABLE != reg.type ||
@@ -311,7 +315,7 @@ MemoryControl::MemoryControl(std::vector<size_t> syncInds) {
             return false;
         }
         return true;
-    }, std::move(syncInds)));
+    }));
 
     //handler for I/O tensors, so far simply individual blocks
     m_handlers.emplace_back(buildHandler<MemoryManagerIO>([](const MemoryRegion& reg) {
@@ -322,22 +326,24 @@ MemoryControl::MemoryControl(std::vector<size_t> syncInds) {
     }));
 }
 
-void MemoryControl::insert(const MemoryRegion& region) {
+void MemoryControl::insert(const MemoryRegion& region, const std::vector<size_t>& syncInds) {
     for (auto&& handler : m_handlers) {
-        if (handler->insert(region)) {
+        if (handler->insert(region, syncInds)) {
             return;
         }
     }
     OPENVINO_THROW("No suitable hanlder was found for the given memory region");
 }
 
-MemoryControl::MemoryBlockMap MemoryControl::insert(const std::vector<MemoryRegion>& regions) {
+void MemoryControl::insert(const std::vector<MemoryRegion>& regions,
+                           const std::vector<size_t>& syncInds) {
     for (auto&& region : regions) {
-        insert(region);
+        insert(region, syncInds);
     }
+}
 
+MemoryControl::MemoryBlockMap MemoryControl::solve() {
     MemoryControl::MemoryBlockMap blocksMap;
-    blocksMap.reserve(regions.size());
 
     for (auto&& handler : m_handlers) {
         auto&& solution = handler->lastSolution();
@@ -364,52 +370,186 @@ void MemoryControl::releaseMemory() {
     m_allocated = false;
 }
 
-edgeClusters MemoryControl::findEdgeClusters(const std::vector<EdgePtr>& graphEdges) {
-    typedef std::unordered_map<EdgePtr, size_t> edge_cluster_idx_map_t;
-
-    edgeClusters edge_clusters;
-    edge_cluster_idx_map_t edge_cluster_indices;
+// /**
+//  * Forms clusters of edges.
+//  * An edge cluster is a collection of edges, so:
+//  * - base edge is an edge with a Memory which other edges point to by means of inplace logic
+//  * - first edge of a cluster is a base edge with a status either NeedAllocation or Allocated
+//  * - rest of the edges in a cluster are NotAllocated ones, since they point to their base edge
+//  */
+// EdgeClusters MemoryControl::flattenEdgeClusters(const EdgeClusters& clusters) {
+// }
+
+/**
+ * Forms clusters of edges.
+ * An edge cluster is a collection of edges, so:
+ * - base edge is an edge with a Memory which other edges point to by means of inplace logic
+ * - first edge of a cluster is a base edge with a status either NeedAllocation or Allocated
+ * - rest of the edges in a cluster are NotAllocated ones, since they point to their base edge
+ */
+EdgeClusters MemoryControl::formEdgeClusters(const std::vector<EdgePtr>& graphEdges) {
+    typedef std::unordered_map<EdgePtr, size_t> EdgeClusterIdxMap;
+    EdgeClusters edgeClusters;
+    EdgeClusterIdxMap edgeClusterIndices;
 
     for (auto& edge : graphEdges) {
-        auto edge_it = edge_cluster_indices.find(edge);
-        if (edge_it != edge_cluster_indices.end())
-            continue;  // edge is visited
+        if (edgeClusterIndices.count(edge))
+            continue; // edge is visited
 
-        size_t cluster_idx = edge_clusters.size();
-        EdgePtr last_shared_edge = nullptr;
+        size_t clusterIdx = edgeClusters.size();
+        EdgePtr lastSharedEdge = nullptr;
 
         // find cluster index
         for (auto shared_edge = edge->getSharedEdge(std::nothrow); shared_edge;
              shared_edge = shared_edge->getSharedEdge(std::nothrow)) {
-            auto shared_edge_it = edge_cluster_indices.find(shared_edge);
-            if (shared_edge_it != edge_cluster_indices.end()) {
-                cluster_idx = shared_edge_it->second;
-                last_shared_edge = shared_edge;
+            auto shared_edge_it = edgeClusterIndices.find(shared_edge);
+            if (shared_edge_it != edgeClusterIndices.end()) {
+                clusterIdx = shared_edge_it->second;
+                lastSharedEdge = shared_edge;
                 break;
             }
         }
 
-        // add shared edges to cluster
-        edge_cluster_indices.emplace(edge, cluster_idx);
+        if (clusterIdx == edgeClusters.size())
+            edgeClusters.emplace_back(EdgeCluster{edge});
 
-        if (cluster_idx == edge_clusters.size())
-            edge_clusters.emplace_back(edgeCluster{edge});
-        else
-            edge_clusters[cluster_idx].emplace(edge);
+        // use recursive approach to ensure that the base edge is placed as a first entry of a cluster
+        std::function<void(EdgePtr)> addToCluster;
+        addToCluster = [&addToCluster, &edgeClusterIndices, &clusterIdx, &edgeClusters, &lastSharedEdge](EdgePtr edge) {
+            if (edge == lastSharedEdge)
+                return;
 
-        for (auto shared_edge = edge->getSharedEdge(std::nothrow); shared_edge != last_shared_edge;
-             shared_edge = shared_edge->getSharedEdge(std::nothrow)) {
-            edge_cluster_indices.emplace(shared_edge, cluster_idx);
-            edge_clusters[cluster_idx].emplace(shared_edge);
+            addToCluster(edge->getSharedEdge(std::nothrow));
+
+            edgeClusterIndices.emplace(edge, clusterIdx);
+            edgeClusters[clusterIdx].push_back(edge);
+        };
+
+        addToCluster(edge);
+    }
+
+    return edgeClusters;
+}
+
+static inline bool isConstOutput(EdgePtr edge) {
+    return edge->getParent()->isConstant() && !edge->getChild()->isConstant();
+}
+
+MemoryRegions MemoryControl::formMemoryRegions(const EdgeClusters& clusters,
+                                               size_t remaining,
+                                               const GlobalExecutionIndex& globalExecIndex) {
+    // Markup the memory regions
+    MemoryRegions memoryRegions;
+    memoryRegions.reserve(remaining);
+
+    for (size_t i = 0; i < remaining; ++i) {
+        MemoryRegion reg = {std::numeric_limits<int>::max(),
+                            0,
+                            0,
+                            static_cast<int64_t>(i),
+                            MemoryRegion::RegionType::VARIABLE,
+                            MemoryRegion::AllocType::UNKNOWN};
+
+        int64_t boxSize = 0;
+        bool isConst = false, isOutput = false, isInput = false;
+        // std::cout << "Form memory region for cluster: " << i << "\n";
+        for (auto &edge : clusters[i]) {
+            const auto& parent = edge->getParent();
+            const auto& child = edge->getChild();
+
+            // std::cout << "[" << globalExecIndex.at(parent).second << " - " << globalExecIndex.at(child).first << "]"
+            //           << edge->name() << " status: " << static_cast<int>(edge->getStatus())
+            //           << "\n";
+
+            int e_start = globalExecIndex.at(parent).second;
+            int e_finish = globalExecIndex.at(child).first;
+            // int e_finish = edge->getChild()->getExecIndex();
+
+            auto&& desc = edge->getDesc();
+
+            if (boxSize != -1 && desc.isDefined()) {
+                int64_t e_size = desc.getCurrentMemSize();  // size in bytes (from the beginning of data to the last element)
+                boxSize = std::max(e_size, boxSize);
+            } else {
+                boxSize = -1;
+            }
+
+            reg.start = std::min(e_start, reg.start);
+            reg.finish = std::max(e_finish, reg.finish);
+
+            auto allocType =
+                desc.getPrecision() == element::string ? MemoryRegion::AllocType::STRING : MemoryRegion::AllocType::POD;
+
+            if (reg.alloc_type != allocType && MemoryRegion::AllocType::UNKNOWN != reg.alloc_type) {
+                OPENVINO_THROW("Different allocation types in the same memory region");
+            }
+            reg.alloc_type = allocType;
+
+            isConst  |= isConstOutput(edge);
+            isOutput |= child->getType() == Type::Output;
+            isInput  |= parent->getType() == Type::Input;
+        }
+
+        reg.size = boxSize;
+
+        if (isConst) {
+            reg.type = MemoryRegion::RegionType::CONSTANT;
+        } else if (isInput) {
+            if (isOutput) {
+                reg.type = MemoryRegion::RegionType::IO;
+            } else {
+                reg.type = MemoryRegion::RegionType::INPUT;
+            }
+        } else if (isOutput) {
+            reg.type = MemoryRegion::RegionType::OUTPUT;
         }
+
+        memoryRegions.push_back(reg);
     }
 
-    return edge_clusters;
+    return memoryRegions;
+}
+
+OutputMemoryBlocks MemoryControl::filterOutDynamicOutputEdges(MemoryRegions& memoryRegions,
+                                                              const EdgeClusters& clusters,
+                                                              const std::map<std::size_t, NodePtr>& outputNodes) {
+    OutputMemoryBlocks outputMemBlocks;
+    memoryRegions.erase(std::remove_if(memoryRegions.begin(), memoryRegions.end(), [&](const MemoryRegion& region) {
+        if (region.size >= 0 || !one_of(region.type, MemoryRegion::RegionType::OUTPUT, MemoryRegion::RegionType::IO)) {
+            return false;
+        }
+        bool result = false;
+        for (auto& edge : clusters[region.id]) {
+            auto child = edge->getChild();
+            if (child->getType() == Type::Output && edge->getStatus() == Edge::Status::NeedAllocation) {
+                auto proxyMemBlock = std::make_shared<ProxyMemoryBlock>();
+                DEBUG_LOG("ProxyMemoryBlock ", proxyMemBlock);
+                // std::cout << "Allocating output edge: " << edge->name() << "\n";
+                edge->allocate(proxyMemBlock);
+
+                // Store the output memory blocks.
+                // So that, the infer requests can be able to access them.
+                int count = 0;
+                for (auto& output : outputNodes) {
+                    if (output.second == child) {
+                        outputMemBlocks[output.first] = proxyMemBlock;
+                        count++;
+                    }
+                }
+                // sometimes there are unused output ports.
+                OPENVINO_ASSERT(count <= 1, "CPU plugin cannot find output node. count ", count);
+                result = true;
+            }
+        }
+        return result;
+    }), memoryRegions.end());
+
+    return outputMemBlocks;
 }
 
-MemoryControl& NetworkMemoryControl::createMemoryControlUnit(std::vector<size_t> syncInds) {
-    m_controlUnits.emplace_back(std::unique_ptr<MemoryControl>(new MemoryControl(syncInds)));
-    return *(m_controlUnits.back());
+MemoryControl* NetworkMemoryControl::createMemoryControlUnit() {
+    m_controlUnits.emplace_back(std::unique_ptr<MemoryControl>(new MemoryControl()));
+    return m_controlUnits.back().get();
 }
 
 void NetworkMemoryControl::allocateMemory() {
@@ -425,4 +565,4 @@ void NetworkMemoryControl::releaseMemory() {
 }
 
 }  // namespace intel_cpu
-}  // namespace ov
\ No newline at end of file
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/memory_control.hpp b/src/plugins/intel_cpu/src/memory_control.hpp
index ce4dc90890f3fa..fc38cf8df2ccb0 100644
--- a/src/plugins/intel_cpu/src/memory_control.hpp
+++ b/src/plugins/intel_cpu/src/memory_control.hpp
@@ -5,22 +5,15 @@
 #pragma once
 
 #include "edge.h"
+#include "graph.h"
+#include "node.h"
+#include "proxy_mem_blk.h"
 
 namespace ov {
 namespace intel_cpu {
 
-using edgeCluster = std::unordered_set<EdgePtr>;
-using edgeClusters = std::vector<edgeCluster>;
-
-struct MemoryRegion {
-    int start;     // Execution order index of first use.
-    int finish;    // Execution order index of last use. -1 means inf
-    int64_t size;  // size in bytes
-    int64_t id;    // ID unique for each region
-
-    enum class RegionType : uint8_t { VARIABLE, CONSTANT, INPUT, OUTPUT, IO } type;
-    enum class AllocType : uint8_t { POD, STRING, UNKNOWN } alloc_type;
-};
+using EdgeCluster = std::vector<EdgePtr>;
+using EdgeClusters = std::vector<EdgeCluster>;
 
 class MemoryControl {
 public:
@@ -30,9 +23,16 @@ class MemoryControl {
     using MemoryBlockMap = std::unordered_map<decltype(MemoryRegion::id), MemoryBlockPtr>;
 
 public:
-    static edgeClusters findEdgeClusters(const std::vector<EdgePtr>& graphEdges);
+    static EdgeClusters formEdgeClusters(const std::vector<EdgePtr>& graphEdges);
+    static MemoryRegions formMemoryRegions(const EdgeClusters& clusters, size_t remaining, const GlobalExecutionIndex& globalExecIndex);
+    static OutputMemoryBlocks filterOutDynamicOutputEdges(MemoryRegions& memoryRegions,
+                                                                const EdgeClusters& clusters,
+                                                                const std::map<std::size_t, NodePtr>& outputNodes);
+
+    void insert(const MemoryRegions& regions,
+                const std::vector<size_t>& syncInds);
 
-    MemoryBlockMap insert(const std::vector<MemoryRegion>& regions);
+    MemoryBlockMap solve();
 
     bool allocated() const {
         return m_allocated;
@@ -42,13 +42,12 @@ class MemoryControl {
     void releaseMemory();
 
 private:
-    explicit MemoryControl(std::vector<size_t> syncInds);
-    void insert(const MemoryRegion& region);
+    explicit MemoryControl();
+    void insert(const MemoryRegion& region, const std::vector<size_t>& syncInds);
 
     friend class NetworkMemoryControl;
 
 private:
-    std::vector<size_t> m_syncInds;
     std::vector<RegionHandlerPtr> m_handlers;
     bool m_allocated = false;
 };
@@ -56,7 +55,8 @@ class MemoryControl {
 class NetworkMemoryControl {
 public:
     NetworkMemoryControl() = default;
-    MemoryControl& createMemoryControlUnit(std::vector<size_t> syncInds);
+    // @todo return std::reference_wrapper instead?
+    MemoryControl* createMemoryControlUnit();
 
     void allocateMemory();
     void releaseMemory();
@@ -69,4 +69,4 @@ class NetworkMemoryControl {
 };
 
 }  // namespace intel_cpu
-}  // namespace ov
\ No newline at end of file
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp
index 7c23d55fc4147a..2c592a26bac7c9 100644
--- a/src/plugins/intel_cpu/src/node.cpp
+++ b/src/plugins/intel_cpu/src/node.cpp
@@ -1152,6 +1152,10 @@ bool Node::isConstant() {
     return getConstantType() == ConstantType::Const;
 }
 
+bool Node::isConstantInput() {
+    return isConstant() && getType() == Type::Input;
+}
+
 void Node::updateConstantType() {
     if (constant == ConstantType::StrictNoConst)
         return;
diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h
index 948bd6999ce27a..4b437d94b52f83 100644
--- a/src/plugins/intel_cpu/src/node.h
+++ b/src/plugins/intel_cpu/src/node.h
@@ -6,6 +6,7 @@
 
 #include <common/utils.hpp>
 #include <oneapi/dnnl/dnnl.hpp>
+#include "allocation_context.hpp"
 #include "cpu_memory.h"
 #include "cpu_shape.h"
 #include "cpu_types.h"
@@ -43,6 +44,7 @@ using NodePtr = std::shared_ptr<Node>;
 using NodeConstPtr = std::shared_ptr<const Node>;
 using NodeWeakPtr = std::weak_ptr<Node>;
 
+
 class PortConfigurator {
 public:
     PortConfigurator(ov::intel_cpu::LayoutType blockedDescType, ov::element::Type prc, const Shape& shape,
@@ -278,6 +280,7 @@ class Node {
     ConstantType getConstantType() const;
     void updateConstantType();
     bool isConstant();
+    bool isConstantInput();
 
     // return type int supports return -1 in overloading when channel axis doesn't exist
     virtual int getFusingAxis() const {
@@ -482,6 +485,25 @@ class Node {
     int getExecIndex() const {
         return execIndex;
     }
+    // for nodes with subgraphs equals to number of internal nodes (continius) - 1
+    // equals to 0 for other nodes
+    virtual int getExecIndexOffset() const {
+        return 1;
+    }
+
+    virtual void updateGlobalFlattenExecIndex(std::unordered_map<NodePtr, size_t>& globalExecIndexStorage) {
+        return;
+    }
+
+    virtual int registerToAllocationContext(int offset, AllocationContext& context) {
+        (void) context;
+        return offset + 1;
+    }
+
+    // virtual void registerInAllocationContext(AllocationContext context) {
+    //     (void) context;
+    //     return;
+    // }
 
     const std::string & getTypeStr() const {
         return typeStr;
diff --git a/src/plugins/intel_cpu/src/nodes/composite.cpp b/src/plugins/intel_cpu/src/nodes/composite.cpp
index a1ceabd6942db1..d2d8ee66ad6323 100644
--- a/src/plugins/intel_cpu/src/nodes/composite.cpp
+++ b/src/plugins/intel_cpu/src/nodes/composite.cpp
@@ -4,6 +4,7 @@
 
 #include "composite.h"
 
+#include "compiled_model.h"
 #include "nodes/input.h"
 #include "cpu_memory.h"
 #include "transformations/cpu_opset/common/op/submodel.hpp"
@@ -75,23 +76,46 @@ void Composite::selectOptimalPrimitiveDescriptor() {
 
 // @todo add ascii diagramm for memory mapping / reuse
 void Composite::createPrimitive() {
-    OPENVINO_ASSERT(getOriginalInputsNumber() == m_graph.GetInputNodesMap().size(),
-                    "Number of node inputs must be equal the number of inner graph's inputs");
+    // OPENVINO_ASSERT(getOriginalInputsNumber() == m_graph.GetInputNodesMap().size(),
+    //                 "Number of node inputs must be equal the number of inner graph's inputs");
 
-    std::vector<MemoryPtr> inputMemory;
-    for (size_t i = 0; i < getOriginalInputsNumber(); i++) {
-        inputMemory.emplace_back(getSrcMemoryAtPort(i));
-    }
+    // std::vector<MemoryPtr> inputMemory;
+    // for (size_t i = 0; i < getOriginalInputsNumber(); i++) {
+    //     inputMemory.emplace_back(getSrcMemoryAtPort(i));
+    // }
 
-    OPENVINO_ASSERT(getOriginalOutputsNumber() == m_graph.GetOutputNodesMap().size(),
-                    "Number of node outputs must be equal the number of inner graph's outputs");
+    // OPENVINO_ASSERT(getOriginalOutputsNumber() == m_graph.GetOutputNodesMap().size(),
+    //                 "Number of node outputs must be equal the number of inner graph's outputs");
 
-    std::vector<MemoryPtr> outputMemory;
-    for (size_t i = 0; i < getOriginalOutputsNumber(); i++) {
-        outputMemory.emplace_back(getDstMemoryAtPort(i));
+    // std::vector<MemoryPtr> outputMemory;
+    // for (size_t i = 0; i < getOriginalOutputsNumber(); i++) {
+    //     outputMemory.emplace_back(getDstMemoryAtPort(i));
+    // }
+
+    // m_graph.Activate(inputMemory, outputMemory);
+    m_graph.Activate({}, {}, true);
+}
+
+int Composite::registerToAllocationContext(int offset, AllocationContext& context) {
+    for (size_t i = 0; i < getParentEdges().size(); i++) {
+        auto parentEdge = getParentEdgeAt(i);
+        auto inputEdges = m_graph.GetInputNodesMap().at(i)->getChildEdgesAtPort(0);
+        for (const auto& inputEdge : inputEdges) {
+            OPENVINO_ASSERT(inputEdge->getStatus() == Edge::Status::Uninitialized,
+                            "Expected Uninitialized state for edge: ", inputEdge->name());
+            inputEdge->sharedMemFrom(parentEdge);
+        }
+    }
+
+    for (size_t i = 0; i < getChildEdges().size(); i++) {
+        auto childEdge = getChildEdgeAt(i);
+        auto outputEdge = m_graph.GetOutputNodesMap().at(i)->getParentEdgeAt(0);
+        OPENVINO_ASSERT(outputEdge->getStatus() == Edge::Status::Uninitialized,
+                        "Expected Uninitialized state for edge: ", outputEdge->name());
+        outputEdge->sharedMemFrom(childEdge);
     }
 
-    m_graph.Activate(inputMemory, outputMemory);
+    return m_graph.RegisterToAllocationContext(offset, context);
 }
 
 void Composite::execute(dnnl::stream) {
diff --git a/src/plugins/intel_cpu/src/nodes/composite.h b/src/plugins/intel_cpu/src/nodes/composite.h
index 9f18a2ba68b769..a80157f624003b 100644
--- a/src/plugins/intel_cpu/src/nodes/composite.h
+++ b/src/plugins/intel_cpu/src/nodes/composite.h
@@ -4,7 +4,9 @@
 
 #pragma once
 
+#include <cstddef>
 #include <memory>
+#include <numeric>
 
 #include "graph.h"
 #include "node.h"
@@ -41,6 +43,8 @@ class Composite : public Node {
     void execute(dnnl::stream) override;
     void executeDynamicImpl(dnnl::stream strm) override;
 
+    int registerToAllocationContext(int offset, AllocationContext& context) override;
+
     const Graph& graph() const {
         return m_graph;
     }
diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp
index 1f650bd8c5de17..5a2b56fac51e07 100644
--- a/src/plugins/intel_cpu/src/nodes/input.cpp
+++ b/src/plugins/intel_cpu/src/nodes/input.cpp
@@ -543,6 +543,36 @@ void Input::initSupportedPdFromMemDesc() {
     supportedPrimitiveDescriptors.emplace_back(std::move(config), impl_desc_type::unknown);
 }
 
+void Input::resolveInPlaceEdges(Edge::LOOK look) {
+    if (look & Edge::LOOK_UP) {
+        auto edges = getChildEdgesAtPort(0);
+        for (const auto& edge : edges) {
+            EdgePtr sharedEdge = edge;
+
+            while (sharedEdge->getSharedEdge(std::nothrow)) {
+                sharedEdge = sharedEdge->getSharedEdge(std::nothrow);
+            }
+
+            // std::cout << edge->name() << " shared edge is: " << sharedEdge->name() << "\n";
+            edge->allocate(sharedEdge->getMemoryPtr()->getMemoryBlock());
+        }
+    }
+
+    if (look & Edge::LOOK_DOWN) {
+        for (size_t i = 0; i < getParentEdges().size(); i++) {
+            auto edge = getParentEdgeAt(i);
+            EdgePtr sharedEdge = edge;
+
+            while (sharedEdge->getSharedEdge(std::nothrow)) {
+                sharedEdge = sharedEdge->getSharedEdge(std::nothrow);
+            }
+
+            // std::cout << edge->name() << " shared edge is: " << sharedEdge->name() << "\n";
+            edge->allocate(sharedEdge->getMemoryPtr()->getMemoryBlock());
+        }
+    }
+}
+
 }   // namespace node
 }   // namespace intel_cpu
 }   // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/input.h b/src/plugins/intel_cpu/src/nodes/input.h
index 4d7febb17ad4b7..a943419aed06d1 100644
--- a/src/plugins/intel_cpu/src/nodes/input.h
+++ b/src/plugins/intel_cpu/src/nodes/input.h
@@ -56,6 +56,7 @@ class Input : public Node {
     void selectOptimalPrimitiveDescriptor() override;
     void createPrimitive() override;
     bool created() const override;
+    void resolveInPlaceEdges(Edge::LOOK look) override;
 
     void withMeanImage();
     MemoryCPtr getMemoryPtr() const;
diff --git a/src/plugins/intel_cpu/src/nodes/lora.cpp b/src/plugins/intel_cpu/src/nodes/lora.cpp
index 2c69bc347b6139..3c0ae5087bae3d 100644
--- a/src/plugins/intel_cpu/src/nodes/lora.cpp
+++ b/src/plugins/intel_cpu/src/nodes/lora.cpp
@@ -80,21 +80,46 @@ void LoRA::selectOptimalPrimitiveDescriptor() {
     selectPrimitiveDescriptorByIndex(0);
 }
 
+int LoRA::registerToAllocationContext(int offset, AllocationContext& context) {
+    for (size_t i = 0; i < getParentEdges().size(); i++) {
+        auto parentEdge = getParentEdgeAt(i);
+        auto inputEdges = m_graph.GetInputNodesMap().at(i)->getChildEdgesAtPort(0);
+        for (const auto& inputEdge : inputEdges) {
+            OPENVINO_ASSERT(inputEdge->getStatus() == Edge::Status::Uninitialized,
+                            "Expected Uninitialized Edge instead of: ", static_cast<int>(inputEdge->getStatus()));
+            inputEdge->sharedMemFrom(parentEdge);
+        }
+    }
+
+    for (size_t i = 0; i < getChildEdges().size(); i++) {
+        auto childEdge = getChildEdgeAt(i);
+        auto outputEdge = m_graph.GetOutputNodesMap().at(i)->getParentEdgeAt(0);
+        // std::cout << outputEdge->name() << " sharing memory from edge: " << childEdge->name() << "\n";
+        outputEdge->sharedMemFrom(childEdge);
+        // for (const auto& inputEdge : inputEdges) {
+        //     OPENVINO_ASSERT(inputEdge->getStatus() != Edge::Status::Uninitialized, "Expected Uninitialized Edge");
+        //     inputEdge->sharedMemFrom(parentEdge);
+        // }
+    }
+    return m_graph.RegisterToAllocationContext(offset, context);
+}
+
 // @todo add ascii diagram for memory mapping / reuse
 void LoRA::createPrimitive() {
-    CPU_NODE_ASSERT(getOriginalInputsNumber() == m_graph.GetInputNodesMap().size(),
-                    "Number of node inputs must be equal the number of inner graph's inputs");
+    // CPU_NODE_ASSERT(getOriginalInputsNumber() == m_graph.GetInputNodesMap().size(),
+    //                 "Number of node inputs must be equal the number of inner graph's inputs");
 
-    std::vector<MemoryPtr> inputMemory;
-    for (size_t i = 0; i < getOriginalInputsNumber(); i++) {
-        inputMemory.emplace_back(getSrcMemoryAtPort(i));
-    }
+    // std::vector<MemoryPtr> inputMemory;
+    // for (size_t i = 0; i < getOriginalInputsNumber(); i++) {
+    //     inputMemory.emplace_back(getSrcMemoryAtPort(i));
+    // }
 
-    CPU_NODE_ASSERT(getOriginalOutputsNumber() == m_graph.GetOutputNodesMap().size(),
-                    "Number of node outputs must be equal the number of inner graph's outputs");
+    // CPU_NODE_ASSERT(getOriginalOutputsNumber() == m_graph.GetOutputNodesMap().size(),
+    //                 "Number of node outputs must be equal the number of inner graph's outputs");
 
-    std::vector<MemoryPtr> outputMemory{getDstMemoryAtPort(0)};
-    m_graph.Activate(inputMemory, outputMemory);
+    // std::vector<MemoryPtr> outputMemory{getDstMemoryAtPort(0)};
+    // m_graph.Activate(inputMemory, outputMemory);
+    m_graph.Activate({}, {}, true);
 }
 
 void LoRA::execute(dnnl::stream) {
diff --git a/src/plugins/intel_cpu/src/nodes/lora.h b/src/plugins/intel_cpu/src/nodes/lora.h
index 89a1bc15c2bf17..acba8d949d2e82 100644
--- a/src/plugins/intel_cpu/src/nodes/lora.h
+++ b/src/plugins/intel_cpu/src/nodes/lora.h
@@ -27,6 +27,7 @@ class LoRA : public Node {
 
     void getSupportedDescriptors() override{};
     void selectOptimalPrimitiveDescriptor() override;
+    int registerToAllocationContext(int offset, AllocationContext& context) override;
     void createPrimitive() override;
     void execute(dnnl::stream) override;
     void executeDynamicImpl(dnnl::stream strm) override;
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
index 5c88772eeedabc..e330cad845837c 100644
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -521,7 +521,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     Config::ModelType modelType = getModelType(model);
     conf.readProperties(config, modelType);
 
-    auto context = std::make_shared<GraphContext>(conf, fake_w_cache, false);
+    auto context = std::make_shared<GraphContext>(conf, fake_w_cache, false, nullptr, nullptr);
 
     auto supported = ov::get_supported_nodes(
         model,
diff --git a/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake b/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake
index 057869a864d87b..9d7fa9f9d9a365 100644
--- a/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake
+++ b/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake
@@ -96,7 +96,8 @@ endif()
 endfunction()
 
 if(ENABLE_CPU_SPECIFIC_TARGET_PER_TEST)
-  create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src ov_cpu_func_subgraph)
+    # create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src ov_cpu_func_subgraph)
+  create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/common ov_cpu_func_subgraph)
   create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests ov_cpu_func_slt)
 endif()
 
diff --git a/src/plugins/intel_cpu/tests/unit/graph/inplace_resolve_io.cpp b/src/plugins/intel_cpu/tests/unit/graph/inplace_resolve_io.cpp
index a41cb4c4300d42..96733ec115319a 100644
--- a/src/plugins/intel_cpu/tests/unit/graph/inplace_resolve_io.cpp
+++ b/src/plugins/intel_cpu/tests/unit/graph/inplace_resolve_io.cpp
@@ -6,6 +6,7 @@
 #include "dummy_node.hpp"
 #include "graph.h"
 
+#include "memory_control.hpp"
 #include "nodes/input.h"
 #include "nodes/concat.h"
 #include "nodes/rnn.h"
@@ -42,7 +43,11 @@ class InplaceResolveIOCPUTestBase : public ::testing::Test {
     std::shared_ptr<Graph> create_graph(const std::vector<ov::PartialShape>& input_shapes, const size_t num_consumers = 1) {
         Config conf;
         conf.rtCacheCapacity = 100;
-        const auto context = std::make_shared<const GraphContext>(conf, nullptr, false);
+        const auto context = std::make_shared<const GraphContext>(conf,
+                                                                  nullptr,
+                                                                  false,
+                                                                  networkMemoryControl->createMemoryControlUnit(),
+                                                                  networkMemoryControl);
 
         std::shared_ptr<Graph> graph = std::shared_ptr<Graph>(new Graph());
 
@@ -88,6 +93,7 @@ class InplaceResolveIOCPUTestBase : public ::testing::Test {
     std::vector<NodePtr> nodes;
     std::vector<EdgePtr> edges;
     std::unordered_set<NodePtr> nodesSet;
+    std::shared_ptr<NetworkMemoryControl> networkMemoryControl = std::make_shared<NetworkMemoryControl>();
 };
 
 class RNNConcatCPUTest : public InplaceResolveIOCPUTestBase {
diff --git a/src/plugins/intel_cpu/tests/unit/graph/memory_state.cpp b/src/plugins/intel_cpu/tests/unit/graph/memory_state.cpp
index 5b9468ffc35e6f..02a5940965fb6e 100644
--- a/src/plugins/intel_cpu/tests/unit/graph/memory_state.cpp
+++ b/src/plugins/intel_cpu/tests/unit/graph/memory_state.cpp
@@ -6,6 +6,7 @@
 #include "dummy_node.hpp"
 
 #include "graph.h"
+#include "memory_control.hpp"
 #include "nodes/memory.hpp"
 #include "nodes/softmax.h"
 #include "nodes/shapeof.h"
@@ -82,7 +83,8 @@ TEST(MemStateGraphTest, smoke_Check_Memory_Modification_Guard) {
 
         Config conf;
         conf.rtCacheCapacity = 0;
-        auto context = std::make_shared<GraphContext>(conf, nullptr, false);
+        std::shared_ptr<NetworkMemoryControl> networkMemoryControl = std::make_shared<NetworkMemoryControl>();
+        auto context = std::make_shared<GraphContext>(conf, nullptr, false, networkMemoryControl->createMemoryControlUnit(), networkMemoryControl);
 
         auto input_node = std::make_shared<node::Input>(param, context);
         auto memory_input = std::make_shared<node::MemoryInput>(read, context);
@@ -281,7 +283,12 @@ TEST(MemStateGraphTest, smoke_ShapeOf_no_Inplace_Conflicts) {
 
     Config conf;
     conf.rtCacheCapacity = 0;
-    auto context = std::make_shared<GraphContext>(conf, nullptr, false);
+    std::shared_ptr<NetworkMemoryControl> networkMemoryControl = std::make_shared<NetworkMemoryControl>();
+    auto context = std::make_shared<GraphContext>(conf,
+                                                  nullptr,
+                                                  false,
+                                                  networkMemoryControl->createMemoryControlUnit(),
+                                                  networkMemoryControl);
 
     auto input_node = std::make_shared<node::Input>(param, context);
     auto memory_input = std::make_shared<node::MemoryInput>(read, context);
diff --git a/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp b/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp
index 003aca979398fb..71bf2dc340855e 100644
--- a/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp
@@ -9,6 +9,7 @@
 #include "common_test_utils/node_builders/constant.hpp"
 #include "dummy_node.hpp"
 #include "graph.h"
+#include "memory_control.hpp"
 #include "nodes/input.h"
 #include "nodes/reorder.h"
 #include "nodes/reshape.h"
@@ -76,7 +77,7 @@ class MergeTransposeReorderCPUTest : public testing::WithParamInterface<MergeTra
                         "MergeTransposeReorderCPUTest doesn't support shape", shape,
                         ". Only 4D and 3D shapes are supported");
         Config conf;
-        m_context = std::make_shared<GraphContext>(conf, nullptr, false);
+        m_context = std::make_shared<GraphContext>(conf, nullptr, false, networkMemoryControl->createMemoryControlUnit(), networkMemoryControl);
         const auto replication_result = CreateModelAndReplicate(shape,
                                                                 params.firstNodeLayout,
                                                                 params.firstNodeInplaceDirection,
@@ -173,6 +174,7 @@ class MergeTransposeReorderCPUTest : public testing::WithParamInterface<MergeTra
 
     std::shared_ptr<GraphContext> m_context;
     std::unique_ptr<Graph> m_graph;
+    std::shared_ptr<NetworkMemoryControl> networkMemoryControl = std::make_shared<NetworkMemoryControl>();
 };  // class MergeTransposeReorderCPUTest
 
 /*
@@ -335,7 +337,8 @@ TEST(MergeTransposeReorder, smoke_InplaceConflict) {
     */
     Config conf;
     conf.rtCacheCapacity = 100;
-    auto context = std::make_shared<GraphContext>(conf, nullptr, false);
+    std::shared_ptr<NetworkMemoryControl> networkMemoryControl = std::make_shared<NetworkMemoryControl>();
+    auto context = std::make_shared<GraphContext>(conf, nullptr, false, networkMemoryControl->createMemoryControlUnit(), networkMemoryControl);
 
     std::unique_ptr<Graph> graph = std::unique_ptr<Graph>(new Graph());
 
diff --git a/src/plugins/intel_cpu/tests/unit/graph/resolve_edge_conflicts_test.cpp b/src/plugins/intel_cpu/tests/unit/graph/resolve_edge_conflicts_test.cpp
index b44194a3d5806c..8e510f31f8066c 100644
--- a/src/plugins/intel_cpu/tests/unit/graph/resolve_edge_conflicts_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/graph/resolve_edge_conflicts_test.cpp
@@ -5,6 +5,7 @@
 
 #include "dummy_node.hpp"
 #include "graph.h"
+#include "memory_control.hpp"
 #include "nodes/input.h"
 #include "nodes/concat.h"
 #include "openvino/op/concat.hpp"
@@ -43,7 +44,12 @@ TEST(ResolveEdgeConflictsCPUTest, smoke_Run_ResolveEdgeConflicts) {
     */
     Config conf;
     conf.rtCacheCapacity = 100;
-    auto context = std::make_shared<GraphContext>(conf, nullptr, false);
+    std::shared_ptr<NetworkMemoryControl> networkMemoryControl = std::make_shared<NetworkMemoryControl>();
+    auto context = std::make_shared<GraphContext>(conf,
+                                                  nullptr,
+                                                  false,
+                                                  networkMemoryControl->createMemoryControlUnit(),
+                                                  networkMemoryControl);
     const dnnl::engine cpuEngine = context->getEngine();
 
     std::unique_ptr<Graph> graph = std::unique_ptr<Graph>(new Graph());
@@ -104,7 +110,8 @@ TEST(ResolveEdgeConflictsCPUTest2, smoke_Run_ResolveEdgeConflicts2) {
     */
     Config conf;
     conf.rtCacheCapacity = 100;
-    auto context = std::make_shared<GraphContext>(conf, nullptr, false);
+    std::shared_ptr<NetworkMemoryControl> networkMemoryControl = std::make_shared<NetworkMemoryControl>();
+    auto context = std::make_shared<GraphContext>(conf, nullptr, false, networkMemoryControl->createMemoryControlUnit(), networkMemoryControl);
 
     std::unique_ptr<Graph> graph = std::unique_ptr<Graph>(new Graph());
 
diff --git a/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp b/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp
index ea2994759e7036..63a44f5bea7075 100644
--- a/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp
@@ -14,6 +14,7 @@
 #include <dnnl.hpp>
 
 #include "common_test_utils/common_utils.hpp"
+#include "memory_control.hpp"
 #include "nodes/input.h"
 
 using namespace ov::intel_cpu;
@@ -108,7 +109,9 @@ class ReorderCPUTestGraph {
         conf.rtCacheCapacity = 100;
         auto context = std::make_shared<GraphContext>(conf,
                                                       std::make_shared<WeightsSharing>(),
-                                                      false);
+                                                      false,
+                                                      networkMemoryControl->createMemoryControlUnit(),
+                                                      networkMemoryControl);
         const dnnl::engine cpuEngine = context->getEngine();
 
         inputNode = std::make_shared<ov::intel_cpu::node::Input>(inputDesc.clone(),
@@ -152,6 +155,7 @@ class ReorderCPUTestGraph {
     std::shared_ptr<ov::intel_cpu::Edge> parentEdge;
     std::shared_ptr<ov::intel_cpu::Edge> childEdge;
     ov::element::Type prec;
+    std::shared_ptr<NetworkMemoryControl> networkMemoryControl = std::make_shared<NetworkMemoryControl>();
 };
 
 }// namespace ReorderCPUTest