diff --git a/ydb/core/mind/hive/balancer.cpp b/ydb/core/mind/hive/balancer.cpp index b5376ce43736..de9a3d3f8d15 100644 --- a/ydb/core/mind/hive/balancer.cpp +++ b/ydb/core/mind/hive/balancer.cpp @@ -303,17 +303,18 @@ class THiveBalancer : public NActors::TActorBootstrapped, public } BLOG_TRACE("Balancer selected tablet " << tablet->ToString()); THive::TBestNodeResult result = Hive->FindBestNode(*tablet); - if (result.BestNode != nullptr && result.BestNode != tablet->Node) { - if (Hive->IsTabletMoveExpedient(*tablet, *result.BestNode)) { + if (std::holds_alternative(result)) { + TNodeInfo* node = std::get(result); + if (node != tablet->Node && Hive->IsTabletMoveExpedient(*tablet, *node)) { tablet->MakeBalancerDecision(now); tablet->ActorsToNotifyOnRestart.emplace_back(SelfId()); // volatile settings, will not persist upon restart ++KickInFlight; ++Movements; BLOG_D("Balancer moving tablet " << tablet->ToString() << " " << tablet->GetResourceValues() << " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues - << " to node " << result.BestNode->Id << " " << result.BestNode->ResourceValues); - Hive->RecordTabletMove(THive::TTabletMoveInfo(now, *tablet, tablet->Node->Id, result.BestNode->Id)); - Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), result.BestNode->Id)); + << " to node " << node->Id << " " << node->ResourceValues); + Hive->RecordTabletMove(THive::TTabletMoveInfo(now, *tablet, tablet->Node->Id, node->Id)); + Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), node->Id)); UpdateProgress(); } } diff --git a/ydb/core/mind/hive/drain.cpp b/ydb/core/mind/hive/drain.cpp index 9b1de009e649..34d8ec23dbe1 100644 --- a/ydb/core/mind/hive/drain.cpp +++ b/ydb/core/mind/hive/drain.cpp @@ -69,21 +69,28 @@ class THiveDrain : public NActors::TActorBootstrapped, public ISubAc TTabletInfo* tablet = Hive->FindTablet(tabletId); if (tablet != nullptr && tablet->IsAlive() && tablet->NodeId == NodeId) { THive::TBestNodeResult result = Hive->FindBestNode(*tablet); - if (result.BestNode != nullptr) { + if (std::holds_alternative(result)) { + TNodeInfo* node = std::get(result); tablet->ActorsToNotifyOnRestart.emplace_back(SelfId()); // volatile settings, will not persist upon restart ++KickInFlight; ++Movements; BLOG_D("Drain " << SelfId() << " moving tablet " << tablet->ToString() << " " << tablet->GetResourceValues() << " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues - << " to node " << result.BestNode->Id << " " << result.BestNode->ResourceValues); + << " to node " << node->Id << " " << node->ResourceValues); Hive->TabletCounters->Cumulative()[NHive::COUNTER_DRAIN_EXECUTED].Increment(1); - Hive->RecordTabletMove(THive::TTabletMoveInfo(TInstant::Now(), *tablet, tablet->Node->Id, result.BestNode->Id)); - Hive->Execute(Hive->CreateRestartTablet(tabletId, result.BestNode->Id)); + Hive->RecordTabletMove(THive::TTabletMoveInfo(TInstant::Now(), *tablet, tablet->Node->Id, node->Id)); + Hive->Execute(Hive->CreateRestartTablet(tabletId, node->Id)); } else { - Hive->TabletCounters->Cumulative()[NHive::COUNTER_DRAIN_FAILED].Increment(1); - BLOG_D("Drain " << SelfId() << " could not move tablet " << tablet->ToString() << " " << tablet->GetResourceValues() - << " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues); + if (std::holds_alternative(result)) { + Hive->TabletCounters->Cumulative()[NHive::COUNTER_DRAIN_FAILED].Increment(1); + BLOG_D("Drain " << SelfId() << " could not move tablet " << tablet->ToString() << " " << tablet->GetResourceValues() + << " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues); + } else if (std::holds_alternative(result)){ + BLOG_D("Drain " << SelfId() << " could not move tablet " << tablet->ToString() << " and will try again later"); + Hive->WaitToMoveTablets(SelfId()); + return; + } } } ++NextKick; @@ -209,6 +216,7 @@ class THiveDrain : public NActors::TActorBootstrapped, public ISubAc hFunc(TEvTabletPipe::TEvClientConnected, Handle); hFunc(TEvTabletPipe::TEvClientDestroyed, Handle); cFunc(TEvents::TSystem::Wakeup, Timeout); + cFunc(TEvPrivate::EvCanMoveTablets, KickNextTablet); } } }; diff --git a/ydb/core/mind/hive/fill.cpp b/ydb/core/mind/hive/fill.cpp index 8f8d996c7a28..a4fd3750d5b6 100644 --- a/ydb/core/mind/hive/fill.cpp +++ b/ydb/core/mind/hive/fill.cpp @@ -55,17 +55,18 @@ class THiveFill : public NActors::TActorBootstrapped, public ISubActo TTabletInfo* tablet = Hive->FindTablet(tabletId); if (tablet != nullptr && tablet->IsAlive() && tablet->NodeId != NodeId) { THive::TBestNodeResult result = Hive->FindBestNode(*tablet); - if (result.BestNode != nullptr) { - if (result.BestNode->Id == NodeId) { + if (std::holds_alternative(result)) { + TNodeInfo* node = std::get(result); + if (node->Id == NodeId) { tablet->ActorsToNotifyOnRestart.emplace_back(SelfId()); // volatile settings, will not persist upon restart ++KickInFlight; ++Movements; BLOG_D("Fill " << SelfId() << " moving tablet " << tablet->ToString() << " " << tablet->GetResourceValues() << " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues - << " to node " << result.BestNode->Id << " " << result.BestNode->ResourceValues); + << " to node " << node->Id << " " << node->ResourceValues); Hive->TabletCounters->Cumulative()[NHive::COUNTER_FILL_EXECUTED].Increment(1); - Hive->RecordTabletMove(THive::TTabletMoveInfo(TInstant::Now(), *tablet, tablet->Node->Id, result.BestNode->Id)); - Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), result.BestNode->Id), ctx); + Hive->RecordTabletMove(THive::TTabletMoveInfo(TInstant::Now(), *tablet, tablet->Node->Id, node->Id)); + Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), node->Id), ctx); } } } diff --git a/ydb/core/mind/hive/hive_events.h b/ydb/core/mind/hive/hive_events.h index a644b793a7cb..c4ccd20c311b 100644 --- a/ydb/core/mind/hive/hive_events.h +++ b/ydb/core/mind/hive/hive_events.h @@ -27,6 +27,7 @@ struct TEvPrivate { EvProcessIncomingEvent, EvRefreshStorageInfo, EvLogTabletMoves, + EvCanMoveTablets, EvStartStorageBalancer, EvRestartCancelled, EvProcessStorageBalancer, @@ -95,6 +96,8 @@ struct TEvPrivate { struct TEvLogTabletMoves : TEventLocal {}; + struct TEvCanMoveTablets : TEventLocal {}; + struct TEvStartStorageBalancer : TEventLocal { TStorageBalancerSettings Settings; diff --git a/ydb/core/mind/hive/hive_impl.cpp b/ydb/core/mind/hive/hive_impl.cpp index 0b253a44cb3a..ba06e2d43ff9 100644 --- a/ydb/core/mind/hive/hive_impl.cpp +++ b/ydb/core/mind/hive/hive_impl.cpp @@ -226,17 +226,17 @@ void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffec continue; } if (tablet->IsReadyToStart(now)) { - TBestNodeResult bestNodeResult = FindBestNode(*tablet); - if (bestNodeResult.BestNode != nullptr) { - if (tablet->InitiateStart(bestNodeResult.BestNode)) { + TBestNodeResult bestNodeResult = FindBestNode(*tablet, record); + if (std::holds_alternative(bestNodeResult)) { + if (tablet->InitiateStart(std::get(bestNodeResult))) { ++tabletsStarted; continue; } } else { - if (!bestNodeResult.TryToContinue) { + if (std::holds_alternative(bestNodeResult)) { delayedTablets.push_back(record); break; - } else { + } else if (std::holds_alternative(bestNodeResult)) { for (const TActorId actorToNotify : tablet->ActorsToNotifyOnRestart) { sideEffects.Send(actorToNotify, new TEvPrivate::TEvRestartComplete(tablet->GetFullTabletId(), "boot delay")); } @@ -790,12 +790,17 @@ void THive::Handle(TEvPrivate::TEvKickTablet::TPtr &ev) { BLOG_D("THive::Handle::TEvKickTablet TabletId=" << tabletId); TBestNodeResult result = FindBestNode(*tablet); - if (result.BestNode == nullptr) { - Execute(CreateRestartTablet(tabletId)); - } else if (result.BestNode != tablet->Node) { - if (IsTabletMoveExpedient(*tablet, *result.BestNode)) { + if (std::holds_alternative(result)) { + if (tablet->Node == nullptr || !tablet->Node->IsAllowedToRunTablet(*tablet)) { Execute(CreateRestartTablet(tabletId)); } + } else if (std::holds_alternative(result)) { + TNodeInfo* node = std::get(result); + if (node != tablet->Node && IsTabletMoveExpedient(*tablet, *node)) { + Execute(CreateRestartTablet(tabletId)); + } + } else { + Execute(CreateRestartTablet(tabletId)); } } @@ -1162,12 +1167,12 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) { if (node != nullptr) { if (node->IsAlive() && node->IsAllowedToRunTablet(tablet) && node->IsAbleToScheduleTablet() && node->IsAbleToRunTablet(tablet)) { BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " choose node " << node->Id << " because of preferred node"); - return TBestNodeResult(*node); + return node; } if (node->Freeze) { BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " preferred to freezed node " << node->Id); tablet.BootState = TStringBuilder() << "Preferred to freezed node " << node->Id; - return TBestNodeResult(true); + return TNoNodeFound(); } } } @@ -1289,7 +1294,7 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) { thereAreNodesWithManyStarts = true; if (GetBootStrategy() == NKikimrConfig::THiveConfig::HIVE_BOOT_STRATEGY_BALANCED) { tablet.BootState = BootStateTooManyStarting; - return TBestNodeResult(false); + return TTooManyTabletsStarting(); } } } else { @@ -1308,7 +1313,7 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) { BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " selected nodes count " << selectedNodes.size()); if (selectedNodes.empty() && thereAreNodesWithManyStarts) { BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " all available nodes are booting too many tablets"); - return TBestNodeResult(false); + return TTooManyTabletsStarting(); } TNodeInfo* selectedNode = nullptr; @@ -1335,7 +1340,7 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) { if (selectedNode != nullptr) { BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " selected node " << selectedNode->Id); tablet.BootState = BootStateStarting; - return TBestNodeResult(*selectedNode); + return selectedNode; } else { BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " no node was selected"); @@ -1343,45 +1348,45 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) { if (tablet.IsFollower() && debugState.LeaderNotRunning) { tablet.BootState = BootStateLeaderNotRunning; - return TBestNodeResult(true); + return TNoNodeFound(); } if (debugState.NodesDead == nodesLeft) { tablet.BootState = BootStateAllNodesAreDead; - return TBestNodeResult(true); + return TNoNodeFound(); } nodesLeft -= debugState.NodesDead; if (debugState.NodesDown == nodesLeft) { tablet.BootState = BootStateAllNodesAreDeadOrDown; - return TBestNodeResult(true); + return TNoNodeFound(); } nodesLeft -= debugState.NodesDown; if (debugState.NodesNotAllowed + debugState.NodesInDatacentersNotAllowed == nodesLeft) { tablet.BootState = BootStateNoNodesAllowedToRun; - return TBestNodeResult(true); + return TNoNodeFound(); } nodesLeft -= debugState.NodesNotAllowed; nodesLeft -= debugState.NodesInDatacentersNotAllowed; if (debugState.NodesWithSomeoneFromOurFamily == nodesLeft) { tablet.BootState = BootStateWeFilledAllAvailableNodes; - return TBestNodeResult(true); + return TNoNodeFound(); } nodesLeft -= debugState.NodesWithSomeoneFromOurFamily; if (debugState.NodesWithoutDomain == nodesLeft) { tablet.BootState = TStringBuilder() << "Can't find domain " << tablet.GetNodeFilter().GetEffectiveAllowedDomains(); - return TBestNodeResult(true); + return TNoNodeFound(); } nodesLeft -= debugState.NodesWithoutDomain; if (tablet.IsFollower() && debugState.NodesFilledWithDatacenterFollowers == nodesLeft) { tablet.BootState = BootStateNotEnoughDatacenters; - return TBestNodeResult(true); + return TNoNodeFound(); } if (debugState.NodesWithoutResources == nodesLeft) { tablet.BootState = BootStateNotEnoughResources; - return TBestNodeResult(true); + return TNoNodeFound(); } if (debugState.NodesWithoutLocation == nodesLeft) { tablet.BootState = BootStateNodesLocationUnknown; - return TBestNodeResult(true); + return TNoNodeFound(); } TStringBuilder state; @@ -1421,7 +1426,7 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) { } tablet.BootState = state; - return TBestNodeResult(true); + return TNoNodeFound(); } } @@ -2471,6 +2476,12 @@ bool THive::StopSubActor(TSubActorId subActorId) { return false; } +void THive::WaitToMoveTablets(TActorId actor) { + if (std::find(ActorsWaitingToMoveTablets.begin(), ActorsWaitingToMoveTablets.end(), actor) == ActorsWaitingToMoveTablets.end()) { + ActorsWaitingToMoveTablets.push_back(actor); + } +} + bool THive::IsValidMetrics(const NKikimrTabletBase::TMetrics& metrics) { return IsValidMetricsCPU(metrics) || IsValidMetricsMemory(metrics) || IsValidMetricsNetwork(metrics); } diff --git a/ydb/core/mind/hive/hive_impl.h b/ydb/core/mind/hive/hive_impl.h index 587219a41ef0..e3d47bbb202b 100644 --- a/ydb/core/mind/hive/hive_impl.h +++ b/ydb/core/mind/hive/hive_impl.h @@ -408,6 +408,7 @@ class THive : public TActor, public TTabletExecutedFlat, public THiveShar TSequenceGenerator Sequencer; TOwnershipKeeper Keeper; TEventPriorityQueue EventQueue{*this}; + std::vector ActorsWaitingToMoveTablets; struct TPendingCreateTablet { NKikimrHive::TEvCreateTablet CreateTablet; @@ -582,20 +583,9 @@ class THive : public TActor, public TTabletExecutedFlat, public THiveShar void RestartBSControllerPipe(); void RestartRootHivePipe(); - struct TBestNodeResult { - TNodeInfo* BestNode; - bool TryToContinue; - - TBestNodeResult(TNodeInfo& bestNode) - : BestNode(&bestNode) - , TryToContinue(true) - {} - - TBestNodeResult(bool tryToContinue) - : BestNode(nullptr) - , TryToContinue(tryToContinue) - {} - }; + struct TNoNodeFound {}; + struct TTooManyTabletsStarting {}; + using TBestNodeResult = std::variant; TBestNodeResult FindBestNode(const TTabletInfo& tablet); @@ -627,7 +617,7 @@ class THive : public TActor, public TTabletExecutedFlat, public THiveShar TTabletInfo& GetTablet(TTabletId tabletId, TFollowerId followerId); TTabletInfo* FindTablet(TTabletId tabletId, TFollowerId followerId); TTabletInfo* FindTablet(const TFullTabletId& tabletId) { return FindTablet(tabletId.first, tabletId.second); } - TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId); +TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId); TStoragePoolInfo& GetStoragePool(const TString& name); TStoragePoolInfo* FindStoragePool(const TString& name); TDomainInfo* FindDomain(TSubDomainKey key); @@ -976,6 +966,7 @@ class THive : public TActor, public TTabletExecutedFlat, public THiveShar THiveStats GetStats() const; void RemoveSubActor(ISubActor* subActor); bool StopSubActor(TSubActorId subActorId); + void WaitToMoveTablets(TActorId actor); const NKikimrLocal::TLocalConfig &GetLocalConfig() const { return LocalConfig; } NKikimrTabletBase::TMetrics GetDefaultResourceValuesForObject(TFullObjectId objectId); NKikimrTabletBase::TMetrics GetDefaultResourceValuesForTabletType(TTabletTypes::EType type); diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp index d8eec27f09bd..e3e79d95a1b0 100644 --- a/ydb/core/mind/hive/hive_ut.cpp +++ b/ydb/core/mind/hive/hive_ut.cpp @@ -903,17 +903,15 @@ Y_UNIT_TEST_SUITE(THiveTest) { } } - Y_UNIT_TEST(TestDrain) { - const int NUM_NODES = 3; + void TestDrain(TTestBasicRuntime& runtime) { + const int numNodes = runtime.GetNodeCount(); const int NUM_TABLETS = 100; - TTestBasicRuntime runtime(NUM_NODES, false); - Setup(runtime, true); const ui64 hiveTablet = MakeDefaultHiveID(0); - const ui64 testerTablet = MakeDefaultHiveID(1); + const ui64 testerTablet = MakeTabletID(1); CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive); { TDispatchOptions options; - options.FinalEvents.emplace_back(TEvLocal::EvStatus, NUM_NODES); + options.FinalEvents.emplace_back(TEvLocal::EvStatus, numNodes); runtime.DispatchEvents(options); } TTabletTypes::EType tabletType = TTabletTypes::Dummy; @@ -970,6 +968,20 @@ Y_UNIT_TEST_SUITE(THiveTest) { UNIT_ASSERT_VALUES_EQUAL(tabletStates[NKikimrWhiteboard::TTabletStateInfo::Dead], drainMovements); } + Y_UNIT_TEST(TestDrain) { + TTestBasicRuntime runtime(3, false); + Setup(runtime, true); + TestDrain(runtime); + } + + Y_UNIT_TEST(TestDrainWithMaxTabletsScheduled) { + TTestBasicRuntime runtime(3, false); + Setup(runtime, true, 2, [](TAppPrepare& app) { + app.HiveConfig.SetMaxTabletsScheduled(1); + }); + TestDrain(runtime); + } + Y_UNIT_TEST(TestCreateSubHiveCreateTablet) { TTestBasicRuntime runtime(1, false); Setup(runtime, true); diff --git a/ydb/core/mind/hive/tx__update_tablet_status.cpp b/ydb/core/mind/hive/tx__update_tablet_status.cpp index 11d16d56e18e..6b8874dfe438 100644 --- a/ydb/core/mind/hive/tx__update_tablet_status.cpp +++ b/ydb/core/mind/hive/tx__update_tablet_status.cpp @@ -93,6 +93,10 @@ class TTxUpdateTabletStatus : public TTransactionBase { SideEffects.Send(actor, new TEvPrivate::TEvRestartComplete({TabletId, FollowerId}, "OK")); } tablet->ActorsToNotifyOnRestart.clear(); + for (const TActorId& actor : Self->ActorsWaitingToMoveTablets) { + SideEffects.Send(actor, new TEvPrivate::TEvCanMoveTablets()); + } + Self->ActorsWaitingToMoveTablets.clear(); if (tablet->GetLeader().IsDeleting()) { tablet->SendStopTablet(SideEffects); return true;