Skip to content

Commit

Permalink
correctly handle hitting MaxTabletsScheduled in drain (ydb-platform#5845
Browse files Browse the repository at this point in the history
)
  • Loading branch information
vporyadke committed Jun 26, 2024
1 parent 4ed949f commit 35849c4
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 62 deletions.
11 changes: 6 additions & 5 deletions ydb/core/mind/hive/balancer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,17 +303,18 @@ class THiveBalancer : public NActors::TActorBootstrapped<THiveBalancer>, public
}
BLOG_TRACE("Balancer selected tablet " << tablet->ToString());
THive::TBestNodeResult result = Hive->FindBestNode(*tablet);
if (result.BestNode != nullptr && result.BestNode != tablet->Node) {
if (Hive->IsTabletMoveExpedient(*tablet, *result.BestNode)) {
if (std::holds_alternative<TNodeInfo*>(result)) {
TNodeInfo* node = std::get<TNodeInfo*>(result);
if (node != tablet->Node && Hive->IsTabletMoveExpedient(*tablet, *node)) {
tablet->MakeBalancerDecision(now);
tablet->ActorsToNotifyOnRestart.emplace_back(SelfId()); // volatile settings, will not persist upon restart
++KickInFlight;
++Movements;
BLOG_D("Balancer moving tablet " << tablet->ToString() << " " << tablet->GetResourceValues()
<< " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues
<< " to node " << result.BestNode->Id << " " << result.BestNode->ResourceValues);
Hive->RecordTabletMove(THive::TTabletMoveInfo(now, *tablet, tablet->Node->Id, result.BestNode->Id));
Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), result.BestNode->Id));
<< " to node " << node->Id << " " << node->ResourceValues);
Hive->RecordTabletMove(THive::TTabletMoveInfo(now, *tablet, tablet->Node->Id, node->Id));
Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), node->Id));
UpdateProgress();
}
}
Expand Down
22 changes: 15 additions & 7 deletions ydb/core/mind/hive/drain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,21 +69,28 @@ class THiveDrain : public NActors::TActorBootstrapped<THiveDrain>, public ISubAc
TTabletInfo* tablet = Hive->FindTablet(tabletId);
if (tablet != nullptr && tablet->IsAlive() && tablet->NodeId == NodeId) {
THive::TBestNodeResult result = Hive->FindBestNode(*tablet);
if (result.BestNode != nullptr) {
if (std::holds_alternative<TNodeInfo*>(result)) {
TNodeInfo* node = std::get<TNodeInfo*>(result);
tablet->ActorsToNotifyOnRestart.emplace_back(SelfId()); // volatile settings, will not persist upon restart
++KickInFlight;
++Movements;
BLOG_D("Drain " << SelfId() << " moving tablet "
<< tablet->ToString() << " " << tablet->GetResourceValues()
<< " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues
<< " to node " << result.BestNode->Id << " " << result.BestNode->ResourceValues);
<< " to node " << node->Id << " " << node->ResourceValues);
Hive->TabletCounters->Cumulative()[NHive::COUNTER_DRAIN_EXECUTED].Increment(1);
Hive->RecordTabletMove(THive::TTabletMoveInfo(TInstant::Now(), *tablet, tablet->Node->Id, result.BestNode->Id));
Hive->Execute(Hive->CreateRestartTablet(tabletId, result.BestNode->Id));
Hive->RecordTabletMove(THive::TTabletMoveInfo(TInstant::Now(), *tablet, tablet->Node->Id, node->Id));
Hive->Execute(Hive->CreateRestartTablet(tabletId, node->Id));
} else {
Hive->TabletCounters->Cumulative()[NHive::COUNTER_DRAIN_FAILED].Increment(1);
BLOG_D("Drain " << SelfId() << " could not move tablet " << tablet->ToString() << " " << tablet->GetResourceValues()
<< " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues);
if (std::holds_alternative<THive::TNoNodeFound>(result)) {
Hive->TabletCounters->Cumulative()[NHive::COUNTER_DRAIN_FAILED].Increment(1);
BLOG_D("Drain " << SelfId() << " could not move tablet " << tablet->ToString() << " " << tablet->GetResourceValues()
<< " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues);
} else if (std::holds_alternative<THive::TTooManyTabletsStarting>(result)){
BLOG_D("Drain " << SelfId() << " could not move tablet " << tablet->ToString() << " and will try again later");
Hive->WaitToMoveTablets(SelfId());
return;
}
}
}
++NextKick;
Expand Down Expand Up @@ -209,6 +216,7 @@ class THiveDrain : public NActors::TActorBootstrapped<THiveDrain>, public ISubAc
hFunc(TEvTabletPipe::TEvClientConnected, Handle);
hFunc(TEvTabletPipe::TEvClientDestroyed, Handle);
cFunc(TEvents::TSystem::Wakeup, Timeout);
cFunc(TEvPrivate::EvCanMoveTablets, KickNextTablet);
}
}
};
Expand Down
11 changes: 6 additions & 5 deletions ydb/core/mind/hive/fill.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,18 @@ class THiveFill : public NActors::TActorBootstrapped<THiveFill>, public ISubActo
TTabletInfo* tablet = Hive->FindTablet(tabletId);
if (tablet != nullptr && tablet->IsAlive() && tablet->NodeId != NodeId) {
THive::TBestNodeResult result = Hive->FindBestNode(*tablet);
if (result.BestNode != nullptr) {
if (result.BestNode->Id == NodeId) {
if (std::holds_alternative<TNodeInfo*>(result)) {
TNodeInfo* node = std::get<TNodeInfo*>(result);
if (node->Id == NodeId) {
tablet->ActorsToNotifyOnRestart.emplace_back(SelfId()); // volatile settings, will not persist upon restart
++KickInFlight;
++Movements;
BLOG_D("Fill " << SelfId() << " moving tablet " << tablet->ToString() << " " << tablet->GetResourceValues()
<< " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues
<< " to node " << result.BestNode->Id << " " << result.BestNode->ResourceValues);
<< " to node " << node->Id << " " << node->ResourceValues);
Hive->TabletCounters->Cumulative()[NHive::COUNTER_FILL_EXECUTED].Increment(1);
Hive->RecordTabletMove(THive::TTabletMoveInfo(TInstant::Now(), *tablet, tablet->Node->Id, result.BestNode->Id));
Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), result.BestNode->Id), ctx);
Hive->RecordTabletMove(THive::TTabletMoveInfo(TInstant::Now(), *tablet, tablet->Node->Id, node->Id));
Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), node->Id), ctx);
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions ydb/core/mind/hive/hive_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ struct TEvPrivate {
EvProcessIncomingEvent,
EvRefreshStorageInfo,
EvLogTabletMoves,
EvCanMoveTablets,
EvStartStorageBalancer,
EvRestartCancelled,
EvProcessStorageBalancer,
Expand Down Expand Up @@ -95,6 +96,8 @@ struct TEvPrivate {

struct TEvLogTabletMoves : TEventLocal<TEvLogTabletMoves, EvLogTabletMoves> {};

struct TEvCanMoveTablets : TEventLocal<TEvCanMoveTablets, EvCanMoveTablets> {};

struct TEvStartStorageBalancer : TEventLocal<TEvStartStorageBalancer, EvStartStorageBalancer> {
TStorageBalancerSettings Settings;

Expand Down
59 changes: 35 additions & 24 deletions ydb/core/mind/hive/hive_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,17 +226,17 @@ void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffec
continue;
}
if (tablet->IsReadyToStart(now)) {
TBestNodeResult bestNodeResult = FindBestNode(*tablet);
if (bestNodeResult.BestNode != nullptr) {
if (tablet->InitiateStart(bestNodeResult.BestNode)) {
TBestNodeResult bestNodeResult = FindBestNode(*tablet, record);
if (std::holds_alternative<TNodeInfo*>(bestNodeResult)) {
if (tablet->InitiateStart(std::get<TNodeInfo*>(bestNodeResult))) {
++tabletsStarted;
continue;
}
} else {
if (!bestNodeResult.TryToContinue) {
if (std::holds_alternative<TTooManyTabletsStarting>(bestNodeResult)) {
delayedTablets.push_back(record);
break;
} else {
} else if (std::holds_alternative<TNoNodeFound>(bestNodeResult)) {
for (const TActorId actorToNotify : tablet->ActorsToNotifyOnRestart) {
sideEffects.Send(actorToNotify, new TEvPrivate::TEvRestartComplete(tablet->GetFullTabletId(), "boot delay"));
}
Expand Down Expand Up @@ -790,12 +790,17 @@ void THive::Handle(TEvPrivate::TEvKickTablet::TPtr &ev) {

BLOG_D("THive::Handle::TEvKickTablet TabletId=" << tabletId);
TBestNodeResult result = FindBestNode(*tablet);
if (result.BestNode == nullptr) {
Execute(CreateRestartTablet(tabletId));
} else if (result.BestNode != tablet->Node) {
if (IsTabletMoveExpedient(*tablet, *result.BestNode)) {
if (std::holds_alternative<TTooManyTabletsStarting>(result)) {
if (tablet->Node == nullptr || !tablet->Node->IsAllowedToRunTablet(*tablet)) {
Execute(CreateRestartTablet(tabletId));
}
} else if (std::holds_alternative<TNodeInfo*>(result)) {
TNodeInfo* node = std::get<TNodeInfo*>(result);
if (node != tablet->Node && IsTabletMoveExpedient(*tablet, *node)) {
Execute(CreateRestartTablet(tabletId));
}
} else {
Execute(CreateRestartTablet(tabletId));
}
}

Expand Down Expand Up @@ -1162,12 +1167,12 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) {
if (node != nullptr) {
if (node->IsAlive() && node->IsAllowedToRunTablet(tablet) && node->IsAbleToScheduleTablet() && node->IsAbleToRunTablet(tablet)) {
BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " choose node " << node->Id << " because of preferred node");
return TBestNodeResult(*node);
return node;
}
if (node->Freeze) {
BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " preferred to freezed node " << node->Id);
tablet.BootState = TStringBuilder() << "Preferred to freezed node " << node->Id;
return TBestNodeResult(true);
return TNoNodeFound();
}
}
}
Expand Down Expand Up @@ -1289,7 +1294,7 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) {
thereAreNodesWithManyStarts = true;
if (GetBootStrategy() == NKikimrConfig::THiveConfig::HIVE_BOOT_STRATEGY_BALANCED) {
tablet.BootState = BootStateTooManyStarting;
return TBestNodeResult(false);
return TTooManyTabletsStarting();
}
}
} else {
Expand All @@ -1308,7 +1313,7 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) {
BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " selected nodes count " << selectedNodes.size());
if (selectedNodes.empty() && thereAreNodesWithManyStarts) {
BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " all available nodes are booting too many tablets");
return TBestNodeResult(false);
return TTooManyTabletsStarting();
}

TNodeInfo* selectedNode = nullptr;
Expand All @@ -1335,53 +1340,53 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) {
if (selectedNode != nullptr) {
BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " selected node " << selectedNode->Id);
tablet.BootState = BootStateStarting;
return TBestNodeResult(*selectedNode);
return selectedNode;
} else {
BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " no node was selected");

ui32 nodesLeft = Nodes.size();

if (tablet.IsFollower() && debugState.LeaderNotRunning) {
tablet.BootState = BootStateLeaderNotRunning;
return TBestNodeResult(true);
return TNoNodeFound();
}
if (debugState.NodesDead == nodesLeft) {
tablet.BootState = BootStateAllNodesAreDead;
return TBestNodeResult(true);
return TNoNodeFound();
}
nodesLeft -= debugState.NodesDead;
if (debugState.NodesDown == nodesLeft) {
tablet.BootState = BootStateAllNodesAreDeadOrDown;
return TBestNodeResult(true);
return TNoNodeFound();
}
nodesLeft -= debugState.NodesDown;
if (debugState.NodesNotAllowed + debugState.NodesInDatacentersNotAllowed == nodesLeft) {
tablet.BootState = BootStateNoNodesAllowedToRun;
return TBestNodeResult(true);
return TNoNodeFound();
}
nodesLeft -= debugState.NodesNotAllowed;
nodesLeft -= debugState.NodesInDatacentersNotAllowed;
if (debugState.NodesWithSomeoneFromOurFamily == nodesLeft) {
tablet.BootState = BootStateWeFilledAllAvailableNodes;
return TBestNodeResult(true);
return TNoNodeFound();
}
nodesLeft -= debugState.NodesWithSomeoneFromOurFamily;
if (debugState.NodesWithoutDomain == nodesLeft) {
tablet.BootState = TStringBuilder() << "Can't find domain " << tablet.GetNodeFilter().GetEffectiveAllowedDomains();
return TBestNodeResult(true);
return TNoNodeFound();
}
nodesLeft -= debugState.NodesWithoutDomain;
if (tablet.IsFollower() && debugState.NodesFilledWithDatacenterFollowers == nodesLeft) {
tablet.BootState = BootStateNotEnoughDatacenters;
return TBestNodeResult(true);
return TNoNodeFound();
}
if (debugState.NodesWithoutResources == nodesLeft) {
tablet.BootState = BootStateNotEnoughResources;
return TBestNodeResult(true);
return TNoNodeFound();
}
if (debugState.NodesWithoutLocation == nodesLeft) {
tablet.BootState = BootStateNodesLocationUnknown;
return TBestNodeResult(true);
return TNoNodeFound();
}

TStringBuilder state;
Expand Down Expand Up @@ -1421,7 +1426,7 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) {
}
tablet.BootState = state;

return TBestNodeResult(true);
return TNoNodeFound();
}
}

Expand Down Expand Up @@ -2471,6 +2476,12 @@ bool THive::StopSubActor(TSubActorId subActorId) {
return false;
}

void THive::WaitToMoveTablets(TActorId actor) {
if (std::find(ActorsWaitingToMoveTablets.begin(), ActorsWaitingToMoveTablets.end(), actor) == ActorsWaitingToMoveTablets.end()) {
ActorsWaitingToMoveTablets.push_back(actor);
}
}

bool THive::IsValidMetrics(const NKikimrTabletBase::TMetrics& metrics) {
return IsValidMetricsCPU(metrics) || IsValidMetricsMemory(metrics) || IsValidMetricsNetwork(metrics);
}
Expand Down
21 changes: 6 additions & 15 deletions ydb/core/mind/hive/hive_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
TSequenceGenerator Sequencer;
TOwnershipKeeper Keeper;
TEventPriorityQueue<THive> EventQueue{*this};
std::vector<TActorId> ActorsWaitingToMoveTablets;

struct TPendingCreateTablet {
NKikimrHive::TEvCreateTablet CreateTablet;
Expand Down Expand Up @@ -582,20 +583,9 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
void RestartBSControllerPipe();
void RestartRootHivePipe();

struct TBestNodeResult {
TNodeInfo* BestNode;
bool TryToContinue;

TBestNodeResult(TNodeInfo& bestNode)
: BestNode(&bestNode)
, TryToContinue(true)
{}

TBestNodeResult(bool tryToContinue)
: BestNode(nullptr)
, TryToContinue(tryToContinue)
{}
};
struct TNoNodeFound {};
struct TTooManyTabletsStarting {};
using TBestNodeResult = std::variant<TNodeInfo*, TNoNodeFound, TTooManyTabletsStarting>;

TBestNodeResult FindBestNode(const TTabletInfo& tablet);

Expand Down Expand Up @@ -627,7 +617,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
TTabletInfo& GetTablet(TTabletId tabletId, TFollowerId followerId);
TTabletInfo* FindTablet(TTabletId tabletId, TFollowerId followerId);
TTabletInfo* FindTablet(const TFullTabletId& tabletId) { return FindTablet(tabletId.first, tabletId.second); }
TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId);
TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId);
TStoragePoolInfo& GetStoragePool(const TString& name);
TStoragePoolInfo* FindStoragePool(const TString& name);
TDomainInfo* FindDomain(TSubDomainKey key);
Expand Down Expand Up @@ -976,6 +966,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
THiveStats GetStats() const;
void RemoveSubActor(ISubActor* subActor);
bool StopSubActor(TSubActorId subActorId);
void WaitToMoveTablets(TActorId actor);
const NKikimrLocal::TLocalConfig &GetLocalConfig() const { return LocalConfig; }
NKikimrTabletBase::TMetrics GetDefaultResourceValuesForObject(TFullObjectId objectId);
NKikimrTabletBase::TMetrics GetDefaultResourceValuesForTabletType(TTabletTypes::EType type);
Expand Down
24 changes: 18 additions & 6 deletions ydb/core/mind/hive/hive_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -903,17 +903,15 @@ Y_UNIT_TEST_SUITE(THiveTest) {
}
}

Y_UNIT_TEST(TestDrain) {
const int NUM_NODES = 3;
void TestDrain(TTestBasicRuntime& runtime) {
const int numNodes = runtime.GetNodeCount();
const int NUM_TABLETS = 100;
TTestBasicRuntime runtime(NUM_NODES, false);
Setup(runtime, true);
const ui64 hiveTablet = MakeDefaultHiveID(0);
const ui64 testerTablet = MakeDefaultHiveID(1);
const ui64 testerTablet = MakeTabletID(1);
CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive);
{
TDispatchOptions options;
options.FinalEvents.emplace_back(TEvLocal::EvStatus, NUM_NODES);
options.FinalEvents.emplace_back(TEvLocal::EvStatus, numNodes);
runtime.DispatchEvents(options);
}
TTabletTypes::EType tabletType = TTabletTypes::Dummy;
Expand Down Expand Up @@ -970,6 +968,20 @@ Y_UNIT_TEST_SUITE(THiveTest) {
UNIT_ASSERT_VALUES_EQUAL(tabletStates[NKikimrWhiteboard::TTabletStateInfo::Dead], drainMovements);
}

Y_UNIT_TEST(TestDrain) {
TTestBasicRuntime runtime(3, false);
Setup(runtime, true);
TestDrain(runtime);
}

Y_UNIT_TEST(TestDrainWithMaxTabletsScheduled) {
TTestBasicRuntime runtime(3, false);
Setup(runtime, true, 2, [](TAppPrepare& app) {
app.HiveConfig.SetMaxTabletsScheduled(1);
});
TestDrain(runtime);
}

Y_UNIT_TEST(TestCreateSubHiveCreateTablet) {
TTestBasicRuntime runtime(1, false);
Setup(runtime, true);
Expand Down
4 changes: 4 additions & 0 deletions ydb/core/mind/hive/tx__update_tablet_status.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ class TTxUpdateTabletStatus : public TTransactionBase<THive> {
SideEffects.Send(actor, new TEvPrivate::TEvRestartComplete({TabletId, FollowerId}, "OK"));
}
tablet->ActorsToNotifyOnRestart.clear();
for (const TActorId& actor : Self->ActorsWaitingToMoveTablets) {
SideEffects.Send(actor, new TEvPrivate::TEvCanMoveTablets());
}
Self->ActorsWaitingToMoveTablets.clear();
if (tablet->GetLeader().IsDeleting()) {
tablet->SendStopTablet(SideEffects);
return true;
Expand Down

0 comments on commit 35849c4

Please sign in to comment.