Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

backport fixes related to tablet drain #6008

Merged
merged 3 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ydb/core/driver_lib/run/run.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1684,8 +1684,8 @@ void TKikimrRunner::KikimrStop(bool graceful) {
}

auto stillOnline = drainProgress->GetOnlineTabletsEstimate();
if (stillOnline) {
Cerr << "Drain completed, but " << stillOnline << " tablet(s) are online." << Endl;
if (stillOnline > 0) {
Cerr << "Drain completed, but " << *stillOnline << " tablet(s) are online." << Endl;
}
}

Expand Down
11 changes: 6 additions & 5 deletions ydb/core/mind/hive/balancer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,17 +303,18 @@ class THiveBalancer : public NActors::TActorBootstrapped<THiveBalancer>, public
}
BLOG_TRACE("Balancer selected tablet " << tablet->ToString());
THive::TBestNodeResult result = Hive->FindBestNode(*tablet);
if (result.BestNode != nullptr && result.BestNode != tablet->Node) {
if (Hive->IsTabletMoveExpedient(*tablet, *result.BestNode)) {
if (std::holds_alternative<TNodeInfo*>(result)) {
TNodeInfo* node = std::get<TNodeInfo*>(result);
if (node != tablet->Node && Hive->IsTabletMoveExpedient(*tablet, *node)) {
tablet->MakeBalancerDecision(now);
tablet->ActorsToNotifyOnRestart.emplace_back(SelfId()); // volatile settings, will not persist upon restart
++KickInFlight;
++Movements;
BLOG_D("Balancer moving tablet " << tablet->ToString() << " " << tablet->GetResourceValues()
<< " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues
<< " to node " << result.BestNode->Id << " " << result.BestNode->ResourceValues);
Hive->RecordTabletMove(THive::TTabletMoveInfo(now, *tablet, tablet->Node->Id, result.BestNode->Id));
Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), result.BestNode->Id));
<< " to node " << node->Id << " " << node->ResourceValues);
Hive->RecordTabletMove(THive::TTabletMoveInfo(now, *tablet, tablet->Node->Id, node->Id));
Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), node->Id));
UpdateProgress();
}
}
Expand Down
22 changes: 15 additions & 7 deletions ydb/core/mind/hive/drain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,21 +69,28 @@ class THiveDrain : public NActors::TActorBootstrapped<THiveDrain>, public ISubAc
TTabletInfo* tablet = Hive->FindTablet(tabletId);
if (tablet != nullptr && tablet->IsAlive() && tablet->NodeId == NodeId) {
THive::TBestNodeResult result = Hive->FindBestNode(*tablet);
if (result.BestNode != nullptr) {
if (std::holds_alternative<TNodeInfo*>(result)) {
TNodeInfo* node = std::get<TNodeInfo*>(result);
tablet->ActorsToNotifyOnRestart.emplace_back(SelfId()); // volatile settings, will not persist upon restart
++KickInFlight;
++Movements;
BLOG_D("Drain " << SelfId() << " moving tablet "
<< tablet->ToString() << " " << tablet->GetResourceValues()
<< " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues
<< " to node " << result.BestNode->Id << " " << result.BestNode->ResourceValues);
<< " to node " << node->Id << " " << node->ResourceValues);
Hive->TabletCounters->Cumulative()[NHive::COUNTER_DRAIN_EXECUTED].Increment(1);
Hive->RecordTabletMove(THive::TTabletMoveInfo(TInstant::Now(), *tablet, tablet->Node->Id, result.BestNode->Id));
Hive->Execute(Hive->CreateRestartTablet(tabletId, result.BestNode->Id));
Hive->RecordTabletMove(THive::TTabletMoveInfo(TInstant::Now(), *tablet, tablet->Node->Id, node->Id));
Hive->Execute(Hive->CreateRestartTablet(tabletId, node->Id));
} else {
Hive->TabletCounters->Cumulative()[NHive::COUNTER_DRAIN_FAILED].Increment(1);
BLOG_D("Drain " << SelfId() << " could not move tablet " << tablet->ToString() << " " << tablet->GetResourceValues()
<< " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues);
if (std::holds_alternative<THive::TNoNodeFound>(result)) {
Hive->TabletCounters->Cumulative()[NHive::COUNTER_DRAIN_FAILED].Increment(1);
BLOG_D("Drain " << SelfId() << " could not move tablet " << tablet->ToString() << " " << tablet->GetResourceValues()
<< " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues);
} else if (std::holds_alternative<THive::TTooManyTabletsStarting>(result)){
BLOG_D("Drain " << SelfId() << " could not move tablet " << tablet->ToString() << " and will try again later");
Hive->WaitToMoveTablets(SelfId());
return;
}
}
}
++NextKick;
Expand Down Expand Up @@ -209,6 +216,7 @@ class THiveDrain : public NActors::TActorBootstrapped<THiveDrain>, public ISubAc
hFunc(TEvTabletPipe::TEvClientConnected, Handle);
hFunc(TEvTabletPipe::TEvClientDestroyed, Handle);
cFunc(TEvents::TSystem::Wakeup, Timeout);
cFunc(TEvPrivate::EvCanMoveTablets, KickNextTablet);
}
}
};
Expand Down
11 changes: 6 additions & 5 deletions ydb/core/mind/hive/fill.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,18 @@ class THiveFill : public NActors::TActorBootstrapped<THiveFill>, public ISubActo
TTabletInfo* tablet = Hive->FindTablet(tabletId);
if (tablet != nullptr && tablet->IsAlive() && tablet->NodeId != NodeId) {
THive::TBestNodeResult result = Hive->FindBestNode(*tablet);
if (result.BestNode != nullptr) {
if (result.BestNode->Id == NodeId) {
if (std::holds_alternative<TNodeInfo*>(result)) {
TNodeInfo* node = std::get<TNodeInfo*>(result);
if (node->Id == NodeId) {
tablet->ActorsToNotifyOnRestart.emplace_back(SelfId()); // volatile settings, will not persist upon restart
++KickInFlight;
++Movements;
BLOG_D("Fill " << SelfId() << " moving tablet " << tablet->ToString() << " " << tablet->GetResourceValues()
<< " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues
<< " to node " << result.BestNode->Id << " " << result.BestNode->ResourceValues);
<< " to node " << node->Id << " " << node->ResourceValues);
Hive->TabletCounters->Cumulative()[NHive::COUNTER_FILL_EXECUTED].Increment(1);
Hive->RecordTabletMove(THive::TTabletMoveInfo(TInstant::Now(), *tablet, tablet->Node->Id, result.BestNode->Id));
Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), result.BestNode->Id), ctx);
Hive->RecordTabletMove(THive::TTabletMoveInfo(TInstant::Now(), *tablet, tablet->Node->Id, node->Id));
Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), node->Id), ctx);
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions ydb/core/mind/hive/hive_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ struct TEvPrivate {
EvProcessIncomingEvent,
EvRefreshStorageInfo,
EvLogTabletMoves,
EvCanMoveTablets,
EvStartStorageBalancer,
EvRestartCancelled,
EvProcessStorageBalancer,
Expand Down Expand Up @@ -95,6 +96,8 @@ struct TEvPrivate {

struct TEvLogTabletMoves : TEventLocal<TEvLogTabletMoves, EvLogTabletMoves> {};

struct TEvCanMoveTablets : TEventLocal<TEvCanMoveTablets, EvCanMoveTablets> {};

struct TEvStartStorageBalancer : TEventLocal<TEvStartStorageBalancer, EvStartStorageBalancer> {
TStorageBalancerSettings Settings;

Expand Down
57 changes: 34 additions & 23 deletions ydb/core/mind/hive/hive_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,16 +227,16 @@ void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffec
}
if (tablet->IsReadyToStart(now)) {
TBestNodeResult bestNodeResult = FindBestNode(*tablet);
if (bestNodeResult.BestNode != nullptr) {
if (tablet->InitiateStart(bestNodeResult.BestNode)) {
if (std::holds_alternative<TNodeInfo*>(bestNodeResult)) {
if (tablet->InitiateStart(std::get<TNodeInfo*>(bestNodeResult))) {
++tabletsStarted;
continue;
}
} else {
if (!bestNodeResult.TryToContinue) {
if (std::holds_alternative<TTooManyTabletsStarting>(bestNodeResult)) {
delayedTablets.push_back(record);
break;
} else {
} else if (std::holds_alternative<TNoNodeFound>(bestNodeResult)) {
for (const TActorId actorToNotify : tablet->ActorsToNotifyOnRestart) {
sideEffects.Send(actorToNotify, new TEvPrivate::TEvRestartComplete(tablet->GetFullTabletId(), "boot delay"));
}
Expand Down Expand Up @@ -790,12 +790,17 @@ void THive::Handle(TEvPrivate::TEvKickTablet::TPtr &ev) {

BLOG_D("THive::Handle::TEvKickTablet TabletId=" << tabletId);
TBestNodeResult result = FindBestNode(*tablet);
if (result.BestNode == nullptr) {
Execute(CreateRestartTablet(tabletId));
} else if (result.BestNode != tablet->Node) {
if (IsTabletMoveExpedient(*tablet, *result.BestNode)) {
if (std::holds_alternative<TTooManyTabletsStarting>(result)) {
if (tablet->Node == nullptr || !tablet->Node->IsAllowedToRunTablet(*tablet)) {
Execute(CreateRestartTablet(tabletId));
}
} else if (std::holds_alternative<TNodeInfo*>(result)) {
TNodeInfo* node = std::get<TNodeInfo*>(result);
if (node != tablet->Node && IsTabletMoveExpedient(*tablet, *node)) {
Execute(CreateRestartTablet(tabletId));
}
} else {
Execute(CreateRestartTablet(tabletId));
}
}

Expand Down Expand Up @@ -1162,12 +1167,12 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) {
if (node != nullptr) {
if (node->IsAlive() && node->IsAllowedToRunTablet(tablet) && node->IsAbleToScheduleTablet() && node->IsAbleToRunTablet(tablet)) {
BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " choose node " << node->Id << " because of preferred node");
return TBestNodeResult(*node);
return node;
}
if (node->Freeze) {
BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " preferred to freezed node " << node->Id);
tablet.BootState = TStringBuilder() << "Preferred to freezed node " << node->Id;
return TBestNodeResult(true);
return TNoNodeFound();
}
}
}
Expand Down Expand Up @@ -1289,7 +1294,7 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) {
thereAreNodesWithManyStarts = true;
if (GetBootStrategy() == NKikimrConfig::THiveConfig::HIVE_BOOT_STRATEGY_BALANCED) {
tablet.BootState = BootStateTooManyStarting;
return TBestNodeResult(false);
return TTooManyTabletsStarting();
}
}
} else {
Expand All @@ -1308,7 +1313,7 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) {
BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " selected nodes count " << selectedNodes.size());
if (selectedNodes.empty() && thereAreNodesWithManyStarts) {
BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " all available nodes are booting too many tablets");
return TBestNodeResult(false);
return TTooManyTabletsStarting();
}

TNodeInfo* selectedNode = nullptr;
Expand All @@ -1335,53 +1340,53 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) {
if (selectedNode != nullptr) {
BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " selected node " << selectedNode->Id);
tablet.BootState = BootStateStarting;
return TBestNodeResult(*selectedNode);
return selectedNode;
} else {
BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " no node was selected");

ui32 nodesLeft = Nodes.size();

if (tablet.IsFollower() && debugState.LeaderNotRunning) {
tablet.BootState = BootStateLeaderNotRunning;
return TBestNodeResult(true);
return TNoNodeFound();
}
if (debugState.NodesDead == nodesLeft) {
tablet.BootState = BootStateAllNodesAreDead;
return TBestNodeResult(true);
return TNoNodeFound();
}
nodesLeft -= debugState.NodesDead;
if (debugState.NodesDown == nodesLeft) {
tablet.BootState = BootStateAllNodesAreDeadOrDown;
return TBestNodeResult(true);
return TNoNodeFound();
}
nodesLeft -= debugState.NodesDown;
if (debugState.NodesNotAllowed + debugState.NodesInDatacentersNotAllowed == nodesLeft) {
tablet.BootState = BootStateNoNodesAllowedToRun;
return TBestNodeResult(true);
return TNoNodeFound();
}
nodesLeft -= debugState.NodesNotAllowed;
nodesLeft -= debugState.NodesInDatacentersNotAllowed;
if (debugState.NodesWithSomeoneFromOurFamily == nodesLeft) {
tablet.BootState = BootStateWeFilledAllAvailableNodes;
return TBestNodeResult(true);
return TNoNodeFound();
}
nodesLeft -= debugState.NodesWithSomeoneFromOurFamily;
if (debugState.NodesWithoutDomain == nodesLeft) {
tablet.BootState = TStringBuilder() << "Can't find domain " << tablet.GetNodeFilter().GetEffectiveAllowedDomains();
return TBestNodeResult(true);
return TNoNodeFound();
}
nodesLeft -= debugState.NodesWithoutDomain;
if (tablet.IsFollower() && debugState.NodesFilledWithDatacenterFollowers == nodesLeft) {
tablet.BootState = BootStateNotEnoughDatacenters;
return TBestNodeResult(true);
return TNoNodeFound();
}
if (debugState.NodesWithoutResources == nodesLeft) {
tablet.BootState = BootStateNotEnoughResources;
return TBestNodeResult(true);
return TNoNodeFound();
}
if (debugState.NodesWithoutLocation == nodesLeft) {
tablet.BootState = BootStateNodesLocationUnknown;
return TBestNodeResult(true);
return TNoNodeFound();
}

TStringBuilder state;
Expand Down Expand Up @@ -1421,7 +1426,7 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) {
}
tablet.BootState = state;

return TBestNodeResult(true);
return TNoNodeFound();
}
}

Expand Down Expand Up @@ -2471,6 +2476,12 @@ bool THive::StopSubActor(TSubActorId subActorId) {
return false;
}

void THive::WaitToMoveTablets(TActorId actor) {
if (std::find(ActorsWaitingToMoveTablets.begin(), ActorsWaitingToMoveTablets.end(), actor) == ActorsWaitingToMoveTablets.end()) {
ActorsWaitingToMoveTablets.push_back(actor);
}
}

bool THive::IsValidMetrics(const NKikimrTabletBase::TMetrics& metrics) {
return IsValidMetricsCPU(metrics) || IsValidMetricsMemory(metrics) || IsValidMetricsNetwork(metrics);
}
Expand Down
21 changes: 6 additions & 15 deletions ydb/core/mind/hive/hive_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
TSequenceGenerator Sequencer;
TOwnershipKeeper Keeper;
TEventPriorityQueue<THive> EventQueue{*this};
std::vector<TActorId> ActorsWaitingToMoveTablets;

struct TPendingCreateTablet {
NKikimrHive::TEvCreateTablet CreateTablet;
Expand Down Expand Up @@ -582,20 +583,9 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
void RestartBSControllerPipe();
void RestartRootHivePipe();

struct TBestNodeResult {
TNodeInfo* BestNode;
bool TryToContinue;

TBestNodeResult(TNodeInfo& bestNode)
: BestNode(&bestNode)
, TryToContinue(true)
{}

TBestNodeResult(bool tryToContinue)
: BestNode(nullptr)
, TryToContinue(tryToContinue)
{}
};
struct TNoNodeFound {};
struct TTooManyTabletsStarting {};
using TBestNodeResult = std::variant<TNodeInfo*, TNoNodeFound, TTooManyTabletsStarting>;

TBestNodeResult FindBestNode(const TTabletInfo& tablet);

Expand Down Expand Up @@ -627,7 +617,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
TTabletInfo& GetTablet(TTabletId tabletId, TFollowerId followerId);
TTabletInfo* FindTablet(TTabletId tabletId, TFollowerId followerId);
TTabletInfo* FindTablet(const TFullTabletId& tabletId) { return FindTablet(tabletId.first, tabletId.second); }
TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId);
TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId);
TStoragePoolInfo& GetStoragePool(const TString& name);
TStoragePoolInfo* FindStoragePool(const TString& name);
TDomainInfo* FindDomain(TSubDomainKey key);
Expand Down Expand Up @@ -976,6 +966,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
THiveStats GetStats() const;
void RemoveSubActor(ISubActor* subActor);
bool StopSubActor(TSubActorId subActorId);
void WaitToMoveTablets(TActorId actor);
const NKikimrLocal::TLocalConfig &GetLocalConfig() const { return LocalConfig; }
NKikimrTabletBase::TMetrics GetDefaultResourceValuesForObject(TFullObjectId objectId);
NKikimrTabletBase::TMetrics GetDefaultResourceValuesForTabletType(TTabletTypes::EType type);
Expand Down
22 changes: 17 additions & 5 deletions ydb/core/mind/hive/hive_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -903,17 +903,15 @@ Y_UNIT_TEST_SUITE(THiveTest) {
}
}

Y_UNIT_TEST(TestDrain) {
const int NUM_NODES = 3;
void TestDrain(TTestBasicRuntime& runtime) {
const int numNodes = runtime.GetNodeCount();
const int NUM_TABLETS = 100;
TTestBasicRuntime runtime(NUM_NODES, false);
Setup(runtime, true);
const ui64 hiveTablet = MakeDefaultHiveID(0);
const ui64 testerTablet = MakeDefaultHiveID(1);
CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive);
{
TDispatchOptions options;
options.FinalEvents.emplace_back(TEvLocal::EvStatus, NUM_NODES);
options.FinalEvents.emplace_back(TEvLocal::EvStatus, numNodes);
runtime.DispatchEvents(options);
}
TTabletTypes::EType tabletType = TTabletTypes::Dummy;
Expand Down Expand Up @@ -970,6 +968,20 @@ Y_UNIT_TEST_SUITE(THiveTest) {
UNIT_ASSERT_VALUES_EQUAL(tabletStates[NKikimrWhiteboard::TTabletStateInfo::Dead], drainMovements);
}

Y_UNIT_TEST(TestDrain) {
TTestBasicRuntime runtime(3, false);
Setup(runtime, true);
TestDrain(runtime);
}

Y_UNIT_TEST(TestDrainWithMaxTabletsScheduled) {
TTestBasicRuntime runtime(3, false);
Setup(runtime, true, 2, [](TAppPrepare& app) {
app.HiveConfig.SetMaxTabletsScheduled(1);
});
TestDrain(runtime);
}

Y_UNIT_TEST(TestCreateSubHiveCreateTablet) {
TTestBasicRuntime runtime(1, false);
Setup(runtime, true);
Expand Down
Loading
Loading