Skip to content

Commit

Permalink
improve node deletion in hive (ydb-platform#7218) (ydb-platform#10393)
Browse files Browse the repository at this point in the history
  • Loading branch information
vporyadke authored and uzhastik committed Oct 24, 2024
1 parent 3488e9d commit 8d559a6
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 6 deletions.
10 changes: 8 additions & 2 deletions ydb/core/mind/hive/hive_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <ydb/core/cms/console/console.h>
#include <ydb/core/cms/console/configs_dispatcher.h>
#include <ydb/core/protos/counters_hive.pb.h>
#include <ydb/core/protos/node_broker.pb.h>
#include <ydb/core/util/tuples.h>
#include <ydb/library/yverify_stream/yverify_stream.h>
#include <ydb/library/actors/interconnect/interconnect.h>
Expand Down Expand Up @@ -96,10 +97,12 @@ void THive::RestartPipeTx(ui64 tabletId) {

bool THive::TryToDeleteNode(TNodeInfo* node) {
if (node->CanBeDeleted()) {
BLOG_I("TryToDeleteNode(" << node->Id << "): deleting");
DeleteNode(node->Id);
return true;
}
if (!node->DeletionScheduled) {
BLOG_D("TryToDeleteNode(" << node->Id << "): waiting " << GetNodeDeletePeriod());
Schedule(GetNodeDeletePeriod(), new TEvPrivate::TEvDeleteNode(node->Id));
node->DeletionScheduled = true;
}
Expand Down Expand Up @@ -987,8 +990,9 @@ void THive::OnActivateExecutor(const TActorContext&) {
BuildLocalConfig();
ClusterConfig = AppData()->HiveConfig;
SpreadNeighbours = ClusterConfig.GetSpreadNeighbours();
NodeBrokerEpoch = TDuration::MicroSeconds(NKikimrNodeBroker::TConfig().GetEpochDuration());
Send(NConsole::MakeConfigsDispatcherID(SelfId().NodeId()),
new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest(NKikimrConsole::TConfigItem::HiveConfigItem));
new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest({NKikimrConsole::TConfigItem::HiveConfigItem, NKikimrConsole::TConfigItem::NodeBrokerConfigItem}));
Execute(CreateInitScheme());
if (!ResponsivenessPinger) {
ResponsivenessPinger = new TTabletResponsivenessPinger(TabletCounters->Simple()[NHive::COUNTER_RESPONSE_TIME_USEC], TDuration::Seconds(1));
Expand Down Expand Up @@ -2208,7 +2212,9 @@ void THive::Handle(TEvHive::TEvInitiateTabletExternalBoot::TPtr& ev) {
void THive::Handle(NConsole::TEvConsole::TEvConfigNotificationRequest::TPtr& ev) {
const NKikimrConsole::TConfigNotificationRequest& record = ev->Get()->Record;
ClusterConfig = record.GetConfig().GetHiveConfig();
BLOG_D("Received TEvConsole::TEvConfigNotificationRequest with update of cluster config: " << ClusterConfig.ShortDebugString());
NodeBrokerEpoch = TDuration::MicroSeconds(record.GetConfig().GetNodeBrokerConfig().GetEpochDuration());
BLOG_D("Received TEvConsole::TEvConfigNotificationRequest with update of cluster config: " << ClusterConfig.ShortDebugString()
<< "; " << record.GetConfig().GetNodeBrokerConfig().ShortDebugString());
BuildCurrentConfig();
Send(ev->Sender, new NConsole::TEvConsole::TEvConfigNotificationResponse(record), 0, ev->Cookie);
}
Expand Down
7 changes: 6 additions & 1 deletion ydb/core/mind/hive/hive_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar

NKikimrConfig::THiveConfig ClusterConfig;
NKikimrConfig::THiveConfig DatabaseConfig;
TDuration NodeBrokerEpoch;
std::unordered_map<TTabletTypes::EType, NKikimrConfig::THiveTabletLimit> TabletLimit; // built from CurrentConfig
std::unordered_map<TTabletTypes::EType, NKikimrHive::TDataCentersPreference> DefaultDataCentersPreference;
std::unordered_map<TDataCenterId, std::unordered_set<TNodeId>> RegisteredDataCenterNodes;
Expand Down Expand Up @@ -739,7 +740,11 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId
}

TDuration GetNodeDeletePeriod() const {
return TDuration::Seconds(CurrentConfig.GetNodeDeletePeriod());
if (CurrentConfig.HasNodeDeletePeriod()) {
return TDuration::Seconds(CurrentConfig.GetNodeDeletePeriod());
} else {
return NodeBrokerEpoch;
}
}

ui64 GetDrainInflight() const {
Expand Down
4 changes: 2 additions & 2 deletions ydb/core/mind/hive/tx__load_everything.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,9 +338,9 @@ class TTxLoadEverything : public TTransactionBase<THive> {
// it's safe to call here, because there is no any tablets in the node yet
node.BecomeDisconnected();
}
if (node.CanBeDeleted()) {
if (Self->TryToDeleteNode(&node)) {
// node is deleted from hashmap
db.Table<Schema::Node>().Key(nodeId).Delete();
Self->Nodes.erase(nodeId);
} else if (node.IsUnknown() && node.LocationAcquired) {
Self->AddRegisteredDataCentersNode(node.Location.GetDataCenterId(), node.Id);
}
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/protos/config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1458,7 +1458,7 @@ message THiveConfig {
optional double MinPeriodBetweenReassign = 33 [default = 300.0]; // seconds
optional double TabletRestartWatchPeriod = 34 [default = 3600.0]; // seconds
optional double NodeRestartWatchPeriod = 35 [default = 3600.0]; // seconds
optional uint64 NodeDeletePeriod = 36 [default = 14400]; // seconds
optional uint64 NodeDeletePeriod = 36 [default = 3600]; // seconds
repeated THiveTabletLimit DefaultTabletLimit = 37;
repeated THiveTabletPreference DefaultTabletPreference = 38;
optional uint64 SystemTabletCategoryId = 39 [default = 1];
Expand Down

0 comments on commit 8d559a6

Please sign in to comment.