Skip to content

Commit

Permalink
storage balancer info in hive ui & sensors KIKIMR-2190 (ydb-platform#…
Browse files Browse the repository at this point in the history
  • Loading branch information
vporyadke authored and shnikd committed Feb 6, 2024
1 parent 189f854 commit 9d8fecd
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 5 deletions.
5 changes: 4 additions & 1 deletion ydb/core/mind/hive/hive_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2382,7 +2382,9 @@ void THive::Handle(TEvPrivate::TEvProcessStorageBalancer::TPtr&) {
auto& [stats, pool] = *std::max_element(poolStats.begin(), poolStats.end(), [](const TPoolStat& lhs, const TPoolStat& rhs) {
return lhs.first.Scatter < rhs.first.Scatter;
});
if (stats.Scatter > GetMinStorageScatterToBalance()) {
StorageScatter = stats.Scatter;
TabletCounters->Simple()[NHive::COUNTER_STORAGE_SCATTER].Set(StorageScatter * 100);
if (StorageScatter > GetMinStorageScatterToBalance()) {
BLOG_D("Storage Scatter = " << stats.Scatter << " in pool " << pool.Name << ", starting StorageBalancer");
ui64 numReassigns = 1;
auto it = pool.Groups.find(stats.MaxUsageGroupId);
Expand All @@ -2395,6 +2397,7 @@ void THive::Handle(TEvPrivate::TEvProcessStorageBalancer::TPtr&) {
}
StartHiveStorageBalancer({
.NumReassigns = numReassigns,
.MaxInFlight = GetStorageBalancerInflight(),
.StoragePool = pool.Name
});
}
Expand Down
6 changes: 6 additions & 0 deletions ydb/core/mind/hive/hive_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
friend class TTxTabletOwnersReply;
friend class TTxRequestTabletOwners;
friend class TTxUpdateTabletsObject;
friend class TTxUpdateTabletGroups;

friend class TDeleteTabletActor;

Expand Down Expand Up @@ -327,6 +328,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
ui32 DataCenters = 1;
ui32 RegisteredDataCenters = 1;
TObjectDistributions ObjectDistributions;
double StorageScatter = 0;

bool AreWeRootHive() const { return RootHiveId == HiveId; }
bool AreWeSubDomainHive() const { return RootHiveId != HiveId; }
Expand Down Expand Up @@ -925,6 +927,10 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
return CurrentConfig.GetMinStorageScatterToBalance();
}

ui64 GetStorageBalancerInflight() const {
return CurrentConfig.GetStorageBalancerInflight();
}

static void ActualizeRestartStatistics(google::protobuf::RepeatedField<google::protobuf::uint64>& restartTimestamps, ui64 barrier);
static ui64 GetRestartsPerPeriod(const google::protobuf::RepeatedField<google::protobuf::uint64>& restartTimestamps, ui64 barrier);
static bool IsSystemTablet(TTabletTypes::EType type);
Expand Down
1 change: 1 addition & 0 deletions ydb/core/mind/hive/hive_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2786,6 +2786,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
Setup(runtime, true, 2, [](TAppPrepare& app) {
app.HiveConfig.SetMinPeriodBetweenReassign(0);
app.HiveConfig.SetStorageInfoRefreshFrequency(200);
app.HiveConfig.SetMinStorageScatterToBalance(0.5);
});
const ui64 hiveTablet = MakeDefaultHiveID(0);
const ui64 testerTablet = MakeDefaultHiveID(1);
Expand Down
15 changes: 15 additions & 0 deletions ydb/core/mind/hive/monitoring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,12 @@ class TTxMonEvent_Settings : public TTransactionBase<THive> {
UpdateConfig(db, "MaxWarmUpPeriod");
UpdateConfig(db, "WarmUpEnabled");
UpdateConfig(db, "ObjectImbalanceToBalance");
UpdateConfig(db, "ChannelBalanceStrategy");
UpdateConfig(db, "MaxChannelHistorySize");
UpdateConfig(db, "StorageInfoRefreshFrequency");
UpdateConfig(db, "MinStorageScatterToBalance");
UpdateConfig(db, "MinGroupUsageToBalance");
UpdateConfig(db, "StorageBalancerInflight");

if (params.contains("BalancerIgnoreTabletTypes")) {
TVector<TString> tabletTypeNames = SplitString(params.Get("BalancerIgnoreTabletTypes"), ";");
Expand Down Expand Up @@ -1111,6 +1117,12 @@ class TTxMonEvent_Settings : public TTransactionBase<THive> {
ShowConfig(out, "MaxWarmUpPeriod");
ShowConfig(out, "WarmUpEnabled");
ShowConfig(out, "ObjectImbalanceToBalance");
ShowConfig(out, "ChannelBalanceStrategy");
ShowConfig(out, "MaxChannelHistorySize");
ShowConfig(out, "StorageInfoRefreshFrequency");
ShowConfig(out, "MinStorageScatterToBalance");
ShowConfig(out, "MinGroupUsageToBalance");
ShowConfig(out, "StorageBalancerInflight");
ShowConfigForBalancerIgnoreTabletTypes(out);

out << "<div class='row' style='margin-top:40px'>";
Expand Down Expand Up @@ -1405,6 +1417,7 @@ class TTxMonEvent_Landing : public TTransactionBase<THive> {
out << "<tr><td>Network</td><td id='resourceScatterNetwork'></td></tr>";
out << "<tr><td>MaxUsage</td><td id='maxUsage'></td></tr>";
out << "<tr><td>Imbalance</td><td id='objectImbalance'></td></tr>";
out << "<tr><td>Storage</td><td id='storageScatter'></td></tr>";
out << "</table></div>";
out << "<div style='min-width:220px'><table class='simple-table3'>";
out << "<tr><th>Balancer</th><th style='min-width:50px'>Runs</th><th style='min-width:50px'>Moves</th>";
Expand Down Expand Up @@ -1896,6 +1909,7 @@ function fillDataShort(result) {
$('#waitQueue').html(result.WaitQueueSize);
$('#maxUsage').html(result.MaxUsage);
$('#objectImbalance').html(result.ObjectImbalance);
$('#storageScatter').html(result.StorageScatter);

$('#resourceTotalCounter').html(result.ResourceTotal.Counter);
$('#resourceTotalCPU').html(result.ResourceTotal.CPU);
Expand Down Expand Up @@ -2205,6 +2219,7 @@ class TTxMonEvent_LandingData : public TTransactionBase<THive> {
jsonData["ScatterHtml"]["Memory"] = std::get<NMetrics::EResource::Memory>(scatterHtml);
jsonData["ScatterHtml"]["Network"] = std::get<NMetrics::EResource::Network>(scatterHtml);
jsonData["ObjectImbalance"] = GetValueWithColoredGlyph(Self->ObjectDistributions.GetMaxImbalance(), Self->GetObjectImbalanceToBalance());
jsonData["StorageScatter"] = GetValueWithColoredGlyph(Self->StorageScatter, Self->GetMinStorageScatterToBalance());
jsonData["WarmUp"] = Self->WarmUp;

if (Cgi.Get("nodes") == "1") {
Expand Down
1 change: 1 addition & 0 deletions ydb/core/mind/hive/tx__update_tablet_groups.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ class TTxUpdateTabletGroups : public TTransactionBase<THive> {
// Use best effort to kill currently running tablet
SideEffects.Register(CreateTabletKiller(TabletId, /* nodeId */ 0, tablet->KnownGeneration));
}
SideEffects.Callback([counters = Self->TabletCounters] { counters->Cumulative()[NHive::COUNTER_TABLETS_STORAGE_REASSIGNED].Increment(1); });
}
if (needToIncreaseGeneration) {
tablet->IncreaseGeneration();
Expand Down
9 changes: 5 additions & 4 deletions ydb/core/protos/config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1375,9 +1375,9 @@ message THiveConfig {
}

enum EHiveChannelBalanceStrategy {
HIVE_CHANNEL_BALANCE_STRATEGY_HEAVIEST = 1;
HIVE_CHANNEL_BALANCE_STRATEGY_RANDOM = 2;
HIVE_CHANNEL_BALANCE_STRATEGY_WEIGHTED_RANDOM = 3;
HIVE_CHANNEL_BALANCE_STRATEGY_HEAVIEST = 0;
HIVE_CHANNEL_BALANCE_STRATEGY_RANDOM = 1;
HIVE_CHANNEL_BALANCE_STRATEGY_WEIGHTED_RANDOM = 2;
}

enum EHiveNodeSelectStrategy {
Expand Down Expand Up @@ -1460,8 +1460,9 @@ message THiveConfig {
optional EHiveChannelBalanceStrategy ChannelBalanceStrategy = 68 [default = HIVE_CHANNEL_BALANCE_STRATEGY_WEIGHTED_RANDOM];
optional uint64 MaxChannelHistorySize = 69 [default = 200];
optional uint64 StorageInfoRefreshFrequency = 70 [default = 600000]; // send a query to BSC every x milliseconds
optional double MinStorageScatterToBalance = 71 [default = 0.5];
optional double MinStorageScatterToBalance = 71 [default = 999]; // storage balancer trigger is disabled by default
optional double MinGroupUsageToBalance = 72 [default = 0.1];
optional uint64 StorageBalancerInflight = 73 [default = 1];
}

message TColumnShardConfig {
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/protos/counters_hive.proto
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ enum ESimpleCounters {
COUNTER_BALANCE_OBJECT_IMBALANCE = 18 [(CounterOpts) = {Name: "BalanceObjectImbalance"}];
COUNTER_IMBALANCED_OBJECTS = 19 [(CounterOpts) = {Name: "ImbalancedObjects"}];
COUNTER_WORST_OBJECT_VARIANCE = 20 [(CounterOpts) = {Name: "WorstObjectVariance"}];
COUNTER_STORAGE_SCATTER = 21 [(CounterOpts) = {Name: "StorageScatter"}];
}

enum ECumulativeCounters {
Expand All @@ -44,6 +45,7 @@ enum ECumulativeCounters {
COUNTER_SUGGESTED_SCALE_UP = 10 [(CounterOpts) = {Name: "SuggestedScaleUp"}];
COUNTER_SUGGESTED_SCALE_DOWN = 11 [(CounterOpts) = {Name: "SuggestedScaleDown"}];
COUNTER_STORAGE_BALANCER_EXECUTED = 12 [(CounterOpts) = {Name: "StorageBalancerExecuted"}];
COUNTER_TABLETS_STORAGE_REASSIGNED = 13 [(CounterOpts) = {Name: "TabletsStorageReassigned"}];
}

enum EPercentileCounters {
Expand Down

0 comments on commit 9d8fecd

Please sign in to comment.