From 2bc3b26322a3b09979c9ec347fadc915ca344783 Mon Sep 17 00:00:00 2001 From: vporyadke Date: Tue, 23 Jul 2024 14:05:02 +0300 Subject: [PATCH] deprioritise system tablets in balancer (#6840) --- ydb/core/mind/hive/balancer.cpp | 51 ++++++++++++++++++----------- ydb/core/mind/hive/balancer.h | 2 +- ydb/core/mind/hive/hive_impl.h | 4 +++ ydb/core/mind/hive/hive_impl_ut.cpp | 4 +-- ydb/core/mind/hive/monitoring.cpp | 2 ++ ydb/core/protos/config.proto | 1 + 6 files changed, 42 insertions(+), 22 deletions(-) diff --git a/ydb/core/mind/hive/balancer.cpp b/ydb/core/mind/hive/balancer.cpp index 637a15f9753b..620f502eead6 100644 --- a/ydb/core/mind/hive/balancer.cpp +++ b/ydb/core/mind/hive/balancer.cpp @@ -62,17 +62,17 @@ void BalanceNodes } template<> -void BalanceTablets(std::vector& tablets, EResourceToBalance resourceToBalance) { +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance resourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); // weighted random shuffle std::vector weights; - weights.reserve(tablets.size()); - for (auto it = tablets.begin(); it != tablets.end(); ++it) { + weights.reserve(last - first); + for (auto it = first; it != last; ++it) { weights.emplace_back((*it)->GetWeight(resourceToBalance)); } - auto itT = tablets.begin(); + auto itT = first; auto itW = weights.begin(); - while (itT != tablets.end() && itW != weights.end()) { + while (itT != last && itW != weights.end()) { auto idx = std::discrete_distribution(itW, weights.end())(randGen); if (idx != 0) { std::iter_swap(itT, std::next(itT, idx)); @@ -84,32 +84,32 @@ void BalanceTablets -void BalanceTablets(std::vector& tablets, EResourceToBalance resourceToBalance) { - std::sort(tablets.begin(), tablets.end(), [resourceToBalance](const TTabletInfo* a, const TTabletInfo* b) -> bool { +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance resourceToBalance) { + std::sort(first, last, [resourceToBalance](const TTabletInfo* a, const TTabletInfo* b) -> bool { return a->GetWeight(resourceToBalance) > b->GetWeight(resourceToBalance); }); } template<> -void BalanceTablets(std::vector& tablets, EResourceToBalance) { +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); - std::shuffle(tablets.begin(), tablets.end(), randGen); + std::shuffle(first, last, randGen); } template<> -void BalanceTablets(std::vector& tablets, EResourceToBalance resourceToBalance) { +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance resourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); std::vector> weights; - weights.reserve(tablets.size()); - for (TTabletInfo* tablet : tablets) { - double weight = tablet->GetWeight(resourceToBalance); - weights.emplace_back(weight * randGen(), tablet); + weights.reserve(last - first); + for (auto it = first; it != last; ++it) { + double weight = (*it)->GetWeight(resourceToBalance); + weights.emplace_back(weight * randGen(), *it); } std::sort(weights.begin(), weights.end(), [](const auto& a, const auto& b) -> bool { return a.first > b.first; }); for (size_t n = 0; n < weights.size(); ++n) { - tablets[n] = weights[n].second; + first[n] = weights[n].second; } } @@ -252,18 +252,31 @@ class THiveBalancer : public NActors::TActorBootstrapped, public } BLOG_TRACE("Balancer on node " << node->Id << ": " << tablets.size() << "/" << nodeTablets.size() << " tablets are suitable for balancing"); if (!tablets.empty()) { + // avoid moving system tablets if possible + std::vector::iterator partitionIt; + if (Hive->GetLessSystemTabletsMoves()) { + partitionIt = std::partition(tablets.begin(), tablets.end(), [](TTabletInfo* tablet) { + return !THive::IsSystemTablet(tablet->GetTabletType()); + }); + } else { + partitionIt = tablets.end(); + } switch (Hive->GetTabletBalanceStrategy()) { case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM: - BalanceTablets(tablets, Settings.ResourceToBalance); + BalanceTablets(tablets.begin(), partitionIt, Settings.ResourceToBalance); + BalanceTablets(partitionIt, tablets.end(), Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_WEIGHTED_RANDOM: - BalanceTablets(tablets, Settings.ResourceToBalance); + BalanceTablets(tablets.begin(), partitionIt, Settings.ResourceToBalance); + BalanceTablets(partitionIt, tablets.end(), Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_HEAVIEST: - BalanceTablets(tablets, Settings.ResourceToBalance); + BalanceTablets(tablets.begin(), partitionIt, Settings.ResourceToBalance); + BalanceTablets(partitionIt, tablets.end(), Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_RANDOM: - BalanceTablets(tablets, Settings.ResourceToBalance); + BalanceTablets(tablets.begin(), partitionIt, Settings.ResourceToBalance); + BalanceTablets(partitionIt, tablets.end(), Settings.ResourceToBalance); break; } Tablets.clear(); diff --git a/ydb/core/mind/hive/balancer.h b/ydb/core/mind/hive/balancer.h index 62289c4f2401..e5ee1a02820d 100644 --- a/ydb/core/mind/hive/balancer.h +++ b/ydb/core/mind/hive/balancer.h @@ -10,7 +10,7 @@ template& nodes, EResourceToBalance resourceTobalance); template -void BalanceTablets(std::vector& tablets, EResourceToBalance resourceToBalance); +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance resourceToBalance); template void BalanceChannels(std::vector& channels, NKikimrConfig::THiveConfig::EHiveStorageBalanceStrategy metricToBalance); diff --git a/ydb/core/mind/hive/hive_impl.h b/ydb/core/mind/hive/hive_impl.h index 53d712ce9646..b89c491937e2 100644 --- a/ydb/core/mind/hive/hive_impl.h +++ b/ydb/core/mind/hive/hive_impl.h @@ -938,6 +938,10 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId return CurrentConfig.GetNodeUsageRangeToKick(); } + bool GetLessSystemTabletsMoves() const { + return CurrentConfig.GetLessSystemTabletsMoves(); + } + static void ActualizeRestartStatistics(google::protobuf::RepeatedField& restartTimestamps, ui64 barrier); static ui64 GetRestartsPerPeriod(const google::protobuf::RepeatedField& restartTimestamps, ui64 barrier); static bool IsSystemTablet(TTabletTypes::EType type); diff --git a/ydb/core/mind/hive/hive_impl_ut.cpp b/ydb/core/mind/hive/hive_impl_ut.cpp index 0f71b8d31503..bf540540a7e0 100644 --- a/ydb/core/mind/hive/hive_impl_ut.cpp +++ b/ydb/core/mind/hive/hive_impl_ut.cpp @@ -109,7 +109,7 @@ Y_UNIT_TEST_SUITE(THiveImplTest) { auto CheckSpeedAndDistribution = []( std::unordered_map& allTablets, - std::function&, EResourceToBalance)> func, + std::function::iterator, std::vector::iterator, EResourceToBalance)> func, EResourceToBalance resource) -> void { std::vector tablets; @@ -119,7 +119,7 @@ Y_UNIT_TEST_SUITE(THiveImplTest) { TProfileTimer timer; - func(tablets, resource); + func(tablets.begin(), tablets.end(), resource); double passed = timer.Get().SecondsFloat(); diff --git a/ydb/core/mind/hive/monitoring.cpp b/ydb/core/mind/hive/monitoring.cpp index e790bc74ef36..2a74db07926b 100644 --- a/ydb/core/mind/hive/monitoring.cpp +++ b/ydb/core/mind/hive/monitoring.cpp @@ -836,6 +836,7 @@ class TTxMonEvent_Settings : public TTransactionBase, public TLoggedMonTr UpdateConfig(db, "MinStorageScatterToBalance", configUpdates); UpdateConfig(db, "MinGroupUsageToBalance", configUpdates); UpdateConfig(db, "StorageBalancerInflight", configUpdates); + UpdateConfig(db, "LessSystemTabletsMoves", configUpdates); if (params.contains("BalancerIgnoreTabletTypes")) { auto value = params.Get("BalancerIgnoreTabletTypes"); @@ -1182,6 +1183,7 @@ class TTxMonEvent_Settings : public TTransactionBase, public TLoggedMonTr ShowConfig(out, "MinStorageScatterToBalance"); ShowConfig(out, "MinGroupUsageToBalance"); ShowConfig(out, "StorageBalancerInflight"); + ShowConfig(out, "LessSystemTabletsMoves"); ShowConfigForBalancerIgnoreTabletTypes(out); out << "
"; diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index bcd4edba42fb..2cd991d479c1 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -1470,6 +1470,7 @@ message THiveConfig { optional uint64 StorageBalancerInflight = 73 [default = 1]; optional bool EnableDestroyOperations = 74 [default = false]; optional double NodeUsageRangeToKick = 75 [default = 0.2]; + optional bool LessSystemTabletsMoves = 77 [default = true]; } message TBlobCacheConfig {