diff --git a/ydb/core/mind/hive/balancer.cpp b/ydb/core/mind/hive/balancer.cpp index b5376ce43736..39d12b2a8eb4 100644 --- a/ydb/core/mind/hive/balancer.cpp +++ b/ydb/core/mind/hive/balancer.cpp @@ -243,7 +243,9 @@ class THiveBalancer : public NActors::TActorBootstrapped, public std::vector tablets; tablets.reserve(nodeTablets.size()); for (TTabletInfo* tablet : nodeTablets) { - if (tablet->IsGoodForBalancer(now) && (!Settings.FilterObjectId || tablet->GetObjectId() == *Settings.FilterObjectId)) { + if (tablet->IsGoodForBalancer(now) && + (!Settings.FilterObjectId || tablet->GetObjectId() == *Settings.FilterObjectId) && + tablet->HasAllowedMetric(Settings.ResourceToBalance)) { tablet->UpdateWeight(); tablets.emplace_back(tablet); } diff --git a/ydb/core/mind/hive/hive.h b/ydb/core/mind/hive/hive.h index 690618ea6eee..2a3713424e4c 100644 --- a/ydb/core/mind/hive/hive.h +++ b/ydb/core/mind/hive/hive.h @@ -98,7 +98,7 @@ constexpr std::size_t EBalancerTypeSize = static_cast(EBalancerType TString EBalancerTypeName(EBalancerType value); enum class EResourceToBalance { - Dominant, + ComputeResources, Counter, CPU, Memory, @@ -292,7 +292,7 @@ struct TBalancerSettings { bool RecheckOnFinish = false; ui64 MaxInFlight = 1; const std::vector FilterNodeIds = {}; - EResourceToBalance ResourceToBalance = EResourceToBalance::Dominant; + EResourceToBalance ResourceToBalance = EResourceToBalance::ComputeResources; std::optional FilterObjectId; }; diff --git a/ydb/core/mind/hive/hive_impl.cpp b/ydb/core/mind/hive/hive_impl.cpp index 5e3dffea4cb3..fe7bd3b24142 100644 --- a/ydb/core/mind/hive/hive_impl.cpp +++ b/ydb/core/mind/hive/hive_impl.cpp @@ -2373,7 +2373,7 @@ void THive::Handle(TEvPrivate::TEvProcessTabletBalancer::TPtr&) { case EResourceToBalance::Network: balancerType = EBalancerType::ScatterNetwork; break; - case EResourceToBalance::Dominant: + case EResourceToBalance::ComputeResources: balancerType = EBalancerType::Scatter; break; } diff --git a/ydb/core/mind/hive/node_info.cpp b/ydb/core/mind/hive/node_info.cpp index a0cb8c5e4afa..fbe913bcb7d1 100644 --- a/ydb/core/mind/hive/node_info.cpp +++ b/ydb/core/mind/hive/node_info.cpp @@ -222,7 +222,7 @@ i32 TNodeInfo::GetPriorityForTablet(const TTabletInfo& tablet) const { bool TNodeInfo::IsAbleToRunTablet(const TTabletInfo& tablet, TTabletDebugState* debugState) const { if (tablet.IsAliveOnLocal(Local)) { - return !IsOverloaded(); + return !(IsOverloaded() && tablet.HasAllowedMetric(EResourceToBalance::ComputeResources)); } if (tablet.IsLeader()) { const TLeaderTabletInfo& leader = tablet.AsLeader(); @@ -280,7 +280,7 @@ bool TNodeInfo::IsAbleToRunTablet(const TTabletInfo& tablet, TTabletDebugState* } } - if (tablet.IsAlive() && IsOverloaded()) { + if (tablet.IsAlive() && IsOverloaded() && tablet.HasAllowedMetric(EResourceToBalance::ComputeResources)) { // we don't move already running tablet to another overloaded node if (debugState) { debugState->NodesWithoutResources++; @@ -434,7 +434,7 @@ double TNodeInfo::GetNodeUsageForTablet(const TTabletInfo& tablet) const { double TNodeInfo::GetNodeUsage(const TResourceNormalizedValues& normValues, EResourceToBalance resource) const { double usage = TTabletInfo::ExtractResourceUsage(normValues, resource); - if (resource == EResourceToBalance::Dominant && AveragedNodeTotalUsage.IsValueStable()) { + if (resource == EResourceToBalance::ComputeResources && AveragedNodeTotalUsage.IsValueStable()) { usage = std::max(usage, AveragedNodeTotalUsage.GetValue()); } return usage; diff --git a/ydb/core/mind/hive/node_info.h b/ydb/core/mind/hive/node_info.h index 34fa1a28661d..961116babce5 100644 --- a/ydb/core/mind/hive/node_info.h +++ b/ydb/core/mind/hive/node_info.h @@ -243,9 +243,9 @@ struct TNodeInfo { } double GetNodeUsageForTablet(const TTabletInfo& tablet) const; - double GetNodeUsage(EResourceToBalance resource = EResourceToBalance::Dominant) const; + double GetNodeUsage(EResourceToBalance resource = EResourceToBalance::ComputeResources) const; double GetNodeUsage(const TResourceNormalizedValues& normValues, - EResourceToBalance resource = EResourceToBalance::Dominant) const; + EResourceToBalance resource = EResourceToBalance::ComputeResources) const; ui64 GetTabletsRunningByType(TTabletTypes::EType tabletType) const; diff --git a/ydb/core/mind/hive/tablet_info.cpp b/ydb/core/mind/hive/tablet_info.cpp index 4815df10663a..88facc69ba09 100644 --- a/ydb/core/mind/hive/tablet_info.cpp +++ b/ydb/core/mind/hive/tablet_info.cpp @@ -310,12 +310,37 @@ const TVector& TTabletInfo::GetTabletAllowedMetricIds() const { return Hive.GetTabletTypeAllowedMetricIds(GetLeader().Type); } +bool TTabletInfo::HasAllowedMetric(const TVector& allowedMetricIds, EResourceToBalance resource) { + switch (resource) { + case EResourceToBalance::ComputeResources: { + auto isComputeMetric = [](i64 metricId) { + return metricId == NKikimrTabletBase::TMetrics::kCPUFieldNumber || + metricId == NKikimrTabletBase::TMetrics::kMemoryFieldNumber || + metricId == NKikimrTabletBase::TMetrics::kNetworkFieldNumber; + }; + return AnyOf(allowedMetricIds.begin(), allowedMetricIds.end(), isComputeMetric); + } + case EResourceToBalance::Counter: + return true; + case EResourceToBalance::CPU: + return Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kCPUFieldNumber) != allowedMetricIds.end(); + case EResourceToBalance::Memory: + return Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kMemoryFieldNumber) != allowedMetricIds.end(); + case EResourceToBalance::Network: + return Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kNetworkFieldNumber) != allowedMetricIds.end(); + } +} + +bool TTabletInfo::HasAllowedMetric(EResourceToBalance resource) const { + return HasAllowedMetric(GetTabletAllowedMetricIds(), resource); +} + void TTabletInfo::UpdateResourceUsage(const NKikimrTabletBase::TMetrics& metrics) { TInstant now = TActivationContext::Now(); const TVector& allowedMetricIds(GetTabletAllowedMetricIds()); auto before = ResourceValues; auto maximum = GetResourceMaximumValues(); - if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kCPUFieldNumber) != allowedMetricIds.end()) { + if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::CPU)) { if (metrics.HasCPU()) { if (metrics.GetCPU() > static_cast(std::get(maximum))) { BLOG_W("Ignoring too high CPU metric (" << metrics.GetCPU() << ") for tablet " << ToString()); @@ -325,7 +350,7 @@ void TTabletInfo::UpdateResourceUsage(const NKikimrTabletBase::TMetrics& metrics } } } - if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kMemoryFieldNumber) != allowedMetricIds.end()) { + if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::Memory)) { if (metrics.HasMemory()) { if (metrics.GetMemory() > static_cast(std::get(maximum))) { BLOG_W("Ignoring too high Memory metric (" << metrics.GetMemory() << ") for tablet " << ToString()); @@ -335,7 +360,7 @@ void TTabletInfo::UpdateResourceUsage(const NKikimrTabletBase::TMetrics& metrics } } } - if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kNetworkFieldNumber) != allowedMetricIds.end()) { + if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::Network)) { if (metrics.HasNetwork()) { if (metrics.GetNetwork() > static_cast(std::get(maximum))) { BLOG_W("Ignoring too high Network metric (" << metrics.GetNetwork() << ") for tablet " << ToString()); @@ -396,13 +421,13 @@ TResourceRawValues TTabletInfo::GetResourceMaximumValues() const { } i64 TTabletInfo::GetCounterValue(const NKikimrTabletBase::TMetrics& metrics, const TVector& allowedMetricIds) { - if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kCPUFieldNumber) != allowedMetricIds.end() && THive::IsValidMetricsCPU(metrics)) { + if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::CPU) && THive::IsValidMetricsCPU(metrics)) { return 0; } - if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kMemoryFieldNumber) != allowedMetricIds.end() && THive::IsValidMetricsMemory(metrics)) { + if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::Memory) && THive::IsValidMetricsMemory(metrics)) { return 0; } - if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kNetworkFieldNumber) != allowedMetricIds.end() && THive::IsValidMetricsNetwork(metrics)) { + if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::Network) && THive::IsValidMetricsNetwork(metrics)) { return 0; } return 1; @@ -414,13 +439,13 @@ void TTabletInfo::FilterRawValues(TResourceRawValues& values) const { if (metrics.GetCounter() == 0) { std::get(values) = 0; } - if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kCPUFieldNumber) == allowedMetricIds.end() || !THive::IsValidMetricsCPU(metrics)) { + if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::CPU) || !THive::IsValidMetricsCPU(metrics)) { std::get(values) = 0; } - if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kMemoryFieldNumber) == allowedMetricIds.end() || !THive::IsValidMetricsMemory(metrics)) { + if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::Memory) || !THive::IsValidMetricsMemory(metrics)) { std::get(values) = 0; } - if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kNetworkFieldNumber) == allowedMetricIds.end() || !THive::IsValidMetricsNetwork(metrics)) { + if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::Network) || !THive::IsValidMetricsNetwork(metrics)) { std::get(values) = 0; } } @@ -431,13 +456,13 @@ void TTabletInfo::FilterRawValues(TResourceNormalizedValues& values) const { if (metrics.GetCounter() == 0) { std::get(values) = 0; } - if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kCPUFieldNumber) == allowedMetricIds.end() || !THive::IsValidMetricsCPU(metrics)) { + if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::CPU) || !THive::IsValidMetricsCPU(metrics)) { std::get(values) = 0; } - if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kMemoryFieldNumber) == allowedMetricIds.end() || !THive::IsValidMetricsMemory(metrics)) { + if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::Memory) || !THive::IsValidMetricsMemory(metrics)) { std::get(values) = 0; } - if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kNetworkFieldNumber) == allowedMetricIds.end() || !THive::IsValidMetricsNetwork(metrics)) { + if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::Network) || !THive::IsValidMetricsNetwork(metrics)) { std::get(values) = 0; } } diff --git a/ydb/core/mind/hive/tablet_info.h b/ydb/core/mind/hive/tablet_info.h index cf22c6c9193f..d37f40563e16 100644 --- a/ydb/core/mind/hive/tablet_info.h +++ b/ydb/core/mind/hive/tablet_info.h @@ -227,6 +227,8 @@ struct TTabletInfo { void BecomeUnknown(TNodeInfo* node); bool Kick(); const TVector& GetTabletAllowedMetricIds() const; + static bool HasAllowedMetric(const TVector& allowedMetricIds, EResourceToBalance resource); + bool HasAllowedMetric(EResourceToBalance resource) const; void UpdateResourceUsage(const NKikimrTabletBase::TMetrics& metrics); TResourceRawValues GetResourceCurrentValues() const; @@ -237,18 +239,18 @@ struct TTabletInfo { void ActualizeCounter(); template - static double GetUsage(const ResourcesType& current, const ResourcesType& maximum, EResourceToBalance resource = EResourceToBalance::Dominant) { + static double GetUsage(const ResourcesType& current, const ResourcesType& maximum, EResourceToBalance resource = EResourceToBalance::ComputeResources) { auto normValues = NormalizeRawValues(current, maximum); return ExtractResourceUsage(normValues, resource); } - static double ExtractResourceUsage(const TResourceNormalizedValues& normValues, EResourceToBalance resource = EResourceToBalance::Dominant) { + static double ExtractResourceUsage(const TResourceNormalizedValues& normValues, EResourceToBalance resource = EResourceToBalance::ComputeResources) { switch (resource) { case EResourceToBalance::CPU: return std::get(normValues); case EResourceToBalance::Memory: return std::get(normValues); case EResourceToBalance::Network: return std::get(normValues); case EResourceToBalance::Counter: return std::get(normValues); - case EResourceToBalance::Dominant: return max(normValues); + case EResourceToBalance::ComputeResources: return max(normValues); } }