diff --git a/ydb/core/mind/bscontroller/bsc.cpp b/ydb/core/mind/bscontroller/bsc.cpp index 5a47a0e697fc..28a0b6ebc20d 100644 --- a/ydb/core/mind/bscontroller/bsc.cpp +++ b/ydb/core/mind/bscontroller/bsc.cpp @@ -75,21 +75,23 @@ void TBlobStorageController::TGroupInfo::CalculateGroupStatus() { failedByPDisk |= {Topology.get(), slot->GetShortVDiskId()}; } } - auto deriveStatus = [&](const auto& failed) { - auto& checker = *Topology->QuorumChecker; - if (!failed.GetNumSetItems()) { // all disks of group are operational - return NKikimrBlobStorage::TGroupStatus::FULL; - } else if (!checker.CheckFailModelForGroup(failed)) { // fail model exceeded - return NKikimrBlobStorage::TGroupStatus::DISINTEGRATED; - } else if (checker.IsDegraded(failed)) { // group degraded - return NKikimrBlobStorage::TGroupStatus::DEGRADED; - } else if (failed.GetNumSetItems()) { // group partially available, but not degraded - return NKikimrBlobStorage::TGroupStatus::PARTIAL; - } else { - Y_ABORT("unexpected case"); - } - }; - Status.MakeWorst(deriveStatus(failed), deriveStatus(failed | failedByPDisk)); + Status.MakeWorst(DeriveStatus(Topology.get(), failed), DeriveStatus(Topology.get(), failed | failedByPDisk)); + } +} + +NKikimrBlobStorage::TGroupStatus::E TBlobStorageController::DeriveStatus(const TBlobStorageGroupInfo::TTopology *topology, + const TBlobStorageGroupInfo::TGroupVDisks& failed) { + auto& checker = *topology->QuorumChecker; + if (!failed.GetNumSetItems()) { // all disks of group are operational + return NKikimrBlobStorage::TGroupStatus::FULL; + } else if (!checker.CheckFailModelForGroup(failed)) { // fail model exceeded + return NKikimrBlobStorage::TGroupStatus::DISINTEGRATED; + } else if (checker.IsDegraded(failed)) { // group degraded + return NKikimrBlobStorage::TGroupStatus::DEGRADED; + } else if (failed.GetNumSetItems()) { // group partially available, but not degraded + return NKikimrBlobStorage::TGroupStatus::PARTIAL; + } else { + Y_ABORT("unexpected case"); } } diff --git a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp index b75a881623b6..cb77e008429e 100644 --- a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp +++ b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp @@ -568,6 +568,7 @@ namespace NKikimr::NBsController { if (const auto& s = Self.StorageConfig; s.HasBlobStorageConfig()) { if (const auto& bsConfig = s.GetBlobStorageConfig(); bsConfig.HasServiceSet()) { const auto& ss = bsConfig.GetServiceSet(); + const TMonotonic mono = TActivationContext::Monotonic(); for (const auto& group : ss.GetGroups()) { auto *x = pb->AddGroup(); x->SetGroupId(group.GetGroupID()); @@ -581,6 +582,45 @@ namespace NKikimr::NBsController { } } } + + TStringStream err; + auto info = TBlobStorageGroupInfo::Parse(group, nullptr, &err); + Y_VERIFY_DEBUG_S(info, "failed to parse static group, error# " << err.Str()); + if (info) { + const auto *topology = &info->GetTopology(); + + TBlobStorageGroupInfo::TGroupVDisks failed(topology); + TBlobStorageGroupInfo::TGroupVDisks failedByPDisk(topology); + + ui32 realmIdx = 0; + for (const auto& realm : group.GetRings()) { + ui32 domainIdx = 0; + for (const auto& domain : realm.GetFailDomains()) { + ui32 vdiskIdx = 0; + for (const auto& location : domain.GetVDiskLocations()) { + const TVSlotId vslotId(location.GetNodeID(), location.GetPDiskID(), location.GetVDiskSlotID()); + const TVDiskIdShort vdiskId(realmIdx, domainIdx, vdiskIdx); + + if (const auto it = StaticVSlots.find(vslotId); it != StaticVSlots.end()) { + if (mono <= it->second.ReadySince) { // VDisk can't be treated as READY one + failed |= {topology, vdiskId}; + } else if (const TPDiskInfo *pdisk = PDisks.Find(vslotId.ComprisingPDiskId()); !pdisk || !pdisk->HasGoodExpectedStatus()) { + failedByPDisk |= {topology, vdiskId}; + } + } else { + failed |= {topology, vdiskId}; + } + + ++vdiskIdx; + } + ++domainIdx; + } + ++realmIdx; + } + + x->SetOperatingStatus(DeriveStatus(topology, failed)); + x->SetExpectedStatus(DeriveStatus(topology, failed | failedByPDisk)); + } } } } diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h index c6444e536e1d..adb21035673c 100644 --- a/ydb/core/mind/bscontroller/impl.h +++ b/ydb/core/mind/bscontroller/impl.h @@ -82,6 +82,9 @@ class TBlobStorageController : public TActor, public TTa using TVSlotReadyTimestampQ = std::list>; + // VDisk will be considered READY during this period after reporting its READY state + static constexpr TDuration ReadyStablePeriod = TDuration::Seconds(15); + class TVSlotInfo : public TIndirectReferable { public: using Table = Schema::VSlot; @@ -121,9 +124,6 @@ class TBlobStorageController : public TActor, public TTa TVSlotReadyTimestampQ& VSlotReadyTimestampQ; TVSlotReadyTimestampQ::iterator VSlotReadyTimestampIter; - // VDisk will be considered READY during this period after reporting its READY state - static constexpr TDuration ReadyStablePeriod = TDuration::Seconds(15); - public: NKikimrBlobStorage::EVDiskStatus Status = NKikimrBlobStorage::EVDiskStatus::INIT_PENDING; bool IsReady = false; @@ -2237,6 +2237,7 @@ class TBlobStorageController : public TActor, public TTa std::optional VDiskMetrics; NKikimrBlobStorage::EVDiskStatus VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR; + TMonotonic ReadySince = TMonotonic::Max(); // when IsReady becomes true for this disk; Max() in non-READY state TStaticVSlotInfo(const NKikimrBlobStorage::TNodeWardenServiceSet::TVDisk& vdisk) : VDiskId(VDiskIDFromVDiskID(vdisk.GetVDiskID())) @@ -2306,6 +2307,9 @@ class TBlobStorageController : public TActor, public TTa const TGroupInfo& group, const TVSlotFinder& finder); static void SerializeGroupInfo(NKikimrBlobStorage::TGroupInfo *group, const TGroupInfo& groupInfo, const TString& storagePoolName, const TMaybe& scopeId); + + static NKikimrBlobStorage::TGroupStatus::E DeriveStatus(const TBlobStorageGroupInfo::TTopology *topology, + const TBlobStorageGroupInfo::TGroupVDisks& failed); }; } //NBsController diff --git a/ydb/core/mind/bscontroller/self_heal.cpp b/ydb/core/mind/bscontroller/self_heal.cpp index d2d47f3f6d5c..95e7accfdcf3 100644 --- a/ydb/core/mind/bscontroller/self_heal.cpp +++ b/ydb/core/mind/bscontroller/self_heal.cpp @@ -971,6 +971,11 @@ namespace NKikimr::NBsController { } if (const auto it = StaticVSlots.find(vslotId); it != StaticVSlots.end() && it->second.VDiskId == vdiskId) { it->second.VDiskStatus = m.GetStatus(); + if (it->second.VDiskStatus == NKikimrBlobStorage::EVDiskStatus::READY) { + it->second.ReadySince = Min(it->second.ReadySince, mono + ReadyStablePeriod); + } else { + it->second.ReadySince = TMonotonic::Max(); + } } }