Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Report status for static group #1116

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 17 additions & 15 deletions ydb/core/mind/bscontroller/bsc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,21 +75,23 @@ void TBlobStorageController::TGroupInfo::CalculateGroupStatus() {
failedByPDisk |= {Topology.get(), slot->GetShortVDiskId()};
}
}
auto deriveStatus = [&](const auto& failed) {
auto& checker = *Topology->QuorumChecker;
if (!failed.GetNumSetItems()) { // all disks of group are operational
return NKikimrBlobStorage::TGroupStatus::FULL;
} else if (!checker.CheckFailModelForGroup(failed)) { // fail model exceeded
return NKikimrBlobStorage::TGroupStatus::DISINTEGRATED;
} else if (checker.IsDegraded(failed)) { // group degraded
return NKikimrBlobStorage::TGroupStatus::DEGRADED;
} else if (failed.GetNumSetItems()) { // group partially available, but not degraded
return NKikimrBlobStorage::TGroupStatus::PARTIAL;
} else {
Y_ABORT("unexpected case");
}
};
Status.MakeWorst(deriveStatus(failed), deriveStatus(failed | failedByPDisk));
Status.MakeWorst(DeriveStatus(Topology.get(), failed), DeriveStatus(Topology.get(), failed | failedByPDisk));
}
}

NKikimrBlobStorage::TGroupStatus::E TBlobStorageController::DeriveStatus(const TBlobStorageGroupInfo::TTopology *topology,
const TBlobStorageGroupInfo::TGroupVDisks& failed) {
auto& checker = *topology->QuorumChecker;
if (!failed.GetNumSetItems()) { // all disks of group are operational
return NKikimrBlobStorage::TGroupStatus::FULL;
} else if (!checker.CheckFailModelForGroup(failed)) { // fail model exceeded
return NKikimrBlobStorage::TGroupStatus::DISINTEGRATED;
} else if (checker.IsDegraded(failed)) { // group degraded
return NKikimrBlobStorage::TGroupStatus::DEGRADED;
} else if (failed.GetNumSetItems()) { // group partially available, but not degraded
return NKikimrBlobStorage::TGroupStatus::PARTIAL;
} else {
Y_ABORT("unexpected case");
}
}

Expand Down
40 changes: 40 additions & 0 deletions ydb/core/mind/bscontroller/cmds_storage_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,7 @@ namespace NKikimr::NBsController {
if (const auto& s = Self.StorageConfig; s.HasBlobStorageConfig()) {
if (const auto& bsConfig = s.GetBlobStorageConfig(); bsConfig.HasServiceSet()) {
const auto& ss = bsConfig.GetServiceSet();
const TMonotonic mono = TActivationContext::Monotonic();
for (const auto& group : ss.GetGroups()) {
auto *x = pb->AddGroup();
x->SetGroupId(group.GetGroupID());
Expand All @@ -581,6 +582,45 @@ namespace NKikimr::NBsController {
}
}
}

TStringStream err;
auto info = TBlobStorageGroupInfo::Parse(group, nullptr, &err);
Y_VERIFY_DEBUG_S(info, "failed to parse static group, error# " << err.Str());
if (info) {
const auto *topology = &info->GetTopology();

TBlobStorageGroupInfo::TGroupVDisks failed(topology);
TBlobStorageGroupInfo::TGroupVDisks failedByPDisk(topology);

ui32 realmIdx = 0;
for (const auto& realm : group.GetRings()) {
ui32 domainIdx = 0;
for (const auto& domain : realm.GetFailDomains()) {
ui32 vdiskIdx = 0;
for (const auto& location : domain.GetVDiskLocations()) {
const TVSlotId vslotId(location.GetNodeID(), location.GetPDiskID(), location.GetVDiskSlotID());
const TVDiskIdShort vdiskId(realmIdx, domainIdx, vdiskIdx);

if (const auto it = StaticVSlots.find(vslotId); it != StaticVSlots.end()) {
if (mono <= it->second.ReadySince) { // VDisk can't be treated as READY one
failed |= {topology, vdiskId};
} else if (const TPDiskInfo *pdisk = PDisks.Find(vslotId.ComprisingPDiskId()); !pdisk || !pdisk->HasGoodExpectedStatus()) {
failedByPDisk |= {topology, vdiskId};
}
} else {
failed |= {topology, vdiskId};
}

++vdiskIdx;
}
++domainIdx;
}
++realmIdx;
}

x->SetOperatingStatus(DeriveStatus(topology, failed));
x->SetExpectedStatus(DeriveStatus(topology, failed | failedByPDisk));
}
}
}
}
Expand Down
10 changes: 7 additions & 3 deletions ydb/core/mind/bscontroller/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa

using TVSlotReadyTimestampQ = std::list<std::pair<TMonotonic, TVSlotInfo*>>;

// VDisk will be considered READY during this period after reporting its READY state
static constexpr TDuration ReadyStablePeriod = TDuration::Seconds(15);

class TVSlotInfo : public TIndirectReferable<TVSlotInfo> {
public:
using Table = Schema::VSlot;
Expand Down Expand Up @@ -121,9 +124,6 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
TVSlotReadyTimestampQ& VSlotReadyTimestampQ;
TVSlotReadyTimestampQ::iterator VSlotReadyTimestampIter;

// VDisk will be considered READY during this period after reporting its READY state
static constexpr TDuration ReadyStablePeriod = TDuration::Seconds(15);

public:
NKikimrBlobStorage::EVDiskStatus Status = NKikimrBlobStorage::EVDiskStatus::INIT_PENDING;
bool IsReady = false;
Expand Down Expand Up @@ -2237,6 +2237,7 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa

std::optional<NKikimrBlobStorage::TVDiskMetrics> VDiskMetrics;
NKikimrBlobStorage::EVDiskStatus VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR;
TMonotonic ReadySince = TMonotonic::Max(); // when IsReady becomes true for this disk; Max() in non-READY state

TStaticVSlotInfo(const NKikimrBlobStorage::TNodeWardenServiceSet::TVDisk& vdisk)
: VDiskId(VDiskIDFromVDiskID(vdisk.GetVDiskID()))
Expand Down Expand Up @@ -2306,6 +2307,9 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
const TGroupInfo& group, const TVSlotFinder& finder);
static void SerializeGroupInfo(NKikimrBlobStorage::TGroupInfo *group, const TGroupInfo& groupInfo,
const TString& storagePoolName, const TMaybe<TKikimrScopeId>& scopeId);

static NKikimrBlobStorage::TGroupStatus::E DeriveStatus(const TBlobStorageGroupInfo::TTopology *topology,
const TBlobStorageGroupInfo::TGroupVDisks& failed);
};

} //NBsController
Expand Down
5 changes: 5 additions & 0 deletions ydb/core/mind/bscontroller/self_heal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,11 @@ namespace NKikimr::NBsController {
}
if (const auto it = StaticVSlots.find(vslotId); it != StaticVSlots.end() && it->second.VDiskId == vdiskId) {
it->second.VDiskStatus = m.GetStatus();
if (it->second.VDiskStatus == NKikimrBlobStorage::EVDiskStatus::READY) {
it->second.ReadySince = Min(it->second.ReadySince, mono + ReadyStablePeriod);
} else {
it->second.ReadySince = TMonotonic::Max();
}
}
}

Expand Down
Loading