Skip to content

Commit

Permalink
General SelfHeal fixes (#4315)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexvru authored May 6, 2024
1 parent 068e829 commit 77a85af
Show file tree
Hide file tree
Showing 9 changed files with 98 additions and 145 deletions.
3 changes: 3 additions & 0 deletions ydb/core/mind/bscontroller/config_cmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ namespace NKikimr::NBsController {
for (bool value : settings.GetEnableDonorMode()) {
Self->DonorMode = value;
db.Table<T>().Key(true).Update<T::DonorModeEnable>(Self->DonorMode);
auto ev = std::make_unique<TEvControllerUpdateSelfHealInfo>();
ev->DonorMode = Self->DonorMode;
Self->Send(Self->SelfHealId, ev.release());
}
for (ui64 value : settings.GetScrubPeriodicitySeconds()) {
Self->ScrubPeriodicity = TDuration::Seconds(value);
Expand Down
15 changes: 9 additions & 6 deletions ydb/core/mind/bscontroller/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -471,8 +471,8 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
|| Status == NKikimrBlobStorage::EDriveStatus::INACTIVE;
}

std::tuple<bool, bool> GetSelfHealStatusTuple() const {
return {ShouldBeSettledBySelfHeal(), BadInTermsOfSelfHeal()};
auto GetSelfHealStatusTuple() const {
return std::make_tuple(ShouldBeSettledBySelfHeal(), BadInTermsOfSelfHeal(), Decommitted(), IsSelfHealReasonDecommit());
}

bool AcceptsNewSlots() const {
Expand Down Expand Up @@ -2222,12 +2222,15 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
const TMonotonic now = TActivationContext::Monotonic();
THashSet<TGroupInfo*> groups;

auto sh = std::make_unique<TEvControllerUpdateSelfHealInfo>();
std::vector<TEvControllerUpdateSelfHealInfo::TVDiskStatusUpdate> updates;
for (auto it = VSlotReadyTimestampQ.begin(); it != VSlotReadyTimestampQ.end() && it->first <= now;
it = VSlotReadyTimestampQ.erase(it)) {
Y_DEBUG_ABORT_UNLESS(!it->second->IsReady);

sh->VDiskIsReadyUpdate.emplace_back(it->second->GetVDiskId(), true);
updates.push_back({
.VDiskId = it->second->GetVDiskId(),
.IsReady = true,
});
it->second->IsReady = true;
it->second->ResetVSlotReadyTimestampIter();
if (const TGroupInfo *group = it->second->Group) {
Expand All @@ -2245,8 +2248,8 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
if (!timingQ.empty()) {
Execute(CreateTxUpdateLastSeenReady(std::move(timingQ)));
}
if (sh->VDiskIsReadyUpdate) {
Send(SelfHealId, sh.release());
if (!updates.empty()) {
Send(SelfHealId, new TEvControllerUpdateSelfHealInfo(std::move(updates)));
}
}

Expand Down
18 changes: 11 additions & 7 deletions ydb/core/mind/bscontroller/register_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -549,25 +549,29 @@ void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId, TActorId serve
SysViewChangedPDisks.insert(it->first);
}
const TVSlotId startingId(nodeId, Min<Schema::VSlot::PDiskID::Type>(), Min<Schema::VSlot::VSlotID::Type>());
auto sh = MakeHolder<TEvControllerUpdateSelfHealInfo>();
std::vector<TEvControllerUpdateSelfHealInfo::TVDiskStatusUpdate> updates;
for (auto it = VSlots.lower_bound(startingId); it != VSlots.end() && it->first.NodeId == nodeId; ++it) {
if (const TGroupInfo *group = it->second->Group) {
if (it->second->IsReady) {
NotReadyVSlotIds.insert(it->second->VSlotId);
sh->VDiskIsReadyUpdate.emplace_back(it->second->GetVDiskId(), false);
}
it->second->SetStatus(NKikimrBlobStorage::EVDiskStatus::ERROR, mono, now, false);
timingQ.emplace_back(*it->second);
sh->VDiskStatusUpdate.emplace_back(it->second->GetVDiskId(), it->second->Status, false);
updates.push_back({
.VDiskId = it->second->GetVDiskId(),
.IsReady = it->second->IsReady,
.VDiskStatus = it->second->Status,
});
ScrubState.UpdateVDiskState(&*it->second);
}
}
for (auto it = StaticVSlots.lower_bound(startingId); it != StaticVSlots.end() && it->first.NodeId == nodeId; ++it) {
it->second.VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR;
it->second.ReadySince = TMonotonic::Max();
auto& slot = it->second;
slot.ReadySince = TMonotonic::Max();
slot.VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR;
}
if (sh->VDiskStatusUpdate) {
Send(SelfHealId, sh.Release());
if (!updates.empty()) {
Send(SelfHealId, new TEvControllerUpdateSelfHealInfo(std::move(updates)));
}
ScrubState.OnNodeDisconnected(nodeId);
EraseKnownDrivesOnDisconnected(&node);
Expand Down
Loading

0 comments on commit 77a85af

Please sign in to comment.