Skip to content

Commit

Permalink
Improve cost metrics configuration (ydb-platform#5045)
Browse files Browse the repository at this point in the history
  • Loading branch information
serbel324 committed Jun 5, 2024
1 parent 6f8e7c9 commit 36b2aee
Show file tree
Hide file tree
Showing 31 changed files with 332 additions and 97 deletions.
1 change: 1 addition & 0 deletions ydb/core/blobstorage/dsproxy/ut_fat/dsproxy_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4252,6 +4252,7 @@ class TBlobStorageProxyTest: public TTestBase {
vDiskConfig->GCOnlySynced = false;
vDiskConfig->HullCompLevelRateThreshold = 0.1;
vDiskConfig->SkeletonFrontQueueBackpressureCheckMsgId = false;
vDiskConfig->UseCostTracker = false;

IActor* vDisk = CreateVDisk(vDiskConfig, bsInfo, counters);
TActorSetupCmd vDiskSetup(vDisk, TMailboxType::Revolving, 0);
Expand Down
25 changes: 25 additions & 0 deletions ydb/core/blobstorage/nodewarden/node_warden_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,33 @@ void TNodeWarden::Bootstrap() {
DsProxyPerPoolCounters = new TDsProxyPerPoolCounters(AppData()->Counters);

if (actorSystem && actorSystem->AppData<TAppData>() && actorSystem->AppData<TAppData>()->Icb) {
<<<<<<< HEAD
actorSystem->AppData<TAppData>()->Icb->RegisterLocalControl(EnablePutBatching, "BlobStorage_EnablePutBatching");
actorSystem->AppData<TAppData>()->Icb->RegisterLocalControl(EnableVPatch, "BlobStorage_EnableVPatch");
=======
const TIntrusivePtr<NKikimr::TControlBoard>& icb = actorSystem->AppData<TAppData>()->Icb;

icb->RegisterLocalControl(EnablePutBatching, "BlobStorage_EnablePutBatching");
icb->RegisterLocalControl(EnableVPatch, "BlobStorage_EnableVPatch");
icb->RegisterSharedControl(EnableLocalSyncLogDataCutting, "VDiskControls.EnableLocalSyncLogDataCutting");
icb->RegisterSharedControl(EnableSyncLogChunkCompressionHDD, "VDiskControls.EnableSyncLogChunkCompressionHDD");
icb->RegisterSharedControl(EnableSyncLogChunkCompressionSSD, "VDiskControls.EnableSyncLogChunkCompressionSSD");
icb->RegisterSharedControl(MaxSyncLogChunksInFlightHDD, "VDiskControls.MaxSyncLogChunksInFlightHDD");
icb->RegisterSharedControl(MaxSyncLogChunksInFlightSSD, "VDiskControls.MaxSyncLogChunksInFlightSSD");

icb->RegisterSharedControl(CostMetricsParametersByMedia[NPDisk::DEVICE_TYPE_ROT].BurstThresholdNs,
"VDiskControls.BurstThresholdNsHDD");
icb->RegisterSharedControl(CostMetricsParametersByMedia[NPDisk::DEVICE_TYPE_SSD].BurstThresholdNs,
"VDiskControls.BurstThresholdNsSSD");
icb->RegisterSharedControl(CostMetricsParametersByMedia[NPDisk::DEVICE_TYPE_NVME].BurstThresholdNs,
"VDiskControls.BurstThresholdNsNVME");
icb->RegisterSharedControl(CostMetricsParametersByMedia[NPDisk::DEVICE_TYPE_ROT].DiskTimeAvailableScale,
"VDiskControls.DiskTimeAvailableScaleHDD");
icb->RegisterSharedControl(CostMetricsParametersByMedia[NPDisk::DEVICE_TYPE_SSD].DiskTimeAvailableScale,
"VDiskControls.DiskTimeAvailableScaleSSD");
icb->RegisterSharedControl(CostMetricsParametersByMedia[NPDisk::DEVICE_TYPE_NVME].DiskTimeAvailableScale,
"VDiskControls.DiskTimeAvailableScaleNVME");
>>>>>>> 208706d808... Improve cost metrics configuration (#5045)
}

// start replication broker
Expand Down
12 changes: 12 additions & 0 deletions ydb/core/blobstorage/nodewarden/node_warden_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ namespace NKikimr::NStorage {
TReplQuoter::TPtr ReplNodeRequestQuoter;
TReplQuoter::TPtr ReplNodeResponseQuoter;

TCostMetricsParametersByMedia CostMetricsParametersByMedia;

public:
struct TGroupRecord;

Expand All @@ -137,6 +139,16 @@ namespace NKikimr::NStorage {
: Cfg(cfg)
, EnablePutBatching(Cfg->FeatureFlags.GetEnablePutBatchingForBlobStorage(), false, true)
, EnableVPatch(Cfg->FeatureFlags.GetEnableVPatch(), false, true)
, EnableLocalSyncLogDataCutting(0, 0, 1)
, EnableSyncLogChunkCompressionHDD(1, 0, 1)
, EnableSyncLogChunkCompressionSSD(0, 0, 1)
, MaxSyncLogChunksInFlightHDD(10, 1, 1024)
, MaxSyncLogChunksInFlightSSD(10, 1, 1024)
, CostMetricsParametersByMedia({
TCostMetricsParameters{200},
TCostMetricsParameters{50},
TCostMetricsParameters{32},
})
{
Y_ABORT_UNLESS(Cfg->BlobStorageConfig.GetServiceSet().AvailabilityDomainsSize() <= 1);
AvailDomainId = 1;
Expand Down
23 changes: 11 additions & 12 deletions ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,21 +174,20 @@ namespace NKikimr::NStorage {
vdiskConfig->EnableVDiskCooldownTimeout = Cfg->EnableVDiskCooldownTimeout;
vdiskConfig->ReplPausedAtStart = Cfg->VDiskReplPausedAtStart;
vdiskConfig->EnableVPatch = EnableVPatch;
vdiskConfig->FeatureFlags = Cfg->FeatureFlags;

if (Cfg->BlobStorageConfig.HasCostMetricsSettings()) {
for (auto type : Cfg->BlobStorageConfig.GetCostMetricsSettings().GetVDiskTypes()) {
if (type.HasPDiskType() && deviceType == PDiskTypeToPDiskType(type.GetPDiskType())) {
if (type.HasBurstThresholdNs()) {
vdiskConfig->BurstThresholdNs = type.GetBurstThresholdNs();
}
if (type.HasDiskTimeAvailableScale()) {
vdiskConfig->DiskTimeAvailableScale = type.GetDiskTimeAvailableScale();
}
}
}
vdiskConfig->EnableLocalSyncLogDataCutting = EnableLocalSyncLogDataCutting;
if (deviceType == NPDisk::EDeviceType::DEVICE_TYPE_ROT) {
vdiskConfig->EnableSyncLogChunkCompression = EnableSyncLogChunkCompressionHDD;
vdiskConfig->MaxSyncLogChunksInFlight = MaxSyncLogChunksInFlightHDD;
} else {
vdiskConfig->EnableSyncLogChunkCompression = EnableSyncLogChunkCompressionSSD;
vdiskConfig->MaxSyncLogChunksInFlight = MaxSyncLogChunksInFlightSSD;
}

vdiskConfig->CostMetricsParametersByMedia = CostMetricsParametersByMedia;

vdiskConfig->FeatureFlags = Cfg->FeatureFlags;

// issue initial report to whiteboard before creating actor to avoid races
Send(WhiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateUpdate(vdiskId, groupInfo->GetStoragePoolName(),
vslotId.PDiskId, vslotId.VDiskSlotId, pdiskGuid, kind, donorMode, whiteboardInstanceGuid, std::move(donors)));
Expand Down
8 changes: 4 additions & 4 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ struct TEvYardInitResult : public TEventLocal<TEvYardInitResult, TEvBlobStorage:
TEvYardInitResult(const NKikimrProto::EReplyStatus status, const TString &errorReason)
: Status(status)
, StatusFlags(0)
, PDiskParams(new TPDiskParams(0, 0, 0, 0, 0, 0, 0, 0, 0, 0))
, PDiskParams(new TPDiskParams(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, DEVICE_TYPE_ROT))
, ErrorReason(errorReason)
{
Y_ABORT_UNLESS(status != NKikimrProto::OK, "Single-parameter constructor is for error responses only");
Expand All @@ -183,7 +183,7 @@ struct TEvYardInitResult : public TEventLocal<TEvYardInitResult, TEvBlobStorage:
ui64 writeSpeedBps, ui64 readBlockSize, ui64 writeBlockSize,
ui64 bulkWriteBlockSize, ui32 chunkSize, ui32 appendBlockSize,
TOwner owner, TOwnerRound ownerRound, TStatusFlags statusFlags, TVector<TChunkIdx> ownedChunks,
const TString &errorReason)
EDeviceType trueMediaType, const TString &errorReason)
: Status(status)
, StatusFlags(statusFlags)
, PDiskParams(new TPDiskParams(
Expand All @@ -196,8 +196,8 @@ struct TEvYardInitResult : public TEventLocal<TEvYardInitResult, TEvBlobStorage:
writeSpeedBps,
readBlockSize,
writeBlockSize,
bulkWriteBlockSize
))
bulkWriteBlockSize,
trueMediaType))
, OwnedChunks(std::move(ownedChunks))
, ErrorReason(errorReason)
{}
Expand Down
8 changes: 5 additions & 3 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1769,7 +1769,8 @@ void TPDisk::ReplyErrorYardInitResult(TYardInit &evYardInit, const TString &str)
DriveModel.Speed(TDriveModel::OP_TYPE_WRITE), readBlockSize, writeBlockSize,
DriveModel.BulkWriteBlockSize(),
GetUserAccessibleChunkSize(), GetChunkAppendBlockSize(), OwnerSystem, 0,
GetStatusFlags(OwnerSystem, evYardInit.OwnerGroupType), TVector<TChunkIdx>(), error.Str()));
GetStatusFlags(OwnerSystem, evYardInit.OwnerGroupType), TVector<TChunkIdx>(),
Cfg->RetrieveDeviceType(), error.Str()));
Mon.YardInit.CountResponse();
}

Expand Down Expand Up @@ -1816,7 +1817,8 @@ bool TPDisk::YardInitForKnownVDisk(TYardInit &evYardInit, TOwner owner) {
DriveModel.SeekTimeNs() / 1000ull, DriveModel.Speed(TDriveModel::OP_TYPE_READ),
DriveModel.Speed(TDriveModel::OP_TYPE_WRITE), readBlockSize, writeBlockSize,
DriveModel.BulkWriteBlockSize(), GetUserAccessibleChunkSize(), GetChunkAppendBlockSize(), owner,
ownerRound, GetStatusFlags(OwnerSystem, evYardInit.OwnerGroupType), ownedChunks, nullptr));
ownerRound, GetStatusFlags(OwnerSystem, evYardInit.OwnerGroupType), ownedChunks,
Cfg->RetrieveDeviceType(), nullptr));
GetStartingPoints(owner, result->StartingPoints);
ownerData.VDiskId = vDiskId;
ownerData.CutLogId = evYardInit.CutLogId;
Expand Down Expand Up @@ -1967,7 +1969,7 @@ void TPDisk::YardInitFinish(TYardInit &evYardInit) {
DriveModel.Speed(TDriveModel::OP_TYPE_WRITE), readBlockSize, writeBlockSize,
DriveModel.BulkWriteBlockSize(), GetUserAccessibleChunkSize(), GetChunkAppendBlockSize(), owner, ownerRound,
GetStatusFlags(OwnerSystem, evYardInit.OwnerGroupType) | ui32(NKikimrBlobStorage::StatusNewOwner), TVector<TChunkIdx>(),
nullptr));
Cfg->RetrieveDeviceType(), nullptr));
GetStartingPoints(result->PDiskParams->Owner, result->StartingPoints);
WriteSysLogRestorePoint(new TCompletionEventSender(
this, evYardInit.Sender, result.Release(), Mon.YardInit.Results), evYardInit.ReqId, {});
Expand Down
3 changes: 2 additions & 1 deletion ydb/core/blobstorage/pdisk/blobstorage_pdisk_params.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace NKikimr {
////////////////////////////////////////////////////////////////////////////
TPDiskParams::TPDiskParams(NPDisk::TOwner owner, ui64 ownerRound, ui32 chunkSize, ui32 appendBlockSize,
ui64 seekTimeUs, ui64 readSpeedBps, ui64 writeSpeedBps, ui64 readBlockSize,
ui64 writeBlockSize, ui64 bulkWriteBlockSize)
ui64 writeBlockSize, ui64 bulkWriteBlockSize, NPDisk::EDeviceType trueMediaType)
: Owner(owner)
, OwnerRound(ownerRound)
, ChunkSize(chunkSize)
Expand All @@ -25,6 +25,7 @@ namespace NKikimr {
, BulkWriteBlockSize(bulkWriteBlockSize)
, PrefetchSizeBytes(CalculatePrefetchSizeBytes(seekTimeUs, readSpeedBps))
, GlueRequestDistanceBytes(CalculateGlueRequestDistanceBytes(seekTimeUs, readSpeedBps))
, TrueMediaType(trueMediaType)
{
Y_DEBUG_ABORT_UNLESS(AppendBlockSize <= ChunkSize);
}
Expand Down
4 changes: 3 additions & 1 deletion ydb/core/blobstorage/pdisk/blobstorage_pdisk_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ namespace NKikimr {
const ui64 PrefetchSizeBytes; // Pdisk is expected to stream data of this size at 83% of max speed.
const ui64 GlueRequestDistanceBytes; // It is faster to read unneeded data of this size than to seek over it.

const NPDisk::EDeviceType TrueMediaType;

static ui32 CalculateRecommendedReadSize(ui64 seekTimeUs, ui64 readSpeedBps, ui64 appendBlockSize);
static ui64 CalculatePrefetchSizeBytes(ui64 seekTimeUs, ui64 readSpeedBps);
static ui64 CalculateGlueRequestDistanceBytes(ui64 seekTimeUs, ui64 readSpeedBps);

TPDiskParams(NPDisk::TOwner owner, ui64 ownerRound, ui32 chunkSize, ui32 appendBlockSize,
ui64 seekTimeUs, ui64 readSpeedBps, ui64 writeSpeedBps, ui64 readBlockSize,
ui64 writeBlockSize, ui64 bulkWriteBlockSize);
ui64 writeBlockSize, ui64 bulkWriteBlockSize, NPDisk::EDeviceType trueMediaType);
void OutputHtml(IOutputStream &str) const;
TString ToString() const;
};
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ class TPDiskMockActor : public TActorBootstrapped<TPDiskMockActor> {
const ui64 bulkWriteBlockSize = 65536;
res = std::make_unique<NPDisk::TEvYardInitResult>(NKikimrProto::OK, seekTimeUs, readSpeedBps, writeSpeedBps,
readBlockSize, writeBlockSize, bulkWriteBlockSize, Impl.ChunkSize, Impl.AppendBlockSize, ownerId,
owner->OwnerRound, GetStatusFlags(), std::move(ownedChunks), TString());
owner->OwnerRound, GetStatusFlags(), std::move(ownedChunks), NPDisk::DEVICE_TYPE_NVME, TString());
res->StartingPoints = owner->StartingPoints;
} else {
res = std::make_unique<NPDisk::TEvYardInitResult>(NKikimrProto::INVALID_ROUND, "invalid owner round");
Expand Down
86 changes: 79 additions & 7 deletions ydb/core/blobstorage/ut_blobstorage/lib/env.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ struct TEnvironmentSetup {
std::set<TActorId> CommencedReplication;
std::unordered_map<ui32, TString> Cache;

using TIcbControlKey = std::pair<ui32, TString>; // { nodeId, name }

std::unordered_map<TIcbControlKey, TControlWrapper> IcbControls;

struct TSettings {
const ui32 NodeCount = 9;
const bool VDiskReplPausedAtStart = false;
Expand All @@ -39,7 +43,12 @@ struct TEnvironmentSetup {
const bool SuppressCompatibilityCheck = false;
const TFeatureFlags FeatureFlags;
const NPDisk::EDeviceType DiskType = NPDisk::EDeviceType::DEVICE_TYPE_NVME;
<<<<<<< HEAD
const ui32 BurstThresholdNs = 0;
=======
const ui64 BurstThresholdNs = 0;
const ui32 MinHugeBlobInBytes = 0;
>>>>>>> 208706d808... Improve cost metrics configuration (#5045)
const float DiskTimeAvailableScale = 1;
};

Expand Down Expand Up @@ -326,14 +335,24 @@ struct TEnvironmentSetup {
}
config->FeatureFlags = Settings.FeatureFlags;

{
auto* type = config->BlobStorageConfig.MutableCostMetricsSettings()->AddVDiskTypes();
type->SetPDiskType(NKikimrBlobStorage::EPDiskType::ROT);
if (Settings.BurstThresholdNs) {
type->SetBurstThresholdNs(Settings.BurstThresholdNs);
}
type->SetDiskTimeAvailableScale(Settings.DiskTimeAvailableScale);
TAppData* appData = Runtime->GetNode(nodeId)->AppData.get();

#define ADD_ICB_CONTROL(controlName, defaultVal, minVal, maxVal, currentValue) { \
TControlWrapper control(defaultVal, minVal, maxVal); \
appData->Icb->RegisterSharedControl(control, controlName); \
control = currentValue; \
IcbControls.insert({{nodeId, controlName}, std::move(control)}); \
}

if (Settings.BurstThresholdNs) {
ADD_ICB_CONTROL("VDiskControls.BurstThresholdNsHDD", 200'000'000, 1, 1'000'000'000'000, Settings.BurstThresholdNs);
ADD_ICB_CONTROL("VDiskControls.BurstThresholdNsSSD", 50'000'000, 1, 1'000'000'000'000, Settings.BurstThresholdNs);
ADD_ICB_CONTROL("VDiskControls.BurstThresholdNsNVME", 32'000'000, 1, 1'000'000'000'000, Settings.BurstThresholdNs);
}
ADD_ICB_CONTROL("VDiskControls.DiskTimeAvailableScaleHDD", 1'000, 1, 1'000'000, std::round(Settings.DiskTimeAvailableScale * 1'000));
ADD_ICB_CONTROL("VDiskControls.DiskTimeAvailableScaleSSD", 1'000, 1, 1'000'000, std::round(Settings.DiskTimeAvailableScale * 1'000));
ADD_ICB_CONTROL("VDiskControls.DiskTimeAvailableScaleNVME", 1'000, 1, 1'000'000, std::round(Settings.DiskTimeAvailableScale * 1'000));
#undef ADD_ICB_CONTROL

warden.reset(CreateBSNodeWarden(config));
}
Expand Down Expand Up @@ -641,6 +660,19 @@ struct TEnvironmentSetup {
});
}

<<<<<<< HEAD
=======
void PutBlob(const ui32 groupId, const TLogoBlobID& blobId, const TString& part) {
TActorId edge = Runtime->AllocateEdgeActor(Settings.ControllerNodeId);
Runtime->WrapInActorContext(edge, [&] {
SendToBSProxy(edge, groupId, new TEvBlobStorage::TEvPut(blobId, part, TInstant::Max(),
NKikimrBlobStorage::TabletLog, TEvBlobStorage::TEvPut::TacticMaxThroughput));
});
auto res = WaitForEdgeActorEvent<TEvBlobStorage::TEvPutResult>(edge);
Y_ABORT_UNLESS(res->Get()->Status == NKikimrProto::OK);
}

>>>>>>> 208706d808... Improve cost metrics configuration (#5045)
void CommenceReplication() {
for (ui32 groupId : GetGroups()) {
auto info = GetGroupInfo(groupId);
Expand Down Expand Up @@ -831,4 +863,44 @@ struct TEnvironmentSetup {
return SyncQueryFactory<TResult>(actorId, [&] { return std::make_unique<TQuery>(args...); });
}

ui64 AggregateVDiskCounters(TString storagePool, ui32 nodesCount, ui32 groupSize, ui32 groupId,
const std::vector<ui32>& pdiskLayout, TString subsystem, TString counter, bool derivative = false) {
ui64 ctr = 0;

for (ui32 nodeId = 1; nodeId <= nodesCount; ++nodeId) {
auto* appData = Runtime->GetNode(nodeId)->AppData.get();
for (ui32 i = 0; i < groupSize; ++i) {
TStringStream ss;
ss << LeftPad(i, 2, '0');
TString orderNumber = ss.Str();
ss.Clear();
ss << LeftPad(pdiskLayout[i], 9, '0');
TString pdisk = ss.Str();
ctr += GetServiceCounters(appData->Counters, "vdisks")->
GetSubgroup("storagePool", storagePool)->
GetSubgroup("group", std::to_string(groupId))->
GetSubgroup("orderNumber", orderNumber)->
GetSubgroup("pdisk", pdisk)->
GetSubgroup("media", "rot")->
GetSubgroup("subsystem", subsystem)->
GetCounter(counter, derivative)->Val();
}
}
return ctr;
};

void SetIcbControl(ui32 nodeId, TString controlName, ui64 value) {
if (nodeId == 0) {
for (nodeId = 1; nodeId <= Settings.NodeCount; ++nodeId) {
auto it = IcbControls.find({nodeId, controlName});
Y_ABORT_UNLESS(it != IcbControls.end());
it->second = value;
}
} else {
auto it = IcbControls.find({nodeId, controlName});
Y_ABORT_UNLESS(it != IcbControls.end());
it->second = value;
}
}

};
29 changes: 15 additions & 14 deletions ydb/core/blobstorage/ut_blobstorage/monitoring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,21 +457,22 @@ Y_UNIT_TEST_SUITE(BurstDetection) {
}

void TestDiskTimeAvailableScaling() {
auto measure = [](float scale) {
TBlobStorageGroupInfo::TTopology topology(TBlobStorageGroupType::ErasureNone, 1, 1, 1, true);
std::unique_ptr<TEnvironmentSetup> env;
ui32 groupSize;
TBlobStorageGroupType groupType;
ui32 groupId;
std::vector<ui32> pdiskLayout;
SetupEnv(topology, env, groupSize, groupType, groupId, pdiskLayout, 0, scale);

return AggregateVDiskCounters(env, env->StoragePoolName, groupSize, groupId, pdiskLayout,
"advancedCost", "DiskTimeAvailable");
};
TBlobStorageGroupInfo::TTopology topology(TBlobStorageGroupType::ErasureNone, 1, 1, 1, true);
std::unique_ptr<TEnvironmentSetup> env;
ui32 groupSize;
TBlobStorageGroupType groupType;
ui32 groupId;
std::vector<ui32> pdiskLayout;
SetupEnv(topology, env, groupSize, groupType, groupId, pdiskLayout, 0, 1);

i64 test1 = env->AggregateVDiskCounters(env->StoragePoolName, groupSize, groupSize, groupId, pdiskLayout,
"advancedCost", "DiskTimeAvailable");

env->SetIcbControl(0, "VDiskControls.DiskTimeAvailableScaleNVME", 2'000);
env->Sim(TDuration::Minutes(5));

i64 test1 = measure(1);
i64 test2 = measure(2);
i64 test2 = env->AggregateVDiskCounters(env->StoragePoolName, groupSize, groupSize, groupId, pdiskLayout,
"advancedCost", "DiskTimeAvailable");

i64 delta = test1 * 2 - test2;

Expand Down
Loading

0 comments on commit 36b2aee

Please sign in to comment.