From 87d8083e835ec61151f4ef6cc393448b5fa0358f Mon Sep 17 00:00:00 2001 From: kungurtsev Date: Fri, 16 Aug 2024 11:23:14 +0200 Subject: [PATCH] Report Bloom filter size (#7729) --- ydb/core/protos/sys_view.proto | 2 ++ ydb/core/protos/table_stats.proto | 2 ++ ydb/core/tablet_flat/flat_stat_table.h | 3 ++ .../flat_stat_table_btree_index.cpp | 1 + .../tablet_flat/flat_stat_table_mixed_index.h | 1 + ydb/core/tx/datashard/datashard__stats.cpp | 28 +++++++++---------- ydb/core/tx/datashard/datashard_impl.h | 8 +++--- ydb/core/tx/datashard/datashard_user_table.h | 9 ------ ydb/core/tx/schemeshard/schemeshard__init.cpp | 1 + .../schemeshard/schemeshard__table_stats.cpp | 2 ++ ydb/core/tx/schemeshard/schemeshard_impl.cpp | 1 + .../tx/schemeshard/schemeshard_info_types.cpp | 2 ++ .../tx/schemeshard/schemeshard_info_types.h | 1 + .../schemeshard_path_describer.cpp | 1 + ydb/core/tx/schemeshard/schemeshard_schema.h | 5 +++- .../flat_schemeshard.schema | 8 +++++- 16 files changed, 46 insertions(+), 29 deletions(-) diff --git a/ydb/core/protos/sys_view.proto b/ydb/core/protos/sys_view.proto index 234646e12450..58ea6ea6af7a 100644 --- a/ydb/core/protos/sys_view.proto +++ b/ydb/core/protos/sys_view.proto @@ -44,6 +44,8 @@ message TPartitionStats { optional uint64 TxRejectedBySpace = 19; optional TTtlStats TtlStats = 20; + + optional uint64 ByKeyFilterSize = 21; } message TPartitionStatsResult { diff --git a/ydb/core/protos/table_stats.proto b/ydb/core/protos/table_stats.proto index 9f123decb7a8..c80e97233550 100644 --- a/ydb/core/protos/table_stats.proto +++ b/ydb/core/protos/table_stats.proto @@ -66,4 +66,6 @@ message TTableStats { repeated TChannelStats Channels = 30; optional TStoragePoolsStats StoragePools = 31; + + optional uint64 ByKeyFilterSize = 32; } diff --git a/ydb/core/tablet_flat/flat_stat_table.h b/ydb/core/tablet_flat/flat_stat_table.h index b5f8e252c823..ef594cc28675 100644 --- a/ydb/core/tablet_flat/flat_stat_table.h +++ b/ydb/core/tablet_flat/flat_stat_table.h @@ -102,6 +102,7 @@ struct TStats { ui64 RowCount = 0; TChanneledDataSize DataSize = { }; TChanneledDataSize IndexSize = { }; + ui64 ByKeyFilterSize = 0; THistogram RowCountHistogram; THistogram DataSizeHistogram; @@ -109,6 +110,7 @@ struct TStats { RowCount = 0; DataSize = { }; IndexSize = { }; + ByKeyFilterSize = 0; RowCountHistogram.clear(); DataSizeHistogram.clear(); } @@ -117,6 +119,7 @@ struct TStats { std::swap(RowCount, other.RowCount); std::swap(DataSize, other.DataSize); std::swap(IndexSize, other.IndexSize); + std::swap(ByKeyFilterSize, other.ByKeyFilterSize); RowCountHistogram.swap(other.RowCountHistogram); DataSizeHistogram.swap(other.DataSizeHistogram); } diff --git a/ydb/core/tablet_flat/flat_stat_table_btree_index.cpp b/ydb/core/tablet_flat/flat_stat_table_btree_index.cpp index e25f160cfef3..a87df9bb1ba1 100644 --- a/ydb/core/tablet_flat/flat_stat_table_btree_index.cpp +++ b/ydb/core/tablet_flat/flat_stat_table_btree_index.cpp @@ -200,6 +200,7 @@ bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBu bool ready = true; for (const auto& part : subset.Flatten) { stats.IndexSize.Add(part->IndexesRawSize, part->Label.Channel()); + stats.ByKeyFilterSize += part->ByKey ? part->ByKey->Raw.size() : 0; ready &= AddDataSize(part, stats, env, yieldHandler); } diff --git a/ydb/core/tablet_flat/flat_stat_table_mixed_index.h b/ydb/core/tablet_flat/flat_stat_table_mixed_index.h index c30ec2f382d4..19ed00c74115 100644 --- a/ydb/core/tablet_flat/flat_stat_table_mixed_index.h +++ b/ydb/core/tablet_flat/flat_stat_table_mixed_index.h @@ -22,6 +22,7 @@ inline bool BuildStatsMixedIndex(const TSubset& subset, TStats& stats, ui64 rowC bool started = true; for (const auto& part : subset.Flatten) { stats.IndexSize.Add(part->IndexesRawSize, part->Label.Channel()); + stats.ByKeyFilterSize += part->ByKey ? part->ByKey->Raw.size() : 0; TAutoPtr iter = new TStatsScreenedPartIterator(part, env, subset.Scheme->Keys, part->Small, part->Large, rowCountResolution / resolutionDivider, dataSizeResolution / resolutionDivider); auto ready = iter->Start(); diff --git a/ydb/core/tx/datashard/datashard__stats.cpp b/ydb/core/tx/datashard/datashard__stats.cpp index 67d5c62db373..0386c224dabf 100644 --- a/ydb/core/tx/datashard/datashard__stats.cpp +++ b/ydb/core/tx/datashard/datashard__stats.cpp @@ -133,7 +133,6 @@ class TTableStatsCoroBuilder : public TActorCoroImpl, private IPages { auto ev = MakeHolder(); ev->TableId = TableId; - ev->IndexSize = IndexSize; ev->StatsUpdateTime = StatsUpdateTime; ev->PartCount = Subset->Flatten.size() + Subset->ColdParts.size(); ev->MemRowCount = MemRowCount; @@ -272,17 +271,14 @@ class TDataShard::TTxGetTableStats : public NTabletFlatExecutor::TTransactionBas const TUserTable& tableInfo = *Self->TableInfos[tableId]; - auto indexSize = txc.DB.GetTableIndexSize(tableInfo.LocalTid); + // Fill stats with current mem table size: auto memSize = txc.DB.GetTableMemSize(tableInfo.LocalTid); auto memRowCount = txc.DB.GetTableMemRowCount(tableInfo.LocalTid); - if (tableInfo.ShadowTid) { - indexSize += txc.DB.GetTableIndexSize(tableInfo.ShadowTid); memSize += txc.DB.GetTableMemSize(tableInfo.ShadowTid); memRowCount += txc.DB.GetTableMemRowCount(tableInfo.ShadowTid); } - Result->Record.MutableTableStats()->SetIndexSize(indexSize); Result->Record.MutableTableStats()->SetInMemSize(memSize); Result->Record.MutableTableStats()->SetLastAccessTime(tableInfo.Stats.AccessTime.MilliSeconds()); Result->Record.MutableTableStats()->SetLastUpdateTime(tableInfo.Stats.UpdateTime.MilliSeconds()); @@ -291,18 +287,21 @@ class TDataShard::TTxGetTableStats : public NTabletFlatExecutor::TTransactionBas tableInfo.Stats.RowCountResolution = Ev->Get()->Record.GetRowCountResolution(); tableInfo.Stats.HistogramBucketsCount = Ev->Get()->Record.GetHistogramBucketsCount(); - // Check if first stats update has been completed + // Check if first stats update has been completed: bool ready = (tableInfo.Stats.StatsUpdateTime != TInstant()); Result->Record.SetFullStatsReady(ready); - if (!ready) + if (!ready) { return true; + } const TStats& stats = tableInfo.Stats.DataStats; + Result->Record.MutableTableStats()->SetIndexSize(stats.IndexSize.Size); + Result->Record.MutableTableStats()->SetByKeyFilterSize(stats.ByKeyFilterSize); Result->Record.MutableTableStats()->SetDataSize(stats.DataSize.Size + memSize); Result->Record.MutableTableStats()->SetRowCount(stats.RowCount + memRowCount); FillHistogram(stats.DataSizeHistogram, *Result->Record.MutableTableStats()->MutableDataSizeHistogram()); FillHistogram(stats.RowCountHistogram, *Result->Record.MutableTableStats()->MutableRowCountHistogram()); - // Fill key access sample if it was collected not too long ago + // Fill key access sample if it was collected not too long ago: if (Self->StopKeyAccessSamplingAt + TDuration::Seconds(30) >= AppData(ctx)->TimeProvider->Now()) { FillKeyAccessSample(tableInfo.Stats.AccessStats, *Result->Record.MutableTableStats()->MutableKeyAccessSample()); } @@ -317,7 +316,7 @@ class TDataShard::TTxGetTableStats : public NTabletFlatExecutor::TTransactionBas Result->Record.AddUserTablePartOwners(pi); } - for (const auto& pi : Self->SysTablesPartOnwers) { + for (const auto& pi : Self->SysTablesPartOwners) { Result->Record.AddSysTablesPartOwners(pi); } @@ -375,9 +374,10 @@ void TDataShard::Handle(TEvPrivate::TEvAsyncTableStats::TPtr& ev, const TActorCo LOG_ERROR(ctx, NKikimrServices::TX_DATASHARD, "Unexpected async stats update at datashard %" PRIu64, TabletID()); } - tableInfo.Stats.Update(std::move(ev->Get()->Stats), ev->Get()->IndexSize, - std::move(ev->Get()->PartOwners), ev->Get()->PartCount, - ev->Get()->StatsUpdateTime); + tableInfo.Stats.DataStats = std::move(ev->Get()->Stats); + tableInfo.Stats.PartOwners = std::move(ev->Get()->PartOwners); + tableInfo.Stats.PartCount = ev->Get()->PartCount; + tableInfo.Stats.StatsUpdateTime = ev->Get()->StatsUpdateTime; tableInfo.Stats.MemRowCount = ev->Get()->MemRowCount; tableInfo.Stats.MemDataSize = ev->Get()->MemDataSize; @@ -565,12 +565,12 @@ class TDataShard::TTxInitiateStatsUpdate : public NTabletFlatExecutor::TTransact Self->Actors.insert(actorId); } - Self->SysTablesPartOnwers.clear(); + Self->SysTablesPartOwners.clear(); for (ui32 sysTableId : Self->SysTablesToTransferAtSplit) { THashSet sysPartOwners; auto subset = txc.DB.Subset(sysTableId, TEpoch::Max(), { }, { }); GetPartOwners(*subset, sysPartOwners); - Self->SysTablesPartOnwers.insert(sysPartOwners.begin(), sysPartOwners.end()); + Self->SysTablesPartOwners.insert(sysPartOwners.begin(), sysPartOwners.end()); } return true; } diff --git a/ydb/core/tx/datashard/datashard_impl.h b/ydb/core/tx/datashard/datashard_impl.h index 053e7b69d5ef..f4614b08a8ac 100644 --- a/ydb/core/tx/datashard/datashard_impl.h +++ b/ydb/core/tx/datashard/datashard_impl.h @@ -403,7 +403,6 @@ class TDataShard struct TEvAsyncTableStats : public TEventLocal { ui64 TableId = -1; - ui64 IndexSize = 0; TInstant StatsUpdateTime; NTable::TStats Stats; THashSet PartOwners; @@ -2634,7 +2633,7 @@ class TDataShard Schema::PlanQueue::TableId, Schema::DeadlineQueue::TableId }; - THashSet SysTablesPartOnwers; + THashSet SysTablesPartOwners; // Sys table contents ui32 State; @@ -2649,7 +2648,7 @@ class TDataShard NMiniKQL::IKeyAccessSampler::TPtr DisabledKeySampler; NMiniKQL::IKeyAccessSampler::TPtr EnabledKeySampler; - NMiniKQL::IKeyAccessSampler::TPtr CurrentKeySampler; // Points to enbaled or disabled + NMiniKQL::IKeyAccessSampler::TPtr CurrentKeySampler; // Points to enabled or disabled TInstant StartedKeyAccessSamplingAt; TInstant StopKeyAccessSamplingAt; @@ -3241,6 +3240,7 @@ class TDataShard ev->Record.MutableTableStats()->SetDataSize(ti.Stats.DataStats.DataSize.Size + ti.Stats.MemDataSize); ev->Record.MutableTableStats()->SetIndexSize(ti.Stats.DataStats.IndexSize.Size); + ev->Record.MutableTableStats()->SetByKeyFilterSize(ti.Stats.DataStats.ByKeyFilterSize); ev->Record.MutableTableStats()->SetInMemSize(ti.Stats.MemDataSize); TMap> channels; // Channel -> (DataSize, IndexSize) @@ -3294,7 +3294,7 @@ class TDataShard for (const auto& pi : ti.Stats.PartOwners) { ev->Record.AddUserTablePartOwners(pi); } - for (const auto& pi : SysTablesPartOnwers) { + for (const auto& pi : SysTablesPartOwners) { ev->Record.AddSysTablesPartOwners(pi); } diff --git a/ydb/core/tx/datashard/datashard_user_table.h b/ydb/core/tx/datashard/datashard_user_table.h index 6f154a143144..e91c052f0a2d 100644 --- a/ydb/core/tx/datashard/datashard_user_table.h +++ b/ydb/core/tx/datashard/datashard_user_table.h @@ -354,7 +354,6 @@ struct TUserTable : public TThrRefBase { struct TStats { NTable::TStats DataStats; - ui64 IndexSize = 0; ui64 MemRowCount = 0; ui64 MemDataSize = 0; TInstant AccessTime; @@ -371,14 +370,6 @@ struct TUserTable : public TThrRefBase { ui64 BackgroundCompactionCount = 0; ui64 CompactBorrowedCount = 0; NTable::TKeyAccessSample AccessStats; - - void Update(NTable::TStats&& dataStats, ui64 indexSize, THashSet&& partOwners, ui64 partCount, TInstant statsUpdateTime) { - DataStats = dataStats; - IndexSize = indexSize; - PartOwners = partOwners; - PartCount = partCount; - StatsUpdateTime = statsUpdateTime; - } }; struct TSpecialUpdate { diff --git a/ydb/core/tx/schemeshard/schemeshard__init.cpp b/ydb/core/tx/schemeshard/schemeshard__init.cpp index 3082db929199..e3db4f5760d0 100644 --- a/ydb/core/tx/schemeshard/schemeshard__init.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__init.cpp @@ -2285,6 +2285,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase { stats.RowCount = rowSet.GetValue(); stats.DataSize = rowSet.GetValue(); stats.IndexSize = rowSet.GetValue(); + stats.ByKeyFilterSize = rowSet.GetValue(); if (rowSet.HaveValue()) { NKikimrTableStats::TStoragePoolsStats protobufRepresentation; Y_ABORT_UNLESS(ParseFromStringNoSizeLimit( diff --git a/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp b/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp index 72e7b4a80379..26ac03e3fd41 100644 --- a/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp @@ -64,6 +64,7 @@ auto TSchemeShard::BuildStatsForCollector(TPathId pathId, TShardIdx shardIdx, TT sysStats.SetDataSize(stats.DataSize); sysStats.SetRowCount(stats.RowCount); sysStats.SetIndexSize(stats.IndexSize); + sysStats.SetByKeyFilterSize(stats.ByKeyFilterSize); sysStats.SetCPUCores(std::min(stats.GetCurrentRawCpuUsage() / 1000000., 1.0)); sysStats.SetTabletId(ui64(datashardId)); sysStats.SetAccessTime(stats.LastAccessTime.MilliSeconds()); @@ -164,6 +165,7 @@ TPartitionStats TTxStoreTableStats::PrepareStats(const TActorContext& ctx, newStats.RowCount = tableStats.GetRowCount(); newStats.DataSize = tableStats.GetDataSize(); newStats.IndexSize = tableStats.GetIndexSize(); + newStats.ByKeyFilterSize = tableStats.GetByKeyFilterSize(); newStats.LastAccessTime = TInstant::MilliSeconds(tableStats.GetLastAccessTime()); newStats.LastUpdateTime = TInstant::MilliSeconds(tableStats.GetLastUpdateTime()); for (const auto& channelStats : tableStats.GetChannels()) { diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.cpp b/ydb/core/tx/schemeshard/schemeshard_impl.cpp index dcba166d20ef..6b507d185acc 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_impl.cpp @@ -2506,6 +2506,7 @@ void TSchemeShard::PersistTablePartitionStats(NIceDb::TNiceDb& db, const TPathId NIceDb::TUpdate(stats.RowCount), NIceDb::TUpdate(stats.DataSize), NIceDb::TUpdate(stats.IndexSize), + NIceDb::TUpdate(stats.ByKeyFilterSize), NIceDb::TUpdate(stats.LastAccessTime.GetValue()), NIceDb::TUpdate(stats.LastUpdateTime.GetValue()), diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp index 852f506dab9c..3660ba1574f5 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp @@ -1592,6 +1592,7 @@ void TTableInfo::SetPartitioning(TVector&& newPartitioning) { newAggregatedStats.RowCount += newStats.RowCount; newAggregatedStats.DataSize += newStats.DataSize; newAggregatedStats.IndexSize += newStats.IndexSize; + newAggregatedStats.ByKeyFilterSize += newStats.ByKeyFilterSize; for (const auto& [poolKind, newStoragePoolStats] : newStats.StoragePoolsStats) { auto& [dataSize, indexSize] = newAggregatedStats.StoragePoolsStats[poolKind]; dataSize += newStoragePoolStats.DataSize; @@ -1678,6 +1679,7 @@ void TAggregatedStats::UpdateShardStats(TShardIdx datashardIdx, const TPartition Aggregated.RowCount += (newStats.RowCount - oldStats.RowCount); Aggregated.DataSize += (newStats.DataSize - oldStats.DataSize); Aggregated.IndexSize += (newStats.IndexSize - oldStats.IndexSize); + Aggregated.ByKeyFilterSize += (newStats.ByKeyFilterSize - oldStats.ByKeyFilterSize); for (const auto& [poolKind, newStoragePoolStats] : newStats.StoragePoolsStats) { auto& [dataSize, indexSize] = Aggregated.StoragePoolsStats[poolKind]; const auto* oldStoragePoolStats = oldStats.StoragePoolsStats.FindPtr(poolKind); diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 4eb70beac9c8..d98cdf96ba93 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -231,6 +231,7 @@ struct TPartitionStats { ui64 RowCount = 0; ui64 DataSize = 0; ui64 IndexSize = 0; + ui64 ByKeyFilterSize = 0; struct TStoragePoolStats { ui64 DataSize = 0; diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index 97d715494b7f..872956a98b6e 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -25,6 +25,7 @@ static void FillTableStats(NKikimrTableStats::TTableStats* stats, const TPartiti stats->SetRowCount(tableStats.RowCount); stats->SetDataSize(tableStats.DataSize); stats->SetIndexSize(tableStats.IndexSize); + stats->SetByKeyFilterSize(tableStats.ByKeyFilterSize); stats->SetLastAccessTime(tableStats.LastAccessTime.MilliSeconds()); stats->SetLastUpdateTime(tableStats.LastUpdateTime.MilliSeconds()); stats->SetImmediateTxCompleted(tableStats.ImmediateTxCompleted); diff --git a/ydb/core/tx/schemeshard/schemeshard_schema.h b/ydb/core/tx/schemeshard/schemeshard_schema.h index ac7cce9805c8..a4f421a7e652 100644 --- a/ydb/core/tx/schemeshard/schemeshard_schema.h +++ b/ydb/core/tx/schemeshard/schemeshard_schema.h @@ -378,6 +378,8 @@ struct Schema : NIceDb::Schema { // Represented by NKikimrTableStats::TStoragePoolsStats. struct StoragePoolsStats : Column<33, NScheme::NTypeIds::String> { using Type = TString; }; + struct ByKeyFilterSize : Column<34, NScheme::NTypeIds::Uint64> {}; + using TKey = TableKey; using TColumns = TableColumns< TableOwnerId, @@ -412,7 +414,8 @@ struct Schema : NIceDb::Schema { SearchHeight, FullCompactionTs, MemDataSize, - StoragePoolsStats + StoragePoolsStats, + ByKeyFilterSize >; }; diff --git a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema index 1a4f2de0d060..d816e0a1990b 100644 --- a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema +++ b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema @@ -6828,6 +6828,11 @@ "ColumnId": 33, "ColumnName": "StoragePoolsStats", "ColumnType": "String" + }, + { + "ColumnId": 34, + "ColumnName": "ByKeyFilterSize", + "ColumnType": "Uint64" } ], "ColumnsDropped": [], @@ -6866,7 +6871,8 @@ 30, 31, 32, - 33 + 33, + 34 ], "RoomID": 0, "Codec": 0,