From 7b17dda333d1faab49e372e62e6bc1eb25eae64e Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Thu, 28 Dec 2023 12:06:33 +0300 Subject: [PATCH 01/10] !!GitHub_Desktop extend information for sys view table --- ydb/core/kqp/ut/olap/kqp_olap_ut.cpp | 80 +++++++++++-------- ydb/core/sys_view/common/schema.h | 35 +++++--- ydb/core/tx/columnshard/columnshard__scan.cpp | 32 +++++--- .../columnshard/columnshard__stats_scan.cpp | 78 +++++++++--------- .../tx/columnshard/columnshard__stats_scan.h | 12 +-- .../columnshard/engines/column_engine_logs.h | 11 +++ .../engines/portions/column_record.h | 7 ++ .../engines/reader/read_metadata.h | 20 ++++- .../ut_rw/ut_columnshard_read_write.cpp | 30 ++++--- 9 files changed, 194 insertions(+), 111 deletions(-) diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index 83373dbbca8e..b99d462630cd 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -327,7 +327,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { if (c.first == "RawBytes") { rawBytes += GetUint64(c.second); } - if (c.first == "Bytes") { + if (c.first == "BlobRangeSize") { bytes += GetUint64(c.second); } if (verbose) { @@ -3431,8 +3431,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto tableClient = kikimr.GetTableClient(); auto selectQuery = TString(R"( - SELECT * + SELECT PathId, Kind, TabletId, PortionId FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY PathId, Kind, TabletId, PortionId ORDER BY PathId, Kind, TabletId )"); @@ -3476,8 +3477,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto tableClient = kikimr.GetTableClient(); { auto selectQuery = TString(R"( - SELECT * + SELECT PathId, PortionId, Kind, TabletId FROM `/Root/olapStore/olapTable_1/.sys/primary_index_stats` + GROUP BY PathId, TabletId, PortionId, Kind ORDER BY PathId, Kind, TabletId )"); @@ -3950,7 +3952,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } { auto selectQuery = TString(R"( - SELECT Bytes, Rows + SELECT BlobRangeSize as Bytes, Rows FROM `/Root/olapStore/.sys/store_primary_index_stats` ORDER BY Bytes )"); @@ -3962,8 +3964,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } { auto selectQuery = TString(R"( - SELECT Rows, Kind, RawBytes, Rows as Rows2, Rows as Rows3, PathId + SELECT Sum(Rows) as Rows, Kind, Sum(RawBytes) as RawBytes, Sum(Rows) as Rows2, Sum(Rows) as Rows3, PathId, PortionId FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY Kind, PortionId, PathId ORDER BY PathId, Kind, Rows3 )"); @@ -4001,9 +4004,18 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto selectQuery = TString(R"( SELECT * FROM `/Root/olapStore/.sys/store_primary_index_stats` + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + } + + { + auto selectQuery = TString(R"( + SELECT PathId, Kind, TabletId, PortionId + FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - PathId == UInt64("3") AND Kind < UInt32("4") - ORDER BY PathId, Kind, TabletId; + PathId == UInt64("3") AND Kind != 'INACTIVE' + GROUP BY TabletId, PathId, PortionId, Kind )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); @@ -4019,8 +4031,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT * + SELECT PathId, Kind, TabletId, PortionId FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY PortionId, PathId, Kind, TabletId ORDER BY PathId DESC, Kind DESC, TabletId DESC ; @@ -4038,11 +4051,12 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT * + SELECT PathId, Kind, TabletId, PortionId FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE PathId > UInt64("0") AND PathId < UInt32("4") OR PathId > UInt64("4") AND PathId <= UInt64("5") + GROUP BY PathId, Kind, TabletId, PortionId ORDER BY PathId DESC, Kind DESC, TabletId DESC ; @@ -4075,9 +4089,10 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT * + SELECT PathId, Kind, TabletId, PortionId, Sum(BlobRangeSize) as Bytes FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE Bytes > UInt64("0") + GROUP BY PathId, Kind, TabletId, PortionId ORDER BY PathId, Kind, TabletId; )"); @@ -4088,9 +4103,10 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId + SELECT PathId, Kind, TabletId, PortionId, Sum(BlobRangeSize) as Bytes FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE Bytes > UInt64("0") + GROUP BY PathId, Kind, TabletId, PortionId ORDER BY PathId, Kind, TabletId; )"); @@ -4103,7 +4119,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto selectQuery = TString(R"( SELECT * FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE Kind == UInt32("6") + WHERE Kind == 'EVICTED' ORDER BY PathId, Kind, TabletId; )"); @@ -4114,9 +4130,10 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT * + SELECT PathId, Kind, TabletId, PortionId FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE Kind >= UInt32("3") + WHERE Kind IN ('SPLIT_COMPACTED', 'INACTIVE', 'EVICTED') + GROUP BY PathId, Kind, TabletId, PortionId ORDER BY PathId, Kind, TabletId; )"); @@ -4152,7 +4169,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SUM(Rows) as rows, FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - Kind != UInt32("4") -- not INACTIVE + Kind != 'INACTIVE' )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); @@ -4166,7 +4183,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SUM(Rows) as rows, FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - Kind != UInt32("4") -- not INACTIVE + Kind != 'INACTIVE' GROUP BY PathId ORDER BY @@ -4185,13 +4202,11 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SELECT PathId, SUM(Rows) as rows, - SUM(Bytes) as bytes, - SUM(RawBytes) as bytes_raw, - SUM(Portions) as portions, - SUM(Blobs) as blobs + SUM(BlobRangeSize) as bytes, + SUM(RawBytes) as bytes_raw FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - Kind < UInt32("4") + Kind IN ('INSERTED', 'SPLIT_COMPACTED', 'COMPACTED') GROUP BY PathId ORDER BY rows DESC LIMIT 10 @@ -4209,13 +4224,11 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SELECT PathId, SUM(Rows) as rows, - SUM(Bytes) as bytes, - SUM(RawBytes) as bytes_raw, - SUM(Portions) as portions, - SUM(Blobs) as blobs + SUM(BlobRangeSize) as bytes, + SUM(RawBytes) as bytes_raw FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - PathId == UInt64("3") AND Kind < UInt32("4") + PathId == UInt64("3") AND Kind IN ('INSERTED', 'SPLIT_COMPACTED', 'COMPACTED') GROUP BY PathId ORDER BY rows DESC LIMIT 10 @@ -4231,13 +4244,11 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SELECT PathId, SUM(Rows) as rows, - SUM(Bytes) as bytes, - SUM(RawBytes) as bytes_raw, - SUM(Portions) as portions, - SUM(Blobs) as blobs + SUM(BlobRangeSize) as bytes, + SUM(RawBytes) as bytes_raw FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - PathId >= UInt64("4") AND Kind < UInt32("4") + PathId >= UInt64("4") AND Kind IN ('INSERTED', 'SPLIT_COMPACTED', 'COMPACTED') GROUP BY PathId ORDER BY rows DESC LIMIT 10 @@ -4251,13 +4262,14 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT count(*) + SELECT PathId, TabletId, PortionId FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY PathId, TabletId, PortionId )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); // 3 Tables with 3 Shards each and 4 KindId-s of stats - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("column0")), 3*3*numKinds); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3 * 3 * numKinds); } { @@ -4277,7 +4289,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT PathId, count(*), sum(Rows), sum(Bytes), sum(RawBytes) + SELECT PathId, count(*), sum(Rows), sum(BlobRangeSize), sum(RawBytes) FROM `/Root/olapStore/.sys/store_primary_index_stats` GROUP BY PathId ORDER BY PathId diff --git a/ydb/core/sys_view/common/schema.h b/ydb/core/sys_view/common/schema.h index 11954088d32e..e598f1e0d89c 100644 --- a/ydb/core/sys_view/common/schema.h +++ b/ydb/core/sys_view/common/schema.h @@ -387,27 +387,42 @@ struct Schema : NIceDb::Schema { struct PrimaryIndexStats : Table<10> { struct PathId : Column<1, NScheme::NTypeIds::Uint64> {}; - struct Kind : Column<2, NScheme::NTypeIds::Uint32> {}; + struct Kind : Column<2, NScheme::NTypeIds::Utf8> {}; struct TabletId : Column<3, NScheme::NTypeIds::Uint64> {}; struct Rows : Column<4, NScheme::NTypeIds::Uint64> {}; - struct Bytes : Column<5, NScheme::NTypeIds::Uint64> {}; - struct RawBytes : Column<6, NScheme::NTypeIds::Uint64> {}; - struct Portions : Column<7, NScheme::NTypeIds::Uint64> {}; - struct Blobs : Column<8, NScheme::NTypeIds::Uint64> {}; + struct RawBytes : Column<5, NScheme::NTypeIds::Uint64> {}; + struct PortionId: Column<6, NScheme::NTypeIds::Uint64> {}; + struct ChunkIdx : Column<7, NScheme::NTypeIds::Uint64> {}; + struct ColumnName: Column<8, NScheme::NTypeIds::Utf8> {}; + struct InternalColumnId : Column<9, NScheme::NTypeIds::Uint32> {}; + struct BlobId : Column<10, NScheme::NTypeIds::Utf8> {}; + struct BlobRangeOffset : Column<11, NScheme::NTypeIds::Uint64> {}; + struct BlobRangeSize : Column<12, NScheme::NTypeIds::Uint64> {}; + struct Activity : Column<13, NScheme::NTypeIds::Bool> {}; + struct TierName : Column<14, NScheme::NTypeIds::Utf8> {}; using TKey = TableKey< PathId, - Kind, - TabletId>; + TabletId, + PortionId, + ChunkIdx + >; using TColumns = TableColumns< PathId, Kind, TabletId, Rows, - Bytes, RawBytes, - Portions, - Blobs>; + PortionId, + ChunkIdx, + ColumnName, + InternalColumnId, + BlobId, + BlobRangeOffset, + BlobRangeSize, + Activity, + TierName + >; }; struct StorageStats : Table<11> { diff --git a/ydb/core/tx/columnshard/columnshard__scan.cpp b/ydb/core/tx/columnshard/columnshard__scan.cpp index c261b70dfa56..a8f853d67296 100644 --- a/ydb/core/tx/columnshard/columnshard__scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__scan.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include namespace NKikimr::NColumnShard { @@ -644,29 +645,40 @@ PrepareStatsReadMetadata(ui64 tabletId, const NOlap::TReadDescription& read, con auto out = std::make_shared(tabletId, isReverse ? NOlap::TReadStatsMetadata::ESorting::DESC : NOlap::TReadStatsMetadata::ESorting::ASC, - read.GetProgram()); + read.GetProgram(), index ? index->GetVersionedIndex().GetSchema(read.GetSnapshot()) : nullptr); out->SetPKRangesFilter(read.PKRangesFilter); out->ReadColumnIds.assign(readColumnIds.begin(), readColumnIds.end()); out->ResultColumnIds = read.ColumnIds; - if (!index) { + const NOlap::TColumnEngineForLogs* logsIndex = dynamic_cast(index.get()); + if (!index || !logsIndex) { return out; } - + THashMap> portionsInUse; for (auto&& filter : read.PKRangesFilter) { const ui64 fromPathId = *filter.GetPredicateFrom().Get(0, 0, 1); const ui64 toPathId = *filter.GetPredicateTo().Get(0, 0, Max()); - const auto& stats = index->GetStats(); if (read.TableName.EndsWith(NOlap::TIndexInfo::TABLE_INDEX_STATS_TABLE)) { - if (fromPathId <= read.PathId && toPathId >= read.PathId && stats.contains(read.PathId)) { - out->IndexStats[read.PathId] = std::make_shared(*stats.at(read.PathId)); + if (fromPathId <= read.PathId && toPathId >= read.PathId) { + auto pathInfo = logsIndex->GetGranuleOptional(read.PathId); + if (!pathInfo) { + continue; + } + for (auto&& p : pathInfo->GetPortions()) { + if (p.second->GetRemoveSnapshot().IsZero() && portionsInUse[read.PathId].emplace(p.first).second) { + out->IndexPortions.emplace_back(p.second); + } + } } } else if (read.TableName.EndsWith(NOlap::TIndexInfo::STORE_INDEX_STATS_TABLE)) { - auto it = stats.lower_bound(fromPathId); - auto itEnd = stats.upper_bound(toPathId); - for (; it != itEnd; ++it) { - out->IndexStats[it->first] = std::make_shared(*it->second); + auto pathInfos = logsIndex->GetTables(fromPathId, toPathId); + for (auto&& pathInfo: pathInfos) { + for (auto&& p: pathInfo->GetPortions()) { + if (p.second->GetRemoveSnapshot().IsZero() && portionsInUse[p.second->GetPathId()].emplace(p.first).second) { + out->IndexPortions.emplace_back(p.second); + } + } } } } diff --git a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp index 05fe12990514..06febb9c5640 100644 --- a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp @@ -26,27 +26,31 @@ std::optional TStatsIterator::GetBatch() { } std::shared_ptr TStatsIterator::FillStatsBatch() { - ui64 numRows = 0; - numRows += NOlap::TColumnEngineStats::GetRecordsCount() * IndexStats.size(); - + std::vector> portions; + ui32 recordsCount = 0; + while (IndexPortions.size()) { + auto& i = IndexPortions.front(); + recordsCount += i->Records.size(); + portions.emplace_back(i); + IndexPortions.pop_front(); + if (recordsCount > 10000) { + break; + } + } std::vector allColumnIds; for (const auto& c : PrimaryIndexStatsSchema.Columns) { allColumnIds.push_back(c.second.Id); } std::sort(allColumnIds.begin(), allColumnIds.end()); auto schema = NOlap::MakeArrowSchema(PrimaryIndexStatsSchema.Columns, allColumnIds); - auto builders = NArrow::MakeBuilders(schema, numRows); + auto builders = NArrow::MakeBuilders(schema, recordsCount); - while (!IndexStats.empty()) { - auto it = Reverse ? std::prev(IndexStats.end()) : IndexStats.begin(); - const auto& stats = it->second; - Y_ABORT_UNLESS(stats); - AppendStats(builders, it->first, *stats); - IndexStats.erase(it); + for (auto&& p: portions) { + AppendStats(builders, *p); } auto columns = NArrow::Finish(std::move(builders)); - return arrow::RecordBatch::Make(schema, numRows, columns); + return arrow::RecordBatch::Make(schema, recordsCount, columns); } void TStatsIterator::ApplyRangePredicates(std::shared_ptr& batch) { @@ -54,36 +58,32 @@ void TStatsIterator::ApplyRangePredicates(std::shared_ptr& b filter.Apply(batch); } -void TStatsIterator::AppendStats(const std::vector>& builders, ui64 pathId, const NOlap::TColumnEngineStats& stats) { - auto kinds = stats.GetKinds(); - auto pathIds = stats.GetConstValues(pathId); - auto tabletIds = stats.GetConstValues(ReadMetadata->TabletId); - auto rows = stats.GetRowsValues(); - auto bytes = stats.GetBytesValues(); - auto rawBytes = stats.GetRawBytesValues(); - auto portions = stats.GetPortionsValues(); - auto blobs = stats.GetBlobsValues(); - +void TStatsIterator::AppendStats(const std::vector>& builders, const NOlap::TPortionInfo& portion) { + std::vector records; + for (auto&& r: portion.Records) { + records.emplace_back(&r); + } if (Reverse) { - std::reverse(std::begin(pathIds), std::end(pathIds)); - std::reverse(std::begin(kinds), std::end(kinds)); - std::reverse(std::begin(tabletIds), std::end(tabletIds)); - std::reverse(std::begin(rows), std::end(rows)); - std::reverse(std::begin(bytes), std::end(bytes)); - std::reverse(std::begin(rawBytes), std::end(rawBytes)); - std::reverse(std::begin(portions), std::end(portions)); - std::reverse(std::begin(blobs), std::end(blobs)); + std::reverse(records.begin(), records.end()); + } + for (auto&& r: records) { + NArrow::Append(*builders[0], portion.GetPathId()); + const std::string prod = ::ToString(portion.GetMeta().Produced); + NArrow::Append(*builders[1], prod); + NArrow::Append(*builders[2], ReadMetadata->TabletId); + NArrow::Append(*builders[3], r->GetMeta().GetNumRowsVerified()); + NArrow::Append(*builders[4], r->GetMeta().GetRawBytesVerified()); + NArrow::Append(*builders[5], portion.GetPortionId()); + NArrow::Append(*builders[6], r->GetChunkIdx()); + NArrow::Append(*builders[7], ReadMetadata->GetColumnNameDef(r->GetColumnId()).value_or("undefined")); + NArrow::Append(*builders[8], r->GetColumnId()); + std::string blobIdString = r->BlobRange.ToString(); + NArrow::Append(*builders[9], blobIdString); + NArrow::Append(*builders[10], r->BlobRange.Offset); + NArrow::Append(*builders[11], r->BlobRange.Size); + NArrow::Append(*builders[12], portion.HasRemoveSnapshot()); + NArrow::Append(*builders[13], portion.GetTierName())); } - - NArrow::Append(*builders[0], pathIds); - NArrow::Append(*builders[1], kinds); - NArrow::Append(*builders[2], tabletIds); - NArrow::Append(*builders[3], rows); - NArrow::Append(*builders[4], bytes); - NArrow::Append(*builders[5], rawBytes); - NArrow::Append(*builders[6], portions); - NArrow::Append(*builders[7], blobs); } } - diff --git a/ydb/core/tx/columnshard/columnshard__stats_scan.h b/ydb/core/tx/columnshard/columnshard__stats_scan.h index 0211b4759557..65fe0999ea70 100644 --- a/ydb/core/tx/columnshard/columnshard__stats_scan.h +++ b/ydb/core/tx/columnshard/columnshard__stats_scan.h @@ -54,15 +54,18 @@ class TStatsIterator : public TScanIteratorBase { , Reverse(ReadMetadata->IsDescSorted()) , KeySchema(NOlap::MakeArrowSchema(PrimaryIndexStatsSchema.Columns, PrimaryIndexStatsSchema.KeyColumns)) , ResultSchema(NOlap::MakeArrowSchema(PrimaryIndexStatsSchema.Columns, ReadMetadata->ResultColumnIds)) - , IndexStats(ReadMetadata->IndexStats.begin(), ReadMetadata->IndexStats.end()) + , IndexPortions(ReadMetadata->IndexPortions) { if (ResultSchema->num_fields() == 0) { ResultSchema = KeySchema; } + if (Reverse) { + std::reverse(IndexPortions.begin(), IndexPortions.end()); + } } bool Finished() const override { - return IndexStats.empty(); + return IndexPortions.empty(); } std::optional GetBatch() override; @@ -73,14 +76,13 @@ class TStatsIterator : public TScanIteratorBase { std::shared_ptr KeySchema; std::shared_ptr ResultSchema; - TMap> IndexStats; + std::deque> IndexPortions; std::shared_ptr FillStatsBatch(); void ApplyRangePredicates(std::shared_ptr& batch); - void AppendStats(const std::vector>& builders, - ui64 pathId, const NOlap::TColumnEngineStats& stats); + void AppendStats(const std::vector>& builders, const NOlap::TPortionInfo& portion); }; } diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.h b/ydb/core/tx/columnshard/engines/column_engine_logs.h index 00439c19808b..4f552e4d0626 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.h +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.h @@ -189,6 +189,17 @@ class TColumnEngineForLogs : public IColumnEngine { return it->second; } + std::vector> GetTables(const ui64 pathIdFrom, const ui64 pathIdTo) const { + std::vector> result; + for (auto&& i : Tables) { + if (i.first < pathIdFrom || i.first > pathIdTo) { + continue; + } + result.emplace_back(i.second); + } + return result; + } + ui64 GetTabletId() const { return TabletId; } diff --git a/ydb/core/tx/columnshard/engines/portions/column_record.h b/ydb/core/tx/columnshard/engines/portions/column_record.h index 4cbc2d067afc..b04174b59afd 100644 --- a/ydb/core/tx/columnshard/engines/portions/column_record.h +++ b/ydb/core/tx/columnshard/engines/portions/column_record.h @@ -78,6 +78,13 @@ struct TColumnRecord { } }; + ui32 GetColumnId() const { + return ColumnId; + } + ui16 GetChunkIdx() const { + return Chunk; + } + TColumnSerializationStat GetSerializationStat(const std::string& columnName) const { TColumnSerializationStat result(ColumnId, columnName); result.Merge(GetSerializationStat()); diff --git a/ydb/core/tx/columnshard/engines/reader/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/read_metadata.h index 1d26b3399ebb..98579f24e990 100644 --- a/ydb/core/tx/columnshard/engines/reader/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/read_metadata.h @@ -255,18 +255,32 @@ struct TReadMetadata : public TReadMetadataBase, public std::enable_shared_from_ struct TReadStatsMetadata : public TReadMetadataBase, public std::enable_shared_from_this { private: using TBase = TReadMetadataBase; + std::shared_ptr ResultIndexSchema; public: using TConstPtr = std::shared_ptr; const ui64 TabletId; std::vector ReadColumnIds; std::vector ResultColumnIds; - THashMap> IndexStats; + std::deque> IndexPortions; + + std::optional GetColumnNameDef(const ui32 columnId) const { + if (!ResultIndexSchema) { + return {}; + } + auto f = ResultIndexSchema->GetFieldByColumnId(columnId); + if (!f) { + return {}; + } + return f->name(); + } - explicit TReadStatsMetadata(ui64 tabletId, const ESorting sorting, const TProgramContainer& ssaProgram) + explicit TReadStatsMetadata(ui64 tabletId, const ESorting sorting, const TProgramContainer& ssaProgram, const std::shared_ptr& schema) : TBase(sorting, ssaProgram) + , ResultIndexSchema(schema) , TabletId(tabletId) - {} + { + } std::vector> GetKeyYqlSchema() const override; diff --git a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp index 317cab0a2632..8e7257e01d09 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp @@ -2181,37 +2181,47 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { auto batchStats = scan->ArrowBatch; UNIT_ASSERT(batchStats); // Cerr << batchStats->ToString() << Endl; - UNIT_ASSERT_VALUES_EQUAL(batchStats->num_rows(), 5); +// UNIT_ASSERT_VALUES_EQUAL(batchStats->num_rows(), 10068); ui64 sumCompactedBytes = 0; ui64 sumCompactedRows = 0; ui64 sumInsertedBytes = 0; ui64 sumInsertedRows = 0; + std::optional keyColumnId; for (ui32 i = 0; i < batchStats->num_rows(); ++i) { auto paths = batchStats->GetColumnByName("PathId"); auto kinds = batchStats->GetColumnByName("Kind"); auto rows = batchStats->GetColumnByName("Rows"); - auto bytes = batchStats->GetColumnByName("Bytes"); + auto bytes = batchStats->GetColumnByName("BlobRangeSize"); auto rawBytes = batchStats->GetColumnByName("RawBytes"); + auto internalColumnIds = batchStats->GetColumnByName("InternalColumnId"); ui64 pathId = static_cast(*paths).Value(i); - ui32 kind = static_cast(*kinds).Value(i); + auto kind = static_cast(*kinds).Value(i); + const TString kindStr(kind.data(), kind.size()); ui64 numRows = static_cast(*rows).Value(i); ui64 numBytes = static_cast(*bytes).Value(i); ui64 numRawBytes = static_cast(*rawBytes).Value(i); - + ui32 internalColumnId = static_cast(*internalColumnIds).Value(i); + if (!keyColumnId) { + keyColumnId = internalColumnId; + } Cerr << "[" << __LINE__ << "] " << table.Pk[0].second.GetTypeId() << " " - << pathId << " " << kind << " " << numRows << " " << numBytes << " " << numRawBytes << "\n"; + << pathId << " " << kindStr << " " << numRows << " " << numBytes << " " << numRawBytes << "\n"; if (pathId == tableId) { - if (kind == (ui32)NOlap::NPortion::EProduced::COMPACTED || kind == (ui32)NOlap::NPortion::EProduced::SPLIT_COMPACTED) { + if (kindStr == ::ToString(NOlap::NPortion::EProduced::COMPACTED) || kindStr == ::ToString(NOlap::NPortion::EProduced::SPLIT_COMPACTED)) { sumCompactedBytes += numBytes; - sumCompactedRows += numRows; + if (*keyColumnId == internalColumnId) { + sumCompactedRows += numRows; + } //UNIT_ASSERT(numRawBytes > numBytes); } - if (kind == (ui32)NOlap::NPortion::EProduced::INSERTED) { + if (kindStr == ::ToString(NOlap::NPortion::EProduced::INSERTED)) { sumInsertedBytes += numBytes; - sumInsertedRows += numRows; + if (*keyColumnId == internalColumnId) { + sumInsertedRows += numRows; + } //UNIT_ASSERT(numRawBytes > numBytes); } } else { @@ -2222,7 +2232,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { } Cerr << "compacted=" << sumCompactedRows << ";inserted=" << sumInsertedRows << ";expected=" << fullNumRows << ";" << Endl; RebootTablet(runtime, TTestTxConfig::TxTablet0, sender); - UNIT_ASSERT(sumCompactedRows == fullNumRows); + AFL_VERIFY(sumCompactedRows == fullNumRows)("sum", sumCompactedRows)("full", fullNumRows); UNIT_ASSERT(sumCompactedRows < sumCompactedBytes); UNIT_ASSERT(sumInsertedRows == 0); UNIT_ASSERT(sumInsertedBytes == 0); From 4e0289f1b298a081ac76525679208c52b63ca9a3 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Thu, 28 Dec 2023 12:06:33 +0300 Subject: [PATCH 02/10] extend information for sys view table --- ydb/core/kqp/ut/olap/kqp_olap_ut.cpp | 80 +++++++++++-------- ydb/core/sys_view/common/schema.h | 35 +++++--- ydb/core/tx/columnshard/columnshard__scan.cpp | 32 +++++--- .../columnshard/columnshard__stats_scan.cpp | 78 +++++++++--------- .../tx/columnshard/columnshard__stats_scan.h | 12 +-- .../columnshard/engines/column_engine_logs.h | 11 +++ .../engines/portions/column_record.h | 7 ++ .../engines/reader/read_metadata.h | 20 ++++- .../ut_rw/ut_columnshard_read_write.cpp | 30 ++++--- 9 files changed, 194 insertions(+), 111 deletions(-) diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index 83373dbbca8e..b99d462630cd 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -327,7 +327,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { if (c.first == "RawBytes") { rawBytes += GetUint64(c.second); } - if (c.first == "Bytes") { + if (c.first == "BlobRangeSize") { bytes += GetUint64(c.second); } if (verbose) { @@ -3431,8 +3431,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto tableClient = kikimr.GetTableClient(); auto selectQuery = TString(R"( - SELECT * + SELECT PathId, Kind, TabletId, PortionId FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY PathId, Kind, TabletId, PortionId ORDER BY PathId, Kind, TabletId )"); @@ -3476,8 +3477,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto tableClient = kikimr.GetTableClient(); { auto selectQuery = TString(R"( - SELECT * + SELECT PathId, PortionId, Kind, TabletId FROM `/Root/olapStore/olapTable_1/.sys/primary_index_stats` + GROUP BY PathId, TabletId, PortionId, Kind ORDER BY PathId, Kind, TabletId )"); @@ -3950,7 +3952,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } { auto selectQuery = TString(R"( - SELECT Bytes, Rows + SELECT BlobRangeSize as Bytes, Rows FROM `/Root/olapStore/.sys/store_primary_index_stats` ORDER BY Bytes )"); @@ -3962,8 +3964,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } { auto selectQuery = TString(R"( - SELECT Rows, Kind, RawBytes, Rows as Rows2, Rows as Rows3, PathId + SELECT Sum(Rows) as Rows, Kind, Sum(RawBytes) as RawBytes, Sum(Rows) as Rows2, Sum(Rows) as Rows3, PathId, PortionId FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY Kind, PortionId, PathId ORDER BY PathId, Kind, Rows3 )"); @@ -4001,9 +4004,18 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto selectQuery = TString(R"( SELECT * FROM `/Root/olapStore/.sys/store_primary_index_stats` + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + } + + { + auto selectQuery = TString(R"( + SELECT PathId, Kind, TabletId, PortionId + FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - PathId == UInt64("3") AND Kind < UInt32("4") - ORDER BY PathId, Kind, TabletId; + PathId == UInt64("3") AND Kind != 'INACTIVE' + GROUP BY TabletId, PathId, PortionId, Kind )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); @@ -4019,8 +4031,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT * + SELECT PathId, Kind, TabletId, PortionId FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY PortionId, PathId, Kind, TabletId ORDER BY PathId DESC, Kind DESC, TabletId DESC ; @@ -4038,11 +4051,12 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT * + SELECT PathId, Kind, TabletId, PortionId FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE PathId > UInt64("0") AND PathId < UInt32("4") OR PathId > UInt64("4") AND PathId <= UInt64("5") + GROUP BY PathId, Kind, TabletId, PortionId ORDER BY PathId DESC, Kind DESC, TabletId DESC ; @@ -4075,9 +4089,10 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT * + SELECT PathId, Kind, TabletId, PortionId, Sum(BlobRangeSize) as Bytes FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE Bytes > UInt64("0") + GROUP BY PathId, Kind, TabletId, PortionId ORDER BY PathId, Kind, TabletId; )"); @@ -4088,9 +4103,10 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId + SELECT PathId, Kind, TabletId, PortionId, Sum(BlobRangeSize) as Bytes FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE Bytes > UInt64("0") + GROUP BY PathId, Kind, TabletId, PortionId ORDER BY PathId, Kind, TabletId; )"); @@ -4103,7 +4119,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto selectQuery = TString(R"( SELECT * FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE Kind == UInt32("6") + WHERE Kind == 'EVICTED' ORDER BY PathId, Kind, TabletId; )"); @@ -4114,9 +4130,10 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT * + SELECT PathId, Kind, TabletId, PortionId FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE Kind >= UInt32("3") + WHERE Kind IN ('SPLIT_COMPACTED', 'INACTIVE', 'EVICTED') + GROUP BY PathId, Kind, TabletId, PortionId ORDER BY PathId, Kind, TabletId; )"); @@ -4152,7 +4169,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SUM(Rows) as rows, FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - Kind != UInt32("4") -- not INACTIVE + Kind != 'INACTIVE' )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); @@ -4166,7 +4183,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SUM(Rows) as rows, FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - Kind != UInt32("4") -- not INACTIVE + Kind != 'INACTIVE' GROUP BY PathId ORDER BY @@ -4185,13 +4202,11 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SELECT PathId, SUM(Rows) as rows, - SUM(Bytes) as bytes, - SUM(RawBytes) as bytes_raw, - SUM(Portions) as portions, - SUM(Blobs) as blobs + SUM(BlobRangeSize) as bytes, + SUM(RawBytes) as bytes_raw FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - Kind < UInt32("4") + Kind IN ('INSERTED', 'SPLIT_COMPACTED', 'COMPACTED') GROUP BY PathId ORDER BY rows DESC LIMIT 10 @@ -4209,13 +4224,11 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SELECT PathId, SUM(Rows) as rows, - SUM(Bytes) as bytes, - SUM(RawBytes) as bytes_raw, - SUM(Portions) as portions, - SUM(Blobs) as blobs + SUM(BlobRangeSize) as bytes, + SUM(RawBytes) as bytes_raw FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - PathId == UInt64("3") AND Kind < UInt32("4") + PathId == UInt64("3") AND Kind IN ('INSERTED', 'SPLIT_COMPACTED', 'COMPACTED') GROUP BY PathId ORDER BY rows DESC LIMIT 10 @@ -4231,13 +4244,11 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SELECT PathId, SUM(Rows) as rows, - SUM(Bytes) as bytes, - SUM(RawBytes) as bytes_raw, - SUM(Portions) as portions, - SUM(Blobs) as blobs + SUM(BlobRangeSize) as bytes, + SUM(RawBytes) as bytes_raw FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - PathId >= UInt64("4") AND Kind < UInt32("4") + PathId >= UInt64("4") AND Kind IN ('INSERTED', 'SPLIT_COMPACTED', 'COMPACTED') GROUP BY PathId ORDER BY rows DESC LIMIT 10 @@ -4251,13 +4262,14 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT count(*) + SELECT PathId, TabletId, PortionId FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY PathId, TabletId, PortionId )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); // 3 Tables with 3 Shards each and 4 KindId-s of stats - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("column0")), 3*3*numKinds); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3 * 3 * numKinds); } { @@ -4277,7 +4289,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT PathId, count(*), sum(Rows), sum(Bytes), sum(RawBytes) + SELECT PathId, count(*), sum(Rows), sum(BlobRangeSize), sum(RawBytes) FROM `/Root/olapStore/.sys/store_primary_index_stats` GROUP BY PathId ORDER BY PathId diff --git a/ydb/core/sys_view/common/schema.h b/ydb/core/sys_view/common/schema.h index 11954088d32e..e598f1e0d89c 100644 --- a/ydb/core/sys_view/common/schema.h +++ b/ydb/core/sys_view/common/schema.h @@ -387,27 +387,42 @@ struct Schema : NIceDb::Schema { struct PrimaryIndexStats : Table<10> { struct PathId : Column<1, NScheme::NTypeIds::Uint64> {}; - struct Kind : Column<2, NScheme::NTypeIds::Uint32> {}; + struct Kind : Column<2, NScheme::NTypeIds::Utf8> {}; struct TabletId : Column<3, NScheme::NTypeIds::Uint64> {}; struct Rows : Column<4, NScheme::NTypeIds::Uint64> {}; - struct Bytes : Column<5, NScheme::NTypeIds::Uint64> {}; - struct RawBytes : Column<6, NScheme::NTypeIds::Uint64> {}; - struct Portions : Column<7, NScheme::NTypeIds::Uint64> {}; - struct Blobs : Column<8, NScheme::NTypeIds::Uint64> {}; + struct RawBytes : Column<5, NScheme::NTypeIds::Uint64> {}; + struct PortionId: Column<6, NScheme::NTypeIds::Uint64> {}; + struct ChunkIdx : Column<7, NScheme::NTypeIds::Uint64> {}; + struct ColumnName: Column<8, NScheme::NTypeIds::Utf8> {}; + struct InternalColumnId : Column<9, NScheme::NTypeIds::Uint32> {}; + struct BlobId : Column<10, NScheme::NTypeIds::Utf8> {}; + struct BlobRangeOffset : Column<11, NScheme::NTypeIds::Uint64> {}; + struct BlobRangeSize : Column<12, NScheme::NTypeIds::Uint64> {}; + struct Activity : Column<13, NScheme::NTypeIds::Bool> {}; + struct TierName : Column<14, NScheme::NTypeIds::Utf8> {}; using TKey = TableKey< PathId, - Kind, - TabletId>; + TabletId, + PortionId, + ChunkIdx + >; using TColumns = TableColumns< PathId, Kind, TabletId, Rows, - Bytes, RawBytes, - Portions, - Blobs>; + PortionId, + ChunkIdx, + ColumnName, + InternalColumnId, + BlobId, + BlobRangeOffset, + BlobRangeSize, + Activity, + TierName + >; }; struct StorageStats : Table<11> { diff --git a/ydb/core/tx/columnshard/columnshard__scan.cpp b/ydb/core/tx/columnshard/columnshard__scan.cpp index c261b70dfa56..a8f853d67296 100644 --- a/ydb/core/tx/columnshard/columnshard__scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__scan.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include namespace NKikimr::NColumnShard { @@ -644,29 +645,40 @@ PrepareStatsReadMetadata(ui64 tabletId, const NOlap::TReadDescription& read, con auto out = std::make_shared(tabletId, isReverse ? NOlap::TReadStatsMetadata::ESorting::DESC : NOlap::TReadStatsMetadata::ESorting::ASC, - read.GetProgram()); + read.GetProgram(), index ? index->GetVersionedIndex().GetSchema(read.GetSnapshot()) : nullptr); out->SetPKRangesFilter(read.PKRangesFilter); out->ReadColumnIds.assign(readColumnIds.begin(), readColumnIds.end()); out->ResultColumnIds = read.ColumnIds; - if (!index) { + const NOlap::TColumnEngineForLogs* logsIndex = dynamic_cast(index.get()); + if (!index || !logsIndex) { return out; } - + THashMap> portionsInUse; for (auto&& filter : read.PKRangesFilter) { const ui64 fromPathId = *filter.GetPredicateFrom().Get(0, 0, 1); const ui64 toPathId = *filter.GetPredicateTo().Get(0, 0, Max()); - const auto& stats = index->GetStats(); if (read.TableName.EndsWith(NOlap::TIndexInfo::TABLE_INDEX_STATS_TABLE)) { - if (fromPathId <= read.PathId && toPathId >= read.PathId && stats.contains(read.PathId)) { - out->IndexStats[read.PathId] = std::make_shared(*stats.at(read.PathId)); + if (fromPathId <= read.PathId && toPathId >= read.PathId) { + auto pathInfo = logsIndex->GetGranuleOptional(read.PathId); + if (!pathInfo) { + continue; + } + for (auto&& p : pathInfo->GetPortions()) { + if (p.second->GetRemoveSnapshot().IsZero() && portionsInUse[read.PathId].emplace(p.first).second) { + out->IndexPortions.emplace_back(p.second); + } + } } } else if (read.TableName.EndsWith(NOlap::TIndexInfo::STORE_INDEX_STATS_TABLE)) { - auto it = stats.lower_bound(fromPathId); - auto itEnd = stats.upper_bound(toPathId); - for (; it != itEnd; ++it) { - out->IndexStats[it->first] = std::make_shared(*it->second); + auto pathInfos = logsIndex->GetTables(fromPathId, toPathId); + for (auto&& pathInfo: pathInfos) { + for (auto&& p: pathInfo->GetPortions()) { + if (p.second->GetRemoveSnapshot().IsZero() && portionsInUse[p.second->GetPathId()].emplace(p.first).second) { + out->IndexPortions.emplace_back(p.second); + } + } } } } diff --git a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp index 05fe12990514..06febb9c5640 100644 --- a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp @@ -26,27 +26,31 @@ std::optional TStatsIterator::GetBatch() { } std::shared_ptr TStatsIterator::FillStatsBatch() { - ui64 numRows = 0; - numRows += NOlap::TColumnEngineStats::GetRecordsCount() * IndexStats.size(); - + std::vector> portions; + ui32 recordsCount = 0; + while (IndexPortions.size()) { + auto& i = IndexPortions.front(); + recordsCount += i->Records.size(); + portions.emplace_back(i); + IndexPortions.pop_front(); + if (recordsCount > 10000) { + break; + } + } std::vector allColumnIds; for (const auto& c : PrimaryIndexStatsSchema.Columns) { allColumnIds.push_back(c.second.Id); } std::sort(allColumnIds.begin(), allColumnIds.end()); auto schema = NOlap::MakeArrowSchema(PrimaryIndexStatsSchema.Columns, allColumnIds); - auto builders = NArrow::MakeBuilders(schema, numRows); + auto builders = NArrow::MakeBuilders(schema, recordsCount); - while (!IndexStats.empty()) { - auto it = Reverse ? std::prev(IndexStats.end()) : IndexStats.begin(); - const auto& stats = it->second; - Y_ABORT_UNLESS(stats); - AppendStats(builders, it->first, *stats); - IndexStats.erase(it); + for (auto&& p: portions) { + AppendStats(builders, *p); } auto columns = NArrow::Finish(std::move(builders)); - return arrow::RecordBatch::Make(schema, numRows, columns); + return arrow::RecordBatch::Make(schema, recordsCount, columns); } void TStatsIterator::ApplyRangePredicates(std::shared_ptr& batch) { @@ -54,36 +58,32 @@ void TStatsIterator::ApplyRangePredicates(std::shared_ptr& b filter.Apply(batch); } -void TStatsIterator::AppendStats(const std::vector>& builders, ui64 pathId, const NOlap::TColumnEngineStats& stats) { - auto kinds = stats.GetKinds(); - auto pathIds = stats.GetConstValues(pathId); - auto tabletIds = stats.GetConstValues(ReadMetadata->TabletId); - auto rows = stats.GetRowsValues(); - auto bytes = stats.GetBytesValues(); - auto rawBytes = stats.GetRawBytesValues(); - auto portions = stats.GetPortionsValues(); - auto blobs = stats.GetBlobsValues(); - +void TStatsIterator::AppendStats(const std::vector>& builders, const NOlap::TPortionInfo& portion) { + std::vector records; + for (auto&& r: portion.Records) { + records.emplace_back(&r); + } if (Reverse) { - std::reverse(std::begin(pathIds), std::end(pathIds)); - std::reverse(std::begin(kinds), std::end(kinds)); - std::reverse(std::begin(tabletIds), std::end(tabletIds)); - std::reverse(std::begin(rows), std::end(rows)); - std::reverse(std::begin(bytes), std::end(bytes)); - std::reverse(std::begin(rawBytes), std::end(rawBytes)); - std::reverse(std::begin(portions), std::end(portions)); - std::reverse(std::begin(blobs), std::end(blobs)); + std::reverse(records.begin(), records.end()); + } + for (auto&& r: records) { + NArrow::Append(*builders[0], portion.GetPathId()); + const std::string prod = ::ToString(portion.GetMeta().Produced); + NArrow::Append(*builders[1], prod); + NArrow::Append(*builders[2], ReadMetadata->TabletId); + NArrow::Append(*builders[3], r->GetMeta().GetNumRowsVerified()); + NArrow::Append(*builders[4], r->GetMeta().GetRawBytesVerified()); + NArrow::Append(*builders[5], portion.GetPortionId()); + NArrow::Append(*builders[6], r->GetChunkIdx()); + NArrow::Append(*builders[7], ReadMetadata->GetColumnNameDef(r->GetColumnId()).value_or("undefined")); + NArrow::Append(*builders[8], r->GetColumnId()); + std::string blobIdString = r->BlobRange.ToString(); + NArrow::Append(*builders[9], blobIdString); + NArrow::Append(*builders[10], r->BlobRange.Offset); + NArrow::Append(*builders[11], r->BlobRange.Size); + NArrow::Append(*builders[12], portion.HasRemoveSnapshot()); + NArrow::Append(*builders[13], portion.GetTierName())); } - - NArrow::Append(*builders[0], pathIds); - NArrow::Append(*builders[1], kinds); - NArrow::Append(*builders[2], tabletIds); - NArrow::Append(*builders[3], rows); - NArrow::Append(*builders[4], bytes); - NArrow::Append(*builders[5], rawBytes); - NArrow::Append(*builders[6], portions); - NArrow::Append(*builders[7], blobs); } } - diff --git a/ydb/core/tx/columnshard/columnshard__stats_scan.h b/ydb/core/tx/columnshard/columnshard__stats_scan.h index 0211b4759557..65fe0999ea70 100644 --- a/ydb/core/tx/columnshard/columnshard__stats_scan.h +++ b/ydb/core/tx/columnshard/columnshard__stats_scan.h @@ -54,15 +54,18 @@ class TStatsIterator : public TScanIteratorBase { , Reverse(ReadMetadata->IsDescSorted()) , KeySchema(NOlap::MakeArrowSchema(PrimaryIndexStatsSchema.Columns, PrimaryIndexStatsSchema.KeyColumns)) , ResultSchema(NOlap::MakeArrowSchema(PrimaryIndexStatsSchema.Columns, ReadMetadata->ResultColumnIds)) - , IndexStats(ReadMetadata->IndexStats.begin(), ReadMetadata->IndexStats.end()) + , IndexPortions(ReadMetadata->IndexPortions) { if (ResultSchema->num_fields() == 0) { ResultSchema = KeySchema; } + if (Reverse) { + std::reverse(IndexPortions.begin(), IndexPortions.end()); + } } bool Finished() const override { - return IndexStats.empty(); + return IndexPortions.empty(); } std::optional GetBatch() override; @@ -73,14 +76,13 @@ class TStatsIterator : public TScanIteratorBase { std::shared_ptr KeySchema; std::shared_ptr ResultSchema; - TMap> IndexStats; + std::deque> IndexPortions; std::shared_ptr FillStatsBatch(); void ApplyRangePredicates(std::shared_ptr& batch); - void AppendStats(const std::vector>& builders, - ui64 pathId, const NOlap::TColumnEngineStats& stats); + void AppendStats(const std::vector>& builders, const NOlap::TPortionInfo& portion); }; } diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.h b/ydb/core/tx/columnshard/engines/column_engine_logs.h index 00439c19808b..4f552e4d0626 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.h +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.h @@ -189,6 +189,17 @@ class TColumnEngineForLogs : public IColumnEngine { return it->second; } + std::vector> GetTables(const ui64 pathIdFrom, const ui64 pathIdTo) const { + std::vector> result; + for (auto&& i : Tables) { + if (i.first < pathIdFrom || i.first > pathIdTo) { + continue; + } + result.emplace_back(i.second); + } + return result; + } + ui64 GetTabletId() const { return TabletId; } diff --git a/ydb/core/tx/columnshard/engines/portions/column_record.h b/ydb/core/tx/columnshard/engines/portions/column_record.h index 4cbc2d067afc..b04174b59afd 100644 --- a/ydb/core/tx/columnshard/engines/portions/column_record.h +++ b/ydb/core/tx/columnshard/engines/portions/column_record.h @@ -78,6 +78,13 @@ struct TColumnRecord { } }; + ui32 GetColumnId() const { + return ColumnId; + } + ui16 GetChunkIdx() const { + return Chunk; + } + TColumnSerializationStat GetSerializationStat(const std::string& columnName) const { TColumnSerializationStat result(ColumnId, columnName); result.Merge(GetSerializationStat()); diff --git a/ydb/core/tx/columnshard/engines/reader/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/read_metadata.h index 1d26b3399ebb..98579f24e990 100644 --- a/ydb/core/tx/columnshard/engines/reader/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/read_metadata.h @@ -255,18 +255,32 @@ struct TReadMetadata : public TReadMetadataBase, public std::enable_shared_from_ struct TReadStatsMetadata : public TReadMetadataBase, public std::enable_shared_from_this { private: using TBase = TReadMetadataBase; + std::shared_ptr ResultIndexSchema; public: using TConstPtr = std::shared_ptr; const ui64 TabletId; std::vector ReadColumnIds; std::vector ResultColumnIds; - THashMap> IndexStats; + std::deque> IndexPortions; + + std::optional GetColumnNameDef(const ui32 columnId) const { + if (!ResultIndexSchema) { + return {}; + } + auto f = ResultIndexSchema->GetFieldByColumnId(columnId); + if (!f) { + return {}; + } + return f->name(); + } - explicit TReadStatsMetadata(ui64 tabletId, const ESorting sorting, const TProgramContainer& ssaProgram) + explicit TReadStatsMetadata(ui64 tabletId, const ESorting sorting, const TProgramContainer& ssaProgram, const std::shared_ptr& schema) : TBase(sorting, ssaProgram) + , ResultIndexSchema(schema) , TabletId(tabletId) - {} + { + } std::vector> GetKeyYqlSchema() const override; diff --git a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp index 317cab0a2632..8e7257e01d09 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp @@ -2181,37 +2181,47 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { auto batchStats = scan->ArrowBatch; UNIT_ASSERT(batchStats); // Cerr << batchStats->ToString() << Endl; - UNIT_ASSERT_VALUES_EQUAL(batchStats->num_rows(), 5); +// UNIT_ASSERT_VALUES_EQUAL(batchStats->num_rows(), 10068); ui64 sumCompactedBytes = 0; ui64 sumCompactedRows = 0; ui64 sumInsertedBytes = 0; ui64 sumInsertedRows = 0; + std::optional keyColumnId; for (ui32 i = 0; i < batchStats->num_rows(); ++i) { auto paths = batchStats->GetColumnByName("PathId"); auto kinds = batchStats->GetColumnByName("Kind"); auto rows = batchStats->GetColumnByName("Rows"); - auto bytes = batchStats->GetColumnByName("Bytes"); + auto bytes = batchStats->GetColumnByName("BlobRangeSize"); auto rawBytes = batchStats->GetColumnByName("RawBytes"); + auto internalColumnIds = batchStats->GetColumnByName("InternalColumnId"); ui64 pathId = static_cast(*paths).Value(i); - ui32 kind = static_cast(*kinds).Value(i); + auto kind = static_cast(*kinds).Value(i); + const TString kindStr(kind.data(), kind.size()); ui64 numRows = static_cast(*rows).Value(i); ui64 numBytes = static_cast(*bytes).Value(i); ui64 numRawBytes = static_cast(*rawBytes).Value(i); - + ui32 internalColumnId = static_cast(*internalColumnIds).Value(i); + if (!keyColumnId) { + keyColumnId = internalColumnId; + } Cerr << "[" << __LINE__ << "] " << table.Pk[0].second.GetTypeId() << " " - << pathId << " " << kind << " " << numRows << " " << numBytes << " " << numRawBytes << "\n"; + << pathId << " " << kindStr << " " << numRows << " " << numBytes << " " << numRawBytes << "\n"; if (pathId == tableId) { - if (kind == (ui32)NOlap::NPortion::EProduced::COMPACTED || kind == (ui32)NOlap::NPortion::EProduced::SPLIT_COMPACTED) { + if (kindStr == ::ToString(NOlap::NPortion::EProduced::COMPACTED) || kindStr == ::ToString(NOlap::NPortion::EProduced::SPLIT_COMPACTED)) { sumCompactedBytes += numBytes; - sumCompactedRows += numRows; + if (*keyColumnId == internalColumnId) { + sumCompactedRows += numRows; + } //UNIT_ASSERT(numRawBytes > numBytes); } - if (kind == (ui32)NOlap::NPortion::EProduced::INSERTED) { + if (kindStr == ::ToString(NOlap::NPortion::EProduced::INSERTED)) { sumInsertedBytes += numBytes; - sumInsertedRows += numRows; + if (*keyColumnId == internalColumnId) { + sumInsertedRows += numRows; + } //UNIT_ASSERT(numRawBytes > numBytes); } } else { @@ -2222,7 +2232,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { } Cerr << "compacted=" << sumCompactedRows << ";inserted=" << sumInsertedRows << ";expected=" << fullNumRows << ";" << Endl; RebootTablet(runtime, TTestTxConfig::TxTablet0, sender); - UNIT_ASSERT(sumCompactedRows == fullNumRows); + AFL_VERIFY(sumCompactedRows == fullNumRows)("sum", sumCompactedRows)("full", fullNumRows); UNIT_ASSERT(sumCompactedRows < sumCompactedBytes); UNIT_ASSERT(sumInsertedRows == 0); UNIT_ASSERT(sumInsertedBytes == 0); From c59814fc26ab6dad7b7ed33d56dd677aabcaf38f Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Thu, 28 Dec 2023 12:13:17 +0300 Subject: [PATCH 03/10] fix --- ydb/core/tx/columnshard/columnshard__stats_scan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp index 06febb9c5640..78eab31890ab 100644 --- a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp @@ -82,7 +82,7 @@ void TStatsIterator::AppendStats(const std::vector(*builders[10], r->BlobRange.Offset); NArrow::Append(*builders[11], r->BlobRange.Size); NArrow::Append(*builders[12], portion.HasRemoveSnapshot()); - NArrow::Append(*builders[13], portion.GetTierName())); + NArrow::Append(*builders[13], portion.GetTierName()); } } From a97be623a6d208a3b121dbb5ed6ace7a3ef158cb Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Thu, 28 Dec 2023 12:27:59 +0300 Subject: [PATCH 04/10] fix --- ydb/core/tx/columnshard/columnshard__stats_scan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp index 78eab31890ab..aaaf14cdf5ab 100644 --- a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp @@ -82,7 +82,7 @@ void TStatsIterator::AppendStats(const std::vector(*builders[10], r->BlobRange.Offset); NArrow::Append(*builders[11], r->BlobRange.Size); NArrow::Append(*builders[12], portion.HasRemoveSnapshot()); - NArrow::Append(*builders[13], portion.GetTierName()); + NArrow::Append(*builders[13], portion.GetMeta().GetTierName()); } } From a922e400c2dec7a457827460de2b3d85e4301580 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Thu, 28 Dec 2023 12:33:56 +0300 Subject: [PATCH 05/10] fix --- ydb/core/tx/columnshard/columnshard__stats_scan.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp index aaaf14cdf5ab..9ab1348be930 100644 --- a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp @@ -81,8 +81,8 @@ void TStatsIterator::AppendStats(const std::vector(*builders[9], blobIdString); NArrow::Append(*builders[10], r->BlobRange.Offset); NArrow::Append(*builders[11], r->BlobRange.Size); - NArrow::Append(*builders[12], portion.HasRemoveSnapshot()); - NArrow::Append(*builders[13], portion.GetMeta().GetTierName()); + NArrow::Append(*builders[12], portion.HasRemoveSnapshot()); + NArrow::Append(*builders[13], portion.GetMeta().GetTierName()); } } From f86209095894775d4bec0b95a297dae6ebb91427 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Thu, 28 Dec 2023 16:39:33 +0300 Subject: [PATCH 06/10] fix --- ydb/core/tx/columnshard/columnshard__stats_scan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp index 9ab1348be930..d98919f24bef 100644 --- a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp @@ -81,7 +81,7 @@ void TStatsIterator::AppendStats(const std::vector(*builders[9], blobIdString); NArrow::Append(*builders[10], r->BlobRange.Offset); NArrow::Append(*builders[11], r->BlobRange.Size); - NArrow::Append(*builders[12], portion.HasRemoveSnapshot()); + NArrow::Append(*builders[12], portion.HasRemoveSnapshot()); NArrow::Append(*builders[13], portion.GetMeta().GetTierName()); } } From 1d8dd4078917ce9d02acf501f764994ecaeab196 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Thu, 28 Dec 2023 16:44:45 +0300 Subject: [PATCH 07/10] fix --- ydb/core/tx/columnshard/columnshard__stats_scan.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp index d98919f24bef..7c1da906a010 100644 --- a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp @@ -82,7 +82,8 @@ void TStatsIterator::AppendStats(const std::vector(*builders[10], r->BlobRange.Offset); NArrow::Append(*builders[11], r->BlobRange.Size); NArrow::Append(*builders[12], portion.HasRemoveSnapshot()); - NArrow::Append(*builders[13], portion.GetMeta().GetTierName()); + std::string strTierName(portion.GetMeta().GetTierName().data(), portion.GetMeta().GetTierName().size()); + NArrow::Append(*builders[13], strTierName); } } From eb62f7f3fe5e70c0d7ec5d2acfe5d440f8e8ff61 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Thu, 28 Dec 2023 19:47:54 +0300 Subject: [PATCH 08/10] fix --- ydb/core/kqp/ut/olap/kqp_olap_ut.cpp | 144 +++++++++--------- ydb/core/tx/columnshard/columnshard__scan.cpp | 4 +- 2 files changed, 77 insertions(+), 71 deletions(-) diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index b99d462630cd..d9ce0b2ebf1a 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -88,6 +88,11 @@ Y_UNIT_TEST_SUITE(KqpOlap) { out << value.GetTimestamp(); break; } + case NYdb::EPrimitiveType::Bool: + { + out << value.GetBool(); + break; + } default: { UNIT_ASSERT_C(false, "PrintValue not iplemented for this type"); @@ -193,6 +198,15 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } } + TString GetUtf8(const NYdb::TValue& v) { + NYdb::TValueParser value(v); + if (value.GetKind() == NYdb::TTypeParser::ETypeKind::Optional) { + return *value.GetOptionalUtf8(); + } else { + return value.GetUtf8(); + } + } + TInstant GetTimestamp(const NYdb::TValue& v) { NYdb::TValueParser value(v); if (value.GetKind() == NYdb::TTypeParser::ETypeKind::Optional) { @@ -3420,7 +3434,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { .SetWithSampleTables(false); TKikimrRunner kikimr(settings); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - static ui32 numKinds = 5; + static ui32 numKinds = 2; TLocalHelper(kikimr).CreateTestOlapTable(); for (ui64 i = 0; i < 100; ++i) { @@ -3431,30 +3445,31 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto tableClient = kikimr.GetTableClient(); auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId, PortionId + SELECT PathId, Kind, TabletId, Sum(Rows) as Rows FROM `/Root/olapStore/.sys/store_primary_index_stats` - GROUP BY PathId, Kind, TabletId, PortionId - ORDER BY PathId, Kind, TabletId + GROUP BY PathId, Kind, TabletId + ORDER BY TabletId, Kind, PathId )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); UNIT_ASSERT_VALUES_EQUAL(rows.size(), numKinds*3); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint32(rows[0].at("Kind")), 1ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); UNIT_ASSERT_GE(GetUint64(rows[0].at("TabletId")), 72075186224037888ull); + UNIT_ASSERT_GE(GetUint64(rows[2].at("TabletId")), 72075186224037889ull); + UNIT_ASSERT_GE(GetUint64(rows[4].at("TabletId")), 72075186224037890ull); UNIT_ASSERT_GE(GetUint64(rows[1].at("TabletId")), GetUint64(rows[0].at("TabletId"))); - UNIT_ASSERT_VALUES_EQUAL(GetUint32(rows[2].at("Kind")), 1ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[2].at("Kind")), "INSERTED"); UNIT_ASSERT_GE(GetUint64(rows[2].at("TabletId")), GetUint64(rows[1].at("TabletId"))); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[6].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint32(rows[6].at("Kind")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[6].at("TabletId")), GetUint64(rows[0].at("TabletId"))); - UNIT_ASSERT_VALUES_EQUAL(GetUint32(rows[7].at("Kind")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[7].at("TabletId")), GetUint64(rows[1].at("TabletId"))); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[3].at("PathId")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[3].at("Kind")), "SPLIT_COMPACTED"); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[3].at("TabletId")), GetUint64(rows[2].at("TabletId"))); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[4].at("Kind")), "INSERTED"); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[4].at("TabletId")), GetUint64(rows[5].at("TabletId"))); UNIT_ASSERT_GE( GetUint64(rows[0].at("Rows")) + GetUint64(rows[1].at("Rows")) + GetUint64(rows[2].at("Rows")) + - GetUint64(rows[3].at("Rows")) + GetUint64(rows[4].at("Rows")) + GetUint64(rows[5].at("Rows")) + - GetUint64(rows[6].at("Rows")) + GetUint64(rows[7].at("Rows")) + GetUint64(rows[8].at("Rows")), + GetUint64(rows[3].at("Rows")) + GetUint64(rows[4].at("Rows")) + GetUint64(rows[5].at("Rows")), 0.3*0.9*100*1000); // >= 90% of 100K inserted rows } @@ -3477,32 +3492,31 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto tableClient = kikimr.GetTableClient(); { auto selectQuery = TString(R"( - SELECT PathId, PortionId, Kind, TabletId + SELECT PathId, Kind, TabletId FROM `/Root/olapStore/olapTable_1/.sys/primary_index_stats` - GROUP BY PathId, TabletId, PortionId, Kind - ORDER BY PathId, Kind, TabletId + GROUP BY PathId, TabletId, Kind + ORDER BY PathId, TabletId, Kind )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); UNIT_ASSERT_GT(rows.size(), 1*numKinds); UNIT_ASSERT_LE(rows.size(), 3*numKinds); - UNIT_ASSERT_VALUES_EQUAL(rows.size() % numKinds, 0); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows.front().at("PathId")), 3ull); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows.back().at("PathId")), 3ull); } { auto selectQuery = TString(R"( - SELECT * + SELECT PathId, Kind, TabletId FROM `/Root/olapStore/olapTable_2/.sys/primary_index_stats` - ORDER BY PathId, Kind, TabletId + GROUP BY PathId, TabletId, Kind + ORDER BY PathId, TabletId, Kind )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); UNIT_ASSERT_GT(rows.size(), 1*numKinds); UNIT_ASSERT_LE(rows.size(), 3*numKinds); - UNIT_ASSERT_VALUES_EQUAL(rows.size() % numKinds, 0); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows.front().at("PathId")), 4ull); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows.back().at("PathId")), 4ull); } @@ -3923,7 +3937,6 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto settings = TKikimrSettings().SetWithSampleTables(false); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); TKikimrRunner kikimr(settings); - static ui32 numKinds = 5; TLocalHelper(kikimr.GetTestServer()).CreateTestOlapTable(); for (ui64 i = 0; i < 10; ++i) { @@ -3946,37 +3959,35 @@ Y_UNIT_TEST_SUITE(KqpOlap) { UNIT_ASSERT_VALUES_EQUAL(rows.size(), 4); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint32(rows[0].at("Kind")), 1ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[3].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint32(rows[3].at("Kind")), 2ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[3].at("Kind")), "INSERTED"); } { auto selectQuery = TString(R"( - SELECT BlobRangeSize as Bytes, Rows + SELECT SUM(BlobRangeSize) as Bytes, SUM(Rows) as Rows, PathId, TabletId FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY PathId, TabletId ORDER BY Bytes )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3*numKinds); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3); UNIT_ASSERT_LE(GetUint64(rows[0].at("Bytes")), GetUint64(rows[1].at("Bytes"))); } { auto selectQuery = TString(R"( - SELECT Sum(Rows) as Rows, Kind, Sum(RawBytes) as RawBytes, Sum(Rows) as Rows2, Sum(Rows) as Rows3, PathId, PortionId + SELECT Sum(Rows) as Rows, Kind, Sum(RawBytes) as RawBytes, Sum(Rows) as Rows2, Sum(Rows) as Rows3, PathId FROM `/Root/olapStore/.sys/store_primary_index_stats` - GROUP BY Kind, PortionId, PathId + GROUP BY Kind, PathId ORDER BY PathId, Kind, Rows3 )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3*numKinds); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 2); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("Rows2")), GetUint64(rows[0].at("Rows3"))); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[1].at("Rows")), GetUint64(rows[1].at("Rows3"))); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[2].at("Rows")), GetUint64(rows[2].at("Rows2"))); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[5].at("Rows")), GetUint64(rows[5].at("Rows3"))); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[11].at("Rows")), GetUint64(rows[11].at("Rows2"))); } } @@ -3986,8 +3997,6 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); Tests::NCommon::TLoggerInit(kikimr).Initialize(); - static ui32 numKinds = 5; - TLocalHelper(kikimr).CreateTestOlapTable("olapTable_1"); TLocalHelper(kikimr).CreateTestOlapTable("olapTable_2"); TLocalHelper(kikimr).CreateTestOlapTable("olapTable_3"); @@ -4011,52 +4020,53 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId, PortionId + SELECT PathId, Kind, TabletId FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - PathId == UInt64("3") AND Kind != 'INACTIVE' - GROUP BY TabletId, PathId, PortionId, Kind + PathId == UInt64("3") AND Activity = true + GROUP BY TabletId, PathId, Kind + ORDER BY TabletId, Kind )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3*3); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3*2); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint32(rows[0].at("Kind")), 1ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[1].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint32(rows[2].at("Kind")), 1ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[8].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint32(rows[8].at("Kind")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[2].at("Kind")), "INSERTED"); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[5].at("PathId")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[4].at("Kind")), "INSERTED"); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[5].at("Kind")), "SPLIT_COMPACTED"); } { auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId, PortionId + SELECT PathId, Kind, TabletId FROM `/Root/olapStore/.sys/store_primary_index_stats` - GROUP BY PortionId, PathId, Kind, TabletId - ORDER BY - PathId DESC, Kind DESC, TabletId DESC + GROUP BY PathId, Kind, TabletId + ORDER BY PathId DESC, Kind DESC, TabletId DESC ; )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); - ui32 numExpected = 3*3*numKinds; + ui32 numExpected = 3*3*2; UNIT_ASSERT_VALUES_EQUAL(rows.size(), numExpected); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 5ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint32(rows[0].at("Kind")), numKinds); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "SPLIT_COMPACTED"); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[numExpected-1].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint32(rows[numExpected-1].at("Kind")), 1ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[numExpected-1].at("Kind")), "INSERTED"); } { auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId, PortionId + SELECT PathId, Kind, TabletId FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE PathId > UInt64("0") AND PathId < UInt32("4") OR PathId > UInt64("4") AND PathId <= UInt64("5") - GROUP BY PathId, Kind, TabletId, PortionId + GROUP BY PathId, Kind, TabletId ORDER BY PathId DESC, Kind DESC, TabletId DESC ; @@ -4064,12 +4074,12 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto rows = ExecuteScanQuery(tableClient, selectQuery); - ui32 numExpected = 2*3*numKinds; + ui32 numExpected = 2*3*2; UNIT_ASSERT_VALUES_EQUAL(rows.size(), numExpected); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 5ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint32(rows[0].at("Kind")), numKinds); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "SPLIT_COMPACTED"); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[numExpected-1].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint32(rows[numExpected-1].at("Kind")), 1ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[numExpected-1].at("Kind")), "INSERTED"); } } @@ -4089,10 +4099,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId, PortionId, Sum(BlobRangeSize) as Bytes + SELECT PathId, Kind, TabletId, Sum(BlobRangeSize) as Bytes FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE Bytes > UInt64("0") - GROUP BY PathId, Kind, TabletId, PortionId + GROUP BY PathId, Kind, TabletId ORDER BY PathId, Kind, TabletId; )"); @@ -4103,10 +4112,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId, PortionId, Sum(BlobRangeSize) as Bytes + SELECT PathId, Kind, TabletId, Sum(BlobRangeSize) as Bytes FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE Bytes > UInt64("0") - GROUP BY PathId, Kind, TabletId, PortionId + GROUP BY PathId, Kind, TabletId ORDER BY PathId, Kind, TabletId; )"); @@ -4130,16 +4138,16 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId, PortionId + SELECT PathId, Kind, TabletId FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE Kind IN ('SPLIT_COMPACTED', 'INACTIVE', 'EVICTED') - GROUP BY PathId, Kind, TabletId, PortionId + GROUP BY PathId, Kind, TabletId ORDER BY PathId, Kind, TabletId; )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); - UNIT_ASSERT_GE(rows.size(), 3*3); + UNIT_ASSERT_GE(rows.size(), 3); } } @@ -4147,7 +4155,6 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto settings = TKikimrSettings().SetWithSampleTables(false); TKikimrRunner kikimr(settings); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - static ui32 numKinds = 5; TLocalHelper(kikimr.GetTestServer()).CreateTestOlapTable("olapTable_1"); TLocalHelper(kikimr.GetTestServer()).CreateTestOlapTable("olapTable_2"); @@ -4262,14 +4269,14 @@ Y_UNIT_TEST_SUITE(KqpOlap) { { auto selectQuery = TString(R"( - SELECT PathId, TabletId, PortionId + SELECT PathId, TabletId, Kind FROM `/Root/olapStore/.sys/store_primary_index_stats` - GROUP BY PathId, TabletId, PortionId + GROUP BY PathId, TabletId, Kind )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); - // 3 Tables with 3 Shards each and 4 KindId-s of stats - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3 * 3 * numKinds); + // 3 Tables with 3 Shards each and 2 KindId-s of stats + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3 * 3 * 2); } { @@ -4283,7 +4290,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto rows = ExecuteScanQuery(tableClient, selectQuery); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("column0")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("column1")), numKinds); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("column1")), 2); UNIT_ASSERT_GE(GetUint64(rows[0].at("column2")), 3ull); } @@ -4299,7 +4306,6 @@ Y_UNIT_TEST_SUITE(KqpOlap) { UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3ull); for (ui64 pathId = 3, row = 0; pathId <= 5; ++pathId, ++row) { UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[row].at("PathId")), pathId); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[row].at("column1")), 3*numKinds); } } } diff --git a/ydb/core/tx/columnshard/columnshard__scan.cpp b/ydb/core/tx/columnshard/columnshard__scan.cpp index a8f853d67296..eb1d57c19edc 100644 --- a/ydb/core/tx/columnshard/columnshard__scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__scan.cpp @@ -666,7 +666,7 @@ PrepareStatsReadMetadata(ui64 tabletId, const NOlap::TReadDescription& read, con continue; } for (auto&& p : pathInfo->GetPortions()) { - if (p.second->GetRemoveSnapshot().IsZero() && portionsInUse[read.PathId].emplace(p.first).second) { + if (portionsInUse[read.PathId].emplace(p.first).second) { out->IndexPortions.emplace_back(p.second); } } @@ -675,7 +675,7 @@ PrepareStatsReadMetadata(ui64 tabletId, const NOlap::TReadDescription& read, con auto pathInfos = logsIndex->GetTables(fromPathId, toPathId); for (auto&& pathInfo: pathInfos) { for (auto&& p: pathInfo->GetPortions()) { - if (p.second->GetRemoveSnapshot().IsZero() && portionsInUse[p.second->GetPathId()].emplace(p.first).second) { + if (portionsInUse[p.second->GetPathId()].emplace(p.first).second) { out->IndexPortions.emplace_back(p.second); } } From ec53e44b4d93303abd2c8aed9f8d1f9d2275f3d9 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Thu, 28 Dec 2023 21:08:58 +0300 Subject: [PATCH 09/10] fix tests --- ydb/core/tx/columnshard/columnshard__scan.cpp | 2 +- .../columnshard/columnshard__stats_scan.cpp | 2 +- .../engines/reader/read_metadata.h | 6 +- .../ut_rw/ut_columnshard_read_write.cpp | 99 ++++++++++--------- 4 files changed, 62 insertions(+), 47 deletions(-) diff --git a/ydb/core/tx/columnshard/columnshard__scan.cpp b/ydb/core/tx/columnshard/columnshard__scan.cpp index eb1d57c19edc..2c98457913c6 100644 --- a/ydb/core/tx/columnshard/columnshard__scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__scan.cpp @@ -645,7 +645,7 @@ PrepareStatsReadMetadata(ui64 tabletId, const NOlap::TReadDescription& read, con auto out = std::make_shared(tabletId, isReverse ? NOlap::TReadStatsMetadata::ESorting::DESC : NOlap::TReadStatsMetadata::ESorting::ASC, - read.GetProgram(), index ? index->GetVersionedIndex().GetSchema(read.GetSnapshot()) : nullptr); + read.GetProgram(), index ? index->GetVersionedIndex().GetSchema(read.GetSnapshot()) : nullptr, read.GetSnapshot()); out->SetPKRangesFilter(read.PKRangesFilter); out->ReadColumnIds.assign(readColumnIds.begin(), readColumnIds.end()); diff --git a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp index 7c1da906a010..af5486faed96 100644 --- a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp @@ -81,7 +81,7 @@ void TStatsIterator::AppendStats(const std::vector(*builders[9], blobIdString); NArrow::Append(*builders[10], r->BlobRange.Offset); NArrow::Append(*builders[11], r->BlobRange.Size); - NArrow::Append(*builders[12], portion.HasRemoveSnapshot()); + NArrow::Append(*builders[12], !portion.HasRemoveSnapshot() || ReadMetadata->GetRequestSnapshot() < portion.GetRemoveSnapshot()); std::string strTierName(portion.GetMeta().GetTierName().data(), portion.GetMeta().GetTierName().size()); NArrow::Append(*builders[13], strTierName); } diff --git a/ydb/core/tx/columnshard/engines/reader/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/read_metadata.h index 98579f24e990..4d322c3716c0 100644 --- a/ydb/core/tx/columnshard/engines/reader/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/read_metadata.h @@ -255,6 +255,7 @@ struct TReadMetadata : public TReadMetadataBase, public std::enable_shared_from_ struct TReadStatsMetadata : public TReadMetadataBase, public std::enable_shared_from_this { private: using TBase = TReadMetadataBase; + TSnapshot RequestSnapshot; std::shared_ptr ResultIndexSchema; public: using TConstPtr = std::shared_ptr; @@ -264,6 +265,8 @@ struct TReadStatsMetadata : public TReadMetadataBase, public std::enable_shared_ std::vector ResultColumnIds; std::deque> IndexPortions; + const TSnapshot& GetRequestSnapshot() const { return RequestSnapshot; } + std::optional GetColumnNameDef(const ui32 columnId) const { if (!ResultIndexSchema) { return {}; @@ -275,8 +278,9 @@ struct TReadStatsMetadata : public TReadMetadataBase, public std::enable_shared_ return f->name(); } - explicit TReadStatsMetadata(ui64 tabletId, const ESorting sorting, const TProgramContainer& ssaProgram, const std::shared_ptr& schema) + explicit TReadStatsMetadata(ui64 tabletId, const ESorting sorting, const TProgramContainer& ssaProgram, const std::shared_ptr& schema, const TSnapshot& requestSnapshot) : TBase(sorting, ssaProgram) + , RequestSnapshot(requestSnapshot) , ResultIndexSchema(schema) , TabletId(tabletId) { diff --git a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp index 8e7257e01d09..00f3dd152f2d 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp @@ -2175,59 +2175,70 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { auto& msg = scanInited->Record; auto scanActorId = ActorIdFromProto(msg.GetScanActorId()); - ui32 resultLimit = 1024 * 1024; - runtime.Send(new IEventHandle(scanActorId, sender, new NKqp::TEvKqpCompute::TEvScanDataAck(resultLimit, 0, 1))); - auto scan = runtime.GrabEdgeEvent(handle); - auto batchStats = scan->ArrowBatch; - UNIT_ASSERT(batchStats); - // Cerr << batchStats->ToString() << Endl; -// UNIT_ASSERT_VALUES_EQUAL(batchStats->num_rows(), 10068); - ui64 sumCompactedBytes = 0; ui64 sumCompactedRows = 0; ui64 sumInsertedBytes = 0; ui64 sumInsertedRows = 0; std::optional keyColumnId; - for (ui32 i = 0; i < batchStats->num_rows(); ++i) { - auto paths = batchStats->GetColumnByName("PathId"); - auto kinds = batchStats->GetColumnByName("Kind"); - auto rows = batchStats->GetColumnByName("Rows"); - auto bytes = batchStats->GetColumnByName("BlobRangeSize"); - auto rawBytes = batchStats->GetColumnByName("RawBytes"); - auto internalColumnIds = batchStats->GetColumnByName("InternalColumnId"); - - ui64 pathId = static_cast(*paths).Value(i); - auto kind = static_cast(*kinds).Value(i); - const TString kindStr(kind.data(), kind.size()); - ui64 numRows = static_cast(*rows).Value(i); - ui64 numBytes = static_cast(*bytes).Value(i); - ui64 numRawBytes = static_cast(*rawBytes).Value(i); - ui32 internalColumnId = static_cast(*internalColumnIds).Value(i); - if (!keyColumnId) { - keyColumnId = internalColumnId; + while (true) { + ui32 resultLimit = 1024 * 1024; + runtime.Send(new IEventHandle(scanActorId, sender, new NKqp::TEvKqpCompute::TEvScanDataAck(resultLimit, 0, 1))); + auto scan = runtime.GrabEdgeEvent(handle); + auto batchStats = scan->ArrowBatch; + if (scan->Finished) { + AFL_VERIFY(!scan->ArrowBatch || !scan->ArrowBatch->num_rows()); + break; } - Cerr << "[" << __LINE__ << "] " << table.Pk[0].second.GetTypeId() << " " - << pathId << " " << kindStr << " " << numRows << " " << numBytes << " " << numRawBytes << "\n"; - - if (pathId == tableId) { - if (kindStr == ::ToString(NOlap::NPortion::EProduced::COMPACTED) || kindStr == ::ToString(NOlap::NPortion::EProduced::SPLIT_COMPACTED)) { - sumCompactedBytes += numBytes; - if (*keyColumnId == internalColumnId) { - sumCompactedRows += numRows; - } - //UNIT_ASSERT(numRawBytes > numBytes); + UNIT_ASSERT(batchStats); +// Cerr << batchStats->ToString() << Endl; + + for (ui32 i = 0; i < batchStats->num_rows(); ++i) { + auto paths = batchStats->GetColumnByName("PathId"); + auto kinds = batchStats->GetColumnByName("Kind"); + auto rows = batchStats->GetColumnByName("Rows"); + auto bytes = batchStats->GetColumnByName("BlobRangeSize"); + auto rawBytes = batchStats->GetColumnByName("RawBytes"); + auto internalColumnIds = batchStats->GetColumnByName("InternalColumnId"); + auto activities = batchStats->GetColumnByName("Activity"); + AFL_VERIFY(activities); + + ui64 pathId = static_cast(*paths).Value(i); + auto kind = static_cast(*kinds).Value(i); + const TString kindStr(kind.data(), kind.size()); + ui64 numRows = static_cast(*rows).Value(i); + ui64 numBytes = static_cast(*bytes).Value(i); + ui64 numRawBytes = static_cast(*rawBytes).Value(i); + ui32 internalColumnId = static_cast(*internalColumnIds).Value(i); + bool activity = static_cast(*activities).Value(i); + if (!activity) { + continue; } - if (kindStr == ::ToString(NOlap::NPortion::EProduced::INSERTED)) { - sumInsertedBytes += numBytes; - if (*keyColumnId == internalColumnId) { - sumInsertedRows += numRows; + if (!keyColumnId) { + keyColumnId = internalColumnId; + } + Cerr << "[" << __LINE__ << "] " << activity << " " << table.Pk[0].second.GetTypeId() << " " + << pathId << " " << kindStr << " " << numRows << " " << numBytes << " " << numRawBytes << "\n"; + + if (pathId == tableId) { + if (kindStr == ::ToString(NOlap::NPortion::EProduced::COMPACTED) || kindStr == ::ToString(NOlap::NPortion::EProduced::SPLIT_COMPACTED)) { + sumCompactedBytes += numBytes; + if (*keyColumnId == internalColumnId) { + sumCompactedRows += numRows; + } + //UNIT_ASSERT(numRawBytes > numBytes); + } + if (kindStr == ::ToString(NOlap::NPortion::EProduced::INSERTED)) { + sumInsertedBytes += numBytes; + if (*keyColumnId == internalColumnId) { + sumInsertedRows += numRows; + } + //UNIT_ASSERT(numRawBytes > numBytes); } - //UNIT_ASSERT(numRawBytes > numBytes); + } else { + UNIT_ASSERT_VALUES_EQUAL(numRows, 0); + UNIT_ASSERT_VALUES_EQUAL(numBytes, 0); + UNIT_ASSERT_VALUES_EQUAL(numRawBytes, 0); } - } else { - UNIT_ASSERT_VALUES_EQUAL(numRows, 0); - UNIT_ASSERT_VALUES_EQUAL(numBytes, 0); - UNIT_ASSERT_VALUES_EQUAL(numRawBytes, 0); } } Cerr << "compacted=" << sumCompactedRows << ";inserted=" << sumInsertedRows << ";expected=" << fullNumRows << ";" << Endl; From 0cfa2d03592621b09fb0e71192de2311f8a11710 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Fri, 29 Dec 2023 10:42:34 +0300 Subject: [PATCH 10/10] fix test --- ydb/core/kqp/ut/olap/kqp_olap_ut.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index d9ce0b2ebf1a..330f1549f929 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -3995,6 +3995,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto settings = TKikimrSettings().SetWithSampleTables(false); TKikimrRunner kikimr(settings); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); Tests::NCommon::TLoggerInit(kikimr).Initialize(); TLocalHelper(kikimr).CreateTestOlapTable("olapTable_1"); @@ -4030,14 +4031,13 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto rows = ExecuteScanQuery(tableClient, selectQuery); - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3*2); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 3ull); UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[1].at("PathId")), 3ull); UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[2].at("Kind")), "INSERTED"); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[5].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[4].at("Kind")), "INSERTED"); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[5].at("Kind")), "SPLIT_COMPACTED"); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[2].at("PathId")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[1].at("Kind")), "INSERTED"); } { @@ -4051,10 +4051,10 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto rows = ExecuteScanQuery(tableClient, selectQuery); - ui32 numExpected = 3*3*2; + ui32 numExpected = 3*3; UNIT_ASSERT_VALUES_EQUAL(rows.size(), numExpected); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 5ull); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "SPLIT_COMPACTED"); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[numExpected-1].at("PathId")), 3ull); UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[numExpected-1].at("Kind")), "INSERTED"); } @@ -4074,10 +4074,10 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto rows = ExecuteScanQuery(tableClient, selectQuery); - ui32 numExpected = 2*3*2; + ui32 numExpected = 2*3; UNIT_ASSERT_VALUES_EQUAL(rows.size(), numExpected); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 5ull); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "SPLIT_COMPACTED"); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[numExpected-1].at("PathId")), 3ull); UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[numExpected-1].at("Kind")), "INSERTED"); }