From eec175032647d7622a3d5554ac6567b0197ee0d5 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Wed, 10 Jul 2024 09:21:26 +0300 Subject: [PATCH 01/12] correct index construction --- .../engines/scheme/indexes/abstract/meta.cpp | 27 +++++++++++++++---- .../engines/scheme/indexes/abstract/meta.h | 14 ++-------- .../engines/storage/indexes/bloom/meta.cpp | 25 +++++++++-------- .../engines/storage/indexes/bloom/meta.h | 21 +++++++++++++++ .../engines/storage/indexes/portions/meta.cpp | 6 +++-- .../engines/storage/indexes/portions/meta.h | 2 +- 6 files changed, 64 insertions(+), 31 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp index ae4fd22fba3d..45979b1c9bc3 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp @@ -2,13 +2,30 @@ namespace NKikimr::NOlap::NIndexes { -bool IIndexMeta::DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) { - IndexId = proto.GetId(); - AFL_VERIFY(IndexId); - IndexName = proto.GetName(); - AFL_VERIFY(IndexName); +bool IIndexMeta::DeserializeFromProto(const NKikimrSchemeOp::TOlapSecondaryData& proto) { + if (!proto.GetId()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot parse secondary data builder")("reason", "incorrect id - 0"); + return false; + } + if (!proto.GetName()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot parse secondary data builder")("reason", "incorrect name - empty string"); + return false; + } + EntityId = proto.GetId(); + Name = proto.GetName(); StorageId = proto.GetStorageId() ? proto.GetStorageId() : IStoragesManager::DefaultStorageId; return DoDeserializeFromProto(proto); } +void IIndexMeta::SerializeToProto(NKikimrSchemeOp::TOlapSecondaryData& proto) const { + AFL_VERIFY(EntityId); + proto.SetId(EntityId); + AFL_VERIFY(Name); + proto.SetName(Name); + if (StorageId) { + proto.SetStorageId(StorageId); + } + return DoSerializeToProto(proto); +} + } // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h index 821d36af8cf5..7af82c512e56 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h @@ -30,7 +30,7 @@ class IIndexMeta { YDB_READONLY(ui32, IndexId, 0); YDB_READONLY(TString, StorageId, IStoragesManager::DefaultStorageId); protected: - virtual std::shared_ptr DoBuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const = 0; + virtual std::shared_ptr DoBuildIndex(THashMap>>& data, const TIndexInfo& indexInfo) const = 0; virtual void DoFillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const = 0; virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) = 0; virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const = 0; @@ -69,17 +69,7 @@ class IIndexMeta { } bool DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto); - - void SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const { - AFL_VERIFY(IndexId); - proto.SetId(IndexId); - AFL_VERIFY(IndexName); - proto.SetName(IndexName); - if (StorageId) { - proto.SetStorageId(StorageId); - } - return DoSerializeToProto(proto); - } + void SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const; virtual TString GetClassName() const = 0; }; diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp index 3c52518d0b8b..01db1c9f1dc4 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp @@ -12,22 +12,23 @@ namespace NKikimr::NOlap::NIndexes { std::shared_ptr TBloomIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { std::set hashes; - for (ui32 i = 0; i < HashesCount; ++i) { - NArrow::NHash::NXX64::TStreamStringHashCalcer hashCalcer(3 * i); + { + NArrow::NHash::NXX64::TStreamStringHashCalcer hashCalcer; for (reader.Start(); reader.IsCorrect(); reader.ReadNext()) { hashCalcer.Start(); for (auto&& i : reader) { NArrow::NHash::TXX64::AppendField(i.GetCurrentChunk(), i.GetCurrentRecordIndex(), hashCalcer); } - const ui64 h = hashCalcer.Finish(); - hashes.emplace(h); + hashes.emplace(hashCalcer.Finish()); } } - const ui32 bitsCount = hashes.size() / std::log(2); + + const ui32 bitsCount = HashesCount * hashes.size() / std::log(2); std::vector flags(bitsCount, false); - for (auto&& i : hashes) { - flags[i % flags.size()] = true; - } + const auto pred = [](const ui64 hash) { + flags[hash % flags.size()] = true; + }; + BuildHashesSet(hashes, pred); arrow::BooleanBuilder builder; auto res = builder.Reserve(flags.size()); @@ -57,14 +58,16 @@ void TBloomIndexMeta::DoFillIndexCheckers(const std::shared_ptr hashes; + const auto pred = [&hashes](const ui64 hash) { + hashes.emplace(hash); + }; for (ui32 i = 0; i < HashesCount; ++i) { - NArrow::NHash::NXX64::TStreamStringHashCalcer calcer(3 * i); + NArrow::NHash::NXX64::TStreamStringHashCalcer calcer; calcer.Start(); for (auto&& i : foundColumns) { NArrow::NHash::TXX64::AppendField(i.second, calcer); } - const ui64 hash = calcer.Finish(); - hashes.emplace(hash); + BuildHashesSet(calcer.Finish(), pred); } branch->MutableIndexes().emplace_back(std::make_shared(GetIndexId(), std::move(hashes))); } diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h index d7cb9c6ab4da..beaf3b85ac09 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h @@ -20,6 +20,27 @@ class TBloomIndexMeta: public TIndexByColumns { AFL_VERIFY(FalsePositiveProbability < 1 && FalsePositiveProbability >= 0.01); HashesCount = -1 * std::log(FalsePositiveProbability) / std::log(2); } + + static const ui64 HashesConstructorP = ((ui64)2 << 31) - 1; + static const ui64 HashesConstructorA = (ui64)2 << 16; + + template + void BuildHashesSet(const ui64 originalHash, const TActor& actor) const { + AFL_VERIFY(HashesCount < p); + for (ui32 b = 1; b < HashesCount; ++b) { + const ui64 hash = (HashesConstructorA * hOriginal + b) % HashesConstructorP; + actor(hash); + } + } + + template + void BuildHashesSet(const TContainer& originalHashes, const TActor& actor) const { + AFL_VERIFY(HashesCount < HashesConstructorP); + for (auto&& hOriginal : originalHashes) { + BuildHashesSet(hOriginal, actor); + } + } + protected: virtual TConclusionStatus DoCheckModificationCompatibility(const IIndexMeta& newMeta) const override { const auto* bMeta = dynamic_cast(&newMeta); diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp index 0aa6e4168f7e..d818e5a46c33 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp @@ -11,7 +11,8 @@ void TPortionIndexChunk::DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRan portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, bRange)); } -std::shared_ptr TIndexByColumns::DoBuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const { +std::shared_ptr TIndexByColumns::DoBuildIndex( + THashMap>>& data, const TIndexInfo& indexInfo) const { AFL_VERIFY(Serializer); AFL_VERIFY(data.size()); std::vector columnReaders; @@ -27,7 +28,8 @@ std::shared_ptr TIndexByColumns::DoBuildIndex TChunkedBatchReader reader(std::move(columnReaders)); std::shared_ptr indexBatch = DoBuildIndexImpl(reader); const TString indexData = Serializer->SerializeFull(indexBatch); - return std::make_shared(TChunkAddress(indexId, 0), recordsCount, NArrow::GetBatchDataSize(indexBatch), indexData); + return std::make_shared( + TChunkAddress(GetIndexId(), 0), recordsCount, NArrow::GetBatchDataSize(indexBatch), indexData); } bool TIndexByColumns::DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& /*proto*/) { diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h index fba7daa9e77c..b0c2846da6d3 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h +++ b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h @@ -53,7 +53,7 @@ class TIndexByColumns: public IIndexMeta { std::set ColumnIds; virtual std::shared_ptr DoBuildIndexImpl(TChunkedBatchReader& reader) const = 0; - virtual std::shared_ptr DoBuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const override final; + virtual std::shared_ptr DoBuildIndex(THashMap>>& data, const TIndexInfo& indexInfo) const override final; virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& /*proto*/) override; TConclusionStatus CheckSameColumnsForModification(const IIndexMeta& newMeta) const; From a9eb8aee3f7038a4159d386b4722ca72c7aa2c1b Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Wed, 10 Jul 2024 09:55:15 +0300 Subject: [PATCH 02/12] fix build --- .../tx/columnshard/engines/scheme/index_info.h | 4 ++-- .../engines/scheme/indexes/abstract/meta.cpp | 16 ++++++++-------- .../engines/scheme/indexes/abstract/meta.h | 4 ++-- .../engines/storage/indexes/bloom/meta.cpp | 6 +++--- .../engines/storage/indexes/bloom/meta.h | 4 ++-- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.h b/ydb/core/tx/columnshard/engines/scheme/index_info.h index a9f77745110d..a14918ef3f43 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.h @@ -209,7 +209,7 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { void AppendIndexes(THashMap>>& originalData) const { for (auto&& i : Indexes) { - std::shared_ptr chunk = i.second->BuildIndex(i.first, originalData, *this); + std::shared_ptr chunk = i.second->BuildIndex(originalData, *this); AFL_VERIFY(originalData.emplace(i.first, std::vector>({chunk})).second); } } @@ -217,7 +217,7 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { void AppendIndex(THashMap>>& originalData, const ui32 indexId) const { auto it = Indexes.find(indexId); AFL_VERIFY(it != Indexes.end()); - std::shared_ptr chunk = it->second->BuildIndex(indexId, originalData, *this); + std::shared_ptr chunk = it->second->BuildIndex(originalData, *this); AFL_VERIFY(originalData.emplace(indexId, std::vector>({chunk})).second); } diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp index 45979b1c9bc3..cd7f6d7236ca 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp @@ -2,7 +2,7 @@ namespace NKikimr::NOlap::NIndexes { -bool IIndexMeta::DeserializeFromProto(const NKikimrSchemeOp::TOlapSecondaryData& proto) { +bool IIndexMeta::DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) { if (!proto.GetId()) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot parse secondary data builder")("reason", "incorrect id - 0"); return false; @@ -11,17 +11,17 @@ bool IIndexMeta::DeserializeFromProto(const NKikimrSchemeOp::TOlapSecondaryData& AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot parse secondary data builder")("reason", "incorrect name - empty string"); return false; } - EntityId = proto.GetId(); - Name = proto.GetName(); + IndexId = proto.GetId(); + IndexName = proto.GetName(); StorageId = proto.GetStorageId() ? proto.GetStorageId() : IStoragesManager::DefaultStorageId; return DoDeserializeFromProto(proto); } -void IIndexMeta::SerializeToProto(NKikimrSchemeOp::TOlapSecondaryData& proto) const { - AFL_VERIFY(EntityId); - proto.SetId(EntityId); - AFL_VERIFY(Name); - proto.SetName(Name); +void IIndexMeta::SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const { + AFL_VERIFY(IndexId); + proto.SetId(IndexId); + AFL_VERIFY(IndexName); + proto.SetName(IndexName); if (StorageId) { proto.SetStorageId(StorageId); } diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h index 7af82c512e56..6938c6a27fce 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h @@ -60,8 +60,8 @@ class IIndexMeta { virtual ~IIndexMeta() = default; - std::shared_ptr BuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const { - return DoBuildIndex(indexId, data, indexInfo); + std::shared_ptr BuildIndex(THashMap>>& data, const TIndexInfo& indexInfo) const { + return DoBuildIndex(data, indexInfo); } void FillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const { diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp index 01db1c9f1dc4..7b64faf5ebd3 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp @@ -13,7 +13,7 @@ namespace NKikimr::NOlap::NIndexes { std::shared_ptr TBloomIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { std::set hashes; { - NArrow::NHash::NXX64::TStreamStringHashCalcer hashCalcer; + NArrow::NHash::NXX64::TStreamStringHashCalcer hashCalcer(0); for (reader.Start(); reader.IsCorrect(); reader.ReadNext()) { hashCalcer.Start(); for (auto&& i : reader) { @@ -25,7 +25,7 @@ std::shared_ptr TBloomIndexMeta::DoBuildIndexImpl(TChunkedBa const ui32 bitsCount = HashesCount * hashes.size() / std::log(2); std::vector flags(bitsCount, false); - const auto pred = [](const ui64 hash) { + const auto pred = [&flags](const ui64 hash) { flags[hash % flags.size()] = true; }; BuildHashesSet(hashes, pred); @@ -61,8 +61,8 @@ void TBloomIndexMeta::DoFillIndexCheckers(const std::shared_ptr void BuildHashesSet(const ui64 originalHash, const TActor& actor) const { - AFL_VERIFY(HashesCount < p); + AFL_VERIFY(HashesCount < HashesConstructorP); for (ui32 b = 1; b < HashesCount; ++b) { - const ui64 hash = (HashesConstructorA * hOriginal + b) % HashesConstructorP; + const ui64 hash = (HashesConstructorA * originalHash + b) % HashesConstructorP; actor(hash); } } From 13adf16574f180a37ddd8a3a11cca1a9718575d0 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Fri, 12 Jul 2024 16:40:14 +0300 Subject: [PATCH 03/12] indexes inplace instead of statistics --- .../formats/arrow/scalar/serialization.cpp | 47 +++++++ ydb/core/formats/arrow/scalar/serialization.h | 15 +++ ydb/core/formats/arrow/scalar/ya.make | 14 +++ ydb/core/formats/arrow/ya.make | 1 + ydb/core/kqp/ut/olap/helpers/typed_local.cpp | 8 +- ydb/core/kqp/ut/olap/helpers/typed_local.h | 9 +- ydb/core/kqp/ut/olap/indexes_ut.cpp | 2 +- ydb/core/kqp/ut/olap/sys_view_ut.cpp | 72 ++++++----- ydb/core/protos/flat_scheme_op.proto | 11 +- .../abstract/storages_manager.cpp | 1 + .../blobs_action/abstract/storages_manager.h | 1 + .../columnshard/blobs_action/common/const.h | 1 + .../blobs_action/local/storage.cpp | 62 ++++++++++ .../columnshard/blobs_action/local/storage.h | 53 ++++++++ .../tx/columnshard/blobs_action/local/ya.make | 11 ++ .../blobs_action/storages_manager/manager.cpp | 23 ++-- .../blobs_action/storages_manager/ya.make | 1 + ydb/core/tx/columnshard/columnshard_schema.h | 35 ++++-- .../tx/columnshard/counters/engine_logs.cpp | 16 ++- .../data_sharing/protos/data.proto | 5 +- .../engines/changes/general_compaction.cpp | 101 ++++++++++----- .../engines/changes/with_appended.cpp | 15 ++- .../tx/columnshard/engines/db_wrapper.cpp | 23 ++-- .../engines/portions/column_record.h | 3 + .../engines/portions/constructor.cpp | 8 +- .../engines/portions/constructor.h | 4 +- .../engines/portions/constructor_meta.cpp | 17 --- .../engines/portions/constructor_meta.h | 11 -- .../engines/portions/index_chunk.cpp | 48 ++++++- .../engines/portions/index_chunk.h | 47 ++++++- .../tx/columnshard/engines/portions/meta.cpp | 3 - .../tx/columnshard/engines/portions/meta.h | 2 - .../engines/portions/portion_info.cpp | 69 ++++++----- .../engines/portions/portion_info.h | 29 ++--- .../engines/portions/read_with_blobs.cpp | 44 +++---- .../engines/portions/write_with_blobs.cpp | 23 ++-- .../engines/portions/write_with_blobs.h | 4 +- .../engines/reader/abstract/read_metadata.h | 5 +- .../reader/plain_reader/iterator/source.cpp | 14 ++- .../engines/reader/sys_view/chunks/chunks.cpp | 14 ++- .../reader/sys_view/portions/portions.cpp | 11 +- .../columnshard/engines/scheme/index_info.cpp | 47 ++++--- .../columnshard/engines/scheme/index_info.h | 59 +++++---- .../engines/scheme/indexes/abstract/meta.cpp | 11 ++ .../engines/scheme/indexes/abstract/meta.h | 13 +- .../storage/actualizer/tiering/tiering.cpp | 10 +- .../engines/storage/chunks/column.h | 3 + .../columnshard/engines/storage/chunks/data.h | 4 + .../engines/storage/chunks/null_column.h | 7 +- .../engines/storage/indexes/bloom/checker.cpp | 9 +- .../engines/storage/indexes/bloom/checker.h | 36 ++++++ .../engines/storage/indexes/bloom/meta.cpp | 17 +-- .../engines/storage/indexes/bloom/meta.h | 4 +- .../storage/indexes/max/constructor.cpp | 56 +++++++++ .../engines/storage/indexes/max/constructor.h | 30 +++++ .../engines/storage/indexes/max/meta.cpp | 52 ++++++++ .../engines/storage/indexes/max/meta.h | 78 ++++++++++++ .../engines/storage/indexes/max/ya.make | 14 +++ .../engines/storage/indexes/portions/meta.cpp | 19 +-- .../engines/storage/indexes/portions/meta.h | 14 ++- .../engines/storage/indexes/ya.make | 1 + .../tx/columnshard/hooks/abstract/abstract.h | 6 +- .../columnshard/normalizer/portion/clean.cpp | 1 + .../normalizer/portion/normalizer.cpp | 117 ++++++++++++------ .../normalizer/portion/normalizer.h | 7 +- .../tx/columnshard/splitter/abstract/chunks.h | 16 ++- .../tx/columnshard/splitter/batch_slice.cpp | 1 - ydb/core/tx/columnshard/splitter/chunks.h | 24 +++- ydb/core/tx/columnshard/splitter/settings.h | 1 + ydb/core/tx/columnshard/splitter/simple.cpp | 4 + ydb/core/tx/columnshard/splitter/simple.h | 2 + .../test_helper/columnshard_ut_common.cpp | 30 +++++ .../test_helper/columnshard_ut_common.h | 27 +--- .../tx/columnshard/test_helper/controllers.h | 2 +- .../tx/schemeshard/olap/schema/schema.cpp | 19 --- ydb/core/tx/schemeshard/olap/schema/schema.h | 7 -- .../tx/schemeshard/olap/schema/update.cpp | 4 - ydb/core/tx/schemeshard/olap/schema/update.h | 2 - ydb/core/tx/schemeshard/olap/schema/ya.make | 2 - .../tx/schemeshard/olap/statistics/schema.cpp | 92 -------------- .../tx/schemeshard/olap/statistics/schema.h | 80 ------------ .../tx/schemeshard/olap/statistics/update.cpp | 35 ------ .../tx/schemeshard/olap/statistics/update.h | 43 ------- .../tx/schemeshard/olap/statistics/ya.make | 15 --- ydb/core/tx/schemeshard/olap/ya.make | 1 - 85 files changed, 1184 insertions(+), 721 deletions(-) create mode 100644 ydb/core/formats/arrow/scalar/serialization.cpp create mode 100644 ydb/core/formats/arrow/scalar/serialization.h create mode 100644 ydb/core/formats/arrow/scalar/ya.make create mode 100644 ydb/core/tx/columnshard/blobs_action/local/storage.cpp create mode 100644 ydb/core/tx/columnshard/blobs_action/local/storage.h create mode 100644 ydb/core/tx/columnshard/blobs_action/local/ya.make create mode 100644 ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.cpp create mode 100644 ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.h create mode 100644 ydb/core/tx/columnshard/engines/storage/indexes/max/meta.cpp create mode 100644 ydb/core/tx/columnshard/engines/storage/indexes/max/meta.h create mode 100644 ydb/core/tx/columnshard/engines/storage/indexes/max/ya.make delete mode 100644 ydb/core/tx/schemeshard/olap/statistics/schema.cpp delete mode 100644 ydb/core/tx/schemeshard/olap/statistics/schema.h delete mode 100644 ydb/core/tx/schemeshard/olap/statistics/update.cpp delete mode 100644 ydb/core/tx/schemeshard/olap/statistics/update.h delete mode 100644 ydb/core/tx/schemeshard/olap/statistics/ya.make diff --git a/ydb/core/formats/arrow/scalar/serialization.cpp b/ydb/core/formats/arrow/scalar/serialization.cpp new file mode 100644 index 000000000000..2b8fb74a92ab --- /dev/null +++ b/ydb/core/formats/arrow/scalar/serialization.cpp @@ -0,0 +1,47 @@ +#include "serialization.h" +#include +#include + +namespace NKikimr::NArrow::NScalar { + +TConclusion TSerializer::SerializePayloadToString(const std::shared_ptr& scalar) { + TString resultString; + const bool resultFlag = NArrow::SwitchType(scalar->type->id(), [&](const auto& type) { + using TWrap = std::decay_t; + if constexpr (arrow::has_c_type()) { + using CType = typename TWrap::T::c_type; + using ScalarType = typename arrow::TypeTraits::ScalarType; + const ScalarType* scalarTyped = static_cast(scalar.get()); + resultString = TString(sizeof(CType), '\0'); + memcpy(&resultString[0], scalarTyped->data(), sizeof(CType)); + return true; + } + return false; + }); + if (!resultFlag) { + return TConclusionStatus::Fail("incorrect scalar type for payload serialization: " + scalar->type->ToString()); + } + return resultString; +} + +TConclusion> TSerializer::DeserializeFromStringWithPayload(const TString& data, const std::shared_ptr& dataType) { + AFL_VERIFY(dataType); + std::shared_ptr result; + const bool resultFlag = NArrow::SwitchType(dataType->id(), [&](const auto& type) { + using TWrap = std::decay_t; + if constexpr (arrow::has_c_type()) { + using CType = typename TWrap::T::c_type; + AFL_VERIFY(data.size() == sizeof(CType)); + using ScalarType = typename arrow::TypeTraits::ScalarType; + result = std::make_shared(*(CType*)&data[0], dataType); + return true; + } + return false; + }); + if (!resultFlag) { + return TConclusionStatus::Fail("incorrect scalar type for payload deserialization: " + dataType->ToString()); + } + return result; +} + +} \ No newline at end of file diff --git a/ydb/core/formats/arrow/scalar/serialization.h b/ydb/core/formats/arrow/scalar/serialization.h new file mode 100644 index 000000000000..c0d227ae4582 --- /dev/null +++ b/ydb/core/formats/arrow/scalar/serialization.h @@ -0,0 +1,15 @@ +#pragma once +#include + +#include +#include + +#include + +namespace NKikimr::NArrow::NScalar { +class TSerializer { +public: + static TConclusion SerializePayloadToString(const std::shared_ptr& scalar); + static TConclusion> DeserializeFromStringWithPayload(const TString& data, const std::shared_ptr& dataType); +}; +} \ No newline at end of file diff --git a/ydb/core/formats/arrow/scalar/ya.make b/ydb/core/formats/arrow/scalar/ya.make new file mode 100644 index 000000000000..d6284ba9ca44 --- /dev/null +++ b/ydb/core/formats/arrow/scalar/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +PEERDIR( + contrib/libs/apache/arrow + ydb/library/conclusion + ydb/core/formats/arrow/switch + ydb/library/actors/core +) + +SRCS( + serialization.cpp +) + +END() diff --git a/ydb/core/formats/arrow/ya.make b/ydb/core/formats/arrow/ya.make index d4bf1f8529c2..4615047b39fa 100644 --- a/ydb/core/formats/arrow/ya.make +++ b/ydb/core/formats/arrow/ya.make @@ -12,6 +12,7 @@ PEERDIR( ydb/core/formats/arrow/dictionary ydb/core/formats/arrow/transformer ydb/core/formats/arrow/reader + ydb/core/formats/arrow/scalar ydb/core/formats/arrow/hash ydb/library/actors/core ydb/library/arrow_kernels diff --git a/ydb/core/kqp/ut/olap/helpers/typed_local.cpp b/ydb/core/kqp/ut/olap/helpers/typed_local.cpp index e592ed398d2b..a37600897d2b 100644 --- a/ydb/core/kqp/ut/olap/helpers/typed_local.cpp +++ b/ydb/core/kqp/ut/olap/helpers/typed_local.cpp @@ -143,16 +143,16 @@ void TTypedLocalHelper::FillPKOnly(const double pkKff /*= 0*/, const ui32 numRow TBase::SendDataViaActorSystem(TablePath, batch); } -void TTypedLocalHelper::GetStats(std::vector& stats, const bool verbose /*= false*/) { +void TTypedLocalHelper::GetStats(std::vector& stats, const bool verbose /*= false*/) { TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_portion_stats` WHERE Activity = true"; auto tableClient = KikimrRunner.GetTableClient(); auto rows = ExecuteScanQuery(tableClient, selectQuery, verbose); for (auto&& r : rows) { for (auto&& c : r) { if (c.first == "Stats") { - NKikimrColumnShardStatisticsProto::TPortionStorage store; - AFL_VERIFY(google::protobuf::TextFormat::ParseFromString(GetUtf8(c.second), &store)); - stats.emplace_back(store); + NJson::TJsonValue jsonStore; + AFL_VERIFY(NJson::ReadJsonFastTree(GetUtf8(c.second), &jsonStore)); + stats.emplace_back(jsonStore); } } } diff --git a/ydb/core/kqp/ut/olap/helpers/typed_local.h b/ydb/core/kqp/ut/olap/helpers/typed_local.h index 1afef6b7a19b..29852b7761da 100644 --- a/ydb/core/kqp/ut/olap/helpers/typed_local.h +++ b/ydb/core/kqp/ut/olap/helpers/typed_local.h @@ -1,11 +1,14 @@ #pragma once #include #include -#include #include #include #include +#include + +#include + namespace NKikimr::NKqp { class TTypedLocalHelper: public Tests::NCS::THelper { @@ -19,7 +22,7 @@ class TTypedLocalHelper: public Tests::NCS::THelper { protected: virtual TString GetTestTableSchema() const override; virtual std::vector GetShardingColumns() const override { - return {"pk_int"}; + return { "pk_int" }; } public: TTypedLocalHelper(const TString& typeName, TKikimrRunner& kikimrRunner, const TString& tableName = "olapTable", const TString& storeName = "olapStore") @@ -66,7 +69,7 @@ class TTypedLocalHelper: public Tests::NCS::THelper { void GetVolumes(ui64& rawBytes, ui64& bytes, const bool verbose = false, const std::vector columnNames = {}); - void GetStats(std::vector& stats, const bool verbose = false); + void GetStats(std::vector& stats, const bool verbose = false); void GetCount(ui64& count); diff --git a/ydb/core/kqp/ut/olap/indexes_ut.cpp b/ydb/core/kqp/ut/olap/indexes_ut.cpp index 6a80bc2bf133..13eeebc02d93 100644 --- a/ydb/core/kqp/ut/olap/indexes_ut.cpp +++ b/ydb/core/kqp/ut/olap/indexes_ut.cpp @@ -310,7 +310,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { CompareYson(result, R"([[1u;]])"); } - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < 0.20 * csController->GetIndexesSkippingOnSelect().Val()) + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < 0.3 * csController->GetIndexesSkippingOnSelect().Val()) ("approved", csController->GetIndexesApprovedOnSelect().Val())("skipped", csController->GetIndexesSkippingOnSelect().Val()); } diff --git a/ydb/core/kqp/ut/olap/sys_view_ut.cpp b/ydb/core/kqp/ut/olap/sys_view_ut.cpp index 4242f7676298..dc234ac3bffe 100644 --- a/ydb/core/kqp/ut/olap/sys_view_ut.cpp +++ b/ydb/core/kqp/ut/olap/sys_view_ut.cpp @@ -302,63 +302,61 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) { auto tableClient = kikimr.GetTableClient(); { helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `ENCODING.DICTIONARY.ENABLED`=`true`);"); - helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, NAME=field_var, TYPE=variability, FEATURES=`{\"column_name\" : \"field\"}`);"); - helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, NAME=pk_int_max, TYPE=max, FEATURES=`{\"column_name\" : \"pk_int\"}`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=pk_int_max, TYPE=MAX, FEATURES=`{\"column_name\" : \"pk_int\"}`);"); helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); - csController->WaitCondition(TDuration::Seconds(10), [&]() { + csController->WaitActualization(TDuration::Seconds(40)); + { ui64 rawBytes2; ui64 bytes2; - helper.GetVolumes(rawBytes2, bytes2, false, {"field"}); + helper.GetVolumes(rawBytes2, bytes2, false, { "field" }); AFL_VERIFY(rawBytes2 == rawBytes1)("f1", rawBytes1)("f2", rawBytes2); AFL_VERIFY(bytes2 < bytes1 * 0.5)("f1", bytes1)("f2", bytes2); - std::vector stats; + std::vector stats; helper.GetStats(stats, true); + AFL_VERIFY(stats.size() == 3); for (auto&& i : stats) { - if (i.ScalarsSize() != 2) { - return false; - } - if (i.GetScalars()[0].GetUint32() != 3) { - return false; - } - } - return true; + AFL_VERIFY(i.IsArray()); + AFL_VERIFY(i.GetArraySafe().size() == 1); + AFL_VERIFY(i.GetArraySafe()[0]["chunk_idx"].GetInteger() == 0); + AFL_VERIFY(i.GetArraySafe()[0]["entity_id"].GetInteger() == 4); + AFL_VERIFY(i.GetArraySafe()[0]["data"].GetIntegerRobust() >= 799992); + AFL_VERIFY(i.GetArraySafe()[0]["data"].GetIntegerRobust() <= 799999); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("json", i); } - ); + } } { - helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_STAT, NAME=pk_int_max);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_INDEX, NAME=pk_int_max);"); helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); - csController->WaitCondition(TDuration::Seconds(10), [&]() { - std::vector stats; + csController->WaitActualization(TDuration::Seconds(30)); + { + std::vector stats; helper.GetStats(stats, true); + AFL_VERIFY(stats.size() == 3); for (auto&& i : stats) { - if (i.ScalarsSize() != 1) { - return false; - } - if (i.GetScalars()[0].GetUint32() != 3) { - return false; - } + AFL_VERIFY(i.IsArray()); + AFL_VERIFY(i.GetArraySafe().size() == 0)("json", i); } - return true; - }); + } } { - helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, NAME=pk_int_max, TYPE=max, FEATURES=`{\"column_name\" : \"pk_int\"}`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=pk_int_max, TYPE=MAX, FEATURES=`{\"column_name\" : \"pk_int\"}`);"); helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); - csController->WaitCondition(TDuration::Seconds(10), [&]() { - std::vector stats; + csController->WaitActualization(TDuration::Seconds(40)); + { + std::vector stats; helper.GetStats(stats, true); + AFL_VERIFY(stats.size() == 3); for (auto&& i : stats) { - if (i.ScalarsSize() != 2) { - return false; - } - if (i.GetScalars()[0].GetUint32() != 3) { - return false; - } + AFL_VERIFY(i.IsArray()); + AFL_VERIFY(i.GetArraySafe().size() == 1); + AFL_VERIFY(i.GetArraySafe()[0]["chunk_idx"].GetInteger() == 0); + AFL_VERIFY(i.GetArraySafe()[0]["entity_id"].GetInteger() == 5)("json", i); + AFL_VERIFY(i.GetArraySafe()[0]["data"].GetIntegerRobust() >= 799992); + AFL_VERIFY(i.GetArraySafe()[0]["data"].GetIntegerRobust() <= 799999); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("json", i); } - return true; - } - ); + } } } diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index aa66c44d3ea0..173d90e9cdf8 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -449,6 +449,10 @@ message TRequestedBloomFilter { repeated string ColumnNames = 3; } +message TRequestedMaxIndex { + optional string ColumnName = 1; +} + message TOlapIndexRequested { optional string Name = 1; optional TCompressionOptions Compression = 3; @@ -457,6 +461,7 @@ message TOlapIndexRequested { optional string ClassName = 39; oneof Implementation { TRequestedBloomFilter BloomFilter = 40; + TRequestedMaxIndex MaxIndex = 41; } } @@ -466,6 +471,10 @@ message TBloomFilter { repeated uint32 ColumnIds = 3; } +message TMaxIndex { + optional uint32 ColumnId = 1; +} + message TOlapIndexDescription { // This id is auto-generated by schemeshard optional uint32 Id = 1; @@ -478,6 +487,7 @@ message TOlapIndexDescription { optional string ClassName = 40; oneof Implementation { TBloomFilter BloomFilter = 41; + TMaxIndex MaxIndex = 42; } } @@ -541,7 +551,6 @@ message TColumnTableSchema { optional bool CompositeMarksDeprecated = 9 [ default = false ]; repeated TOlapIndexDescription Indexes = 10; - repeated NKikimrColumnShardStatisticsProto.TOperatorContainer Statistics = 11; optional TColumnTableSchemeOptions Options = 12; } diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp index 8b80dd9be277..0902a464871a 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp +++ b/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp @@ -51,6 +51,7 @@ void IStoragesManager::OnTieringModified(const std::shared_ptr& storageSharedBlobsManager) + : TBase(storageId, storageSharedBlobsManager) +{ +} + +namespace { +class TBlobInUseTracker: public IBlobInUseTracker { +private: + virtual bool DoFreeBlob(const NOlap::TUnifiedBlobId& /*blobId*/) override { + AFL_VERIFY(false); + return true; + } + virtual bool DoUseBlob(const NOlap::TUnifiedBlobId& /*blobId*/) override { + AFL_VERIFY(false); + return true; + } + virtual bool IsBlobInUsage(const NOlap::TUnifiedBlobId& /*blobId*/) const override { + AFL_VERIFY(false); + return false; + } + +public: +}; +} + +std::shared_ptr TOperator::GetBlobsTracker() const { + static std::shared_ptr result = std::make_shared(); + return result; +} + +namespace { +class TBlobsDeclareRemovingAction: public IBlobsDeclareRemovingAction { +private: + using TBase = IBlobsDeclareRemovingAction; +protected: + virtual void DoDeclareRemove(const TTabletId /*tabletId*/, const TUnifiedBlobId& /*blobId*/) override { + AFL_VERIFY(false); + } + virtual void DoOnExecuteTxAfterRemoving(TBlobManagerDb& /*dbBlobs*/, const bool /*blobsWroteSuccessfully*/) override { + } + virtual void DoOnCompleteTxAfterRemoving(const bool /*blobsWroteSuccessfully*/) override { + + } + +public: + TBlobsDeclareRemovingAction(const TString& storageId, const TTabletId selfTabletId, const std::shared_ptr& counters) + : TBase(storageId, selfTabletId, counters) { + } +}; +} + +std::shared_ptr TOperator::DoStartDeclareRemovingAction( + const std::shared_ptr& counters) { + static std::shared_ptr result = std::make_shared(GetStorageId(), GetSelfTabletId(), counters); + return result; +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/local/storage.h b/ydb/core/tx/columnshard/blobs_action/local/storage.h new file mode 100644 index 000000000000..beb5c4286cab --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/local/storage.h @@ -0,0 +1,53 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NBlobOperations::NLocal { + +class TOperator: public IBlobsStorageOperator { +private: + using TBase = IBlobsStorageOperator; + NSplitter::TSplitSettings SplitSettings = Default(); + +protected: + virtual const NSplitter::TSplitSettings& DoGetBlobSplitSettings() const override { + return SplitSettings; + } + virtual std::shared_ptr DoStartDeclareRemovingAction( + const std::shared_ptr& /*counters*/) override; + virtual std::shared_ptr DoStartWritingAction() override { + AFL_VERIFY(false)("problem", "unimplemented method"); + return nullptr; + }; + virtual std::shared_ptr DoStartReadingAction() override { + AFL_VERIFY(false)("problem", "unimplemented method"); + return nullptr; + }; + virtual std::shared_ptr DoCreateGCAction(const std::shared_ptr& /*counters*/) const override { + return nullptr; + } + virtual void DoStartGCAction(const std::shared_ptr& /*action*/) const override { + AFL_VERIFY(false)("problem", "unimplemented method"); + }; + virtual bool DoLoad(IBlobManagerDb& /*dbBlobs*/) override { + return true; + }; + virtual void DoOnTieringModified(const std::shared_ptr& /*tiers*/) override { + return; + }; + +public: + TOperator(const TString& storageId, const std::shared_ptr& storageSharedBlobsManager); + + virtual TTabletsByBlob GetBlobsToDelete() const override { + return Default(); + } + + virtual std::shared_ptr GetBlobsTracker() const override; + + virtual bool HasToDelete(const TUnifiedBlobId& /*blobId*/, const TTabletId /*tabletId*/) const override { + return false; + } + +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/local/ya.make b/ydb/core/tx/columnshard/blobs_action/local/ya.make new file mode 100644 index 000000000000..ed4199e82243 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/local/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + storage.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/blobs_action/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp b/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp index 255845c9fb92..bd2a8acec539 100644 --- a/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp +++ b/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp @@ -1,18 +1,22 @@ #include "manager.h" -#include + #include +#include +#include #ifndef KIKIMR_DISABLE_S3_OPS #include #endif -#include #include +#include namespace NKikimr::NOlap { std::shared_ptr TStoragesManager::DoBuildOperator(const TString& storageId) { if (storageId == TBase::DefaultStorageId) { - return std::make_shared(storageId, Shard.SelfId(), Shard.Info(), - Shard.Executor()->Generation(), SharedBlobsManager->GetStorageManagerGuarantee(storageId)); + return std::make_shared( + storageId, Shard.SelfId(), Shard.Info(), Shard.Executor()->Generation(), SharedBlobsManager->GetStorageManagerGuarantee(storageId)); + } else if (storageId == TBase::LocalMetadataStorageId) { + return std::make_shared(storageId, SharedBlobsManager->GetStorageManagerGuarantee(storageId)); } else if (storageId == TBase::MemoryStorageId) { #ifndef KIKIMR_DISABLE_S3_OPS { @@ -20,7 +24,7 @@ std::shared_ptr TStoragesManager::DoBuild TGuard g(mutexLocal); Singleton()->SetSecretKey("fakeSecret"); } - return std::make_shared(storageId, Shard.SelfId(), + return std::make_shared(storageId, Shard.SelfId(), std::make_shared("fakeBucket", "fakeSecret"), SharedBlobsManager->GetStorageManagerGuarantee(storageId), Shard.Executor()->Generation()); #else @@ -30,7 +34,8 @@ std::shared_ptr TStoragesManager::DoBuild return nullptr; } else { #ifndef KIKIMR_DISABLE_S3_OPS - return std::make_shared(storageId, Shard, SharedBlobsManager->GetStorageManagerGuarantee(storageId)); + return std::make_shared( + storageId, Shard, SharedBlobsManager->GetStorageManagerGuarantee(storageId)); #else return nullptr; #endif @@ -43,9 +48,7 @@ bool TStoragesManager::DoLoadIdempotency(NTable::TDatabase& database) { TStoragesManager::TStoragesManager(NColumnShard::TColumnShard& shard) : Shard(shard) - , SharedBlobsManager(std::make_shared((TTabletId)Shard.TabletID())) -{ - + , SharedBlobsManager(std::make_shared((TTabletId)Shard.TabletID())) { } -} \ No newline at end of file +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make b/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make index b79b6720608b..ad3f37f24574 100644 --- a/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make +++ b/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make @@ -7,6 +7,7 @@ SRCS( PEERDIR( ydb/core/tx/columnshard/data_sharing/manager ydb/core/tx/columnshard/blobs_action/bs + ydb/core/tx/columnshard/blobs_action/local ) IF (OS_WINDOWS) diff --git a/ydb/core/tx/columnshard/columnshard_schema.h b/ydb/core/tx/columnshard/columnshard_schema.h index 50c67db625b1..fd1da16940ef 100644 --- a/ydb/core/tx/columnshard/columnshard_schema.h +++ b/ydb/core/tx/columnshard/columnshard_schema.h @@ -372,9 +372,10 @@ struct Schema : NIceDb::Schema { struct Size: Column<7, NScheme::NTypeIds::Uint32> {}; struct RecordsCount: Column<8, NScheme::NTypeIds::Uint32> {}; struct RawBytes: Column<9, NScheme::NTypeIds::Uint64> {}; + struct BlobData: Column<10, NScheme::NTypeIds::String> {}; using TKey = TableKey; - using TColumns = TableColumns; + using TColumns = TableColumns; }; struct SharedBlobIds: NIceDb::Schema::Table { @@ -876,13 +877,20 @@ class TColumnChunkLoadContext { class TIndexChunkLoadContext { private: - YDB_READONLY_DEF(TBlobRange, BlobRange); + YDB_READONLY_DEF(std::optional, BlobRange); + YDB_READONLY_DEF(std::optional, BlobData); TChunkAddress Address; const ui32 RecordsCount; const ui32 RawBytes; public: TIndexChunk BuildIndexChunk(const TBlobRangeLink16::TLinkId blobLinkId) const { - return TIndexChunk(Address.GetColumnId(), Address.GetChunkIdx(), RecordsCount, RawBytes, BlobRange.BuildLink(blobLinkId)); + AFL_VERIFY(BlobRange); + return TIndexChunk(Address.GetColumnId(), Address.GetChunkIdx(), RecordsCount, RawBytes, BlobRange->BuildLink(blobLinkId)); + } + + TIndexChunk BuildIndexChunk() const { + AFL_VERIFY(BlobData); + return TIndexChunk(Address.GetColumnId(), Address.GetChunkIdx(), RecordsCount, RawBytes, *BlobData); } template @@ -892,13 +900,20 @@ class TIndexChunkLoadContext { , RawBytes(rowset.template GetValue()) { AFL_VERIFY(Address.GetColumnId())("event", "incorrect address")("address", Address.DebugString()); - TString strBlobId = rowset.template GetValue(); - Y_ABORT_UNLESS(strBlobId.size() == sizeof(TLogoBlobID), "Size %" PRISZT " doesn't match TLogoBlobID", strBlobId.size()); - TLogoBlobID logoBlobId((const ui64*)strBlobId.data()); - BlobRange.BlobId = NOlap::TUnifiedBlobId(dsGroupSelector->GetGroup(logoBlobId), logoBlobId); - BlobRange.Offset = rowset.template GetValue(); - BlobRange.Size = rowset.template GetValue(); - AFL_VERIFY(BlobRange.BlobId.IsValid() && BlobRange.Size)("event", "incorrect blob")("blob", BlobRange.ToString()); + if (rowset.template HaveValue()) { + TBlobRange& bRange = BlobRange.emplace(); + TString strBlobId = rowset.template GetValue(); + Y_ABORT_UNLESS(strBlobId.size() == sizeof(TLogoBlobID), "Size %" PRISZT " doesn't match TLogoBlobID", strBlobId.size()); + TLogoBlobID logoBlobId((const ui64*)strBlobId.data()); + bRange.BlobId = NOlap::TUnifiedBlobId(dsGroupSelector->GetGroup(logoBlobId), logoBlobId); + bRange.Offset = rowset.template GetValue(); + bRange.Size = rowset.template GetValue(); + AFL_VERIFY(bRange.BlobId.IsValid() && bRange.Size)("event", "incorrect blob")("blob", bRange.ToString()); + } else if (rowset.template HaveValue()) { + BlobData = rowset.template GetValue(); + } else { + AFL_VERIFY(false); + } } }; diff --git a/ydb/core/tx/columnshard/counters/engine_logs.cpp b/ydb/core/tx/columnshard/counters/engine_logs.cpp index 368eebc40ef1..7a38e052c5ed 100644 --- a/ydb/core/tx/columnshard/counters/engine_logs.cpp +++ b/ydb/core/tx/columnshard/counters/engine_logs.cpp @@ -91,9 +91,11 @@ void TEngineLogsCounters::TPortionsInfoGuard::OnNewPortion(const std::shared_ptr } } for (auto&& i : portion->GetIndexes()) { - const auto blobId = portion->GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); - if (blobIds.emplace(blobId).second) { - BlobGuards[producedId]->Add(blobId.BlobSize(), blobId.BlobSize()); + if (i.HasBlobRange()) { + const auto blobId = portion->GetBlobId(i.GetBlobRangeVerified().GetBlobIdxVerified()); + if (blobIds.emplace(blobId).second) { + BlobGuards[producedId]->Add(blobId.BlobSize(), blobId.BlobSize()); + } } } PortionRecordCountGuards[producedId]->Add(portion->GetRecordsCount(), 1); @@ -111,9 +113,11 @@ void TEngineLogsCounters::TPortionsInfoGuard::OnDropPortion(const std::shared_pt } } for (auto&& i : portion->GetIndexes()) { - const auto blobId = portion->GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); - if (blobIds.emplace(blobId).second) { - BlobGuards[producedId]->Sub(blobId.BlobSize(), blobId.BlobSize()); + if (i.HasBlobRange()) { + const auto blobId = portion->GetBlobId(i.GetBlobRangeVerified().GetBlobIdxVerified()); + if (blobIds.emplace(blobId).second) { + BlobGuards[producedId]->Sub(blobId.BlobSize(), blobId.BlobSize()); + } } } PortionRecordCountGuards[producedId]->Sub(portion->GetRecordsCount(), 1); diff --git a/ydb/core/tx/columnshard/data_sharing/protos/data.proto b/ydb/core/tx/columnshard/data_sharing/protos/data.proto index e602bc4dbba4..8b376e919946 100644 --- a/ydb/core/tx/columnshard/data_sharing/protos/data.proto +++ b/ydb/core/tx/columnshard/data_sharing/protos/data.proto @@ -14,7 +14,10 @@ message TColumnRecord { message TIndexChunk { optional uint32 IndexId = 1; optional uint32 ChunkIdx = 2; - optional NKikimrColumnShardProto.TBlobRangeLink16 BlobRange = 3; + oneof DataImplementation { + NKikimrColumnShardProto.TBlobRangeLink16 BlobRange = 3; + string BlobData = 5; + } message TMeta { optional uint32 RecordsCount = 1; diff --git a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp index 8ba3d0891d85..7f98ea4ed79d 100644 --- a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp @@ -19,13 +19,15 @@ namespace NKikimr::NOlap::NCompaction { -void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByFullBatches(TConstructionContext& context, std::vector&& portions) noexcept { +void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByFullBatches( + TConstructionContext& context, std::vector&& portions) noexcept { std::vector> batchResults; auto resultSchema = context.SchemaVersions.GetLastSchema(); auto shardingActual = context.SchemaVersions.GetShardingInfoActual(GranuleMeta->GetPathId()); { auto resultDataSchema = resultSchema->GetIndexInfo().ArrowSchemaWithSpecials(); - NArrow::NMerger::TMergePartialStream mergeStream(resultSchema->GetIndexInfo().GetReplaceKey(), resultDataSchema, false, IIndexInfo::GetSnapshotColumnNames()); + NArrow::NMerger::TMergePartialStream mergeStream( + resultSchema->GetIndexInfo().GetReplaceKey(), resultDataSchema, false, IIndexInfo::GetSnapshotColumnNames()); THashSet portionsInUsage; for (auto&& i : portions) { @@ -56,8 +58,9 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByFullBatches(TCon } } -std::shared_ptr TGeneralCompactColumnEngineChanges::BuildPortionFilter(const std::optional& shardingActual, - const std::shared_ptr& batch, const TPortionInfo& pInfo, const THashSet& portionsInUsage, const ISnapshotSchema::TPtr& resultSchema) const { +std::shared_ptr TGeneralCompactColumnEngineChanges::BuildPortionFilter( + const std::optional& shardingActual, const std::shared_ptr& batch, + const TPortionInfo& pInfo, const THashSet& portionsInUsage, const ISnapshotSchema::TPtr& resultSchema) const { std::shared_ptr filter; if (shardingActual && pInfo.NeedShardingFilter(*shardingActual)) { filter = shardingActual->GetShardingInfo()->GetFilter(batch); @@ -79,14 +82,17 @@ std::shared_ptr TGeneralCompactColumnEngineChanges::Build for (auto&& i : excludedIntervalsInfo.GetExcludedIntervals()) { NArrow::NMerger::TSortableBatchPosition startForFound(i.GetStart().ToBatch(pkSchema), 0, pkSchema->field_names(), {}, false); NArrow::NMerger::TSortableBatchPosition finishForFound(i.GetFinish().ToBatch(pkSchema), 0, pkSchema->field_names(), {}, false); - auto foundStart = NArrow::NMerger::TSortableBatchPosition::FindPosition(pos, pos.GetPosition(), batch->num_rows() - 1, startForFound, true); + auto foundStart = + NArrow::NMerger::TSortableBatchPosition::FindPosition(pos, pos.GetPosition(), batch->num_rows() - 1, startForFound, true); AFL_VERIFY(foundStart); AFL_VERIFY(!foundStart->IsLess())("pos", pos.DebugJson())("start", startForFound.DebugJson())("found", foundStart->DebugString()); - auto foundFinish = NArrow::NMerger::TSortableBatchPosition::FindPosition(pos, pos.GetPosition(), batch->num_rows() - 1, finishForFound, false); + auto foundFinish = + NArrow::NMerger::TSortableBatchPosition::FindPosition(pos, pos.GetPosition(), batch->num_rows() - 1, finishForFound, false); AFL_VERIFY(foundFinish); AFL_VERIFY(foundFinish->GetPosition() >= foundStart->GetPosition()); if (foundFinish->GetPosition() > foundStart->GetPosition()) { - AFL_VERIFY(!foundFinish->IsGreater())("pos", pos.DebugJson())("finish", finishForFound.DebugJson())("found", foundFinish->DebugString()); + AFL_VERIFY(!foundFinish->IsGreater())("pos", pos.DebugJson())("finish", finishForFound.DebugJson())( + "found", foundFinish->DebugString()); } filterCorrection.Add(foundStart->GetPosition() - posCurrent, false); if (foundFinish->IsGreater()) { @@ -109,11 +115,14 @@ std::shared_ptr TGeneralCompactColumnEngineChanges::Build return filter; } -void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstructionContext& context, std::vector&& portions) noexcept { +void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks( + TConstructionContext& context, std::vector&& portions) noexcept { static const TString portionIdFieldName = "$$__portion_id"; static const TString portionRecordIndexFieldName = "$$__portion_record_idx"; - static const std::shared_ptr portionIdField = std::make_shared(portionIdFieldName, std::make_shared()); - static const std::shared_ptr portionRecordIndexField = std::make_shared(portionRecordIndexFieldName, std::make_shared()); + static const std::shared_ptr portionIdField = + std::make_shared(portionIdFieldName, std::make_shared()); + static const std::shared_ptr portionRecordIndexField = + std::make_shared(portionRecordIndexFieldName, std::make_shared()); auto resultSchema = context.SchemaVersions.GetLastSchema(); auto shardingActual = context.SchemaVersions.GetShardingInfoActual(GranuleMeta->GetPathId()); @@ -132,7 +141,8 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc indexFields.emplace_back(portionRecordIndexField); IIndexInfo::AddSpecialFields(indexFields); auto dataSchema = std::make_shared(indexFields); - NArrow::NMerger::TMergePartialStream mergeStream(resultSchema->GetIndexInfo().GetReplaceKey(), dataSchema, false, IIndexInfo::GetSnapshotColumnNames()); + NArrow::NMerger::TMergePartialStream mergeStream( + resultSchema->GetIndexInfo().GetReplaceKey(), dataSchema, false, IIndexInfo::GetSnapshotColumnNames()); THashSet usedPortionIds; for (auto&& i : portions) { AFL_VERIFY(usedPortionIds.emplace(i.GetPortionInfo().GetPortionId()).second); @@ -143,16 +153,23 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc auto dataSchema = i.GetPortionInfo().GetSchema(context.SchemaVersions); auto batch = i.GetBatch(dataSchema, *resultSchema, pkFieldNamesSet); { - NArrow::NConstruction::IArrayBuilder::TPtr column = std::make_shared>>(portionIdFieldName, idx++); - batch = NArrow::TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), portionIdField, column->BuildArray(batch->num_rows()))); + NArrow::NConstruction::IArrayBuilder::TPtr column = + std::make_shared>>( + portionIdFieldName, idx++); + batch = NArrow::TStatusValidator::GetValid( + batch->AddColumn(batch->num_columns(), portionIdField, column->BuildArray(batch->num_rows()))); } { - NArrow::NConstruction::IArrayBuilder::TPtr column = std::make_shared>>(portionRecordIndexFieldName); - batch = NArrow::TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), portionRecordIndexField, column->BuildArray(batch->num_rows()))); + NArrow::NConstruction::IArrayBuilder::TPtr column = + std::make_shared>>( + portionRecordIndexFieldName); + batch = NArrow::TStatusValidator::GetValid( + batch->AddColumn(batch->num_columns(), portionRecordIndexField, column->BuildArray(batch->num_rows()))); } batch = IIndexInfo::NormalizeDeletionColumn(batch); Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(batch, resultSchema->GetIndexInfo().GetReplaceKey())); - std::shared_ptr filter = BuildPortionFilter(shardingActual, batch, i.GetPortionInfo(), usedPortionIds, resultSchema); + std::shared_ptr filter = + BuildPortionFilter(shardingActual, batch, i.GetPortionInfo(), usedPortionIds, resultSchema); mergeStream.AddSource(batch, filter); } batchResults = mergeStream.DrainAllParts(CheckPoints, indexFields); @@ -167,7 +184,8 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc std::vector>> chunkGroups; chunkGroups.resize(batchResults.size()); for (auto&& columnId : resultSchema->GetIndexInfo().GetColumnIds()) { - NActors::TLogContextGuard logGuard(NActors::TLogContextBuilder::Build()("field_name", resultSchema->GetIndexInfo().GetColumnName(columnId))); + NActors::TLogContextGuard logGuard( + NActors::TLogContextBuilder::Build()("field_name", resultSchema->GetIndexInfo().GetColumnName(columnId))); auto columnInfo = stats->GetColumnInfo(columnId); auto resultField = resultSchema->GetIndexInfo().GetColumnFieldVerified(columnId); @@ -183,7 +201,9 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc } else { AFL_VERIFY(dataSchema->IsSpecialColumnId(columnId)); } - chunks.emplace_back(std::make_shared(columnId, p.GetPortionInfo().GetRecordsCount(), resultField, resultSchema->GetDefaultValueVerified(columnId), resultSchema->GetColumnSaver(columnId))); + chunks.emplace_back(std::make_shared(columnId, p.GetPortionInfo().GetRecordsCount(), + p.GetPortionInfo().GetColumnRawBytes({ columnId }), resultField, resultSchema->GetDefaultValueVerified(columnId), + resultSchema->GetColumnSaver(columnId))); records = { nullptr }; } AFL_VERIFY(!!loader); @@ -195,8 +215,10 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc std::map> columnChunks; ui32 batchIdx = 0; for (auto&& batchResult : batchResults) { - const ui32 portionRecordsCountLimit = batchResult->num_rows() / (batchResult->num_rows() / NSplitter::TSplitSettings().GetExpectedRecordsCountOnPage() + 1) + 1; - TColumnMergeContext context(columnId, resultSchema, portionRecordsCountLimit, NSplitter::TSplitSettings().GetExpectedUnpackColumnChunkRawSize(), columnInfo); + const ui32 portionRecordsCountLimit = + batchResult->num_rows() / (batchResult->num_rows() / NSplitter::TSplitSettings().GetExpectedRecordsCountOnPage() + 1) + 1; + TColumnMergeContext context( + columnId, resultSchema, portionRecordsCountLimit, NSplitter::TSplitSettings().GetExpectedUnpackColumnChunkRawSize(), columnInfo); TMergedColumn mColumn(context); auto columnPortionIdx = batchResult->GetColumnByName(portionIdFieldName); @@ -236,7 +258,8 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc } ui32 batchIdx = 0; - const auto groups = resultSchema->GetIndexInfo().GetEntityGroupsByStorageId(IStoragesManager::DefaultStorageId, *SaverContext.GetStoragesManager()); + const auto groups = + resultSchema->GetIndexInfo().GetEntityGroupsByStorageId(IStoragesManager::DefaultStorageId, *SaverContext.GetStoragesManager()); for (auto&& columnChunks : chunkGroups) { auto batchResult = batchResults[batchIdx]; ++batchIdx; @@ -245,10 +268,12 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc for (auto&& i : columnChunks) { if (i.second.size() != columnChunks.begin()->second.size()) { for (ui32 p = 0; p < std::min(columnChunks.begin()->second.size(), i.second.size()); ++p) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("p_first", columnChunks.begin()->second[p].DebugString())("p", i.second[p].DebugString()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("p_first", columnChunks.begin()->second[p].DebugString())( + "p", i.second[p].DebugString()); } } - AFL_VERIFY(i.second.size() == columnChunks.begin()->second.size())("first", columnChunks.begin()->second.size())("current", i.second.size())("first_name", columnChunks.begin()->first)("current_name", i.first); + AFL_VERIFY(i.second.size() == columnChunks.begin()->second.size())("first", columnChunks.begin()->second.size())( + "current", i.second.size())("first_name", columnChunks.begin()->first)("current_name", i.first); } std::vector batchSlices; @@ -259,7 +284,6 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc for (auto&& p : columnChunks) { portionColumns.emplace(p.first, p.second[i].GetChunks()); } - resultSchema->GetIndexInfo().AppendIndexes(portionColumns); batchSlices.emplace_back(portionColumns, schemaDetails, context.Counters.SplitterCounters); } TSimilarPacker slicer(NSplitter::TSplitSettings().GetExpectedPortionSize()); @@ -267,12 +291,17 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc ui32 recordIdx = 0; for (auto&& i : packs) { - TGeneralSerializedSlice slice(std::move(i)); + TGeneralSerializedSlice slicePrimary(std::move(i)); + auto dataWithSecondary = + resultSchema->GetIndexInfo().AppendIndexes(slicePrimary.GetPortionChunksToHash(), SaverContext.GetStoragesManager()).DetachResult(); + TGeneralSerializedSlice slice(dataWithSecondary.GetExternalData(), schemaDetails, context.Counters.SplitterCounters); + auto b = batchResult->Slice(recordIdx, slice.GetRecordsCount()); const ui32 deletionsCount = IIndexInfo::CalcDeletions(b, true); - auto constructor = TWritePortionInfoWithBlobsConstructor::BuildByBlobs(slice.GroupChunksByBlobs(groups), GranuleMeta->GetPathId(), + auto constructor = TWritePortionInfoWithBlobsConstructor::BuildByBlobs(slice.GroupChunksByBlobs(groups), + dataWithSecondary.GetSecondaryInplaceData(), GranuleMeta->GetPathId(), resultSchema->GetVersion(), resultSchema->GetSnapshot(), SaverContext.GetStoragesManager()); - constructor.FillStatistics(resultSchema->GetIndexInfo()); + NArrow::TFirstLastSpecialKeys primaryKeys(slice.GetFirstLastPKBatch(resultSchema->GetIndexInfo().GetReplaceKey())); NArrow::TMinMaxSpecialKeys snapshotKeys(b, TIndexInfo::ArrowSchemaSnapshot()); constructor.GetPortionConstructor().AddMetadata(*resultSchema, deletionsCount, primaryKeys, snapshotKeys); @@ -307,7 +336,8 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc NChanges::TGeneralCompactionCounters::OnRepackPortions(portionsCount, portionsSize); { - std::vector portions = TReadPortionInfoWithBlobs::RestorePortions(SwitchedPortions, Blobs, context.SchemaVersions); + std::vector portions = + TReadPortionInfoWithBlobs::RestorePortions(SwitchedPortions, Blobs, context.SchemaVersions); if (!HasAppData() || AppDataVerified().ColumnShardConfig.GetUseChunkedMergeOnCompaction()) { BuildAppendedPortionsByChunks(context, std::move(portions)); } else { @@ -329,7 +359,8 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc } AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "blobs_created_diff")("appended", sbAppended)("switched", sbSwitched); } - AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "blobs_created")("appended", AppendedPortions.size())("switched", SwitchedPortions.size()); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "blobs_created")("appended", AppendedPortions.size())( + "switched", SwitchedPortions.size()); return TConclusionStatus::Success(); } @@ -337,7 +368,8 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc void TGeneralCompactColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) { TBase::DoWriteIndexOnComplete(self, context); if (self) { - self->IncCounter(context.FinishedSuccessfully ? NColumnShard::COUNTER_SPLIT_COMPACTION_SUCCESS : NColumnShard::COUNTER_SPLIT_COMPACTION_FAIL); + self->IncCounter( + context.FinishedSuccessfully ? NColumnShard::COUNTER_SPLIT_COMPACTION_SUCCESS : NColumnShard::COUNTER_SPLIT_COMPACTION_FAIL); self->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BLOBS_WRITTEN, context.BlobsWritten); self->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BYTES_WRITTEN, context.BytesWritten); } @@ -346,14 +378,16 @@ void TGeneralCompactColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TC void TGeneralCompactColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { TBase::DoStart(self); auto& g = *GranuleMeta; - self.CSCounters.OnSplitCompactionInfo(g.GetAdditiveSummary().GetCompacted().GetTotalPortionsSize(), g.GetAdditiveSummary().GetCompacted().GetPortionsCount()); + self.CSCounters.OnSplitCompactionInfo( + g.GetAdditiveSummary().GetCompacted().GetTotalPortionsSize(), g.GetAdditiveSummary().GetCompacted().GetPortionsCount()); } NColumnShard::ECumulativeCounters TGeneralCompactColumnEngineChanges::GetCounterIndex(const bool isSuccess) const { return isSuccess ? NColumnShard::COUNTER_COMPACTION_SUCCESS : NColumnShard::COUNTER_COMPACTION_FAIL; } -void TGeneralCompactColumnEngineChanges::AddCheckPoint(const NArrow::NMerger::TSortableBatchPosition& position, const bool include, const bool validationDuplications) { +void TGeneralCompactColumnEngineChanges::AddCheckPoint( + const NArrow::NMerger::TSortableBatchPosition& position, const bool include, const bool validationDuplications) { AFL_VERIFY(CheckPoints.emplace(position, include).second || !validationDuplications); } @@ -387,7 +421,8 @@ ui64 TGeneralCompactColumnEngineChanges::TMemoryPredictorChunkedPolicy::AddPorti SumMemoryDelta = std::max(SumMemoryDelta, MaxMemoryByColumnChunk[i.first]); } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("memory_prediction_after", SumMemoryFix + SumMemoryDelta)("portion_info", portionInfo.DebugString()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("memory_prediction_after", SumMemoryFix + SumMemoryDelta)( + "portion_info", portionInfo.DebugString()); return SumMemoryFix + SumMemoryDelta; } diff --git a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp index 090aeca0c15d..4b1d779a6c10 100644 --- a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp +++ b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp @@ -121,9 +121,7 @@ std::vector TChangesWithAppend::MakeAppen std::vector pages = TBatchSerializedSlice::BuildSimpleSlices(batch, NSplitter::TSplitSettings(), context.Counters.SplitterCounters, schema); std::vector generalPages; for (auto&& i : pages) { - auto portionColumns = i.GetPortionChunksToHash(); - resultSchema->GetIndexInfo().AppendIndexes(portionColumns); - generalPages.emplace_back(portionColumns, schema, context.Counters.SplitterCounters); + generalPages.emplace_back(i.GetPortionChunksToHash(), schema, context.Counters.SplitterCounters); } const NSplitter::TEntityGroups groups = resultSchema->GetIndexInfo().GetEntityGroupsByStorageId(IStoragesManager::DefaultStorageId, *SaverContext.GetStoragesManager()); @@ -132,10 +130,15 @@ std::vector TChangesWithAppend::MakeAppen ui32 recordIdx = 0; for (auto&& i : packs) { - TGeneralSerializedSlice slice(std::move(i)); + TGeneralSerializedSlice slicePrimary(std::move(i)); + auto dataWithSecondary = + resultSchema->GetIndexInfo().AppendIndexes(slicePrimary.GetPortionChunksToHash(), SaverContext.GetStoragesManager()).DetachResult(); + TGeneralSerializedSlice slice(dataWithSecondary.GetExternalData(), schema, context.Counters.SplitterCounters); + auto b = batch->Slice(recordIdx, slice.GetRecordsCount()); - auto constructor = TWritePortionInfoWithBlobsConstructor::BuildByBlobs(slice.GroupChunksByBlobs(groups), pathId, resultSchema->GetVersion(), snapshot, SaverContext.GetStoragesManager()); - constructor.FillStatistics(resultSchema->GetIndexInfo()); + auto constructor = TWritePortionInfoWithBlobsConstructor::BuildByBlobs(slice.GroupChunksByBlobs(groups), + dataWithSecondary.GetSecondaryInplaceData(), pathId, resultSchema->GetVersion(), snapshot, SaverContext.GetStoragesManager()); + constructor.GetPortionConstructor().AddMetadata(*resultSchema, b); constructor.GetPortionConstructor().MutableMeta().SetTierName(IStoragesManager::DefaultStorageId); out.emplace_back(std::move(constructor)); diff --git a/ydb/core/tx/columnshard/engines/db_wrapper.cpp b/ydb/core/tx/columnshard/engines/db_wrapper.cpp index b605475552cc..d38cdc53c1ae 100644 --- a/ydb/core/tx/columnshard/engines/db_wrapper.cpp +++ b/ydb/core/tx/columnshard/engines/db_wrapper.cpp @@ -144,16 +144,23 @@ bool TDbWrapper::LoadPortions(const std::function().Key(portion.GetPathId(), portion.GetPortionId(), row.GetIndexId(), row.GetChunkIdx()).Update( - NIceDb::TUpdate(portion.GetBlobId(row.GetBlobRange().GetBlobIdxVerified()).SerializeBinary()), - NIceDb::TUpdate(row.GetBlobRange().Offset), - NIceDb::TUpdate(row.GetBlobRange().Size), - NIceDb::TUpdate(row.GetRecordsCount()), - NIceDb::TUpdate(row.GetRawBytes()) - ); + if (auto bRange = row.GetBlobRangeOptional()) { + AFL_VERIFY(bRange->IsValid()); + db.Table() + .Key(portion.GetPathId(), portion.GetPortionId(), row.GetIndexId(), row.GetChunkIdx()) + .Update(NIceDb::TUpdate(portion.GetBlobId(bRange->GetBlobIdxVerified()).SerializeBinary()), + NIceDb::TUpdate(bRange->Offset), NIceDb::TUpdate(row.GetDataSize()), + NIceDb::TUpdate(row.GetRecordsCount()), NIceDb::TUpdate(row.GetRawBytes())); + } else if (auto bData = row.GetBlobDataOptional()) { + db.Table() + .Key(portion.GetPathId(), portion.GetPortionId(), row.GetIndexId(), row.GetChunkIdx()) + .Update(NIceDb::TUpdate(*bData), + NIceDb::TUpdate(row.GetRecordsCount()), NIceDb::TUpdate(row.GetRawBytes())); + } else { + AFL_VERIFY(false); + } } void TDbWrapper::EraseIndex(const TPortionInfo& portion, const TIndexChunk& row) { diff --git a/ydb/core/tx/columnshard/engines/portions/column_record.h b/ydb/core/tx/columnshard/engines/portions/column_record.h index ff34086f38d7..0109b372f302 100644 --- a/ydb/core/tx/columnshard/engines/portions/column_record.h +++ b/ydb/core/tx/columnshard/engines/portions/column_record.h @@ -206,6 +206,9 @@ class TSimpleOrderedColumnChunk: public IPortionColumnChunk { virtual const TString& DoGetData() const override { return Data; } + virtual ui64 DoGetRawBytesImpl() const override { + return ColumnRecord.GetMeta().GetRawBytes(); + } virtual ui32 DoGetRecordsCountImpl() const override { return ColumnRecord.GetMeta().GetNumRows(); } diff --git a/ydb/core/tx/columnshard/engines/portions/constructor.cpp b/ydb/core/tx/columnshard/engines/portions/constructor.cpp index 4b8cfdf97a01..56575c60d54b 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/portions/constructor.cpp @@ -57,8 +57,12 @@ void TPortionInfoConstructor::LoadRecord(const TIndexInfo& indexInfo, const TCol } void TPortionInfoConstructor::LoadIndex(const TIndexChunkLoadContext& loadContext) { - const auto linkBlobId = RegisterBlobId(loadContext.GetBlobRange().GetBlobId()); - AddIndex(loadContext.BuildIndexChunk(linkBlobId)); + if (loadContext.GetBlobRange()) { + const TBlobRangeLink16::TLinkId linkBlobId = RegisterBlobId(loadContext.GetBlobRange()->GetBlobId()); + AddIndex(loadContext.BuildIndexChunk(linkBlobId)); + } else { + AddIndex(loadContext.BuildIndexChunk()); + } } const NKikimr::NOlap::TColumnRecord& TPortionInfoConstructor::AppendOneChunkColumn(TColumnRecord&& record) { diff --git a/ydb/core/tx/columnshard/engines/portions/constructor.h b/ydb/core/tx/columnshard/engines/portions/constructor.h index c29a7afc2c7e..4146c80fbc19 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor.h +++ b/ydb/core/tx/columnshard/engines/portions/constructor.h @@ -276,7 +276,9 @@ class TPortionInfoConstructor { blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); } for (auto&& i : Indexes) { - blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); + if (i.HasBlobRange()) { + blobIdxs.emplace(i.GetBlobRangeVerified().GetBlobIdxVerified()); + } } if (BlobIds.size()) { AFL_VERIFY(BlobIds.size() == blobIdxs.size()); diff --git a/ydb/core/tx/columnshard/engines/portions/constructor_meta.cpp b/ydb/core/tx/columnshard/engines/portions/constructor_meta.cpp index 378871ccf2aa..8c6c375305ca 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor_meta.cpp +++ b/ydb/core/tx/columnshard/engines/portions/constructor_meta.cpp @@ -29,9 +29,6 @@ TPortionMetaConstructor::TPortionMetaConstructor(const TPortionMeta& meta) { RecordSnapshotMax = meta.RecordSnapshotMax; DeletionsCount = meta.GetDeletionsCount(); TierName = meta.GetTierNameOptional(); - if (!meta.StatisticsStorage.IsEmpty()) { - StatisticsStorage = meta.StatisticsStorage; - } if (meta.Produced != NPortion::EProduced::UNSPECIFIED) { Produced = meta.Produced; } @@ -49,9 +46,6 @@ TPortionMeta TPortionMetaConstructor::Build() { result.DeletionsCount = *DeletionsCount; AFL_VERIFY(Produced); result.Produced = *Produced; - if (StatisticsStorage) { - result.StatisticsStorage = *StatisticsStorage; - } return result; } @@ -60,17 +54,6 @@ bool TPortionMetaConstructor::LoadMetadata(const NKikimrTxColumnShard::TIndexPor AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "DeserializeFromProto")("error", "parsing duplication"); return true; } - if (portionMeta.HasStatisticsStorage()) { - auto parsed = NStatistics::TPortionStorage::BuildFromProto(portionMeta.GetStatisticsStorage()); - if (!parsed) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "DeserializeFromProto")("error", parsed.GetErrorMessage()); - return false; - } - StatisticsStorage = parsed.DetachResult(); - if (StatisticsStorage->IsEmpty()) { - StatisticsStorage.reset(); - } - } if (portionMeta.GetTierName()) { TierName = portionMeta.GetTierName(); } diff --git a/ydb/core/tx/columnshard/engines/portions/constructor_meta.h b/ydb/core/tx/columnshard/engines/portions/constructor_meta.h index ccc854886ec7..87b808a282fd 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor_meta.h +++ b/ydb/core/tx/columnshard/engines/portions/constructor_meta.h @@ -3,7 +3,6 @@ #include #include #include -#include namespace NKikimr::NOlap { class TPortionInfoConstructor; @@ -13,7 +12,6 @@ class TPortionMetaConstructor { private: std::optional FirstAndLastPK; std::optional TierName; - std::optional StatisticsStorage; std::optional RecordSnapshotMin; std::optional RecordSnapshotMax; std::optional Produced; @@ -31,15 +29,6 @@ class TPortionMetaConstructor { SetTierName(tierName); } - void SetStatisticsStorage(NStatistics::TPortionStorage&& storage) { - AFL_VERIFY(!StatisticsStorage); - StatisticsStorage = std::move(storage); - } - - void ResetStatisticsStorage(NStatistics::TPortionStorage&& storage) { - StatisticsStorage = std::move(storage); - } - void UpdateRecordsMeta(const NPortion::EProduced prod) { Produced = prod; } diff --git a/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp b/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp index 4aeaa20dd20e..990de1b65d78 100644 --- a/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp +++ b/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp @@ -1,4 +1,5 @@ #include "index_chunk.h" + #include namespace NKikimr::NOlap { @@ -13,16 +14,39 @@ NKikimr::TConclusionStatus TIndexChunk::DeserializeFromProto(const NKikimrColumn RecordsCount = proto.GetMeta().GetRecordsCount(); RawBytes = proto.GetMeta().GetRawBytes(); } - { + if (proto.HasBlobRange()) { auto parsed = TBlobRangeLink16::BuildFromProto(proto.GetBlobRange()); if (!parsed) { return parsed; } - BlobRange = parsed.DetachResult(); + Data = parsed.DetachResult(); + } else if (proto.HasBlobData()) { + Data = proto.GetBlobData(); + } else { + return TConclusionStatus::Fail("incorrect blob info - neither BlobData nor BlobRange"); } return TConclusionStatus::Success(); } +namespace { +class TBlobInfoSerializer { +private: + NKikimrColumnShardDataSharingProto::TIndexChunk& Proto; + +public: + TBlobInfoSerializer(NKikimrColumnShardDataSharingProto::TIndexChunk& proto) + : Proto(proto) { + } + + void operator()(const TBlobRangeLink16& link) { + *Proto.MutableBlobRange() = link.SerializeToProto(); + } + void operator()(const TString& data) { + *Proto.MutableBlobData() = data; + } +}; +} // namespace + NKikimrColumnShardDataSharingProto::TIndexChunk TIndexChunk::SerializeToProto() const { NKikimrColumnShardDataSharingProto::TIndexChunk result; result.SetIndexId(IndexId); @@ -32,8 +56,26 @@ NKikimrColumnShardDataSharingProto::TIndexChunk TIndexChunk::SerializeToProto() meta->SetRecordsCount(RecordsCount); meta->SetRawBytes(RawBytes); } - *result.MutableBlobRange() = BlobRange.SerializeToProto(); + std::visit(TBlobInfoSerializer(result), Data); return result; } +namespace { +class TDataSizeExtractor { +public: + TDataSizeExtractor() = default; + + ui64 operator()(const TBlobRangeLink16& link) { + return link.GetSize(); + } + ui64 operator()(const TString& data) { + return data.size(); + } +}; +} // namespace + +ui64 TIndexChunk::GetDataSize() const { + return std::visit(TDataSizeExtractor(), Data); } + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/index_chunk.h b/ydb/core/tx/columnshard/engines/portions/index_chunk.h index 6a71704318ca..1fe92adfb539 100644 --- a/ydb/core/tx/columnshard/engines/portions/index_chunk.h +++ b/ydb/core/tx/columnshard/engines/portions/index_chunk.h @@ -30,11 +30,41 @@ class TIndexChunk { YDB_READONLY(ui32, ChunkIdx, 0); YDB_READONLY(ui32, RecordsCount, 0); YDB_READONLY(ui32, RawBytes, 0); - YDB_READONLY_DEF(TBlobRangeLink16, BlobRange); + std::variant Data; TIndexChunk() = default; TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TIndexChunk& proto); public: + ui64 GetDataSize() const; + + bool HasBlobRange() const { + return std::holds_alternative(Data); + } + + const TBlobRangeLink16* GetBlobRangeOptional() const { + return std::get_if(&Data); + } + + const TBlobRangeLink16& GetBlobRangeVerified() const { + const auto* result = std::get_if(&Data); + AFL_VERIFY(result); + return *result; + } + + bool HasBlobData() const { + return std::holds_alternative(Data); + } + + const TString* GetBlobDataOptional() const { + return std::get_if(&Data); + } + + const TString& GetBlobDataVerified() const { + const auto* result = std::get_if(&Data); + AFL_VERIFY(result); + return *result; + } + TChunkAddress GetAddress() const { return TChunkAddress(IndexId, ChunkIdx); } @@ -48,13 +78,22 @@ class TIndexChunk { , ChunkIdx(chunkIdx) , RecordsCount(recordsCount) , RawBytes(rawBytes) - , BlobRange(blobRange) { + , Data(blobRange) { + + } + TIndexChunk(const ui32 indexId, const ui32 chunkIdx, const ui32 recordsCount, const ui64 rawBytes, const TString& blobData) + : IndexId(indexId) + , ChunkIdx(chunkIdx) + , RecordsCount(recordsCount) + , RawBytes(rawBytes) + , Data(blobData) { } void RegisterBlobIdx(const TBlobRangeLink16::TLinkId blobLinkId) { -// AFL_VERIFY(!BlobRange.BlobId.GetTabletId())("original", BlobRange.BlobId.ToStringNew())("new", blobId.ToStringNew()); - BlobRange.BlobIdx = blobLinkId; + auto* result = std::get_if(&Data); + AFL_VERIFY(result); + result->BlobIdx = blobLinkId; } static TConclusion BuildFromProto(const NKikimrColumnShardDataSharingProto::TIndexChunk& proto) { diff --git a/ydb/core/tx/columnshard/engines/portions/meta.cpp b/ydb/core/tx/columnshard/engines/portions/meta.cpp index b3b2a92ef9e2..e585d6a88baf 100644 --- a/ydb/core/tx/columnshard/engines/portions/meta.cpp +++ b/ydb/core/tx/columnshard/engines/portions/meta.cpp @@ -11,9 +11,6 @@ namespace NKikimr::NOlap { NKikimrTxColumnShard::TIndexPortionMeta TPortionMeta::SerializeToProto() const { NKikimrTxColumnShard::TIndexPortionMeta portionMeta; portionMeta.SetTierName(TierName); - if (!StatisticsStorage.IsEmpty()) { - *portionMeta.MutableStatisticsStorage() = StatisticsStorage.SerializeToProto(); - } portionMeta.SetDeletionsCount(DeletionsCount); switch (Produced) { case TPortionMeta::EProduced::UNSPECIFIED: diff --git a/ydb/core/tx/columnshard/engines/portions/meta.h b/ydb/core/tx/columnshard/engines/portions/meta.h index 5facbe8d5ae6..9c2e5cd332d3 100644 --- a/ydb/core/tx/columnshard/engines/portions/meta.h +++ b/ydb/core/tx/columnshard/engines/portions/meta.h @@ -1,7 +1,6 @@ #pragma once #include #include -#include #include #include #include @@ -16,7 +15,6 @@ struct TPortionMeta { private: NArrow::TFirstLastSpecialKeys ReplaceKeyEdges; // first and last PK rows YDB_READONLY_DEF(TString, TierName); - YDB_READONLY_DEF(NStatistics::TPortionStorage, StatisticsStorage); YDB_READONLY(ui32, DeletionsCount, 0); friend class TPortionMetaConstructor; TPortionMeta(NArrow::TFirstLastSpecialKeys& pk, const TSnapshot& min, const TSnapshot& max) diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp index 0bbefe1c5550..492c099ceae7 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp @@ -312,7 +312,10 @@ THashMap TPortionInfo::DecodeBlobAddress continue; } for (auto&& record : Indexes) { - if (RestoreBlobRange(record.GetBlobRange()) == b.first) { + if (!record.HasBlobRange()) { + continue; + } + if (RestoreBlobRange(record.GetBlobRangeVerified()) == b.first) { if (columnId != record.GetIndexId()) { columnStorageId = indexInfo.GetIndexStorageId(record.GetIndexId()); } @@ -355,7 +358,9 @@ void TPortionInfo::FillBlobRangesByStorage(THashMapemplace(i.GetBlobRange().GetBlobIdxVerified()).second) { - auto blobId = GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); - AFL_VERIFY(currentHashResult); - AFL_VERIFY(currentHashResult->emplace(blobId).second)("blob_id", blobId.ToStringNew()); - lastBlobIdx = i.GetBlobRange().GetBlobIdxVerified(); + if (auto bRange = i.GetBlobRangeOptional()) { + if (lastBlobIdx != bRange->GetBlobIdxVerified() && currentHashLocal->emplace(bRange->GetBlobIdxVerified()).second) { + auto blobId = GetBlobId(bRange->GetBlobIdxVerified()); + AFL_VERIFY(currentHashResult); + AFL_VERIFY(currentHashResult->emplace(blobId).second)("blob_id", blobId.ToStringNew()); + lastBlobIdx = bRange->GetBlobIdxVerified(); + } } } } @@ -414,38 +421,30 @@ void TPortionInfo::FillBlobIdsByStorage(THashMapGetIndexInfo()); } -THashMap>>> TPortionInfo::RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const { - THashMap>>> result; +THashMap>> TPortionInfo::RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const { + THashMap>> result; for (auto&& c : GetRecords()) { const TString& storageId = GetColumnStorageId(c.GetColumnId(), indexInfo); - auto& storageRecords = result[storageId]; - auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; - blobRecords.emplace_back(std::make_shared(blobs.Extract(storageId, RestoreBlobRange(c.GetBlobRange())), c, indexInfo.GetColumnFeaturesVerified(c.GetColumnId()))); - blobRecords.back()->SetChunkIdx(c.GetChunkIdx()); + auto chunk = std::make_shared(blobs.Extract(storageId, RestoreBlobRange(c.GetBlobRange())), c, indexInfo.GetColumnFeaturesVerified(c.GetColumnId())); + chunk->SetChunkIdx(c.GetChunkIdx()); + AFL_VERIFY(result[storageId].emplace(c.GetAddress(), chunk).second); } for (auto&& c : GetIndexes()) { const TString& storageId = indexInfo.GetIndexStorageId(c.GetIndexId()); - auto& storageRecords = result[storageId]; - auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; - blobRecords.emplace_back(std::make_shared(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), blobs.Extract(storageId, RestoreBlobRange(c.GetBlobRange())))); - blobRecords.back()->SetChunkIdx(c.GetChunkIdx()); - } - return result; -} + const TString blobData = [&]() -> TString { + if (auto bRange = c.GetBlobRangeOptional()) { + return blobs.Extract(storageId, RestoreBlobRange(*bRange)); + } else if (auto data = c.GetBlobDataOptional()) { + return *data; + } else { + AFL_VERIFY(false); + Y_UNREACHABLE(); + } + }(); + auto chunk = std::make_shared(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), blobData); + chunk->SetChunkIdx(c.GetChunkIdx()); -THashMap>> TPortionInfo::GetEntityChunks(const TIndexInfo& indexInfo) const { - THashMap>> result; - for (auto&& c : GetRecords()) { - const TString& storageId = GetColumnStorageId(c.GetColumnId(), indexInfo); - auto& storageRecords = result[storageId]; - auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; - blobRecords.emplace_back(TEntityChunk(c.GetAddress(), c.GetMeta().GetNumRows(), c.GetMeta().GetRawBytes(), c.GetBlobRange())); - } - for (auto&& c : GetIndexes()) { - const TString& storageId = indexInfo.GetIndexStorageId(c.GetIndexId()); - auto& storageRecords = result[storageId]; - auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; - blobRecords.emplace_back(TEntityChunk(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), c.GetBlobRange())); + AFL_VERIFY(result[storageId].emplace(c.GetAddress(), chunk).second); } return result; } @@ -496,7 +495,9 @@ void TPortionInfo::FullValidation() const { blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); } for (auto&& i : Indexes) { - blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); + if (auto bRange = i.GetBlobRangeOptional()) { + blobIdxs.emplace(bRange->GetBlobIdxVerified()); + } } if (BlobIds.size()) { AFL_VERIFY(BlobIds.size() == blobIdxs.size()); diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index 3ee701b606c8..d8098486f5b6 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -197,6 +197,16 @@ class TPortionInfo { SetRemoveSnapshot(TSnapshot(planStep, txId)); } + std::vector GetIndexInplaceData(const ui32 indexId) const { + std::vector result; + for (auto&& i : Indexes) { + if (i.GetEntityId() == indexId) { + result.emplace_back(i.GetBlobDataVerified()); + } + } + return result; + } + void InitRuntimeFeature(const ERuntimeFeature feature, const bool activity) { if (activity) { AddRuntimeFeature(feature); @@ -239,8 +249,7 @@ class TPortionInfo { void ReorderChunks(); - THashMap>>> RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const; - THashMap>> GetEntityChunks(const TIndexInfo & info) const; + THashMap>> RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const; const TBlobRange RestoreBlobRange(const TBlobRangeLink16& linkRange) const { return linkRange.RestoreRange(GetBlobId(linkRange.GetBlobIdxVerified())); @@ -386,20 +395,6 @@ class TPortionInfo { return nullptr; } - std::optional GetEntityRecord(const TChunkAddress& address) const { - for (auto&& c : GetRecords()) { - if (c.GetAddress() == address) { - return TEntityChunk(c.GetAddress(), c.GetMeta().GetNumRows(), c.GetMeta().GetRawBytes(), c.GetBlobRange()); - } - } - for (auto&& c : GetIndexes()) { - if (c.GetAddress() == address) { - return TEntityChunk(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), c.GetBlobRange()); - } - } - return {}; - } - bool HasEntityAddress(const TChunkAddress& address) const { for (auto&& c : GetRecords()) { if (c.GetAddress() == address) { @@ -591,7 +586,7 @@ class TPortionInfo { ui64 GetIndexBlobBytes() const noexcept { ui64 sum = 0; for (const auto& rec : Indexes) { - sum += rec.GetBlobRange().Size; + sum += rec.GetDataSize(); } return sum; } diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp index 4a13b658a724..52f80382171d 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp @@ -45,12 +45,10 @@ std::shared_ptr TReadPortionInfoWithBlobs::GetBatch(const IS NKikimr::NOlap::TReadPortionInfoWithBlobs TReadPortionInfoWithBlobs::RestorePortion(const TPortionInfo& portion, NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) { TReadPortionInfoWithBlobs result(portion); - THashMap>>> records = result.PortionInfo.RestoreEntityChunks(blobs, indexInfo); - for (auto&& [storageId, recordsByBlob] : records) { - for (auto&& i : recordsByBlob) { - for (auto&& d : i.second) { - result.RestoreChunk(d); - } + THashMap>> records = result.PortionInfo.RestoreEntityChunks(blobs, indexInfo); + for (auto&& [storageId, chunksByAddress] : records) { + for (auto&& [_, chunk] : chunksByAddress) { + result.RestoreChunk(chunk); } } return result; @@ -137,33 +135,31 @@ std::optional TReadPortionInfoWithBlobs::SyncP AFL_VERIFY(entityChunksNew.emplace(i, std::move(newChunks)).second); } - for (auto&& i : to->GetIndexInfo().GetIndexes()) { - if (from->GetIndexInfo().HasIndexId(i.first)) { - continue; - } - to->GetIndexInfo().AppendIndex(entityChunksNew, i.first); - } - - auto schemaTo = std::make_shared(to, std::make_shared()); - TGeneralSerializedSlice slice(entityChunksNew, schemaTo, counters); - const NSplitter::TEntityGroups groups = to->GetIndexInfo().GetEntityGroupsByStorageId(targetTier, *storages); TPortionInfoConstructor constructor(source.PortionInfo, false, true); constructor.SetMinSnapshotDeprecated(to->GetSnapshot()); constructor.SetSchemaVersion(to->GetVersion()); constructor.MutableMeta().ResetTierName(targetTier); - NStatistics::TPortionStorage storage; - for (auto&& i : to->GetIndexInfo().GetStatisticsByName()) { - auto it = from->GetIndexInfo().GetStatisticsByName().find(i.first); - if (it != from->GetIndexInfo().GetStatisticsByName().end()) { - i.second->CopyData(it->second.GetCursorVerified(), source.PortionInfo.GetMeta().GetStatisticsStorage(), storage); + TIndexInfo::TSecondaryData secondaryData; + secondaryData.MutableExternalData() = entityChunksNew; + for (auto&& i : to->GetIndexInfo().GetIndexes()) { + if (from->GetIndexInfo().HasIndexId(i.first)) { + for (auto&& c : source.PortionInfo.GetIndexes()) { + if (c.GetEntityId() == i.first) { + constructor.AddIndex(c); + } + } } else { - i.second->FillStatisticsData(entityChunksNew, storage, to->GetIndexInfo()); + to->GetIndexInfo().AppendIndex(entityChunksNew, i.first, storages, secondaryData).Validate(); } } - constructor.MutableMeta().ResetStatisticsStorage(std::move(storage)); - return TWritePortionInfoWithBlobsConstructor::BuildByBlobs(slice.GroupChunksByBlobs(groups), std::move(constructor), storages); + const NSplitter::TEntityGroups groups = to->GetIndexInfo().GetEntityGroupsByStorageId(targetTier, *storages); + auto schemaTo = std::make_shared(to, std::make_shared()); + TGeneralSerializedSlice slice(secondaryData.GetExternalData(), schemaTo, counters); + + return TWritePortionInfoWithBlobsConstructor::BuildByBlobs( + slice.GroupChunksByBlobs(groups), secondaryData.GetSecondaryInplaceData(), std::move(constructor), storages); } const TString& TReadPortionInfoWithBlobs::GetBlobByAddressVerified(const ui32 columnId, const ui32 chunkId) const { diff --git a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp index 9af4a7c74092..3f580531b749 100644 --- a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp @@ -25,15 +25,17 @@ void TWritePortionInfoWithBlobsResult::TBlobInfo::RegisterBlobId(TWritePortionIn } TWritePortionInfoWithBlobsConstructor TWritePortionInfoWithBlobsConstructor::BuildByBlobs(std::vector&& chunks, + const THashMap>& inplaceChunks, const ui64 granule, const ui64 schemaVersion, const TSnapshot& snapshot, const std::shared_ptr& operators) { TPortionInfoConstructor constructor(granule); constructor.SetMinSnapshotDeprecated(snapshot); constructor.SetSchemaVersion(schemaVersion); - return BuildByBlobs(std::move(chunks), std::move(constructor), operators); + return BuildByBlobs(std::move(chunks), inplaceChunks, std::move(constructor), operators); } -TWritePortionInfoWithBlobsConstructor TWritePortionInfoWithBlobsConstructor::BuildByBlobs(std::vector&& chunks, TPortionInfoConstructor&& constructor, const std::shared_ptr& operators) { +TWritePortionInfoWithBlobsConstructor TWritePortionInfoWithBlobsConstructor::BuildByBlobs( + std::vector&& chunks, const THashMap>& inplaceChunks, TPortionInfoConstructor&& constructor, const std::shared_ptr& operators) { TWritePortionInfoWithBlobsConstructor result(std::move(constructor)); for (auto&& blob : chunks) { auto storage = operators->GetOperatorVerified(blob.GetGroupName()); @@ -42,6 +44,11 @@ TWritePortionInfoWithBlobsConstructor TWritePortionInfoWithBlobsConstructor::Bui blobInfo.AddChunk(chunk); } } + for (auto&& [_, i] : inplaceChunks) { + result.GetPortionConstructor().AddIndex( + TIndexChunk(i->GetEntityId(), i->GetChunkIdxVerified(), i->GetRecordsCountVerified(), i->GetRawBytesVerified(), i->GetData())); + } + return result; } @@ -62,18 +69,6 @@ std::vector> TWritePortionInfoWithBlobsConstr return result; } -void TWritePortionInfoWithBlobsConstructor::FillStatistics(const TIndexInfo& index) { - NStatistics::TPortionStorage storage; - for (auto&& i : index.GetStatisticsByName()) { - THashMap>> data; - for (auto&& entityId : i.second->GetEntityIds()) { - data.emplace(entityId, GetEntityChunks(entityId)); - } - i.second->FillStatisticsData(data, storage, index); - } - GetPortionConstructor().MutableMeta().SetStatisticsStorage(std::move(storage)); -} - TString TWritePortionInfoWithBlobsResult::GetBlobByRangeVerified(const ui32 entityId, const ui32 chunkIdx) const { AFL_VERIFY(!!PortionConstructor); for (auto&& rec : PortionConstructor->GetRecords()) { diff --git a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h index bb93cc3ae7d5..dde424fd63b8 100644 --- a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h +++ b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h @@ -84,12 +84,12 @@ class TWritePortionInfoWithBlobsConstructor: public TBasePortionInfoWithBlobs { public: std::vector> GetEntityChunks(const ui32 entityId) const; - void FillStatistics(const TIndexInfo& index); - static TWritePortionInfoWithBlobsConstructor BuildByBlobs(std::vector&& chunks, + const THashMap>& inplaceChunks, const ui64 granule, const ui64 schemaVersion, const TSnapshot& snapshot, const std::shared_ptr& operators); static TWritePortionInfoWithBlobsConstructor BuildByBlobs(std::vector&& chunks, + const THashMap>& inplaceChunks, TPortionInfoConstructor&& constructor, const std::shared_ptr& operators); std::vector& GetBlobs() { diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h index 7770674caa45..b03982775fce 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h @@ -42,12 +42,13 @@ struct TReadMetadataBase { protected: std::shared_ptr ResultIndexSchema; +public: + using TConstPtr = std::shared_ptr; + const TVersionedIndex& GetIndexVersions() const { AFL_VERIFY(IndexVersionsPointer); return *IndexVersionsPointer; } -public: - using TConstPtr = std::shared_ptr; const std::optional& GetRequestShardingInfo() const { return RequestShardingInfo; diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp index d825d0594392..b9742c17b3e7 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp @@ -121,9 +121,11 @@ bool TPortionDataSource::DoStartFetchingIndexes(const std::shared_ptrGetIndexInfo().GetIndexStorageId(i.GetIndexId())); - readAction->SetIsBackgroundProcess(false); - readAction->AddRange(Portion->RestoreBlobRange(i.GetBlobRange())); + if (auto bRange = i.GetBlobRangeOptional()) { + auto readAction = action.GetReading(Schema->GetIndexInfo().GetIndexStorageId(i.GetIndexId())); + readAction->SetIsBackgroundProcess(false); + readAction->AddRange(Portion->RestoreBlobRange(*bRange)); + } } if (indexes->GetIndexIdsSet().size() != indexIds.size()) { return false; @@ -154,7 +156,11 @@ void TPortionDataSource::DoApplyIndex(const NIndexes::TIndexCheckerContainer& in if (!indexIds.contains(i->GetIndexId())) { continue; } - indexBlobs[i->GetIndexId()].emplace_back(StageData->ExtractBlob(i->GetAddress())); + if (i->HasBlobData()) { + indexBlobs[i->GetIndexId()].emplace_back(i->GetBlobDataVerified()); + } else { + indexBlobs[i->GetIndexId()].emplace_back(StageData->ExtractBlob(i->GetAddress())); + } } for (auto&& i : indexIds) { if (!indexBlobs.contains(i)) { diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp index 1aeb83b4ea24..79e3dfeaf248 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp @@ -56,10 +56,16 @@ void TStatsIterator::AppendStats(const std::vector(*builders[6], r->GetChunkIdx()); NArrow::Append(*builders[7], ReadMetadata->GetEntityName(r->GetIndexId()).value_or("undefined")); NArrow::Append(*builders[8], r->GetIndexId()); - std::string blobIdString = portion.GetBlobId(r->GetBlobRange().GetBlobIdxVerified()).ToStringLegacy(); - NArrow::Append(*builders[9], blobIdString); - NArrow::Append(*builders[10], r->GetBlobRange().Offset); - NArrow::Append(*builders[11], r->GetBlobRange().Size); + if (auto bRange = r->GetBlobRangeOptional()) { + std::string blobIdString = portion.GetBlobId(bRange->GetBlobIdxVerified()).ToStringLegacy(); + NArrow::Append(*builders[9], blobIdString); + NArrow::Append(*builders[10], bRange->Offset); + NArrow::Append(*builders[11], bRange->Size); + } else if (auto bData = r->GetBlobDataOptional()) { + NArrow::Append(*builders[9], "INPLACE"); + NArrow::Append(*builders[10], 0); + NArrow::Append(*builders[11], bData->size()); + } NArrow::Append(*builders[12], activity); const auto tierName = portionSchema->GetIndexInfo().GetEntityStorageId(r->GetIndexId(), portion.GetMeta().GetTierName()); std::string strTierName(tierName.data(), tierName.size()); diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp index 76f1bdda2c7c..b79da31a64ee 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp @@ -20,7 +20,16 @@ void TStatsIterator::AppendStats(const std::vector(*builders[10], arrow::util::string_view(tierName.data(), tierName.size())); - auto statInfo = portion.GetMeta().GetStatisticsStorage().SerializeToProto().DebugString(); + NJson::TJsonValue statReport = NJson::JSON_ARRAY; + for (auto&& i : portion.GetIndexes()) { + if (!i.HasBlobData()) { + continue; + } + auto schema = portion.GetSchema(ReadMetadata->GetIndexVersions()); + auto indexMeta = schema->GetIndexInfo().GetIndexVerified(i.GetEntityId()); + statReport.AppendValue(indexMeta->SerializeDataToJson(i, schema->GetIndexInfo())); + } + auto statInfo = statReport.GetStringRobust(); NArrow::Append(*builders[11], arrow::util::string_view(statInfo.data(), statInfo.size())); } diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp index b49c6a3e7b53..42dfe2ab2028 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp @@ -1,8 +1,8 @@ #include "index_info.h" -#include "statistics/abstract/operator.h" #include #include +#include #include #include @@ -253,19 +253,6 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& DefaultSerializer = container; } - { - for (const auto& stat : schema.GetStatistics()) { - NStatistics::TOperatorContainer container; - AFL_VERIFY(container.DeserializeFromProto(stat)); - AFL_VERIFY(StatisticsByName.emplace(container.GetName(), std::move(container)).second); - } - NStatistics::TPortionStorageCursor cursor; - for (auto&& [_, container] : StatisticsByName) { - container.SetCursor(cursor); - container->ShiftCursor(cursor); - } - } - for (const auto& idx : schema.GetIndexes()) { NIndexes::TIndexMetaContainer meta; AFL_VERIFY(meta.DeserializeFromProto(idx)); @@ -415,4 +402,36 @@ std::shared_ptr TIndexInfo::GetColumnDefaultValueVerified(const u } } +NKikimr::TConclusionStatus TIndexInfo::AppendIndex(const THashMap>>& originalData, + const ui32 indexId, const std::shared_ptr& operators, TSecondaryData& result) const { + auto it = Indexes.find(indexId); + AFL_VERIFY(it != Indexes.end()); + auto& index = it->second; + std::shared_ptr chunk = index->BuildIndex(originalData, *this); + auto opStorage = operators->GetOperatorVerified(index->GetStorageId()); + if ((i64)chunk->GetPackedSize() > opStorage->GetBlobSplitSettings().GetMaxBlobSize()) { + return TConclusionStatus::Fail("blob size for secondary data (" + ::ToString(indexId) + ") bigger than limit (" + + ::ToString(opStorage->GetBlobSplitSettings().GetMaxBlobSize()) + ")"); + } + if (index->GetStorageId() == IStoragesManager::LocalMetadataStorageId) { + AFL_VERIFY(result.MutableSecondaryInplaceData().emplace(indexId, chunk).second); + } else { + AFL_VERIFY(result.MutableExternalData().emplace(indexId, std::vector>({chunk})).second); + } + return TConclusionStatus::Success(); +} + +std::shared_ptr TIndexInfo::GetIndexMax(const ui32 columnId) const { + for (auto&& i : Indexes) { + if (i.second->GetClassName() != NIndexes::NMax::TIndexMeta::GetClassNameStatic()) { + continue; + } + auto maxIndex = static_pointer_cast(i.second.GetObjectPtr()); + if (maxIndex->GetColumnId() == columnId) { + return maxIndex; + } + } + return nullptr; +} + } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.h b/ydb/core/tx/columnshard/engines/scheme/index_info.h index a14918ef3f43..16397100f6d9 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.h @@ -5,8 +5,6 @@ #include "abstract/index_info.h" #include "indexes/abstract/meta.h" -#include "statistics/abstract/operator.h" -#include "statistics/abstract/common.h" #include @@ -25,6 +23,11 @@ namespace arrow { } namespace NKikimr::NOlap { + +namespace NIndexes::NMax { +class TIndexMeta; +} + namespace NStorageOptimizer { class IOptimizerPlannerConstructor; } @@ -41,7 +44,6 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { THashMap ColumnFeatures; THashMap> ArrowColumnByColumnIdCache; THashMap Indexes; - std::map StatisticsByName; TIndexInfo(const TString& name); bool SchemeNeedActualization = false; std::shared_ptr CompactionPlannerConstructor; @@ -96,19 +98,6 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { std::vector> MakeEmptyChunks(const ui32 columnId, const std::vector& pages, const TSimpleColumnInfo& columnInfo) const; - const std::map& GetStatisticsByName() const { - return StatisticsByName; - } - - NStatistics::TOperatorContainer GetStatistics(const NStatistics::TIdentifier& id) const { - for (auto&& i : StatisticsByName) { - if (i.second->GetIdentifier() == id) { - return i.second; - } - } - return NStatistics::TOperatorContainer(); - } - const THashMap& GetIndexes() const { return Indexes; } @@ -199,6 +188,12 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { return it->second; } + NIndexes::TIndexMetaContainer GetIndexVerified(const ui32 indexId) const { + auto it = Indexes.find(indexId); + AFL_VERIFY(it != Indexes.end()); + return it->second; + } + std::optional GetIndexNameOptional(const ui32 indexId) const { auto meta = GetIndexOptional(indexId); if (!meta) { @@ -207,19 +202,33 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { return meta->GetIndexName(); } - void AppendIndexes(THashMap>>& originalData) const { + class TSecondaryData { + private: + using TStorageData = THashMap>; + YDB_ACCESSOR_DEF(TStorageData, SecondaryInplaceData); + using TPrimaryStorageData = THashMap>>; + YDB_ACCESSOR_DEF(TPrimaryStorageData, ExternalData); + public: + TSecondaryData() = default; + }; + + [[nodiscard]] TConclusion AppendIndexes(const THashMap>>& primaryData, + const std::shared_ptr& operators) const { + TSecondaryData result; + result.MutableExternalData() = primaryData; for (auto&& i : Indexes) { - std::shared_ptr chunk = i.second->BuildIndex(originalData, *this); - AFL_VERIFY(originalData.emplace(i.first, std::vector>({chunk})).second); + auto conclusion = AppendIndex(primaryData, i.first, operators, result); + if (conclusion.IsFail()) { + return conclusion; + } } + return result; } - void AppendIndex(THashMap>>& originalData, const ui32 indexId) const { - auto it = Indexes.find(indexId); - AFL_VERIFY(it != Indexes.end()); - std::shared_ptr chunk = it->second->BuildIndex(originalData, *this); - AFL_VERIFY(originalData.emplace(indexId, std::vector>({chunk})).second); - } + std::shared_ptr GetIndexMax(const ui32 columnId) const; + + [[nodiscard]] TConclusionStatus AppendIndex(const THashMap>>& originalData, const ui32 indexId, + const std::shared_ptr& operators, TSecondaryData& result) const; /// Returns an id of the column located by name. The name should exists in the schema. ui32 GetColumnIdVerified(const std::string& name) const; diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp index cd7f6d7236ca..4d4efc52a2d6 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp @@ -1,4 +1,5 @@ #include "meta.h" +#include namespace NKikimr::NOlap::NIndexes { @@ -28,4 +29,14 @@ void IIndexMeta::SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) return DoSerializeToProto(proto); } +NJson::TJsonValue IIndexMeta::SerializeDataToJson(const TIndexChunk& iChunk, const TIndexInfo& indexInfo) const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("entity_id", iChunk.GetEntityId()); + result.InsertValue("chunk_idx", iChunk.GetChunkIdx()); + if (iChunk.HasBlobData()) { + result.InsertValue("data", DoSerializeDataToJson(iChunk.GetBlobDataVerified(), indexInfo)); + } + return result; +} + } // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h index 6938c6a27fce..d5185cbca236 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h @@ -16,6 +16,7 @@ class TExprBase; namespace NKikimr::NOlap { struct TIndexInfo; class TProgramContainer; +class TIndexChunk; } namespace NKikimr::NSchemeShard { @@ -30,24 +31,30 @@ class IIndexMeta { YDB_READONLY(ui32, IndexId, 0); YDB_READONLY(TString, StorageId, IStoragesManager::DefaultStorageId); protected: - virtual std::shared_ptr DoBuildIndex(THashMap>>& data, const TIndexInfo& indexInfo) const = 0; + virtual std::shared_ptr DoBuildIndex(const THashMap>>& data, const TIndexInfo& indexInfo) const = 0; virtual void DoFillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const = 0; virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) = 0; virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const = 0; virtual TConclusionStatus DoCheckModificationCompatibility(const IIndexMeta& newMeta) const = 0; + virtual NJson::TJsonValue DoSerializeDataToJson(const TString& /*data*/, const TIndexInfo& /*indexInfo*/) const { + return "NO_IMPLEMENTED"; + } public: using TFactory = NObjectFactory::TObjectFactory; using TProto = NKikimrSchemeOp::TOlapIndexDescription; IIndexMeta() = default; - IIndexMeta(const ui32 indexId, const TString& indexName) + IIndexMeta(const ui32 indexId, const TString& indexName, const TString& storageId) : IndexName(indexName) , IndexId(indexId) + , StorageId(storageId) { } + NJson::TJsonValue SerializeDataToJson(const TIndexChunk& iChunk, const TIndexInfo& indexInfo) const; + TConclusionStatus CheckModificationCompatibility(const std::shared_ptr& newMeta) const { if (!newMeta) { return TConclusionStatus::Fail("new meta cannot be absent"); @@ -60,7 +67,7 @@ class IIndexMeta { virtual ~IIndexMeta() = default; - std::shared_ptr BuildIndex(THashMap>>& data, const TIndexInfo& indexInfo) const { + std::shared_ptr BuildIndex(const THashMap>>& data, const TIndexInfo& indexInfo) const { return DoBuildIndex(data, indexInfo); } diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp index 3a2ce59ae22f..bf7463d02657 100644 --- a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp @@ -1,6 +1,7 @@ #include "tiering.h" #include #include +#include #include #include #include @@ -25,17 +26,18 @@ std::optional TTieringActualizer::Bu if (Tiering) { AFL_VERIFY(TieringColumnId); - auto statOperator = portionSchema->GetIndexInfo().GetStatistics(NStatistics::TIdentifier(NStatistics::EType::Max, {*TieringColumnId})); + auto indexMeta = portionSchema->GetIndexInfo().GetIndexMax(*TieringColumnId); std::shared_ptr max; - if (!statOperator) { + if (!indexMeta) { max = portion.MaxValue(*TieringColumnId); if (!max) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "scalar_less_not_max"); return {}; } } else { - NYDBTest::TControllers::GetColumnShardController()->OnStatisticsUsage(statOperator); - max = statOperator.GetScalarVerified(portion.GetMeta().GetStatisticsStorage()); + NYDBTest::TControllers::GetColumnShardController()->OnStatisticsUsage(NIndexes::TIndexMetaContainer(indexMeta)); + const std::vector data = portion.GetIndexInplaceData(indexMeta->GetIndexId()); + max = indexMeta->GetMaxScalarVerified(data, portionSchema->GetIndexInfo().GetColumnFieldVerified(*TieringColumnId)->type()); } auto tieringInfo = Tiering->GetTierToMove(max, now); AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("tiering_info", tieringInfo.DebugString()); diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/column.h b/ydb/core/tx/columnshard/engines/storage/chunks/column.h index 7e05b45a9638..a8c4be1ae3de 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/column.h +++ b/ydb/core/tx/columnshard/engines/storage/chunks/column.h @@ -22,6 +22,9 @@ class TChunkPreparation: public IPortionColumnChunk { virtual ui32 DoGetRecordsCountImpl() const override { return Record.GetMeta().GetNumRows(); } + virtual ui64 DoGetRawBytesImpl() const override { + return Record.GetMeta().GetRawBytes(); + } virtual TString DoDebugString() const override { return ""; } diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/data.h b/ydb/core/tx/columnshard/engines/storage/chunks/data.h index d5a91c19609c..8409243df347 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/data.h +++ b/ydb/core/tx/columnshard/engines/storage/chunks/data.h @@ -17,6 +17,7 @@ class TPortionIndexChunk: public IPortionDataChunk { return ""; } virtual std::vector> DoInternalSplit(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, const std::vector& /*splitSizes*/) const override { + AFL_VERIFY(false); return {}; } virtual bool DoIsSplittable() const override { @@ -25,6 +26,9 @@ class TPortionIndexChunk: public IPortionDataChunk { virtual std::optional DoGetRecordsCount() const override { return RecordsCount; } + virtual std::optional DoGetRawBytes() const override { + return RawBytes; + } virtual std::shared_ptr DoGetFirstScalar() const override { return nullptr; } diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h b/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h index c848f0f0cb45..721705fc5e4d 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h +++ b/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h @@ -11,6 +11,7 @@ class TDefaultChunkPreparation: public IPortionColumnChunk { using TBase = IPortionColumnChunk; const std::shared_ptr DefaultValue; const ui32 RecordsCount; + const ui64 RawBytes; TString Data; protected: virtual std::vector> DoInternalSplitImpl(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, @@ -24,6 +25,9 @@ class TDefaultChunkPreparation: public IPortionColumnChunk { virtual ui32 DoGetRecordsCountImpl() const override { return RecordsCount; } + virtual ui64 DoGetRawBytesImpl() const override { + return RawBytes; + } virtual TString DoDebugString() const override { return TStringBuilder() << "rc=" << RecordsCount << ";data_size=" << Data.size() << ";"; } @@ -39,11 +43,12 @@ class TDefaultChunkPreparation: public IPortionColumnChunk { } public: - TDefaultChunkPreparation(const ui32 columnId, const ui32 recordsCount, const std::shared_ptr& f, + TDefaultChunkPreparation(const ui32 columnId, const ui32 recordsCount, const ui32 rawBytes, const std::shared_ptr& f, const std::shared_ptr& defaultValue, const TColumnSaver& saver) : TBase(columnId) , DefaultValue(defaultValue) , RecordsCount(recordsCount) + , RawBytes(rawBytes) { Y_ABORT_UNLESS(RecordsCount); Data = saver.Apply(NArrow::TThreadSimpleArraysCache::Get(f->type(), defaultValue, RecordsCount), f); diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.cpp index 1ec8aede916d..aad793c858c2 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.cpp @@ -13,15 +13,12 @@ void TBloomFilterChecker::DoSerializeToProtoImpl(NKikimrSSA::TProgram::TOlapInde } bool TBloomFilterChecker::DoCheckImpl(const std::vector& blobs) const { + AFL_VERIFY(blobs.size() == 1); for (auto&& blob : blobs) { - auto rb = NArrow::TStatusValidator::GetValid(NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer()->Deserialize(blob)); - AFL_VERIFY(rb); - AFL_VERIFY(rb->schema()->num_fields() == 1); - AFL_VERIFY(rb->schema()->field(0)->type()->id() == arrow::Type::BOOL); - auto& bArray = static_cast(*rb->column(0)); + TFixStringBitsStorage bits(blob); bool found = true; for (auto&& i : HashValues) { - if (!bArray.Value(i % bArray.length())) { + if (!bits.Get(i % bits.GetSizeBits())) { found = false; break; } diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.h b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.h index 92ecf9534d29..740af9f1720d 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.h +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.h @@ -2,6 +2,42 @@ #include namespace NKikimr::NOlap::NIndexes { +class TFixStringBitsStorage { +private: + YDB_READONLY_DEF(TString, Data); + +public: + TFixStringBitsStorage(const TString& data) + : Data(data) + {} + + ui32 GetSizeBits() const { + return Data.size() * 8; + } + + TFixStringBitsStorage(const ui32 sizeBits) + : Data(sizeBits / 8 + ((sizeBits % 8) ? 1 : 0), '\0') { + } + + void Set(const bool val, const ui32 idx) { + AFL_VERIFY(idx < GetSizeBits()); + auto* start = &Data[idx / 8]; + ui8 word = (*(ui8*)start); + if (val) { + word |= 1 << (idx % 8); + } else { + word &= (Max() - (1 << (idx % 8))); + } + memcpy(start, &word, sizeof(ui8)); + } + + bool Get(const ui32 idx) const { + AFL_VERIFY(idx < GetSizeBits()); + const ui8 start = (*(ui8*)&Data[idx / 8]); + return start & (1 << (idx % 8)); + } +}; + class TBloomFilterChecker: public TSimpleIndexChecker { public: static TString GetClassNameStatic() { diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp index 7b64faf5ebd3..19d283d7f9e5 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp @@ -10,7 +10,7 @@ namespace NKikimr::NOlap::NIndexes { -std::shared_ptr TBloomIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { +TString TBloomIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { std::set hashes; { NArrow::NHash::NXX64::TStreamStringHashCalcer hashCalcer(0); @@ -24,19 +24,12 @@ std::shared_ptr TBloomIndexMeta::DoBuildIndexImpl(TChunkedBa } const ui32 bitsCount = HashesCount * hashes.size() / std::log(2); - std::vector flags(bitsCount, false); - const auto pred = [&flags](const ui64 hash) { - flags[hash % flags.size()] = true; + TFixStringBitsStorage bits(bitsCount); + const auto pred = [&bits](const ui64 hash) { + bits.Set(true, hash % bits.GetSizeBits()); }; BuildHashesSet(hashes, pred); - - arrow::BooleanBuilder builder; - auto res = builder.Reserve(flags.size()); - NArrow::TStatusValidator::Validate(builder.AppendValues(flags)); - std::shared_ptr out; - NArrow::TStatusValidator::Validate(builder.Finish(&out)); - - return arrow::RecordBatch::Make(ResultSchema, bitsCount, {out}); + return bits.GetData(); } void TBloomIndexMeta::DoFillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const { diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h index 5a99f43924e1..feecc2e83524 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h @@ -52,7 +52,7 @@ class TBloomIndexMeta: public TIndexByColumns { } virtual void DoFillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const override; - virtual std::shared_ptr DoBuildIndexImpl(TChunkedBatchReader& reader) const override; + virtual TString DoBuildIndexImpl(TChunkedBatchReader& reader) const override; virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) override { AFL_VERIFY(TBase::DoDeserializeFromProto(proto)); @@ -76,7 +76,7 @@ class TBloomIndexMeta: public TIndexByColumns { public: TBloomIndexMeta() = default; TBloomIndexMeta(const ui32 indexId, const TString& indexName, std::set& columnIds, const double fpProbability) - : TBase(indexId, indexName, columnIds) + : TBase(indexId, indexName, columnIds, NBlobOperations::TGlobal::DefaultStorageId) , FalsePositiveProbability(fpProbability) { Initialize(); } diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.cpp new file mode 100644 index 000000000000..d7a3516c8de2 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.cpp @@ -0,0 +1,56 @@ +#include "constructor.h" +#include "meta.h" + +#include + +namespace NKikimr::NOlap::NIndexes::NMax { + +std::shared_ptr TIndexConstructor::DoCreateIndexMeta( + const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const { + ui32 columnId; + { + auto* columnInfo = currentSchema.GetColumns().GetByName(ColumnName); + if (!columnInfo) { + errors.AddError("no column with name " + ColumnName); + return nullptr; + } + if (!TIndexMeta::IsAvailableType(columnInfo->GetType())) { + errors.AddError("inappropriate type for max index"); + return nullptr; + } + columnId = columnInfo->GetId(); + } + return std::make_shared(indexId, indexName, columnId); +} + +NKikimr::TConclusionStatus TIndexConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) { + if (!jsonInfo.Has("column_name")) { + return TConclusionStatus::Fail("column_name have to be in max index features"); + } + if (!jsonInfo["column_name"].GetString(&ColumnName)) { + return TConclusionStatus::Fail("column_name have to be in max index features as string"); + } + return TConclusionStatus::Success(); +} + +NKikimr::TConclusionStatus TIndexConstructor::DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& proto) { + if (!proto.HasMaxIndex()) { + const TString errorMessage = "Not found MaxIndex section in proto: \"" + proto.DebugString() + "\""; + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", errorMessage); + return TConclusionStatus::Fail(errorMessage); + } + auto& bIndex = proto.GetMaxIndex(); + ColumnName = bIndex.GetColumnName(); + if (!ColumnName) { + return TConclusionStatus::Fail("Empty column name in MaxIndex proto"); + } + return TConclusionStatus::Success(); +} + +void TIndexConstructor::DoSerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& proto) const { + auto* filterProto = proto.MutableMaxIndex(); + AFL_VERIFY(!!ColumnName)("problem", "not initialized max index info trying to serialize"); + filterProto->SetColumnName(ColumnName); +} + +} // namespace NKikimr::NOlap::NIndexes::NMax diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.h b/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.h new file mode 100644 index 000000000000..35faabebb220 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.h @@ -0,0 +1,30 @@ +#pragma once +#include +namespace NKikimr::NOlap::NIndexes::NMax { + +class TIndexConstructor: public IIndexMetaConstructor { +public: + static TString GetClassNameStatic() { + return "MAX"; + } +private: + TString ColumnName; + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + +protected: + virtual std::shared_ptr DoCreateIndexMeta(const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const override; + + virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) override; + + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& proto) override; + virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& proto) const override; + +public: + TIndexConstructor() = default; + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.cpp new file mode 100644 index 000000000000..3556cb3d1a9d --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.cpp @@ -0,0 +1,52 @@ +#include "meta.h" + +#include +#include +#include +#include + +#include +#include + +namespace NKikimr::NOlap::NIndexes::NMax { + +TString TIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { + std::shared_ptr result; + AFL_VERIFY(reader.GetColumnsCount() == 1)("count", reader.GetColumnsCount()); + { + TChunkedColumnReader cReader = *reader.begin(); + for (reader.Start(); cReader.IsCorrect(); cReader.ReadNextChunk()) { + auto minMax = NArrow::FindMinMaxPosition(cReader.GetCurrentChunk()); + auto currentScalar = NArrow::GetScalar(cReader.GetCurrentChunk(), minMax.second); + if (!result || NArrow::ScalarCompare(*result, *currentScalar) == -1) { + result = currentScalar; + } + } + } + return NArrow::NScalar::TSerializer::SerializePayloadToString(result).DetachResult(); +} + +void TIndexMeta::DoFillIndexCheckers( + const std::shared_ptr& /*info*/, const NSchemeShard::TOlapSchema& /*schema*/) const { +} + +std::shared_ptr TIndexMeta::GetMaxScalarVerified( + const std::vector& data, const std::shared_ptr& dataType) const { + AFL_VERIFY(data.size()); + std::shared_ptr result; + for (auto&& d : data) { + std::shared_ptr current = NArrow::NScalar::TSerializer::DeserializeFromStringWithPayload(d, dataType).DetachResult(); + if (!result || NArrow::ScalarCompare(*result, *current) == -1) { + result = current; + } + } + return result; +} + +NJson::TJsonValue TIndexMeta::DoSerializeDataToJson(const TString& data, const TIndexInfo& indexInfo) const { + AFL_VERIFY(ColumnIds.size() == 1); + auto scalar = GetMaxScalarVerified({ data }, indexInfo.GetColumnFeaturesVerified(*ColumnIds.begin()).GetArrowField()->type()); + return scalar->ToString(); +} + +} // namespace NKikimr::NOlap::NIndexes::NMax diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.h new file mode 100644 index 000000000000..6c48398043ff --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.h @@ -0,0 +1,78 @@ +#pragma once +#include +namespace NKikimr::NOlap::NIndexes::NMax { + +class TIndexMeta: public TIndexByColumns { +public: + static TString GetClassNameStatic() { + return "MAX"; + } +private: + using TBase = TIndexByColumns; + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); +protected: + virtual TConclusionStatus DoCheckModificationCompatibility(const IIndexMeta& newMeta) const override { + return TConclusionStatus::Fail("max index not modifiable"); + } + virtual void DoFillIndexCheckers( + const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const override; + + virtual TString DoBuildIndexImpl(TChunkedBatchReader& reader) const override; + + virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) override { + AFL_VERIFY(TBase::DoDeserializeFromProto(proto)); + AFL_VERIFY(proto.HasMaxIndex()); + auto& bFilter = proto.GetMaxIndex(); + if (!bFilter.GetColumnId()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "incorrect column id"); + return false; + }; + ColumnIds.emplace(bFilter.GetColumnId()); + return true; + } + + virtual NJson::TJsonValue DoSerializeDataToJson(const TString& data, const TIndexInfo& indexInfo) const override; + + virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const override { + AFL_VERIFY(ColumnIds.size() == 1); + auto* filterProto = proto.MutableMaxIndex(); + filterProto->SetColumnId(*ColumnIds.begin()); + } + +public: + TIndexMeta() = default; + TIndexMeta(const ui32 indexId, const TString& indexName, const ui32& columnId) + : TBase(indexId, indexName, { columnId }, NBlobOperations::TGlobal::LocalMetadataStorageId) { + } + + ui32 GetColumnId() const { + AFL_VERIFY(ColumnIds.size() == 1); + return *ColumnIds.begin(); + } + + static bool IsAvailableType(const NScheme::TTypeInfo type) { + auto dataTypeResult = NArrow::GetArrowType(type); + if (!dataTypeResult.ok()) { + return false; + } + if (!NArrow::SwitchType((*dataTypeResult)->id(), [&](const auto& type) { + using TWrap = std::decay_t; + if constexpr (arrow::has_c_type()) { + return true; + } + return false; + })) { + return false; + } + + return true; + } + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } + + std::shared_ptr GetMaxScalarVerified(const std::vector& data, const std::shared_ptr& type) const; +}; + +} // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/ya.make b/ydb/core/tx/columnshard/engines/storage/indexes/max/ya.make new file mode 100644 index 000000000000..7a24787285e2 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + GLOBAL constructor.cpp + GLOBAL meta.cpp +) + +PEERDIR( + ydb/core/protos + ydb/core/formats/arrow + ydb/core/tx/columnshard/engines/storage/indexes/portions +) + +END() diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp index d818e5a46c33..cd5f4f3c51cc 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp @@ -6,13 +6,18 @@ namespace NKikimr::NOlap::NIndexes { -void TPortionIndexChunk::DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const { +void TPortionIndexChunk::DoAddIntoPortionBeforeBlob( + const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const { AFL_VERIFY(!bRange.IsValid()); portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, bRange)); } +void TPortionIndexChunk::DoAddInplaceIntoPortion(TPortionInfoConstructor& portionInfo) const { + portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, GetData())); +} + std::shared_ptr TIndexByColumns::DoBuildIndex( - THashMap>>& data, const TIndexInfo& indexInfo) const { + const THashMap>>& data, const TIndexInfo& indexInfo) const { AFL_VERIFY(Serializer); AFL_VERIFY(data.size()); std::vector columnReaders; @@ -26,10 +31,8 @@ std::shared_ptr TIndexByColumns::DoBuildIndex recordsCount += i->GetRecordsCountVerified(); } TChunkedBatchReader reader(std::move(columnReaders)); - std::shared_ptr indexBatch = DoBuildIndexImpl(reader); - const TString indexData = Serializer->SerializeFull(indexBatch); - return std::make_shared( - TChunkAddress(GetIndexId(), 0), recordsCount, NArrow::GetBatchDataSize(indexBatch), indexData); + const TString indexData = DoBuildIndexImpl(reader); + return std::make_shared(TChunkAddress(GetIndexId(), 0), recordsCount, indexData.size(), indexData); } bool TIndexByColumns::DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& /*proto*/) { @@ -37,8 +40,8 @@ bool TIndexByColumns::DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDe return true; } -TIndexByColumns::TIndexByColumns(const ui32 indexId, const TString& indexName, const std::set& columnIds) - : TBase(indexId, indexName) +TIndexByColumns::TIndexByColumns(const ui32 indexId, const TString& indexName, const std::set& columnIds, const TString& storageId) + : TBase(indexId, indexName, storageId) , ColumnIds(columnIds) { Serializer = NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer(); diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h index b0c2846da6d3..3f2f5dfb872f 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h +++ b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h @@ -19,6 +19,7 @@ class TPortionIndexChunk: public IPortionDataChunk { return ""; } virtual std::vector> DoInternalSplit(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, const std::vector& /*splitSizes*/) const override { + AFL_VERIFY(false); return {}; } virtual bool DoIsSplittable() const override { @@ -27,6 +28,9 @@ class TPortionIndexChunk: public IPortionDataChunk { virtual std::optional DoGetRecordsCount() const override { return RecordsCount; } + virtual std::optional DoGetRawBytes() const override { + return RawBytes; + } virtual std::shared_ptr DoGetFirstScalar() const override { return nullptr; } @@ -34,6 +38,8 @@ class TPortionIndexChunk: public IPortionDataChunk { return nullptr; } virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const override; + virtual void DoAddInplaceIntoPortion(TPortionInfoConstructor& portionInfo) const override; + public: TPortionIndexChunk(const TChunkAddress& address, const ui32 recordsCount, const ui64 rawBytes, const TString& data) : TBase(address.GetColumnId(), address.GetChunkIdx()) @@ -51,16 +57,16 @@ class TIndexByColumns: public IIndexMeta { std::shared_ptr Serializer; protected: std::set ColumnIds; - virtual std::shared_ptr DoBuildIndexImpl(TChunkedBatchReader& reader) const = 0; + virtual TString DoBuildIndexImpl(TChunkedBatchReader& reader) const = 0; - virtual std::shared_ptr DoBuildIndex(THashMap>>& data, const TIndexInfo& indexInfo) const override final; - virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& /*proto*/) override; + virtual std::shared_ptr DoBuildIndex(const THashMap>>& data, const TIndexInfo& indexInfo) const override final; + virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) override; TConclusionStatus CheckSameColumnsForModification(const IIndexMeta& newMeta) const; public: TIndexByColumns() = default; - TIndexByColumns(const ui32 indexId, const TString& indexName, const std::set& columnIds); + TIndexByColumns(const ui32 indexId, const TString& indexName, const std::set& columnIds, const TString& storageId); }; } // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/ya.make b/ydb/core/tx/columnshard/engines/storage/indexes/ya.make index c97b9d1ae656..2edfa9332cd4 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/ya.make +++ b/ydb/core/tx/columnshard/engines/storage/indexes/ya.make @@ -3,6 +3,7 @@ LIBRARY() PEERDIR( ydb/core/tx/columnshard/engines/storage/indexes/portions ydb/core/tx/columnshard/engines/storage/indexes/bloom + ydb/core/tx/columnshard/engines/storage/indexes/max ) END() diff --git a/ydb/core/tx/columnshard/hooks/abstract/abstract.h b/ydb/core/tx/columnshard/hooks/abstract/abstract.h index d1ce343edf7e..c231ce94496c 100644 --- a/ydb/core/tx/columnshard/hooks/abstract/abstract.h +++ b/ydb/core/tx/columnshard/hooks/abstract/abstract.h @@ -21,8 +21,8 @@ namespace NKikimr::NOlap { class TColumnEngineChanges; class IBlobsGCAction; class TPortionInfo; -namespace NStatistics { -class TOperatorContainer; +namespace NIndexes { +class TIndexMetaContainer; } } namespace arrow { @@ -143,7 +143,7 @@ class ICSController { void OnDataSharingStarted(const ui64 tabletId, const TString& sessionId) { return DoOnDataSharingStarted(tabletId, sessionId); } - virtual void OnStatisticsUsage(const NOlap::NStatistics::TOperatorContainer& /*statOperator*/) { + virtual void OnStatisticsUsage(const NOlap::NIndexes::TIndexMetaContainer& /*statOperator*/) { } virtual void OnPortionActualization(const NOlap::TPortionInfo& /*info*/) { diff --git a/ydb/core/tx/columnshard/normalizer/portion/clean.cpp b/ydb/core/tx/columnshard/normalizer/portion/clean.cpp index 435083000889..d1e00669f8b3 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/clean.cpp +++ b/ydb/core/tx/columnshard/normalizer/portion/clean.cpp @@ -76,6 +76,7 @@ INormalizerTask::TPtr TCleanPortionsNormalizer::BuildTask(std::vector> TPortionsNormalizerBase::DoInit( return tasks; } - auto columnsFilter = GetColumnsFilter(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetLastSchema()); - THashMap portions; auto schemas = std::make_shared>(); - { - auto rowset = db.Table().Select(); - if (!rowset.IsReady()) { - return TConclusionStatus::Fail("Not ready"); + auto conclusion = InitColumns(tablesManager, db, portions); + if (conclusion.IsFail()) { + return conclusion; } - - TPortionInfo::TSchemaCursor schema(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex()); - auto initPortion = [&](TPortionInfoConstructor&& portion, const TColumnChunkLoadContext& loadContext) { - auto currentSchema = schema.GetSchema(portion); - portion.SetSchemaVersion(currentSchema->GetVersion()); - - if (!columnsFilter.empty() && !columnsFilter.contains(loadContext.GetAddress().GetColumnId())) { - return; - } - auto it = portions.find(portion.GetPortionIdVerified()); - if (it == portions.end()) { - (*schemas)[portion.GetPortionIdVerified()] = currentSchema; - const ui64 portionId = portion.GetPortionIdVerified(); - it = portions.emplace(portionId, std::move(portion)).first; - } else { - it->second.Merge(std::move(portion)); - } - it->second.LoadRecord(currentSchema->GetIndexInfo(), loadContext); - }; - - while (!rowset.EndOfSet()) { - TPortionInfoConstructor portion(rowset.GetValue(), rowset.GetValue()); - Y_ABORT_UNLESS(rowset.GetValue() == 0); - - portion.SetMinSnapshotDeprecated(NOlap::TSnapshot(rowset.GetValue(), rowset.GetValue())); - portion.SetRemoveSnapshot(rowset.GetValue(), rowset.GetValue()); - - NOlap::TColumnChunkLoadContext chunkLoadContext(rowset, &DsGroupSelector); - initPortion(std::move(portion), chunkLoadContext); - - if (!rowset.Next()) { - return TConclusionStatus::Fail("Not ready"); - } + } + { + auto conclusion = InitIndexes(db, portions); + if (conclusion.IsFail()) { + return conclusion; } } + TPortionInfo::TSchemaCursor schema(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex()); + for (auto&& [_, p] : portions) { + (*schemas)[p.GetPortionIdVerified()] = schema.GetSchema(p); + } std::vector> package; package.reserve(100); @@ -110,4 +83,70 @@ TConclusion> TPortionsNormalizerBase::DoInit( return tasks; } +TConclusionStatus TPortionsNormalizerBase::InitColumns( + const NColumnShard::TTablesManager& tablesManager, NIceDb::TNiceDb& db, THashMap& portions) { + using namespace NColumnShard; + auto columnsFilter = GetColumnsFilter(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetLastSchema()); + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return TConclusionStatus::Fail("Not ready"); + } + + TPortionInfo::TSchemaCursor schema(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex()); + auto initPortion = [&](TPortionInfoConstructor&& portion, const TColumnChunkLoadContext& loadContext) { + auto currentSchema = schema.GetSchema(portion); + portion.SetSchemaVersion(currentSchema->GetVersion()); + + if (!columnsFilter.empty() && !columnsFilter.contains(loadContext.GetAddress().GetColumnId())) { + return; + } + auto it = portions.find(portion.GetPortionIdVerified()); + if (it == portions.end()) { + const ui64 portionId = portion.GetPortionIdVerified(); + it = portions.emplace(portionId, std::move(portion)).first; + } else { + it->second.Merge(std::move(portion)); + } + it->second.LoadRecord(currentSchema->GetIndexInfo(), loadContext); + }; + + while (!rowset.EndOfSet()) { + TPortionInfoConstructor portion(rowset.GetValue(), rowset.GetValue()); + Y_ABORT_UNLESS(rowset.GetValue() == 0); + + portion.SetMinSnapshotDeprecated( + NOlap::TSnapshot(rowset.GetValue(), rowset.GetValue())); + portion.SetRemoveSnapshot(rowset.GetValue(), rowset.GetValue()); + + NOlap::TColumnChunkLoadContext chunkLoadContext(rowset, &DsGroupSelector); + initPortion(std::move(portion), chunkLoadContext); + + if (!rowset.Next()) { + return TConclusionStatus::Fail("Not ready"); + } + } + return TConclusionStatus::Success(); +} + +TConclusionStatus TPortionsNormalizerBase::InitIndexes(NIceDb::TNiceDb& db, THashMap& portions) { + using IndexIndexes = NColumnShard::Schema::IndexIndexes; + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return TConclusionStatus::Fail("Not ready"); + } + + while (!rowset.EndOfSet()) { + NOlap::TIndexChunkLoadContext chunkLoadContext(rowset, &DsGroupSelector); + + auto it = portions.find(rowset.GetValue()); + AFL_VERIFY(it != portions.end()); + it->second.LoadIndex(chunkLoadContext); + + if (!rowset.Next()) { + return TConclusionStatus::Fail("Not ready"); + } + } + return TConclusionStatus::Success(); +} + } diff --git a/ydb/core/tx/columnshard/normalizer/portion/normalizer.h b/ydb/core/tx/columnshard/normalizer/portion/normalizer.h index 574a1c212873..8c23395eba0b 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/normalizer.h +++ b/ydb/core/tx/columnshard/normalizer/portion/normalizer.h @@ -85,7 +85,12 @@ class TPortionsNormalizerBase : public TNormalizationController::INormalizerComp : DsGroupSelector(info.GetStorageInfo()) {} - virtual TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override final; + TConclusionStatus InitColumns( + const NColumnShard::TTablesManager& tablesManager, NIceDb::TNiceDb& db, THashMap& portions); + TConclusionStatus InitIndexes(NIceDb::TNiceDb& db, THashMap& portions); + + virtual TConclusion> DoInit( + const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override final; protected: virtual INormalizerTask::TPtr BuildTask(std::vector>&& portions, std::shared_ptr> schemas) const = 0; diff --git a/ydb/core/tx/columnshard/splitter/abstract/chunks.h b/ydb/core/tx/columnshard/splitter/abstract/chunks.h index e3be37be2bd3..3463fcea0374 100644 --- a/ydb/core/tx/columnshard/splitter/abstract/chunks.h +++ b/ydb/core/tx/columnshard/splitter/abstract/chunks.h @@ -22,7 +22,6 @@ class IPortionDataChunk { YDB_READONLY(ui32, EntityId, 0); std::optional ChunkIdx; - protected: ui64 DoGetPackedSize() const { return GetData().size(); @@ -32,9 +31,14 @@ class IPortionDataChunk { virtual std::vector> DoInternalSplit(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const = 0; virtual bool DoIsSplittable() const = 0; virtual std::optional DoGetRecordsCount() const = 0; + virtual std::optional DoGetRawBytes() const = 0; + virtual std::shared_ptr DoGetFirstScalar() const = 0; virtual std::shared_ptr DoGetLastScalar() const = 0; virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const = 0; + virtual void DoAddInplaceIntoPortion(TPortionInfoConstructor& /*portionInfo*/) const { + AFL_VERIFY(false)("problem", "implemented only in index chunks"); + } virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& /*data*/, const TSimpleColumnInfo& /*columnInfo*/) const { AFL_VERIFY(false); return nullptr; @@ -63,6 +67,12 @@ class IPortionDataChunk { return DoGetRecordsCount(); } + ui64 GetRawBytesVerified() const { + auto result = DoGetRawBytes(); + AFL_VERIFY(result); + return *result; + } + ui32 GetRecordsCountVerified() const { auto result = DoGetRecordsCount(); AFL_VERIFY(result); @@ -121,6 +131,10 @@ class IPortionDataChunk { AFL_VERIFY(!bRange.IsValid()); return DoAddIntoPortionBeforeBlob(bRange, portionInfo); } + + void AddInplaceIntoPortion(TPortionInfoConstructor& portionInfo) const { + return DoAddInplaceIntoPortion(portionInfo); + } }; } diff --git a/ydb/core/tx/columnshard/splitter/batch_slice.cpp b/ydb/core/tx/columnshard/splitter/batch_slice.cpp index 83f9f90f77f2..15fd2506e3ba 100644 --- a/ydb/core/tx/columnshard/splitter/batch_slice.cpp +++ b/ydb/core/tx/columnshard/splitter/batch_slice.cpp @@ -69,7 +69,6 @@ bool TGeneralSerializedSlice::GroupBlobsImpl(const NSplitter::TGroupFeatures& fe chunksInProgress.AddChunks(i.GetChunks()); } InternalSplitsCount = 0; - AFL_VERIFY(chunksInProgress.size()); std::vector result; Y_ABORT_UNLESS(features.GetSplitSettings().GetMaxBlobSize() >= 2 * features.GetSplitSettings().GetMinBlobSize()); while (!chunksInProgress.IsEmpty()) { diff --git a/ydb/core/tx/columnshard/splitter/chunks.h b/ydb/core/tx/columnshard/splitter/chunks.h index eae2f4d58fd0..280f47d8c238 100644 --- a/ydb/core/tx/columnshard/splitter/chunks.h +++ b/ydb/core/tx/columnshard/splitter/chunks.h @@ -15,6 +15,12 @@ class IPortionColumnChunk : public IPortionDataChunk { protected: virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const = 0; virtual ui32 DoGetRecordsCountImpl() const = 0; + virtual ui64 DoGetRawBytesImpl() const = 0; + + virtual std::optional DoGetRawBytes() const final { + return DoGetRawBytesImpl(); + } + virtual std::optional DoGetRecordsCount() const override final { return DoGetRecordsCountImpl(); } @@ -78,11 +84,7 @@ class TChunkedColumnReader { return !!CurrentChunk; } - bool ReadNext() { - AFL_VERIFY(!!CurrentChunk); - if (++CurrentRecordIndex < CurrentChunk->length()) { - return true; - } + bool ReadNextChunk() { while (++CurrentChunkIndex < Chunks.size()) { CurrentChunk = Loader->ApplyVerifiedColumn(Chunks[CurrentChunkIndex]->GetData()); CurrentRecordIndex = 0; @@ -93,6 +95,14 @@ class TChunkedColumnReader { CurrentChunk = nullptr; return false; } + + bool ReadNext() { + AFL_VERIFY(!!CurrentChunk); + if (++CurrentRecordIndex < CurrentChunk->length()) { + return true; + } + return ReadNextChunk(); + } }; class TChunkedBatchReader { @@ -135,6 +145,10 @@ class TChunkedBatchReader { return *result; } + ui32 GetColumnsCount() const { + return Columns.size(); + } + std::vector::const_iterator begin() const { return Columns.begin(); } diff --git a/ydb/core/tx/columnshard/splitter/settings.h b/ydb/core/tx/columnshard/splitter/settings.h index 146d1147aef2..d370a5206047 100644 --- a/ydb/core/tx/columnshard/splitter/settings.h +++ b/ydb/core/tx/columnshard/splitter/settings.h @@ -22,6 +22,7 @@ class TSplitSettings { YDB_ACCESSOR(i64, MinBlobSize, DefaultMinBlobSize); YDB_ACCESSOR(i64, MinRecordsCount, DefaultMinRecordsCount); YDB_ACCESSOR(i64, MaxPortionSize, DefaultMaxPortionSize); + public: ui64 GetExpectedRecordsCountOnPage() const { return 1.5 * MinRecordsCount; diff --git a/ydb/core/tx/columnshard/splitter/simple.cpp b/ydb/core/tx/columnshard/splitter/simple.cpp index 7a155eb3158a..0af14f0ff9b4 100644 --- a/ydb/core/tx/columnshard/splitter/simple.cpp +++ b/ydb/core/tx/columnshard/splitter/simple.cpp @@ -19,6 +19,10 @@ TString TSplittedColumnChunk::DoDebugString() const { return TStringBuilder() << "records_count=" << GetRecordsCount() << ";data=" << NArrow::DebugJson(Data.GetSlicedBatch(), 3, 3) << ";"; } +ui64 TSplittedColumnChunk::DoGetRawBytesImpl() const { + return NArrow::GetBatchDataSize(Data.GetSlicedBatch()); +} + std::vector TSimpleSplitter::Split(const std::shared_ptr& data, const std::shared_ptr& field, const ui32 maxBlobSize) const { AFL_VERIFY(data); AFL_VERIFY(field); diff --git a/ydb/core/tx/columnshard/splitter/simple.h b/ydb/core/tx/columnshard/splitter/simple.h index 48c7b9efa009..01467190373d 100644 --- a/ydb/core/tx/columnshard/splitter/simple.h +++ b/ydb/core/tx/columnshard/splitter/simple.h @@ -156,6 +156,8 @@ class TSplittedColumnChunk: public IPortionColumnChunk { virtual const TString& DoGetData() const override { return Data.GetSerializedChunk(); } + virtual ui64 DoGetRawBytesImpl() const override; + virtual ui32 DoGetRecordsCountImpl() const override { return Data.GetRecordsCount(); } diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp index bee19d729f86..f45afd7c9be6 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -396,6 +397,35 @@ NMetadata::NFetcher::ISnapshot::TPtr TTestSchema::BuildSnapshot(const TTableSpec return cs; } +void TTestSchema::InitSchema(const std::vector& columns, const std::vector& pk, + const TTableSpecials& specials, NKikimrSchemeOp::TColumnTableSchema* schema) { + schema->SetEngine(NKikimrSchemeOp::COLUMN_ENGINE_REPLACING_TIMESERIES); + + for (ui32 i = 0; i < columns.size(); ++i) { + *schema->MutableColumns()->Add() = columns[i].CreateColumn(i + 1); + if (!specials.NeedTestStatistics()) { + continue; + } + if (NOlap::NIndexes::NMax::TIndexMeta::IsAvailableType(columns[i].GetType())) { + *schema->AddIndexes() = NOlap::NIndexes::TIndexMetaContainer( + std::make_shared(1000 + i, "MAX::INDEX::" + columns[i].GetName(), i + 1)) + .SerializeToProto(); + } + } + + Y_ABORT_UNLESS(pk.size() > 0); + for (auto& column : ExtractNames(pk)) { + schema->AddKeyColumnNames(column); + } + + if (specials.HasCodec()) { + schema->MutableDefaultCompression()->SetCodec(specials.GetCodecId()); + } + if (specials.CompressionLevel) { + schema->MutableDefaultCompression()->SetLevel(*specials.CompressionLevel); + } +} + } namespace NKikimr::NColumnShard { diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h index 12b056b6ba6a..f3bbc468a0a2 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h @@ -244,32 +244,7 @@ struct TTestSchema { static void InitSchema(const std::vector& columns, const std::vector& pk, const TTableSpecials& specials, - NKikimrSchemeOp::TColumnTableSchema* schema) - { - schema->SetEngine(NKikimrSchemeOp::COLUMN_ENGINE_REPLACING_TIMESERIES); - - for (ui32 i = 0; i < columns.size(); ++i) { - *schema->MutableColumns()->Add() = columns[i].CreateColumn(i + 1); - if (!specials.NeedTestStatistics()) { - continue; - } - if (NOlap::NStatistics::NMax::TOperator::IsAvailableType(columns[i].GetType())) { - *schema->AddStatistics() = NOlap::NStatistics::TOperatorContainer("MAX::" + columns[i].GetName(), std::make_shared(i + 1)).SerializeToProto(); - } - } - - Y_ABORT_UNLESS(pk.size() > 0); - for (auto& column : ExtractNames(pk)) { - schema->AddKeyColumnNames(column); - } - - if (specials.HasCodec()) { - schema->MutableDefaultCompression()->SetCodec(specials.GetCodecId()); - } - if (specials.CompressionLevel) { - schema->MutableDefaultCompression()->SetLevel(*specials.CompressionLevel); - } - } + NKikimrSchemeOp::TColumnTableSchema* schema); static void InitTtl(const TTableSpecials& specials, NKikimrSchemeOp::TColumnDataLifeCycle::TTtl* ttl) { Y_ABORT_UNLESS(specials.HasTtl()); diff --git a/ydb/core/tx/columnshard/test_helper/controllers.h b/ydb/core/tx/columnshard/test_helper/controllers.h index b18c2bc34e8d..5b48f204e2bc 100644 --- a/ydb/core/tx/columnshard/test_helper/controllers.h +++ b/ydb/core/tx/columnshard/test_helper/controllers.h @@ -45,7 +45,7 @@ class TWaitCompactionController: public NYDBTest::NColumnShard::TController { return ExportsFinishedCount.Val(); } - virtual void OnStatisticsUsage(const NKikimr::NOlap::NStatistics::TOperatorContainer& /*statOperator*/) override { + virtual void OnStatisticsUsage(const NKikimr::NOlap::NIndexes::TIndexMetaContainer& /*statOperator*/) override { StatisticsUsageCount.Inc(); } virtual void OnMaxValueUsage() override { diff --git a/ydb/core/tx/schemeshard/olap/schema/schema.cpp b/ydb/core/tx/schemeshard/olap/schema/schema.cpp index 3f6749641ec3..dd1889779c1e 100644 --- a/ydb/core/tx/schemeshard/olap/schema/schema.cpp +++ b/ydb/core/tx/schemeshard/olap/schema/schema.cpp @@ -1,6 +1,5 @@ #include "schema.h" #include -#include namespace NKikimr::NSchemeShard { @@ -83,14 +82,6 @@ bool TOlapSchema::ValidateTtlSettings(const NKikimrSchemeOp::TColumnDataLifeCycl errors.AddError("Incorrect ttl column - not found in scheme"); return false; } - if (!Statistics.GetByIdOptional(NOlap::NStatistics::EType::Max, {column->GetId()})) { - TOlapStatisticsModification modification; - NOlap::NStatistics::TConstructorContainer container(std::make_shared(column->GetName())); - modification.AddUpsert("__TTL_PROVIDER::" + TGUID::CreateTimebased().AsUuidString(), container); - if (!Statistics.ApplyUpdate(*this, modification, errors)) { - return false; - } - } return ValidateColumnTableTtl(ttl.GetEnabled(), {}, Columns.GetColumns(), Columns.GetColumnsByName(), errors); } case TTtlProto::kDisabled: @@ -110,10 +101,6 @@ bool TOlapSchema::Update(const TOlapSchemaUpdate& schemaUpdate, IErrorCollector& return false; } - if (!Statistics.ApplyUpdate(*this, schemaUpdate.GetStatistics(), errors)) { - return false; - } - if (!Options.ApplyUpdate(schemaUpdate.GetOptions(), errors)) { return false; } @@ -140,7 +127,6 @@ void TOlapSchema::ParseFromLocalDB(const NKikimrSchemeOp::TColumnTableSchema& ta Columns.Parse(tableSchema); Indexes.Parse(tableSchema); Options.Parse(tableSchema); - Statistics.Parse(tableSchema); } void TOlapSchema::Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchemaExt) const { @@ -154,7 +140,6 @@ void TOlapSchema::Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchemaExt) Columns.Serialize(resultLocal); Indexes.Serialize(resultLocal); Options.Serialize(resultLocal); - Statistics.Serialize(resultLocal); std::swap(resultLocal, tableSchemaExt); } @@ -171,10 +156,6 @@ bool TOlapSchema::Validate(const NKikimrSchemeOp::TColumnTableSchema& opSchema, return false; } - if (!Statistics.Validate(opSchema, errors)) { - return false; - } - if (opSchema.GetEngine() != Engine) { errors.AddError("Specified schema engine does not match schema preset"); return false; diff --git a/ydb/core/tx/schemeshard/olap/schema/schema.h b/ydb/core/tx/schemeshard/olap/schema/schema.h index b840f97ca616..f800750341fa 100644 --- a/ydb/core/tx/schemeshard/olap/schema/schema.h +++ b/ydb/core/tx/schemeshard/olap/schema/schema.h @@ -1,11 +1,9 @@ #pragma once #include #include -#include #include #include #include -#include #include "update.h" namespace NKikimr::NSchemeShard { @@ -16,16 +14,11 @@ namespace NKikimr::NSchemeShard { YDB_READONLY_DEF(TOlapColumnsDescription, Columns); YDB_READONLY_DEF(TOlapIndexesDescription, Indexes); YDB_READONLY_DEF(TOlapOptionsDescription, Options); - mutable TOlapStatisticsDescription Statistics; YDB_READONLY(ui32, NextColumnId, 1); YDB_READONLY(ui32, Version, 0); public: - const TOlapStatisticsDescription& GetStatistics() const { - return Statistics; - } - bool Update(const TOlapSchemaUpdate& schemaUpdate, IErrorCollector& errors); void ParseFromLocalDB(const NKikimrSchemeOp::TColumnTableSchema& tableSchema); diff --git a/ydb/core/tx/schemeshard/olap/schema/update.cpp b/ydb/core/tx/schemeshard/olap/schema/update.cpp index b78161394b78..3b0087e3b756 100644 --- a/ydb/core/tx/schemeshard/olap/schema/update.cpp +++ b/ydb/core/tx/schemeshard/olap/schema/update.cpp @@ -23,10 +23,6 @@ namespace NKikimr::NSchemeShard { return false; } - if (!Statistics.Parse(alterRequest, errors)) { - return false; - } - if (!Options.Parse(alterRequest, errors)) { return false; } diff --git a/ydb/core/tx/schemeshard/olap/schema/update.h b/ydb/core/tx/schemeshard/olap/schema/update.h index d61b97749a5d..0cd98c09b3c1 100644 --- a/ydb/core/tx/schemeshard/olap/schema/update.h +++ b/ydb/core/tx/schemeshard/olap/schema/update.h @@ -1,6 +1,5 @@ #pragma once #include -#include #include #include #include @@ -11,7 +10,6 @@ namespace NKikimr::NSchemeShard { YDB_READONLY_DEF(TOlapColumnsUpdate, Columns); YDB_READONLY_DEF(TOlapIndexesUpdate, Indexes); YDB_READONLY_DEF(TOlapOptionsUpdate, Options); - YDB_READONLY_DEF(TOlapStatisticsModification, Statistics); YDB_READONLY_OPT(NKikimrSchemeOp::EColumnTableEngine, Engine); public: bool Parse(const NKikimrSchemeOp::TColumnTableSchema& tableSchema, IErrorCollector& errors, bool allowNullKeys = false); diff --git a/ydb/core/tx/schemeshard/olap/schema/ya.make b/ydb/core/tx/schemeshard/olap/schema/ya.make index 03fae68d790d..76b2d2d1c801 100644 --- a/ydb/core/tx/schemeshard/olap/schema/ya.make +++ b/ydb/core/tx/schemeshard/olap/schema/ya.make @@ -9,8 +9,6 @@ PEERDIR( ydb/core/tx/schemeshard/olap/columns ydb/core/tx/schemeshard/olap/indexes ydb/core/tx/schemeshard/olap/options - ydb/core/tx/schemeshard/olap/statistics - ydb/core/tx/columnshard/engines/scheme/statistics/max ydb/core/tx/schemeshard/common ) diff --git a/ydb/core/tx/schemeshard/olap/statistics/schema.cpp b/ydb/core/tx/schemeshard/olap/statistics/schema.cpp deleted file mode 100644 index af6f9e711d05..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/schema.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include "schema.h" -#include - -namespace NKikimr::NSchemeShard { - -void TOlapStatisticsSchema::SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - Operator.SerializeToProto(proto); -} - -bool TOlapStatisticsSchema::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - AFL_VERIFY(Operator.DeserializeFromProto(proto))("incorrect_proto", proto.DebugString()); - return true; -} - -bool TOlapStatisticsSchema::ApplyUpdate(const TOlapSchema& /*currentSchema*/, const TOlapStatisticsUpsert& upsert, IErrorCollector& errors) { - AFL_VERIFY(upsert.GetName() == Operator.GetName()); - AFL_VERIFY(!!upsert.GetConstructor()); - if (upsert.GetConstructor().GetClassName() != Operator.GetClassName()) { - errors.AddError("different index classes: " + upsert.GetConstructor().GetClassName() + " vs " + Operator.GetClassName()); - return false; - } - errors.AddError("cannot modify statistics calculation for " + Operator.GetName() + ". not implemented currently."); - return false; -} - -bool TOlapStatisticsDescription::ApplyUpdate(const TOlapSchema& currentSchema, const TOlapStatisticsModification& schemaUpdate, IErrorCollector& errors) { - for (auto&& stat : schemaUpdate.GetUpsert()) { - auto* current = MutableByNameOptional(stat.GetName()); - if (current) { - if (!current->ApplyUpdate(currentSchema, stat, errors)) { - return false; - } - } else { - auto meta = stat.GetConstructor()->CreateOperator(stat.GetName(), currentSchema); - if (!meta) { - errors.AddError(meta.GetErrorMessage()); - return false; - } - TOlapStatisticsSchema object(meta.DetachResult()); - Y_ABORT_UNLESS(ObjectsByName.emplace(stat.GetName(), std::move(object)).second); - } - } - - for (const auto& name : schemaUpdate.GetDrop()) { - auto info = GetByNameOptional(name); - if (!info) { - errors.AddError(NKikimrScheme::StatusSchemeError, TStringBuilder() << "Unknown stat for drop: " << name); - return false; - } - AFL_VERIFY(ObjectsByName.erase(name)); - } - - return true; -} - -void TOlapStatisticsDescription::Parse(const NKikimrSchemeOp::TColumnTableSchema& tableSchema) { - for (const auto& proto : tableSchema.GetStatistics()) { - TOlapStatisticsSchema object; - AFL_VERIFY(object.DeserializeFromProto(proto)); - AFL_VERIFY(ObjectsByName.emplace(proto.GetName(), std::move(object)).second); - } -} - -void TOlapStatisticsDescription::Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchema) const { - for (const auto& object : ObjectsByName) { - object.second.SerializeToProto(*tableSchema.AddStatistics()); - } -} - -bool TOlapStatisticsDescription::Validate(const NKikimrSchemeOp::TColumnTableSchema& opSchema, IErrorCollector& errors) const { - THashSet usedObjects; - for (const auto& proto : opSchema.GetStatistics()) { - if (proto.GetName().empty()) { - errors.AddError("Statistic cannot have an empty name"); - return false; - } - - const TString& name = proto.GetName(); - if (!GetByNameOptional(name)) { - errors.AddError("Stat '" + name + "' does not match schema preset"); - return false; - } - - if (!usedObjects.emplace(proto.GetName()).second) { - errors.AddError("Column '" + name + "' is specified multiple times"); - return false; - } - } - return true; -} - -} diff --git a/ydb/core/tx/schemeshard/olap/statistics/schema.h b/ydb/core/tx/schemeshard/olap/statistics/schema.h deleted file mode 100644 index 37a79fc17fdd..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/schema.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once -#include "update.h" - -namespace NKikimr::NSchemeShard { - -class TOlapSchema; - -class TOlapStatisticsSchema { -private: - YDB_READONLY_DEF(NOlap::NStatistics::TOperatorContainer, Operator); -public: - TOlapStatisticsSchema() = default; - - TOlapStatisticsSchema(const NOlap::NStatistics::TOperatorContainer& container) - : Operator(container) - { - AFL_VERIFY(container.GetName()); - } - - bool ApplyUpdate(const TOlapSchema& currentSchema, const TOlapStatisticsUpsert& upsert, IErrorCollector& errors); - - void SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const; - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto); -}; - -class TOlapStatisticsDescription { -public: - using TObjectsByName = THashMap; - -private: - YDB_READONLY_DEF(TObjectsByName, ObjectsByName); -public: - const TOlapStatisticsSchema* GetByIdOptional(const NOlap::NStatistics::EType type, const std::vector& entityIds) const noexcept { - for (auto&& i : ObjectsByName) { - if (!i.second.GetOperator()) { - continue; - } - if (i.second.GetOperator()->GetIdentifier() != NOlap::NStatistics::TIdentifier(type, entityIds)) { - continue; - } - return &i.second; - } - return nullptr; - } - - const TOlapStatisticsSchema* GetByNameOptional(const TString& name) const noexcept { - auto it = ObjectsByName.find(name); - if (it != ObjectsByName.end()) { - return &it->second; - } - return nullptr; - } - - const TOlapStatisticsSchema& GetByNameVerified(const TString& name) const noexcept { - auto object = GetByNameOptional(name); - AFL_VERIFY(object); - return *object; - } - - TOlapStatisticsSchema* MutableByNameOptional(const TString& name) noexcept { - auto it = ObjectsByName.find(name); - if (it != ObjectsByName.end()) { - return &it->second; - } - return nullptr; - } - - TOlapStatisticsSchema& MutableByNameVerified(const TString& name) noexcept { - auto* object = MutableByNameOptional(name); - AFL_VERIFY(object); - return *object; - } - - bool ApplyUpdate(const TOlapSchema& currentSchema, const TOlapStatisticsModification& schemaUpdate, IErrorCollector& errors); - - void Parse(const NKikimrSchemeOp::TColumnTableSchema& tableSchema); - void Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchema) const; - bool Validate(const NKikimrSchemeOp::TColumnTableSchema& opSchema, IErrorCollector& errors) const; -}; -} diff --git a/ydb/core/tx/schemeshard/olap/statistics/update.cpp b/ydb/core/tx/schemeshard/olap/statistics/update.cpp deleted file mode 100644 index 1c82c07c300c..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/update.cpp +++ /dev/null @@ -1,35 +0,0 @@ -#include "update.h" - -namespace NKikimr::NSchemeShard { - -void TOlapStatisticsUpsert::SerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& requestedProto) const { - requestedProto.SetName(Name); - Constructor.SerializeToProto(requestedProto); -} - -bool TOlapStatisticsUpsert::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { - Name = proto.GetName(); - AFL_VERIFY(Constructor.DeserializeFromProto(proto))("incorrect_proto", proto.DebugString()); - return true; -} - -bool TOlapStatisticsModification::Parse(const NKikimrSchemeOp::TAlterColumnTableSchema& alterRequest, IErrorCollector& errors) { - for (const auto& name : alterRequest.GetDropStatistics()) { - if (!Drop.emplace(name).second) { - errors.AddError(NKikimrScheme::StatusInvalidParameter, "Duplicated statistics for drop"); - return false; - } - } - TSet upsertNames; - for (auto& schema : alterRequest.GetUpsertStatistics()) { - TOlapStatisticsUpsert stat; - AFL_VERIFY(stat.DeserializeFromProto(schema)); - if (!upsertNames.emplace(stat.GetName()).second) { - errors.AddError(NKikimrScheme::StatusAlreadyExists, TStringBuilder() << "stat '" << stat.GetName() << "' duplication for add"); - return false; - } - Upsert.emplace_back(std::move(stat)); - } - return true; -} -} diff --git a/ydb/core/tx/schemeshard/olap/statistics/update.h b/ydb/core/tx/schemeshard/olap/statistics/update.h deleted file mode 100644 index 96558928acf3..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/update.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include - -namespace NKikimr::NSchemeShard { - - class TOlapStatisticsUpsert { - private: - YDB_READONLY_DEF(TString, Name); - protected: - NOlap::NStatistics::TConstructorContainer Constructor; - public: - TOlapStatisticsUpsert() = default; - TOlapStatisticsUpsert(const TString& name, const NOlap::NStatistics::TConstructorContainer& constructor) - : Name(name) - , Constructor(constructor) - { - - } - - const NOlap::NStatistics::TConstructorContainer& GetConstructor() const { - return Constructor; - } - - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& requestedProto); - void SerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& requestedProto) const; - }; - - class TOlapStatisticsModification { - private: - YDB_READONLY_DEF(TVector, Upsert); - YDB_READONLY_DEF(TSet, Drop); - public: - void AddUpsert(const TString& name, const NOlap::NStatistics::TConstructorContainer container) { - Upsert.emplace_back(TOlapStatisticsUpsert(name, container)); - } - - bool Parse(const NKikimrSchemeOp::TAlterColumnTableSchema& alterRequest, IErrorCollector& errors); - }; -} diff --git a/ydb/core/tx/schemeshard/olap/statistics/ya.make b/ydb/core/tx/schemeshard/olap/statistics/ya.make deleted file mode 100644 index 3f4902454ef1..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/ya.make +++ /dev/null @@ -1,15 +0,0 @@ -LIBRARY() - -SRCS( - schema.cpp - update.cpp -) - -PEERDIR( - ydb/services/bg_tasks/abstract - ydb/core/tx/schemeshard/olap/common - ydb/core/tx/columnshard/engines/scheme/statistics/abstract - ydb/core/protos -) - -END() diff --git a/ydb/core/tx/schemeshard/olap/ya.make b/ydb/core/tx/schemeshard/olap/ya.make index d41824702a58..4fde54f9fbd0 100644 --- a/ydb/core/tx/schemeshard/olap/ya.make +++ b/ydb/core/tx/schemeshard/olap/ya.make @@ -7,7 +7,6 @@ PEERDIR( ydb/core/tx/schemeshard/olap/schema ydb/core/tx/schemeshard/olap/common ydb/core/tx/schemeshard/olap/operations - ydb/core/tx/schemeshard/olap/statistics ydb/core/tx/schemeshard/olap/options ydb/core/tx/schemeshard/olap/layout ydb/core/tx/schemeshard/olap/manager From fc8b5712567c27c7d3adcaecd2d34d890a35d893 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Fri, 12 Jul 2024 17:04:03 +0300 Subject: [PATCH 04/12] cleaning --- .../tablestore/operations/drop_stat.cpp | 21 --- .../tablestore/operations/drop_stat.h | 19 -- .../tablestore/operations/upsert_stat.cpp | 49 ------ .../tablestore/operations/upsert_stat.h | 23 --- .../behaviour/tablestore/operations/ya.make | 3 - ydb/core/kqp/ut/olap/indexes_ut.cpp | 4 +- ydb/core/kqp/ut/olap/statistics_ut.cpp | 6 +- ydb/core/protos/flat_scheme_op.proto | 3 - .../scheme/statistics/abstract/common.cpp | 40 ----- .../scheme/statistics/abstract/common.h | 24 --- .../statistics/abstract/constructor.cpp | 5 - .../scheme/statistics/abstract/constructor.h | 73 -------- .../scheme/statistics/abstract/operator.cpp | 12 -- .../scheme/statistics/abstract/operator.h | 124 ------------- .../statistics/abstract/portion_storage.cpp | 119 ------------- .../statistics/abstract/portion_storage.h | 53 ------ .../scheme/statistics/abstract/ya.make | 20 --- .../scheme/statistics/max/constructor.cpp | 45 ----- .../scheme/statistics/max/constructor.h | 33 ---- .../scheme/statistics/max/operator.cpp | 41 ----- .../engines/scheme/statistics/max/operator.h | 68 -------- .../engines/scheme/statistics/max/ya.make | 15 -- .../scheme/statistics/protos/data.proto | 66 ------- .../engines/scheme/statistics/protos/ya.make | 11 -- .../statistics/variability/constructor.cpp | 45 ----- .../statistics/variability/constructor.h | 33 ---- .../statistics/variability/operator.cpp | 164 ------------------ .../scheme/statistics/variability/operator.h | 71 -------- .../scheme/statistics/variability/ya.make | 15 -- .../engines/scheme/statistics/ya.make | 10 -- .../tx/columnshard/engines/scheme/ya.make | 1 - 31 files changed, 5 insertions(+), 1211 deletions(-) delete mode 100644 ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.cpp delete mode 100644 ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.h delete mode 100644 ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.cpp delete mode 100644 ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.h delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.cpp delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.h delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.cpp delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.h delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.cpp delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.h delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.cpp delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.h delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/ya.make delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.cpp delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.h delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.cpp delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.h delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/max/ya.make delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/protos/ya.make delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.cpp delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.h delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.cpp delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.h delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/variability/ya.make delete mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/ya.make diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.cpp deleted file mode 100644 index 94a18e7e4140..000000000000 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#include "drop_stat.h" -#include - -namespace NKikimr::NKqp { - -TConclusionStatus TDropStatOperation::DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) { - { - auto fValue = features.Extract("NAME"); - if (!fValue) { - return TConclusionStatus::Fail("can't find parameter NAME"); - } - Name = *fValue; - } - return TConclusionStatus::Success(); -} - -void TDropStatOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const { - *schemaData.AddDropStatistics() = Name; -} - -} diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.h deleted file mode 100644 index 777aae036858..000000000000 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.h +++ /dev/null @@ -1,19 +0,0 @@ -#include "abstract.h" - -namespace NKikimr::NKqp { - -class TDropStatOperation : public ITableStoreOperation { - static TString GetTypeName() { - return "DROP_STAT"; - } - - static inline auto Registrator = TFactory::TRegistrator(GetTypeName()); -private: - TString Name; -public: - TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; - void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const override; -}; - -} - diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.cpp deleted file mode 100644 index 9e8360dd5e35..000000000000 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#include "upsert_stat.h" -#include -#include - -namespace NKikimr::NKqp { - -TConclusionStatus TUpsertStatOperation::DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) { - { - auto fValue = features.Extract("NAME"); - if (!fValue) { - return TConclusionStatus::Fail("can't find alter parameter NAME"); - } - Name = *fValue; - } - TString type; - { - auto fValue = features.Extract("TYPE"); - if (!fValue) { - return TConclusionStatus::Fail("can't find alter parameter TYPE"); - } - type = *fValue; - } - { - auto fValue = features.Extract("FEATURES"); - if (!fValue) { - return TConclusionStatus::Fail("can't find alter parameter FEATURES"); - } - if (!Constructor.Initialize(type)) { - return TConclusionStatus::Fail("can't initialize stat constructor object for type \"" + type + "\""); - } - NJson::TJsonValue jsonData; - if (!NJson::ReadJsonFastTree(*fValue, &jsonData)) { - return TConclusionStatus::Fail("incorrect json in request FEATURES parameter"); - } - auto result = Constructor->DeserializeFromJson(jsonData); - if (result.IsFail()) { - return result; - } - } - return TConclusionStatus::Success(); -} - -void TUpsertStatOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const { - auto* proto = schemaData.AddUpsertStatistics(); - proto->SetName(Name); - Constructor.SerializeToProto(*proto); -} - -} diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.h deleted file mode 100644 index 5d8abdffae8d..000000000000 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.h +++ /dev/null @@ -1,23 +0,0 @@ -#include "abstract.h" -#include - -namespace NKikimr::NKqp { - -class TUpsertStatOperation : public ITableStoreOperation { -private: - static TString GetTypeName() { - return "UPSERT_STAT"; - } - - static inline const auto Registrator = TFactory::TRegistrator(GetTypeName()); -private: - TString Name; - NOlap::NStatistics::TConstructorContainer Constructor; -public: - TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; - - void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const override; -}; - -} - diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make b/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make index 6094887573e1..e393435d9cc5 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make @@ -7,8 +7,6 @@ SRCS( GLOBAL drop_column.cpp GLOBAL upsert_index.cpp GLOBAL drop_index.cpp - GLOBAL upsert_stat.cpp - GLOBAL drop_stat.cpp GLOBAL upsert_opt.cpp GLOBAL alter_sharding.cpp ) @@ -16,7 +14,6 @@ SRCS( PEERDIR( ydb/services/metadata/manager ydb/core/formats/arrow/serializer - ydb/core/tx/columnshard/engines/scheme/statistics/abstract ydb/core/tx/columnshard/engines/storage/optimizer/abstract ydb/core/kqp/gateway/utils ydb/core/protos diff --git a/ydb/core/kqp/ut/olap/indexes_ut.cpp b/ydb/core/kqp/ut/olap/indexes_ut.cpp index 13eeebc02d93..1a55241f7137 100644 --- a/ydb/core/kqp/ut/olap/indexes_ut.cpp +++ b/ydb/core/kqp/ut/olap/indexes_ut.cpp @@ -281,8 +281,8 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); TString result = StreamResultToYson(it); - Cout << result << Endl; - Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << Endl; + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("result", result); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("skip", csController->GetIndexesSkippingOnSelect().Val())("check", csController->GetIndexesApprovedOnSelect().Val()); CompareYson(result, R"([[0u;]])"); AFL_VERIFY(csController->GetIndexesSkippedNoData().Val() == 0); AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val() * 0.3); diff --git a/ydb/core/kqp/ut/olap/statistics_ut.cpp b/ydb/core/kqp/ut/olap/statistics_ut.cpp index d79a07f9bc3b..415513bcabb9 100644 --- a/ydb/core/kqp/ut/olap/statistics_ut.cpp +++ b/ydb/core/kqp/ut/olap/statistics_ut.cpp @@ -14,19 +14,19 @@ Y_UNIT_TEST_SUITE(KqpOlapStatistics) { helper.CreateTestOlapTable(); auto tableClient = kikimr.GetTableClient(); { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_pk_int, FEATURES=`{\"column_name\": \"pk_int\"}`);"; + auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=max_pk_int, TYPE=MAX, FEATURES=`{\"column_name\": \"pk_int\"}`)"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_field, FEATURES=`{\"column_name\": \"field\"}`);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=max_pk_int, TYPE=MAX, FEATURES=`{\"column_name\": \"field\"}`);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_pk_int, FEATURES=`{\"column_name\": \"pk_int\"}`);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=max_pk_int, TYPE=MAX, FEATURES=`{\"column_name\": \"pk_int\"}`);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index 173d90e9cdf8..f2eca4ddabb2 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -17,7 +17,6 @@ import "ydb/public/api/protos/ydb_value.proto"; import "ydb/library/actors/protos/actors.proto"; import "ydb/library/mkql_proto/protos/minikql.proto"; import "ydb/core/protos/index_builder.proto"; -import "ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto"; import "ydb/core/tx/columnshard/engines/scheme/defaults/protos/data.proto"; import "ydb/core/tx/columnshard/common/protos/snapshot.proto"; @@ -567,8 +566,6 @@ message TAlterColumnTableSchema { repeated TOlapColumnDiff AlterColumns = 7; repeated TOlapIndexRequested UpsertIndexes = 8; repeated string DropIndexes = 9; - repeated NKikimrColumnShardStatisticsProto.TConstructorContainer UpsertStatistics = 10; - repeated string DropStatistics = 11; optional TColumnTableRequestedOptions Options = 12; } diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.cpp deleted file mode 100644 index e7960e66809e..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.cpp +++ /dev/null @@ -1,40 +0,0 @@ -#include "common.h" -#include - -namespace NKikimr::NOlap::NStatistics { - -TIdentifier::TIdentifier(const EType type, const std::vector& entities) - : Type(type) - , EntityIds(entities) -{ - AFL_VERIFY(EntityIds.size()); -} - -bool TIdentifier::operator<(const TIdentifier& item) const { - if (Type != item.Type) { - return (ui32)Type < (ui32)item.Type; - } - for (ui32 i = 0; i < std::min(EntityIds.size(), item.EntityIds.size()); ++i) { - if (EntityIds[i] < item.EntityIds[i]) { - return true; - } - } - return false; -} - -bool TIdentifier::operator==(const TIdentifier& item) const { - if (Type != item.Type) { - return false; - } - if (EntityIds.size() != item.EntityIds.size()) { - return false; - } - for (ui32 i = 0; i < EntityIds.size(); ++i) { - if (EntityIds[i] != item.EntityIds[i]) { - return false; - } - } - return true; -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.h deleted file mode 100644 index abfd7159a97b..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once -#include -#include -#include - -namespace NKikimr::NOlap::NStatistics { -enum class EType { - Undefined /* "undefined" */, - Max /* "max" */, - Variability /* "variability" */ -}; - -class TIdentifier { -private: - YDB_READONLY(EType, Type, EType::Undefined); - YDB_READONLY_DEF(std::vector, EntityIds); -public: - TIdentifier(const EType type, const std::vector& entities); - - bool operator<(const TIdentifier& item) const; - bool operator==(const TIdentifier& item) const; -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.cpp deleted file mode 100644 index 5713317c7d21..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "constructor.h" - -namespace NKikimr::NOlap::NStatistics { - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.h deleted file mode 100644 index 8948e93d482c..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once -#include "common.h" -#include "portion_storage.h" -#include "operator.h" - -#include - -namespace NKikimr::NSchemeShard { -class TOlapSchema; -} - -namespace NKikimrColumnShardStatisticsProto { -class TOperatorContainer; -} - -namespace NKikimr::NOlap::NStatistics { - -class IConstructor { -private: - YDB_READONLY(EType, Type, EType::Undefined); - IConstructor() = default; -protected: - virtual TConclusion> DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const = 0; - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) = 0; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const = 0; - virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonData) = 0; -public: - using TProto = NKikimrColumnShardStatisticsProto::TConstructorContainer; - using TFactory = NObjectFactory::TObjectFactory; - - virtual ~IConstructor() = default; - - IConstructor(const EType type) - :Type(type) { - - } - - TConclusionStatus DeserializeFromJson(const NJson::TJsonValue& jsonData) { - return DoDeserializeFromJson(jsonData); - } - - TConclusion CreateOperator(const TString& name, const NSchemeShard::TOlapSchema& currentSchema) const { - auto result = DoCreateOperator(currentSchema); - if (!result) { - return result.GetError(); - } - return TOperatorContainer(name, result.DetachResult()); - } - - TString GetClassName() const { - return ::ToString(Type); - } - - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { - if (!TryFromString(proto.GetClassName(), Type)) { - return false; - } - return DoDeserializeFromProto(proto); - } - - void SerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const { - return DoSerializeToProto(proto); - } -}; - -class TConstructorContainer: public NBackgroundTasks::TInterfaceProtoContainer { -private: - using TBase = NBackgroundTasks::TInterfaceProtoContainer; -public: - using TBase::TBase; -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.cpp deleted file mode 100644 index 357d8bbd3934..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "operator.h" - -namespace NKikimr::NOlap::NStatistics { - -bool IOperator::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - if (!TryFromString(proto.GetClassName(), Type)) { - return false; - } - return DoDeserializeFromProto(proto); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.h deleted file mode 100644 index 29f6f6744ac4..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.h +++ /dev/null @@ -1,124 +0,0 @@ -#pragma once -#include "common.h" -#include "portion_storage.h" - -#include -#include - -#include -#include - -#include - -namespace NKikimr::NOlap { -class IPortionDataChunk; -} - -namespace NKikimr::NOlap::NStatistics { - -class IOperator { -private: - YDB_READONLY(EType, Type, EType::Undefined); - IOperator() = default; -protected: - virtual void DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const = 0; - virtual void DoShiftCursor(TPortionStorageCursor& cursor) const = 0; - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) = 0; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const = 0; - virtual void DoCopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const = 0; -public: - using TProto = NKikimrColumnShardStatisticsProto::TOperatorContainer; - using TFactory = NObjectFactory::TObjectFactory; - - virtual ~IOperator() = default; - - virtual std::vector GetEntityIds() const = 0; - - IOperator(const EType type) - :Type(type) { - - } - - void ShiftCursor(TPortionStorageCursor& cursor) const { - DoShiftCursor(cursor); - } - - void CopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const { - return DoCopyData(cursor, portionStatsFrom, portionStatsTo); - } - - void FillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const { - DoFillStatisticsData(data, portionStats, index); - } - - TString GetClassName() const { - return ::ToString(Type); - } - - TIdentifier GetIdentifier() const { - return TIdentifier(Type, GetEntityIds()); - } - - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto); - - void SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - return DoSerializeToProto(proto); - } -}; - -class TOperatorContainer: public NBackgroundTasks::TInterfaceProtoContainer { -private: - YDB_READONLY_DEF(TString, Name); - std::optional Cursor; - using TBase = NBackgroundTasks::TInterfaceProtoContainer; -public: - TOperatorContainer() = default; - - TOperatorContainer(const TString& name, const std::shared_ptr& object) - : TBase(object) - , Name(name) - { - AFL_VERIFY(Name); - } - - const TPortionStorageCursor& GetCursorVerified() const { - AFL_VERIFY(Cursor); - return *Cursor; - } - - void SetCursor(const TPortionStorageCursor& cursor) { - AFL_VERIFY(!Cursor); - Cursor = cursor; - } - - std::shared_ptr GetScalarVerified(const TPortionStorage& storage) { - AFL_VERIFY(!!Cursor); - return storage.GetScalarVerified(*Cursor); - } - - NKikimrColumnShardStatisticsProto::TOperatorContainer SerializeToProto() const { - NKikimrColumnShardStatisticsProto::TOperatorContainer result = TBase::SerializeToProto(); - result.SetName(Name); - AFL_VERIFY(Name); - return result; - } - - void SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - TBase::SerializeToProto(proto); - proto.SetName(Name); - AFL_VERIFY(Name); - } - - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - Name = proto.GetName(); - if (!Name) { - return false; - } - if (!TBase::DeserializeFromProto(proto)) { - return false; - } - return true; - } -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.cpp deleted file mode 100644 index f0d67ecf7d42..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.cpp +++ /dev/null @@ -1,119 +0,0 @@ -#include "portion_storage.h" -#include -#include - -namespace NKikimr::NOlap::NStatistics { - -NKikimrColumnShardStatisticsProto::TScalar TPortionStorage::ScalarToProto(const arrow::Scalar& scalar) { - NKikimrColumnShardStatisticsProto::TScalar result; - switch (scalar.type->id()) { - case arrow::Type::BOOL: - result.SetBool(static_cast(scalar).value); - break; - case arrow::Type::UINT8: - result.SetUint8(static_cast(scalar).value); - break; - case arrow::Type::UINT16: - result.SetUint16(static_cast(scalar).value); - break; - case arrow::Type::UINT32: - result.SetUint32(static_cast(scalar).value); - break; - case arrow::Type::UINT64: - result.SetUint64(static_cast(scalar).value); - break; - case arrow::Type::INT8: - result.SetInt8(static_cast(scalar).value); - break; - case arrow::Type::INT16: - result.SetInt16(static_cast(scalar).value); - break; - case arrow::Type::INT32: - result.SetInt32(static_cast(scalar).value); - break; - case arrow::Type::INT64: - result.SetInt64(static_cast(scalar).value); - break; - case arrow::Type::DOUBLE: - result.SetDouble(static_cast(scalar).value); - break; - case arrow::Type::FLOAT: - result.SetFloat(static_cast(scalar).value); - break; - case arrow::Type::TIMESTAMP: - { - auto* ts = result.MutableTimestamp(); - ts->SetValue(static_cast(scalar).value); - ts->SetUnit(static_cast(*scalar.type).unit()); - break; - } - default: - AFL_VERIFY(false)("problem", "incorrect type for statistics usage")("type", scalar.type->ToString()); - } - return result; -} - -std::shared_ptr TPortionStorage::ProtoToScalar(const NKikimrColumnShardStatisticsProto::TScalar& proto) { - if (proto.HasBool()) { - return std::make_shared(proto.GetBool()); - } else if (proto.HasUint8()) { - return std::make_shared(proto.GetUint8()); - } else if (proto.HasUint16()) { - return std::make_shared(proto.GetUint16()); - } else if (proto.HasUint32()) { - return std::make_shared(proto.GetUint32()); - } else if (proto.HasUint64()) { - return std::make_shared(proto.GetUint64()); - } else if (proto.HasInt8()) { - return std::make_shared(proto.GetInt8()); - } else if (proto.HasInt16()) { - return std::make_shared(proto.GetInt16()); - } else if (proto.HasInt32()) { - return std::make_shared(proto.GetInt32()); - } else if (proto.HasInt64()) { - return std::make_shared(proto.GetInt64()); - } else if (proto.HasDouble()) { - return std::make_shared(proto.GetDouble()); - } else if (proto.HasFloat()) { - return std::make_shared(proto.GetFloat()); - } else if (proto.HasTimestamp()) { - arrow::TimeUnit::type unit = arrow::TimeUnit::type(proto.GetTimestamp().GetUnit()); - return std::make_shared(proto.GetTimestamp().GetValue(), std::make_shared(unit)); - } - AFL_VERIFY(false)("problem", "incorrect statistics proto")("proto", proto.DebugString()); - return nullptr; -} - -std::shared_ptr TPortionStorage::GetScalarVerified(const TPortionStorageCursor& cursor) const { - AFL_VERIFY(cursor.GetScalarsPosition() < Data.size()); - AFL_VERIFY(Data[cursor.GetScalarsPosition()]); - return Data[cursor.GetScalarsPosition()]; -} - -void TPortionStorage::AddScalar(const std::shared_ptr& scalar) { - const auto type = scalar->type->id(); - AFL_VERIFY(type == arrow::Type::BOOL || - type == arrow::Type::UINT8 || type == arrow::Type::UINT16 || type == arrow::Type::UINT32 || type == arrow::Type::UINT64 || - type == arrow::Type::INT8 || type == arrow::Type::INT16 || type == arrow::Type::INT32 || type == arrow::Type::INT64 || - type == arrow::Type::DOUBLE || type == arrow::Type::TIMESTAMP || type == arrow::Type::FLOAT) - ("problem", "incorrect_stat_type")("incoming", scalar->type->ToString()); - Data.emplace_back(scalar); -} - -NKikimrColumnShardStatisticsProto::TPortionStorage TPortionStorage::SerializeToProto() const { - NKikimrColumnShardStatisticsProto::TPortionStorage result; - for (auto&& i : Data) { - AFL_VERIFY(i); - *result.AddScalars() = ScalarToProto(*i); - } - return result; -} - -NKikimr::TConclusionStatus TPortionStorage::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TPortionStorage& proto) { - for (auto&& i : proto.GetScalars()) { - Data.emplace_back(ProtoToScalar(i)); - } - return TConclusionStatus::Success(); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.h deleted file mode 100644 index a3e4b6bcb0dd..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once - -#include -#include -#include - -#include - -namespace NKikimrColumnShardStatisticsProto { -class TScalar; -class TPortionStorage; -} - -namespace NKikimr::NOlap::NStatistics { -class TPortionStorageCursor { -private: - YDB_READONLY(ui32, ScalarsPosition, 0); -public: - TPortionStorageCursor() = default; - - void AddScalarsPosition(const ui32 shift) { - ScalarsPosition += shift; - } -}; - -class TPortionStorage { -private: - YDB_READONLY_DEF(std::vector>, Data); - static NKikimrColumnShardStatisticsProto::TScalar ScalarToProto(const arrow::Scalar& value); - static std::shared_ptr ProtoToScalar(const NKikimrColumnShardStatisticsProto::TScalar& proto); - TConclusionStatus DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TPortionStorage& proto); - -public: - bool IsEmpty() const { - return Data.empty(); - } - - std::shared_ptr GetScalarVerified(const TPortionStorageCursor& cursor) const; - - void AddScalar(const std::shared_ptr& scalar); - - NKikimrColumnShardStatisticsProto::TPortionStorage SerializeToProto() const; - - static TConclusion BuildFromProto(const NKikimrColumnShardStatisticsProto::TPortionStorage& proto) { - TPortionStorage result; - auto parse = result.DeserializeFromProto(proto); - if (!parse) { - return parse; - } - return result; - } -}; -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/ya.make deleted file mode 100644 index f63520354edf..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/ya.make +++ /dev/null @@ -1,20 +0,0 @@ -LIBRARY() - -SRCS( - portion_storage.cpp - constructor.cpp - operator.cpp - common.cpp -) - -PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/protos - ydb/core/tx/columnshard/engines/scheme/abstract - contrib/libs/apache/arrow - ydb/library/actors/core - ydb/library/conclusion -) - -GENERATE_ENUM_SERIALIZATION(common.h) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.cpp deleted file mode 100644 index a12a27812350..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#include "constructor.h" -#include "operator.h" - -namespace NKikimr::NOlap::NStatistics::NMax { - -NKikimr::TConclusion> TConstructor::DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const { - auto column = currentSchema.GetColumns().GetByName(ColumnName); - if (!TOperator::IsAvailableType(column->GetType())) { - return TConclusionStatus::Fail("incorrect type for stat calculation"); - } - return std::make_shared(column->GetId()); -} - -bool TConstructor::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { - if (!proto.HasMax()) { - return false; - } - ColumnName = proto.GetMax().GetColumnName(); - if (!ColumnName) { - return false; - } - return true; -} - -void TConstructor::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const { - AFL_VERIFY(!!ColumnName); - proto.MutableMax()->SetColumnName(ColumnName); -} - -NKikimr::TConclusionStatus TConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonData) { - if (!jsonData.Has("column_name")) { - return TConclusionStatus::Fail("no column_name field in json description"); - } - TString columnNameLocal; - if (!jsonData["column_name"].GetString(&columnNameLocal)) { - return TConclusionStatus::Fail("incorrect column_name field in json description (no string)"); - } - if (!columnNameLocal) { - return TConclusionStatus::Fail("empty column_name field in json description"); - } - ColumnName = columnNameLocal; - return TConclusionStatus::Success(); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.h b/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.h deleted file mode 100644 index 695096a63d2f..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once -#include -#include - -#include - -namespace NKikimr::NOlap::NStatistics::NMax { - -class TConstructor: public IConstructor { -private: - using TBase = IConstructor; - static inline const auto Registrator = TFactory::TRegistrator(::ToString(EType::Max)); - YDB_READONLY(TString, ColumnName, 0); -protected: - virtual TConclusion> DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const override; - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) override; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const override; - virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonData) override; -public: - TConstructor(const TString& columnName) - : TBase(EType::Max) - , ColumnName(columnName) - { - - } - - TConstructor() - :TBase(EType::Max) { - - } -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.cpp deleted file mode 100644 index 8e2c179e077b..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "operator.h" -#include -#include -#include - -namespace NKikimr::NOlap::NStatistics::NMax { - -void TOperator::DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const { - auto loader = index.GetColumnLoaderVerified(EntityId); - auto it = data.find(EntityId); - AFL_VERIFY(it != data.end()); - std::shared_ptr result; - for (auto&& i : it->second) { - auto rb = NArrow::TStatusValidator::GetValid(loader->Apply(i->GetData())); - AFL_VERIFY(rb->num_columns() == 1); - auto res = NArrow::FindMinMaxPosition(rb->column(0)); - auto currentScalarMax = NArrow::TStatusValidator::GetValid(rb->column(0)->GetScalar(res.second)); - if (!result || NArrow::ScalarCompare(result, currentScalarMax) < 0) { - result = currentScalarMax; - } - } - portionStats.AddScalar(result); -} - -bool TOperator::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - if (!proto.HasMax()) { - return false; - } - EntityId = proto.GetMax().GetEntityId(); - if (!EntityId) { - return false; - } - return true; -} - -void TOperator::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - AFL_VERIFY(EntityId); - proto.MutableMax()->SetEntityId(EntityId); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.h b/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.h deleted file mode 100644 index b3478e5a24bb..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.h +++ /dev/null @@ -1,68 +0,0 @@ -#pragma once -#include -#include - -namespace NKikimr::NOlap::NStatistics::NMax { - -class TOperator: public IOperator { -private: - using TBase = IOperator; - ui32 EntityId = 0; - static inline auto Registrator = TFactory::TRegistrator(::ToString(EType::Max)); -protected: - virtual void DoCopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const override { - std::shared_ptr scalar = portionStatsFrom.GetScalarVerified(cursor); - portionStatsTo.AddScalar(scalar); - } - - virtual void DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const override; - virtual void DoShiftCursor(TPortionStorageCursor& cursor) const override { - cursor.AddScalarsPosition(1); - } - virtual std::vector GetEntityIds() const override { - return {EntityId}; - } - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) override; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const override; -public: - - static bool IsAvailableType(const NScheme::TTypeInfo type) { - switch (type.GetTypeId()) { - case NScheme::NTypeIds::Int8: - case NScheme::NTypeIds::Uint8: - case NScheme::NTypeIds::Int16: - case NScheme::NTypeIds::Uint16: - case NScheme::NTypeIds::Int32: - case NScheme::NTypeIds::Uint32: - case NScheme::NTypeIds::Int64: - case NScheme::NTypeIds::Uint64: - case NScheme::NTypeIds::Timestamp: - case NScheme::NTypeIds::Double: - case NScheme::NTypeIds::Float: - case NScheme::NTypeIds::Datetime: - case NScheme::NTypeIds::Date: - case NScheme::NTypeIds::Date32: - case NScheme::NTypeIds::Datetime64: - case NScheme::NTypeIds::Timestamp64: - case NScheme::NTypeIds::Interval64: - return true; - default: - break; - } - return false; - } - - TOperator() - : TBase(EType::Max) - { - - } - - TOperator(const ui32 entityId) - : TBase(EType::Max) - , EntityId(entityId) { - - } -}; - -} diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/max/ya.make deleted file mode 100644 index 631c95eeb3d8..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/ya.make +++ /dev/null @@ -1,15 +0,0 @@ -LIBRARY() - -SRCS( - GLOBAL constructor.cpp - GLOBAL operator.cpp -) - -PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/abstract - ydb/core/tx/columnshard/engines/scheme/abstract - ydb/core/tx/columnshard/splitter/abstract - ydb/core/formats/arrow -) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto b/ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto deleted file mode 100644 index c99f485d399d..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto +++ /dev/null @@ -1,66 +0,0 @@ -package NKikimrColumnShardStatisticsProto; - -message TScalar { - message TTimestamp { - optional uint64 Value = 1; - optional uint32 Unit = 2; - } - oneof Value { - bool Bool = 1; - uint32 Uint8 = 2; - uint32 Uint16 = 3; - uint32 Uint32 = 4; - uint64 Uint64 = 5; - - int32 Int8 = 6; - int32 Int16 = 7; - int32 Int32 = 8; - int64 Int64 = 9; - - double Double = 10; - - TTimestamp Timestamp = 11; - - float Float = 12; - } -} - -message TPortionStorage { - repeated TScalar Scalars = 1; -} - -message TMaxConstructor { - optional string ColumnName = 3; -} - -message TVariabilityConstructor { - optional string ColumnName = 3; -} - -message TConstructorContainer { - optional string Name = 1; - - optional string ClassName = 40; - oneof Implementation { - TMaxConstructor Max = 41; - TVariabilityConstructor Variability = 42; - } -} - -message TMaxOperator { - optional uint32 EntityId = 1; -} - -message TVariabilityOperator { - optional uint32 EntityId = 1; -} - -message TOperatorContainer { - optional string Name = 1; - - optional string ClassName = 40; - oneof Implementation { - TMaxOperator Max = 41; - TVariabilityOperator Variability = 42; - } -} diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/protos/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/protos/ya.make deleted file mode 100644 index f72b3b7cf620..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/protos/ya.make +++ /dev/null @@ -1,11 +0,0 @@ -PROTO_LIBRARY() - -SRCS( - data.proto -) - -PEERDIR( - -) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.cpp deleted file mode 100644 index 25840673fcb5..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#include "constructor.h" -#include "operator.h" - -namespace NKikimr::NOlap::NStatistics::NVariability { - -NKikimr::TConclusion> TConstructor::DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const { - auto column = currentSchema.GetColumns().GetByName(ColumnName); - if (!TOperator::IsAvailableType(column->GetType())) { - return TConclusionStatus::Fail("incorrect type for stat calculation"); - } - return std::make_shared(column->GetId()); -} - -bool TConstructor::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { - if (!proto.HasVariability()) { - return false; - } - ColumnName = proto.GetVariability().GetColumnName(); - if (!ColumnName) { - return false; - } - return true; -} - -void TConstructor::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const { - AFL_VERIFY(!!ColumnName); - proto.MutableVariability()->SetColumnName(ColumnName); -} - -NKikimr::TConclusionStatus TConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonData) { - if (!jsonData.Has("column_name")) { - return TConclusionStatus::Fail("no column_name field in json description"); - } - TString columnNameLocal; - if (!jsonData["column_name"].GetString(&columnNameLocal)) { - return TConclusionStatus::Fail("incorrect column_name field in json description (no string)"); - } - if (!columnNameLocal) { - return TConclusionStatus::Fail("empty column_name field in json description"); - } - ColumnName = columnNameLocal; - return TConclusionStatus::Success(); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.h b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.h deleted file mode 100644 index 809c9043faac..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once -#include -#include - -#include - -namespace NKikimr::NOlap::NStatistics::NVariability { - -class TConstructor: public IConstructor { -private: - using TBase = IConstructor; - static inline const auto Registrator = TFactory::TRegistrator(::ToString(EType::Variability)); - YDB_READONLY(TString, ColumnName, 0); -protected: - virtual TConclusion> DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const override; - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) override; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const override; - virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonData) override; -public: - TConstructor(const TString& columnName) - : TBase(EType::Max) - , ColumnName(columnName) - { - - } - - TConstructor() - :TBase(EType::Variability) { - - } -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.cpp deleted file mode 100644 index d43d617171bb..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.cpp +++ /dev/null @@ -1,164 +0,0 @@ -#include "operator.h" -#include -#include -#include - -namespace NKikimr::NOlap::NStatistics::NVariability { - -class IValuesContainer { -protected: - std::optional DataType; - ui32 DifferentCount = 0; - - virtual void DoAddArray(const std::shared_ptr& array) = 0; -public: - virtual ~IValuesContainer() = default; - ui32 GetDifferentCount() const { - return DifferentCount; - } - - void AddArray(const std::shared_ptr& array) { - if (!DataType) { - DataType = array->type_id(); - } else { - AFL_VERIFY(DataType == array->type_id())("base", (ui32)*DataType)("to", (ui32)array->type_id()); - } - return DoAddArray(array); - } -}; - -template -class TCTypeValuesContainer: public IValuesContainer { -private: - using TWrap = TArrowElement; - using TArray = typename arrow::TypeTraits::ArrayType; - using TCType = typename TWrap::T::c_type; - using TCContainer = THashSet; - - TCContainer ElementsStorage; -protected: - virtual void DoAddArray(const std::shared_ptr& array) override { - NArrow::SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - if constexpr (std::is_same_v) { - const TArray& arrTyped = static_cast(*array); - for (ui32 i = 0; i < array->length(); ++i) { - if constexpr (arrow::has_c_type()) { - if (ElementsStorage.emplace(arrTyped.Value(i)).second) { - ++DifferentCount; - } - continue; - } - AFL_VERIFY(false); - } - return true; - } - AFL_VERIFY(false); - return false; - }); - } -}; - -template -class TStringValuesContainer: public IValuesContainer { -private: - using TWrap = TArrowElement; - using TArray = typename arrow::TypeTraits::ArrayType; - using TCType = TString; - using TCContainer = THashSet; - - TCContainer ElementsStorage; -protected: - virtual void DoAddArray(const std::shared_ptr& array) override { - NArrow::SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - if constexpr (std::is_same_v) { - const TArray& arrTyped = static_cast(*array); - for (ui32 i = 0; i < array->length(); ++i) { - if constexpr (arrow::has_string_view()) { - auto value = arrTyped.GetView(i); - if (ElementsStorage.emplace(value.data(), value.size()).second) { - ++DifferentCount; - } - continue; - } - AFL_VERIFY(false); - } - return true; - } - AFL_VERIFY(false); - return false; - }); - } -}; - -class TDifferentElementsAggregator { -private: - std::shared_ptr Container; -public: - TDifferentElementsAggregator() = default; - - bool HasData() const { - return !!Container; - } - - ui32 GetDifferentCount() const { - return Container ? Container->GetDifferentCount() : 0; - } - - void AddArray(const std::shared_ptr& array) { - if (!Container) { - NArrow::SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - if (!Container) { - if constexpr (arrow::has_c_type()) { - Container = std::make_shared>(); - Container->AddArray(array); - return true; - } - if constexpr (arrow::has_string_view()) { - Container = std::make_shared>(); - Container->AddArray(array); - return true; - } - AFL_VERIFY(false); - } - return false; - }); - } - Container->AddArray(array); - } -}; - -void TOperator::DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const { - auto it = data.find(EntityId); - AFL_VERIFY(it != data.end()); - auto loader = index.GetColumnLoaderVerified(EntityId); - std::shared_ptr result; - TDifferentElementsAggregator aggregator; - for (auto&& i : it->second) { - auto rb = NArrow::TStatusValidator::GetValid(loader->Apply(i->GetData())); - AFL_VERIFY(rb->num_columns() == 1); - aggregator.AddArray(rb->column(0)); - } - AFL_VERIFY(aggregator.HasData()); - portionStats.AddScalar(std::make_shared(aggregator.GetDifferentCount())); -} - -bool TOperator::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - if (!proto.HasVariability()) { - return false; - } - EntityId = proto.GetVariability().GetEntityId(); - if (!EntityId) { - return false; - } - return true; -} - -void TOperator::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - AFL_VERIFY(EntityId); - proto.MutableVariability()->SetEntityId(EntityId); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.h b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.h deleted file mode 100644 index ca46daf02aa6..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.h +++ /dev/null @@ -1,71 +0,0 @@ -#pragma once -#include -#include - -namespace NKikimr::NOlap::NStatistics::NVariability { - -class TOperator: public IOperator { -private: - using TBase = IOperator; - ui32 EntityId = 0; - static inline auto Registrator = TFactory::TRegistrator(::ToString(EType::Variability)); -protected: - virtual void DoCopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const override { - std::shared_ptr scalar = portionStatsFrom.GetScalarVerified(cursor); - portionStatsTo.AddScalar(scalar); - } - - virtual void DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const override; - virtual void DoShiftCursor(TPortionStorageCursor& cursor) const override { - cursor.AddScalarsPosition(1); - } - virtual std::vector GetEntityIds() const override { - return {EntityId}; - } - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) override; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const override; -public: - - static bool IsAvailableType(const NScheme::TTypeInfo type) { - switch (type.GetTypeId()) { - case NScheme::NTypeIds::Int8: - case NScheme::NTypeIds::Uint8: - case NScheme::NTypeIds::Int16: - case NScheme::NTypeIds::Uint16: - case NScheme::NTypeIds::Int32: - case NScheme::NTypeIds::Uint32: - case NScheme::NTypeIds::Int64: - case NScheme::NTypeIds::Uint64: - case NScheme::NTypeIds::String: - case NScheme::NTypeIds::Utf8: - case NScheme::NTypeIds::Uuid: - case NScheme::NTypeIds::Timestamp: - case NScheme::NTypeIds::Double: - case NScheme::NTypeIds::Float: - case NScheme::NTypeIds::Datetime: - case NScheme::NTypeIds::Date: - case NScheme::NTypeIds::Date32: - case NScheme::NTypeIds::Datetime64: - case NScheme::NTypeIds::Timestamp64: - case NScheme::NTypeIds::Interval64: - return true; - default: - break; - } - return false; - } - - TOperator() - : TBase(EType::Variability) - { - - } - - TOperator(const ui32 entityId) - : TBase(EType::Variability) - , EntityId(entityId) { - - } -}; - -} diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/ya.make deleted file mode 100644 index 631c95eeb3d8..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/ya.make +++ /dev/null @@ -1,15 +0,0 @@ -LIBRARY() - -SRCS( - GLOBAL constructor.cpp - GLOBAL operator.cpp -) - -PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/abstract - ydb/core/tx/columnshard/engines/scheme/abstract - ydb/core/tx/columnshard/splitter/abstract - ydb/core/formats/arrow -) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/ya.make deleted file mode 100644 index 3baed9c3538a..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/ya.make +++ /dev/null @@ -1,10 +0,0 @@ -LIBRARY() - -PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/abstract - ydb/core/tx/columnshard/engines/scheme/statistics/max - ydb/core/tx/columnshard/engines/scheme/statistics/variability - ydb/core/tx/columnshard/engines/scheme/statistics/protos -) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/ya.make b/ydb/core/tx/columnshard/engines/scheme/ya.make index 8684d7894338..e3d52b2649bd 100644 --- a/ydb/core/tx/columnshard/engines/scheme/ya.make +++ b/ydb/core/tx/columnshard/engines/scheme/ya.make @@ -15,7 +15,6 @@ PEERDIR( ydb/library/actors/core ydb/core/tx/columnshard/engines/scheme/indexes - ydb/core/tx/columnshard/engines/scheme/statistics ydb/core/tx/columnshard/engines/scheme/abstract ydb/core/tx/columnshard/engines/scheme/versions ydb/core/tx/columnshard/engines/scheme/tiering From a0765ba2c0e818edc41869a2094d578920cd11ac Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Fri, 12 Jul 2024 17:07:02 +0300 Subject: [PATCH 05/12] fix build --- .../gateway/behaviour/tablestore/operations/alter_sharding.h | 1 - ydb/core/protos/ya.make | 1 - ydb/core/tx/columnshard/engines/protos/portion_info.proto | 2 -- ydb/core/tx/columnshard/engines/protos/ya.make | 1 - ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h | 1 - 5 files changed, 6 deletions(-) diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h index 52f58e14d7b5..f2931b723798 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h @@ -1,5 +1,4 @@ #include "abstract.h" -#include namespace NKikimr::NKqp { diff --git a/ydb/core/protos/ya.make b/ydb/core/protos/ya.make index f4301f6202d6..d5d2f709a27a 100644 --- a/ydb/core/protos/ya.make +++ b/ydb/core/protos/ya.make @@ -165,7 +165,6 @@ PEERDIR( ydb/library/yql/public/types ydb/library/services ydb/library/ydb_issue/proto - ydb/core/tx/columnshard/engines/scheme/statistics/protos ydb/core/tx/columnshard/engines/scheme/defaults/protos ydb/core/tx/columnshard/engines/protos ydb/core/formats/arrow/protos diff --git a/ydb/core/tx/columnshard/engines/protos/portion_info.proto b/ydb/core/tx/columnshard/engines/protos/portion_info.proto index 8e058f49d8e2..dc599633eb7a 100644 --- a/ydb/core/tx/columnshard/engines/protos/portion_info.proto +++ b/ydb/core/tx/columnshard/engines/protos/portion_info.proto @@ -1,4 +1,3 @@ -import "ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto"; import "ydb/core/formats/arrow/protos/ssa.proto"; package NKikimrTxColumnShard; @@ -19,7 +18,6 @@ message TIndexPortionMeta { optional bytes PrimaryKeyBorders = 6; // arrow::RecordBatch with first and last ReplaceKey rows optional TSnapshot RecordSnapshotMin = 7; optional TSnapshot RecordSnapshotMax = 8; - optional NKikimrColumnShardStatisticsProto.TPortionStorage StatisticsStorage = 9; optional uint32 DeletionsCount = 10; } diff --git a/ydb/core/tx/columnshard/engines/protos/ya.make b/ydb/core/tx/columnshard/engines/protos/ya.make index 67c3e138a8ac..ad664077a031 100644 --- a/ydb/core/tx/columnshard/engines/protos/ya.make +++ b/ydb/core/tx/columnshard/engines/protos/ya.make @@ -5,7 +5,6 @@ SRCS( ) PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/protos ydb/core/formats/arrow/protos ) diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h index f3bbc468a0a2..4306babbb1ed 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include From ab6c34f4eea3769c342c28edda367b04b6a9eb15 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Fri, 12 Jul 2024 17:13:20 +0300 Subject: [PATCH 06/12] fix build --- .../behaviour/tablestore/operations/alter_sharding.cpp | 5 +++++ .../gateway/behaviour/tablestore/operations/alter_sharding.h | 4 +--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.cpp index fdc831d424d3..fc0e3b0d262e 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.cpp +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.cpp @@ -1,4 +1,5 @@ #include "alter_sharding.h" +#include #include namespace NKikimr::NKqp { @@ -26,4 +27,8 @@ void TAlterShardingOperation::DoSerializeScheme(NKikimrSchemeOp::TModifyScheme& scheme.MutableAlterColumnTable()->MutableReshardColumnTable()->SetIncrease(*Increase); } +void TAlterShardingOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& /*scheme*/) const { + AFL_VERIFY(false); +} + } diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h index f2931b723798..cb81ee36da68 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h @@ -11,9 +11,7 @@ class TAlterShardingOperation: public ITableStoreOperation { static inline const auto Registrator = TFactory::TRegistrator(GetTypeName()); private: std::optional Increase; - virtual void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& /*scheme*/) const override { - AFL_VERIFY(false); - } + virtual void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& /*scheme*/) const override; virtual void DoSerializeScheme(NKikimrSchemeOp::TModifyScheme& scheme, const bool isStandalone) const override; public: From 29b7d426f349cf5ed708fac47bdca2248463bca0 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Fri, 12 Jul 2024 17:43:45 +0300 Subject: [PATCH 07/12] correct test --- ydb/core/kqp/ut/olap/indexes_ut.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ydb/core/kqp/ut/olap/indexes_ut.cpp b/ydb/core/kqp/ut/olap/indexes_ut.cpp index 1a55241f7137..a1d11fe22a79 100644 --- a/ydb/core/kqp/ut/olap/indexes_ut.cpp +++ b/ydb/core/kqp/ut/olap/indexes_ut.cpp @@ -285,7 +285,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("skip", csController->GetIndexesSkippingOnSelect().Val())("check", csController->GetIndexesApprovedOnSelect().Val()); CompareYson(result, R"([[0u;]])"); AFL_VERIFY(csController->GetIndexesSkippedNoData().Val() == 0); - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val() * 0.3); + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val()); } ui32 requestsCount = 100; for (ui32 i = 0; i < requestsCount; ++i) { @@ -310,7 +310,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { CompareYson(result, R"([[1u;]])"); } - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < 0.3 * csController->GetIndexesSkippingOnSelect().Val()) + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val()) ("approved", csController->GetIndexesApprovedOnSelect().Val())("skipped", csController->GetIndexesSkippingOnSelect().Val()); } From 82cb6e56927bcf23dac323c9538c01fb126e6695 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Fri, 12 Jul 2024 17:50:11 +0300 Subject: [PATCH 08/12] correct test --- ydb/core/kqp/ut/olap/statistics_ut.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ydb/core/kqp/ut/olap/statistics_ut.cpp b/ydb/core/kqp/ut/olap/statistics_ut.cpp index 415513bcabb9..ece5e454bacb 100644 --- a/ydb/core/kqp/ut/olap/statistics_ut.cpp +++ b/ydb/core/kqp/ut/olap/statistics_ut.cpp @@ -14,7 +14,7 @@ Y_UNIT_TEST_SUITE(KqpOlapStatistics) { helper.CreateTestOlapTable(); auto tableClient = kikimr.GetTableClient(); { - auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=max_pk_int, TYPE=MAX, FEATURES=`{\"column_name\": \"pk_int\"}`)"; + auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=max_pk_int, TYPE=MAX, FEATURES=`{\"column_name\": \"pk_int\"}`))"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); @@ -32,7 +32,7 @@ Y_UNIT_TEST_SUITE(KqpOlapStatistics) { UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_STAT, NAME=max_pk_int);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_INDEX, NAME=max_pk_int);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); @@ -50,7 +50,7 @@ Y_UNIT_TEST_SUITE(KqpOlapStatistics) { helper.CreateTestOlapTable(); auto tableClient = kikimr.GetTableClient(); { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_ts, FEATURES=`{\"column_name\": \"ts\"}`);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, TYPE=MAX, NAME=max_ts, FEATURES=`{\"column_name\": \"ts\"}`);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); @@ -62,7 +62,7 @@ Y_UNIT_TEST_SUITE(KqpOlapStatistics) { UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_STAT, NAME=max_ts);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_INDEX, NAME=max_ts);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); From 8370370aa13c6ca40b0666bce5f7b4bb743ae011 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Sat, 13 Jul 2024 08:28:41 +0300 Subject: [PATCH 09/12] fix local storage construction in test --- .../tx/columnshard/test_helper/helper.cpp | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/ydb/core/tx/columnshard/test_helper/helper.cpp b/ydb/core/tx/columnshard/test_helper/helper.cpp index 8b35442b7abb..4731960a004f 100644 --- a/ydb/core/tx/columnshard/test_helper/helper.cpp +++ b/ydb/core/tx/columnshard/test_helper/helper.cpp @@ -1,12 +1,15 @@ #include "helper.h" -#include + #include #include #include #include -#include -#include #include +#include + +#include + +#include #ifndef KIKIMR_DISABLE_S3_OPS #include #endif @@ -36,7 +39,8 @@ std::vector> TTestColumn::Conver return result; } -std::vector TTestColumn::BuildFromPairs(const std::vector>& columns) { +std::vector TTestColumn::BuildFromPairs( + const std::vector>& columns) { std::vector result; for (auto&& i : columns) { result.emplace_back(i.first, i.second); @@ -57,38 +61,44 @@ std::vector TTestColumn::CropSchema(const s return std::vector(input.begin(), input.begin() + size); } -} +} // namespace NKikimr::NArrow::NTest namespace NKikimr::NArrow { -std::vector> MakeArrowFields(const std::vector& columns, const std::set& notNullColumns /*= {}*/) { +std::vector> MakeArrowFields( + const std::vector& columns, const std::set& notNullColumns /*= {}*/) { auto result = MakeArrowFields(NTest::TTestColumn::ConvertToPairs(columns), notNullColumns); UNIT_ASSERT_C(result.ok(), result.status().ToString()); return result.ValueUnsafe(); } -std::shared_ptr MakeArrowSchema(const std::vector& columns, const std::set& notNullColumns /*= {}*/) { +std::shared_ptr MakeArrowSchema( + const std::vector& columns, const std::set& notNullColumns /*= {}*/) { auto result = MakeArrowSchema(NTest::TTestColumn::ConvertToPairs(columns), notNullColumns); UNIT_ASSERT_C(result.ok(), result.status().ToString()); return result.ValueUnsafe(); } -} +} // namespace NKikimr::NArrow namespace NKikimr::NOlap { std::shared_ptr TTestStoragesManager::DoBuildOperator(const TString& storageId) { if (storageId == TBase::DefaultStorageId) { - return std::make_shared(storageId, NActors::TActorId(), TabletInfo, - GetGeneration(), SharedBlobsManager->GetStorageManagerGuarantee(TBase::DefaultStorageId)); + return std::make_shared(storageId, NActors::TActorId(), TabletInfo, GetGeneration(), + SharedBlobsManager->GetStorageManagerGuarantee(TBase::DefaultStorageId)); + } else if (storageId == TBase::LocalMetadataStorageId) { + return std::make_shared( + storageId, SharedBlobsManager->GetStorageManagerGuarantee(TBase::DefaultStorageId)); } else if (storageId == TBase::MemoryStorageId) { #ifndef KIKIMR_DISABLE_S3_OPS Singleton()->SetSecretKey("fakeSecret"); - return std::make_shared(storageId, NActors::TActorId(), std::make_shared("fakeBucket", "fakeSecret"), + return std::make_shared(storageId, NActors::TActorId(), + std::make_shared("fakeBucket", "fakeSecret"), SharedBlobsManager->GetStorageManagerGuarantee(storageId), GetGeneration()); #endif } return nullptr; } -} \ No newline at end of file +} // namespace NKikimr::NOlap From 43cfe1c76429a4fc522ae21bf99c16225ccba2f1 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Sat, 13 Jul 2024 08:30:29 +0300 Subject: [PATCH 10/12] fix --- ydb/core/tx/columnshard/test_helper/helper.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ydb/core/tx/columnshard/test_helper/helper.cpp b/ydb/core/tx/columnshard/test_helper/helper.cpp index 4731960a004f..a7aca5f114ca 100644 --- a/ydb/core/tx/columnshard/test_helper/helper.cpp +++ b/ydb/core/tx/columnshard/test_helper/helper.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include From df9001f4785a69a0e4ed6eec3175ec79ed8cc39a Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Sun, 14 Jul 2024 08:21:52 +0300 Subject: [PATCH 11/12] correct after rebase --- ydb/core/testlib/common_helper.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ydb/core/testlib/common_helper.cpp b/ydb/core/testlib/common_helper.cpp index d5267342d540..8e92ccd15c1b 100644 --- a/ydb/core/testlib/common_helper.cpp +++ b/ydb/core/testlib/common_helper.cpp @@ -22,6 +22,9 @@ const std::vector TLoggerInit::KqpServices = { const std::vector TLoggerInit::CSServices = { NKikimrServices::TX_COLUMNSHARD, + NKikimrServices::TX_COLUMNSHARD_BLOBS, + NKikimrServices::TX_COLUMNSHARD_BLOBS_BS, + NKikimrServices::TX_COLUMNSHARD_BLOBS_TIER, NKikimrServices::TX_COLUMNSHARD_SCAN, NKikimrServices::TX_CONVEYOR }; From 905d848b2eff8e08a0396315866acb5814a56c41 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Sun, 14 Jul 2024 18:20:47 +0300 Subject: [PATCH 12/12] test for local_metadata storage using --- .../tablestore/operations/upsert_index.cpp | 7 - .../tablestore/operations/upsert_index.h | 1 - ydb/core/kqp/ut/olap/indexes_ut.cpp | 245 ++++++++++-------- .../scheme/indexes/abstract/constructor.cpp | 15 ++ .../scheme/indexes/abstract/constructor.h | 13 +- .../storage/indexes/bloom/constructor.cpp | 2 +- .../engines/storage/indexes/bloom/meta.h | 4 +- .../storage/indexes/max/constructor.cpp | 2 +- .../engines/storage/indexes/max/meta.h | 4 +- .../test_helper/columnshard_ut_common.cpp | 2 +- .../tx/schemeshard/olap/indexes/schema.cpp | 6 +- ydb/core/tx/schemeshard/olap/indexes/schema.h | 1 - 12 files changed, 165 insertions(+), 137 deletions(-) diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp index ae0f08e3333d..61914cb6e005 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp @@ -12,10 +12,6 @@ TConclusionStatus TUpsertIndexOperation::DoDeserialize(NYql::TObjectSettingsImpl } IndexName = *fValue; } - StorageId = features.Extract("STORAGE_ID"); - if (StorageId && !*StorageId) { - return TConclusionStatus::Fail("STORAGE_ID cannot be empty string"); - } TString indexType; { auto fValue = features.Extract("TYPE"); @@ -46,9 +42,6 @@ TConclusionStatus TUpsertIndexOperation::DoDeserialize(NYql::TObjectSettingsImpl void TUpsertIndexOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const { auto* indexProto = schemaData.AddUpsertIndexes(); - if (StorageId) { - indexProto->SetStorageId(*StorageId); - } indexProto->SetName(IndexName); IndexMetaConstructor.SerializeToProto(*indexProto); } diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h index 12305f85f0ae..267829a1a5f4 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h @@ -12,7 +12,6 @@ class TUpsertIndexOperation : public ITableStoreOperation { static inline auto Registrator = TFactory::TRegistrator(GetTypeName()); private: TString IndexName; - std::optional StorageId; NBackgroundTasks::TInterfaceProtoContainer IndexMetaConstructor; public: TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; diff --git a/ydb/core/kqp/ut/olap/indexes_ut.cpp b/ydb/core/kqp/ut/olap/indexes_ut.cpp index a1d11fe22a79..feff93803fc5 100644 --- a/ydb/core/kqp/ut/olap/indexes_ut.cpp +++ b/ydb/core/kqp/ut/olap/indexes_ut.cpp @@ -171,78 +171,87 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { (ui64)csController->GetActualizationRefreshSchemeCount().Val())("updates", updatesCount)("count", csController->GetActualizationRefreshSchemeCount().Val()); } - Y_UNIT_TEST(Indexes) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - - TLocalHelper(kikimr).CreateTestOlapTable(); - auto tableClient = kikimr.GetTableClient(); + class TTestIndexesScenario { + private: + TKikimrSettings Settings; + std::unique_ptr Kikimr; + YDB_ACCESSOR(TString, StorageId, "__DEFAULT"); + public: + TTestIndexesScenario& Initialize() { + Settings = TKikimrSettings().SetWithSampleTables(false); + Kikimr = std::make_unique(Settings); + return *this; + } -// Tests::NCommon::TLoggerInit(kikimr).Initialize(); + void Execute() const { + TLocalHelper(*Kikimr).CreateTestOlapTable(); + auto tableClient = Kikimr->GetTableClient(); - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); - - { - auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, - FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05}`); - )"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } - { - auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_resource_id, TYPE=BLOOM_FILTER, - FEATURES=`{"column_names" : ["resource_id", "level"], "false_positive_probability" : 0.05}`); - )"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } - { - auto alterQuery = TStringBuilder() << - "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, EXTERNAL_GUARANTEE_EXCLUSIVE_PK=`true`);"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } + // Tests::NCommon::TLoggerInit(kikimr).Initialize(); - std::vector uids; - std::vector resourceIds; - std::vector levels; + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); - { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1200000, 300200000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1300000, 300300000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); + { + auto alterQuery = TStringBuilder() << Sprintf( + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05, "storage_id" : "%s"}`); + )", StorageId.data()); + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << Sprintf( + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_resource_id, TYPE=BLOOM_FILTER, + FEATURES=`{"column_names" : ["resource_id", "level"], "false_positive_probability" : 0.05, "storage_id" : "%s"}`); + )", StorageId.data() + ); + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << + "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, EXTERNAL_GUARANTEE_EXCLUSIVE_PK=`true`);"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } - const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { - for (ui32 i = 0; i < count; ++i) { - uids.emplace_back("uid_" + ::ToString(startUid + i)); - resourceIds.emplace_back(::ToString(startRes + i)); - levels.emplace_back(i % 5); - } - }; + std::vector uids; + std::vector resourceIds; + std::vector levels; + + { + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1200000, 300200000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1300000, 300300000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); + + const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { + for (ui32 i = 0; i < count; ++i) { + uids.emplace_back("uid_" + ::ToString(startUid + i)); + resourceIds.emplace_back(::ToString(startRes + i)); + levels.emplace_back(i % 5); + } + }; + + filler(1000000, 300000000, 10000); + filler(1100000, 300100000, 10000); + filler(1200000, 300200000, 10000); + filler(1300000, 300300000, 10000); + filler(1400000, 300400000, 10000); + filler(2000000, 200000000, 70000); + filler(3000000, 100000000, 110000); - filler(1000000, 300000000, 10000); - filler(1100000, 300100000, 10000); - filler(1200000, 300200000, 10000); - filler(1300000, 300300000, 10000); - filler(1400000, 300400000, 10000); - filler(2000000, 200000000, 70000); - filler(3000000, 100000000, 110000); - - } + } - { - auto it = tableClient.StreamExecuteScanQuery(R"( + { + auto it = tableClient.StreamExecuteScanQuery(R"( --!syntax_v1 SELECT @@ -250,27 +259,27 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { FROM `/Root/olapStore/olapTable` )").GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << result << Endl; - CompareYson(result, R"([[230000u;]])"); - } + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson(result, R"([[230000u;]])"); + } - AFL_VERIFY(csController->GetIndexesSkippingOnSelect().Val() == 0); - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() == 0); - TInstant start = Now(); - ui32 compactionsStart = csController->GetCompactionStartedCounter().Val(); - while (Now() - start < TDuration::Seconds(10)) { - if (compactionsStart != csController->GetCompactionStartedCounter().Val()) { - compactionsStart = csController->GetCompactionStartedCounter().Val(); - start = Now(); + AFL_VERIFY(csController->GetIndexesSkippingOnSelect().Val() == 0); + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() == 0); + TInstant start = Now(); + ui32 compactionsStart = csController->GetCompactionStartedCounter().Val(); + while (Now() - start < TDuration::Seconds(10)) { + if (compactionsStart != csController->GetCompactionStartedCounter().Val()) { + compactionsStart = csController->GetCompactionStartedCounter().Val(); + start = Now(); + } + Cerr << "WAIT_COMPACTION: " << csController->GetCompactionStartedCounter().Val() << Endl; + Sleep(TDuration::Seconds(1)); } - Cerr << "WAIT_COMPACTION: " << csController->GetCompactionStartedCounter().Val() << Endl; - Sleep(TDuration::Seconds(1)); - } - { - auto it = tableClient.StreamExecuteScanQuery(R"( + { + auto it = tableClient.StreamExecuteScanQuery(R"( --!syntax_v1 SELECT @@ -279,40 +288,50 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { WHERE ((resource_id = '2' AND level = 222222) OR (resource_id = '1' AND level = 111111) OR (resource_id LIKE '%11dd%')) AND uid = '222' )").GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("result", result); - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("skip", csController->GetIndexesSkippingOnSelect().Val())("check", csController->GetIndexesApprovedOnSelect().Val()); - CompareYson(result, R"([[0u;]])"); - AFL_VERIFY(csController->GetIndexesSkippedNoData().Val() == 0); - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val()); - } - ui32 requestsCount = 100; - for (ui32 i = 0; i < requestsCount; ++i) { - const ui32 idx = RandomNumber(uids.size()); - const auto query = [](const TString& res, const TString& uid, const ui32 level) { - TStringBuilder sb; - sb << "SELECT" << Endl; - sb << "COUNT(*)" << Endl; - sb << "FROM `/Root/olapStore/olapTable`" << Endl; - sb << "WHERE(" << Endl; - sb << "resource_id = '" << res << "' AND" << Endl; - sb << "uid= '" << uid << "' AND" << Endl; - sb << "level= " << level << Endl; - sb << ")"; - return sb; - }; - auto it = tableClient.StreamExecuteScanQuery(query(resourceIds[idx], uids[idx], levels[idx])).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("result", result); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("skip", csController->GetIndexesSkippingOnSelect().Val())("check", csController->GetIndexesApprovedOnSelect().Val()); + CompareYson(result, R"([[0u;]])"); + if (StorageId == "__LOCAL_METADATA") { + AFL_VERIFY(csController->GetIndexesSkippedNoData().Val()); + } else { + AFL_VERIFY(csController->GetIndexesSkippedNoData().Val() == 0)("val", csController->GetIndexesSkippedNoData().Val()); + } + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val()); + } + ui32 requestsCount = 100; + for (ui32 i = 0; i < requestsCount; ++i) { + const ui32 idx = RandomNumber(uids.size()); + const auto query = [](const TString& res, const TString& uid, const ui32 level) { + TStringBuilder sb; + sb << "SELECT COUNT(*) FROM `/Root/olapStore/olapTable`" << Endl; + sb << "WHERE(" << Endl; + sb << "resource_id = '" << res << "' AND" << Endl; + sb << "uid= '" << uid << "' AND" << Endl; + sb << "level= " << level << Endl; + sb << ")"; + return sb; + }; + auto it = tableClient.StreamExecuteScanQuery(query(resourceIds[idx], uids[idx], levels[idx])).GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << " / " << csController->GetIndexesSkippedNoData().Val() << Endl; + CompareYson(result, R"([[1u;]])"); + } - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << " / " << csController->GetIndexesSkippedNoData().Val() << Endl; - CompareYson(result, R"([[1u;]])"); + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val()) + ("approved", csController->GetIndexesApprovedOnSelect().Val())("skipped", csController->GetIndexesSkippingOnSelect().Val()); } + }; - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val()) - ("approved", csController->GetIndexesApprovedOnSelect().Val())("skipped", csController->GetIndexesSkippingOnSelect().Val()); + Y_UNIT_TEST(IndexesInBS) { + TTestIndexesScenario().SetStorageId("__DEFAULT").Initialize().Execute(); + } + Y_UNIT_TEST(IndexesInLocalMetadata) { + TTestIndexesScenario().SetStorageId("__LOCAL_METADATA").Initialize().Execute(); } Y_UNIT_TEST(IndexesModificationError) { diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.cpp index a93507bec06f..e6dad360d159 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.cpp @@ -2,4 +2,19 @@ namespace NKikimr::NOlap::NIndexes { +NKikimr::TConclusionStatus IIndexMetaConstructor::DeserializeFromJson(const NJson::TJsonValue& jsonInfo) { + if (jsonInfo.Has("storage_id")) { + if (!jsonInfo["storage_id"].IsString()) { + return TConclusionStatus::Fail("incorrect storage_id field in json index description (have to be string)"); + } + StorageId = jsonInfo["storage_id"].GetStringSafe(); + if (!*StorageId) { + return TConclusionStatus::Fail("storage_id cannot be empty string"); + } else if (*StorageId != "__LOCAL_METADATA" && *StorageId != "__DEFAULT") { + return TConclusionStatus::Fail("storage_id have to been one of variant ['__LOCAL_METADATA', '__DEFAULT']"); + } + } + return DoDeserializeFromJson(jsonInfo); +} + } // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.h b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.h index e6fe22e3f273..dded1abd0081 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.h +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.h @@ -14,6 +14,9 @@ class TOlapSchema; namespace NKikimr::NOlap::NIndexes { class IIndexMetaConstructor { +private: + YDB_READONLY_DEF(std::optional, StorageId); + protected: virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) = 0; virtual std::shared_ptr DoCreateIndexMeta(const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const = 0; @@ -25,19 +28,23 @@ class IIndexMetaConstructor { virtual ~IIndexMetaConstructor() = default; - TConclusionStatus DeserializeFromJson(const NJson::TJsonValue& jsonInfo) { - return DoDeserializeFromJson(jsonInfo); - } + TConclusionStatus DeserializeFromJson(const NJson::TJsonValue& jsonInfo); std::shared_ptr CreateIndexMeta(const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const { return DoCreateIndexMeta(indexId, indexName, currentSchema, errors); } TConclusionStatus DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& proto) { + if (proto.HasStorageId()) { + StorageId = proto.GetStorageId(); + } return DoDeserializeFromProto(proto); } void SerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& proto) const { + if (StorageId) { + proto.SetStorageId(*StorageId); + } return DoSerializeToProto(proto); } diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/constructor.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/constructor.cpp index 9a464f67d595..fa11002fe17f 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/constructor.cpp @@ -15,7 +15,7 @@ std::shared_ptr TBloomIndexConstructor::Do } AFL_VERIFY(columnIds.emplace(columnInfo->GetId()).second); } - return std::make_shared(indexId, indexName, columnIds, FalsePositiveProbability); + return std::make_shared(indexId, indexName, GetStorageId().value_or(NBlobOperations::TGlobal::DefaultStorageId), columnIds, FalsePositiveProbability); } NKikimr::TConclusionStatus TBloomIndexConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) { diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h index feecc2e83524..4fa0a5be0c0e 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h @@ -75,8 +75,8 @@ class TBloomIndexMeta: public TIndexByColumns { public: TBloomIndexMeta() = default; - TBloomIndexMeta(const ui32 indexId, const TString& indexName, std::set& columnIds, const double fpProbability) - : TBase(indexId, indexName, columnIds, NBlobOperations::TGlobal::DefaultStorageId) + TBloomIndexMeta(const ui32 indexId, const TString& indexName, const TString& storageId, std::set& columnIds, const double fpProbability) + : TBase(indexId, indexName, columnIds, storageId) , FalsePositiveProbability(fpProbability) { Initialize(); } diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.cpp index d7a3516c8de2..6c1efcc0e570 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.cpp @@ -20,7 +20,7 @@ std::shared_ptr TIndexConstructor::DoCreat } columnId = columnInfo->GetId(); } - return std::make_shared(indexId, indexName, columnId); + return std::make_shared(indexId, indexName, GetStorageId().value_or(NBlobOperations::TGlobal::LocalMetadataStorageId), columnId); } NKikimr::TConclusionStatus TIndexConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) { diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.h index 6c48398043ff..c93cf91f49e0 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.h +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.h @@ -41,8 +41,8 @@ class TIndexMeta: public TIndexByColumns { public: TIndexMeta() = default; - TIndexMeta(const ui32 indexId, const TString& indexName, const ui32& columnId) - : TBase(indexId, indexName, { columnId }, NBlobOperations::TGlobal::LocalMetadataStorageId) { + TIndexMeta(const ui32 indexId, const TString& indexName, const TString& storageId, const ui32& columnId) + : TBase(indexId, indexName, { columnId }, storageId) { } ui32 GetColumnId() const { diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp index f45afd7c9be6..4f8cfe4ce4b9 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp @@ -408,7 +408,7 @@ void TTestSchema::InitSchema(const std::vector& colu } if (NOlap::NIndexes::NMax::TIndexMeta::IsAvailableType(columns[i].GetType())) { *schema->AddIndexes() = NOlap::NIndexes::TIndexMetaContainer( - std::make_shared(1000 + i, "MAX::INDEX::" + columns[i].GetName(), i + 1)) + std::make_shared(1000 + i, "MAX::INDEX::" + columns[i].GetName(), "__LOCAL_METADATA", i + 1)) .SerializeToProto(); } } diff --git a/ydb/core/tx/schemeshard/olap/indexes/schema.cpp b/ydb/core/tx/schemeshard/olap/indexes/schema.cpp index 0f31bf0e2ede..8b00178a459c 100644 --- a/ydb/core/tx/schemeshard/olap/indexes/schema.cpp +++ b/ydb/core/tx/schemeshard/olap/indexes/schema.cpp @@ -1,19 +1,18 @@ #include "schema.h" #include +#include namespace NKikimr::NSchemeShard { void TOlapIndexSchema::SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& indexSchema) const { indexSchema.SetId(Id); indexSchema.SetName(Name); - indexSchema.SetStorageId(StorageId); IndexMeta.SerializeToProto(indexSchema); } void TOlapIndexSchema::DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& indexSchema) { Id = indexSchema.GetId(); Name = indexSchema.GetName(); - StorageId = indexSchema.GetStorageId(); AFL_VERIFY(IndexMeta.DeserializeFromProto(indexSchema))("incorrect_proto", indexSchema.DebugString()); } @@ -24,9 +23,6 @@ bool TOlapIndexSchema::ApplyUpdate(const TOlapSchema& currentSchema, const TOlap errors.AddError("different index classes: " + upsert.GetIndexConstructor().GetClassName() + " vs " + IndexMeta.GetClassName()); return false; } - if (upsert.GetStorageId()) { - StorageId = *upsert.GetStorageId(); - } auto object = upsert.GetIndexConstructor()->CreateIndexMeta(GetId(), GetName(), currentSchema, errors); if (!object) { return false; diff --git a/ydb/core/tx/schemeshard/olap/indexes/schema.h b/ydb/core/tx/schemeshard/olap/indexes/schema.h index 1aa302ecb826..630016fe96a5 100644 --- a/ydb/core/tx/schemeshard/olap/indexes/schema.h +++ b/ydb/core/tx/schemeshard/olap/indexes/schema.h @@ -10,7 +10,6 @@ class TOlapIndexSchema { using TBase = TOlapIndexUpsert; YDB_READONLY(ui32, Id, Max()); YDB_READONLY_DEF(TString, Name); - YDB_READONLY_DEF(TString, StorageId); YDB_READONLY_DEF(NBackgroundTasks::TInterfaceProtoContainer, IndexMeta); public: TOlapIndexSchema() = default;