diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp index 1ad515529129..2136d43eec2a 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp @@ -200,6 +200,7 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_index_info")("reason", "incorrect_engine_in_schema"); return false; } + AFL_VERIFY(cache); { TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Optimizer"); @@ -235,10 +236,10 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Columns"); for (const auto& col : schema.GetColumns()) { const ui32 id = col.GetId(); - const TString& name = col.GetName(); + const TString& name = cache->GetStringCache(col.GetName()); const bool notNull = col.HasNotNull() ? col.GetNotNull() : false; auto typeInfoMod = NScheme::TypeInfoModFromProtoColumnType(col.GetTypeId(), col.HasTypeInfo() ? &col.GetTypeInfo() : nullptr); - Columns[id] = NTable::TColumn(name, id, typeInfoMod.TypeInfo, typeInfoMod.TypeMod, notNull); + Columns[id] = NTable::TColumn(name, id, typeInfoMod.TypeInfo, cache->GetStringCache(typeInfoMod.TypeMod), notNull); ColumnNames[name] = id; } } @@ -246,28 +247,35 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& Y_ABORT_UNLESS(ColumnNames.contains(keyName)); KeyColumns.push_back(ColumnNames[keyName]); } - InitializeCaches(operators, cache); + InitializeCaches(operators, cache, false); { TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Columns::Features"); for (const auto& col : schema.GetColumns()) { - auto it = ColumnFeatures.find(col.GetId()); - AFL_VERIFY(it != ColumnFeatures.end()); - const TString fingerprint = cache ? col.SerializeAsString() : Default(); - if (cache) { - if (std::shared_ptr f = cache->GetColumnFeatures(fingerprint)) { - it->second = f; - continue; + THashMap> it; + const TString fingerprint = cache ? ("C:" + col.SerializeAsString()) : Default(); + const auto createPred = [&]() -> TConclusion> { + auto f = BuildDefaultColumnFeatures(col.GetId(), operators); + auto parsed = f->DeserializeFromProto(col, operators); + if (parsed.IsFail()) { + return parsed; } - } - - auto parsed = it->second->DeserializeFromProto(col, operators); - if (!parsed) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_column_feature")("reason", parsed.GetErrorMessage()); + return f; + }; + auto fConclusion = cache->GetOrCreateColumnFeatures(fingerprint, createPred); + if (fConclusion.IsFail()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_column_feature")("reason", fConclusion.GetErrorMessage()); return false; } - if (cache) { - cache->RegisterColumnFeatures(fingerprint, it->second); - } + AFL_VERIFY(ColumnFeatures.emplace(col.GetId(), fConclusion.DetachResult()).second); + } + for (auto&& cId : GetSystemColumnIds()) { + THashMap> it; + const TString fingerprint = "SC:" + ::ToString(cId); + const auto createPred = [&]() -> TConclusion> { + return BuildDefaultColumnFeatures(cId, operators); + }; + auto fConclusion = cache->GetOrCreateColumnFeatures(fingerprint, createPred); + AFL_VERIFY(ColumnFeatures.emplace(cId, fConclusion.DetachResult()).second); } } @@ -295,8 +303,8 @@ std::optional TIndexInfo::BuildFromProto(const NKikimrSchemeOp::TCol return result; } -std::shared_ptr MakeArrowSchema( - const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, const std::shared_ptr& cache) { +std::vector> MakeArrowFields(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, + const std::shared_ptr& cache) { std::vector> fields; for (const ui32 id : ids) { AFL_VERIFY(!TIndexInfo::IsSpecialColumn(id)); @@ -321,10 +329,16 @@ std::shared_ptr MakeArrowSchema( } } - return std::make_shared(std::move(fields)); + return fields; +} + +std::shared_ptr MakeArrowSchema( + const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, const std::shared_ptr& cache) { + return std::make_shared(MakeArrowFields(columns, ids, cache)); } -void TIndexInfo::InitializeCaches(const std::shared_ptr& operators, const std::shared_ptr& cache) { +void TIndexInfo::InitializeCaches(const std::shared_ptr& operators, const std::shared_ptr& cache, + const bool withColumnFeatures) { { TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::Schema"); AFL_VERIFY(!Schema); @@ -334,33 +348,31 @@ void TIndexInfo::InitializeCaches(const std::shared_ptr& opera } std::sort(SchemaColumnIds.begin(), SchemaColumnIds.end()); - auto originalSchema = MakeArrowSchema(Columns, SchemaColumnIds, cache); - Schema = std::make_shared(originalSchema); - SchemaWithSpecials = std::make_shared(IIndexInfo::AddSpecialFields(originalSchema)); + auto originalFields = MakeArrowFields(Columns, SchemaColumnIds, cache); + Schema = std::make_shared(originalFields); + IIndexInfo::AddSpecialFields(originalFields); + SchemaWithSpecials = std::make_shared(originalFields); } { TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::SchemaFields"); SchemaColumnIdsWithSpecials = IIndexInfo::AddSpecialFieldIds(SchemaColumnIds); - SchemaColumnIdsWithSpecialsSet = IIndexInfo::AddSpecialFieldIds(std::set(SchemaColumnIds.begin(), SchemaColumnIds.end())); ui32 idx = 0; for (auto&& i : SchemaColumnIdsWithSpecials) { AFL_VERIFY(IdIntoIndex.emplace(i, idx++).second); } } - { - TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::Columns"); - for (auto&& c : Columns) { - AFL_VERIFY(ArrowColumnByColumnIdCache.emplace(c.first, GetColumnFieldVerified(c.first)).second); - AFL_VERIFY(ColumnFeatures.emplace(c.first, std::make_shared(c.first, GetColumnFieldVerified(c.first), DefaultSerializer, operators->GetDefaultOperator(), - NArrow::IsPrimitiveYqlType(c.second.PType), c.first == GetPKFirstColumnId(), nullptr)).second); + if (withColumnFeatures) { + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::Columns"); + for (auto&& c : Columns) { + AFL_VERIFY(ColumnFeatures.emplace(c.first, BuildDefaultColumnFeatures(c.first, operators)).second); + } } - } - { - TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::SysColumns"); - for (auto&& cId : GetSystemColumnIds()) { - AFL_VERIFY(ArrowColumnByColumnIdCache.emplace(cId, GetColumnFieldVerified(cId)).second); - AFL_VERIFY(ColumnFeatures.emplace(cId, std::make_shared(cId, GetColumnFieldVerified(cId), DefaultSerializer, operators->GetDefaultOperator(), - false, false, IIndexInfo::DefaultColumnValue(cId))).second); + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::SysColumns"); + for (auto&& cId : GetSystemColumnIds()) { + AFL_VERIFY(ColumnFeatures.emplace(cId, BuildDefaultColumnFeatures(cId, operators)).second); + } } } } @@ -448,4 +460,17 @@ std::vector TIndexInfo::GetEntityIds() const { return result; } +std::shared_ptr TIndexInfo::BuildDefaultColumnFeatures( + const ui32 columnId, const std::shared_ptr& operators) const { + if (IsSpecialColumn(columnId)) { + return std::make_shared(columnId, GetColumnFieldVerified(columnId), DefaultSerializer, operators->GetDefaultOperator(), + false, false, IIndexInfo::DefaultColumnValue(columnId)); + } else { + auto itC = Columns.find(columnId); + AFL_VERIFY(itC != Columns.end()); + return std::make_shared(columnId, GetColumnFieldVerified(columnId), DefaultSerializer, operators->GetDefaultOperator(), + NArrow::IsPrimitiveYqlType(itC->second.PType), columnId == GetPKFirstColumnId(), nullptr); + } +} + } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.h b/ydb/core/tx/columnshard/engines/scheme/index_info.h index fcb4d5a8ec97..6a31e79bfe1d 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.h @@ -46,10 +46,19 @@ class TSchemaObjectsCache { private: THashMap> Fields; THashMap> ColumnFeatures; + THashSet StringsCache; mutable ui64 AcceptionFieldsCount = 0; mutable ui64 AcceptionFeaturesCount = 0; public: + const TString& GetStringCache(const TString& original) { + auto it = StringsCache.find(original); + if (it == StringsCache.end()) { + it = StringsCache.emplace(original).first; + } + return *it; + } + void RegisterField(const TString& fingerprint, const std::shared_ptr& f) { AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "register_field")("fp", fingerprint)("f", f->ToString()); AFL_VERIFY(Fields.emplace(fingerprint, f).second); @@ -71,16 +80,23 @@ class TSchemaObjectsCache { } return it->second; } - std::shared_ptr GetColumnFeatures(const TString& fingerprint) const { + template + TConclusion> GetOrCreateColumnFeatures(const TString& fingerprint, const TConstructor& constructor) { auto it = ColumnFeatures.find(fingerprint); if (it == ColumnFeatures.end()) { AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_column_features_miss")("fp", UrlEscapeRet(fingerprint))( "count", ColumnFeatures.size())("acc", AcceptionFeaturesCount); - return nullptr; - } - if (++AcceptionFeaturesCount % 1000 == 0) { - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_column_features_accept")("fp", UrlEscapeRet(fingerprint))( - "count", ColumnFeatures.size())("acc", AcceptionFeaturesCount); + TConclusion> resultConclusion = constructor(); + if (resultConclusion.IsFail()) { + return resultConclusion; + } + it = ColumnFeatures.emplace(fingerprint, resultConclusion.DetachResult()).first; + AFL_VERIFY(it->second); + } else { + if (++AcceptionFeaturesCount % 1000 == 0) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_column_features_accept")("fp", UrlEscapeRet(fingerprint))( + "count", ColumnFeatures.size())("acc", AcceptionFeaturesCount); + } } return it->second; } @@ -92,7 +108,6 @@ struct TIndexInfo: public NTable::TScheme::TTableSchema, public IIndexInfo { private: THashMap IdIntoIndex; THashMap> ColumnFeatures; - THashMap> ArrowColumnByColumnIdCache; THashMap Indexes; TIndexInfo(const TString& name); bool SchemeNeedActualization = false; @@ -100,7 +115,8 @@ struct TIndexInfo: public NTable::TScheme::TTableSchema, public IIndexInfo { bool ExternalGuaranteeExclusivePK = false; bool DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators, const std::shared_ptr& cache); - void InitializeCaches(const std::shared_ptr& operators, const std::shared_ptr& cache); + void InitializeCaches(const std::shared_ptr& operators, const std::shared_ptr& cache, const bool withColumnFeatures = true); + std::shared_ptr BuildDefaultColumnFeatures(const ui32 columnId, const std::shared_ptr& operators) const; public: std::shared_ptr GetCompactionPlannerConstructor() const; @@ -312,9 +328,6 @@ struct TIndexInfo: public NTable::TScheme::TTableSchema, public IIndexInfo { std::vector GetColumnNames(const std::vector& ids) const; std::vector GetColumnSTLNames(const std::vector& ids) const; const std::vector& GetColumnIds(const bool withSpecial = true) const; - const std::set& GetColumnIdsSet() const { - return SchemaColumnIdsWithSpecialsSet; - } const std::vector& GetPKColumnIds() const { AFL_VERIFY(PKColumnIds.size()); return PKColumnIds; @@ -379,7 +392,6 @@ struct TIndexInfo: public NTable::TScheme::TTableSchema, public IIndexInfo { TString Name; std::vector SchemaColumnIds; std::vector SchemaColumnIdsWithSpecials; - std::set SchemaColumnIdsWithSpecialsSet; std::vector PKColumnIds; std::shared_ptr SchemaWithSpecials; std::shared_ptr Schema; @@ -390,6 +402,8 @@ struct TIndexInfo: public NTable::TScheme::TTableSchema, public IIndexInfo { std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, const std::shared_ptr& cache = nullptr); +std::vector> MakeArrowFields(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, + const std::shared_ptr& cache = nullptr); /// Extracts columns with the specific ids from the schema. std::vector GetColumns(const NTable::TScheme::TTableSchema& tableSchema, const std::vector& ids); diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h index f08e8eb3ff60..962989d75fb2 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h @@ -27,7 +27,7 @@ class ISnapshotSchema { std::shared_ptr GetColumnLoaderVerified(const std::string& columnName) const; bool IsSpecialColumnId(const ui32 columnId) const; - virtual const std::set& GetColumnIds() const = 0; + virtual const std::vector& GetColumnIds() const = 0; virtual NArrow::NAccessor::TColumnSaver GetColumnSaver(const ui32 columnId) const = 0; NArrow::NAccessor::TColumnSaver GetColumnSaver(const TString& columnName) const { diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp index 620da7befc27..f8ea532fadf6 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp @@ -4,11 +4,11 @@ namespace NKikimr::NOlap { -TFilteredSnapshotSchema::TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::vector& columnIds) - : TFilteredSnapshotSchema(originalSnapshot, std::set(columnIds.begin(), columnIds.end())) { +TFilteredSnapshotSchema::TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::set& columnIds) + : TFilteredSnapshotSchema(originalSnapshot, std::vector(columnIds.begin(), columnIds.end())) { } -TFilteredSnapshotSchema::TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::set& columnIds) +TFilteredSnapshotSchema::TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::vector& columnIds) : OriginalSnapshot(originalSnapshot) , ColumnIds(columnIds) { @@ -21,12 +21,12 @@ TFilteredSnapshotSchema::TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& or } TColumnSaver TFilteredSnapshotSchema::GetColumnSaver(const ui32 columnId) const { - Y_ABORT_UNLESS(ColumnIds.contains(columnId)); + AFL_VERIFY(std::find(ColumnIds.begin(), ColumnIds.end(), columnId) != ColumnIds.end()); return OriginalSnapshot->GetColumnSaver(columnId); } std::shared_ptr TFilteredSnapshotSchema::GetColumnLoaderOptional(const ui32 columnId) const { - Y_ABORT_UNLESS(ColumnIds.contains(columnId)); + AFL_VERIFY(std::find(ColumnIds.begin(), ColumnIds.end(), columnId) != ColumnIds.end()); return OriginalSnapshot->GetColumnLoaderOptional(columnId); } @@ -35,7 +35,7 @@ std::optional TFilteredSnapshotSchema::GetColumnIdOptional(const std::stri if (!result) { return result; } - if (!ColumnIds.contains(*result)) { + if (std::find(ColumnIds.begin(), ColumnIds.end(), *result) == ColumnIds.end()) { return std::nullopt; } return result; @@ -43,7 +43,7 @@ std::optional TFilteredSnapshotSchema::GetColumnIdOptional(const std::stri ui32 TFilteredSnapshotSchema::GetColumnIdVerified(const std::string& columnName) const { auto result = OriginalSnapshot->GetColumnIdVerified(columnName); - AFL_VERIFY(ColumnIds.contains(result)); + AFL_VERIFY(std::find(ColumnIds.begin(), ColumnIds.end(), result) != ColumnIds.end()); return result; } diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h index 5c2210096961..8fc82ee6a304 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h @@ -9,7 +9,7 @@ namespace NKikimr::NOlap { class TFilteredSnapshotSchema: public ISnapshotSchema { ISnapshotSchema::TPtr OriginalSnapshot; std::shared_ptr Schema; - std::set ColumnIds; + std::vector ColumnIds; THashMap IdIntoIndex; protected: @@ -18,7 +18,7 @@ class TFilteredSnapshotSchema: public ISnapshotSchema { TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::vector& columnIds); TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::set& columnIds); - virtual const std::set& GetColumnIds() const override { + virtual const std::vector& GetColumnIds() const override { return ColumnIds; } TColumnSaver GetColumnSaver(const ui32 columnId) const override; diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h index 58599fb39bf4..5fa3c4ef7551 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h @@ -23,8 +23,8 @@ class TSnapshotSchema: public ISnapshotSchema { public: TSnapshotSchema(TIndexInfo&& indexInfo, const TSnapshot& snapshot); - virtual const std::set& GetColumnIds() const override { - return IndexInfo.GetColumnIdsSet(); + virtual const std::vector& GetColumnIds() const override { + return IndexInfo.GetColumnIds(); } TColumnSaver GetColumnSaver(const ui32 columnId) const override; diff --git a/ydb/library/conclusion/result.h b/ydb/library/conclusion/result.h index 72aaf29f1a47..3e0cde0c7da2 100644 --- a/ydb/library/conclusion/result.h +++ b/ydb/library/conclusion/result.h @@ -40,6 +40,11 @@ class TConclusion { : Result(result) { } + template + TConclusion(TResultArg& result) + : Result(result) { + } + const TConclusionStatus& GetError() const { auto result = std::get_if(&Result); Y_ABORT_UNLESS(result, "incorrect object for error request");