From c94e8b104472d1b4506d420a6ac7c141e40f87a7 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Thu, 25 Jul 2024 12:15:32 +0300 Subject: [PATCH 1/5] dont create delete flag column in indexation if it is not necessary --- ydb/core/tx/columnshard/engines/changes/indexation.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.cpp b/ydb/core/tx/columnshard/engines/changes/indexation.cpp index 9fced30f5c9e..242c3939ecc4 100644 --- a/ydb/core/tx/columnshard/engines/changes/indexation.cpp +++ b/ydb/core/tx/columnshard/engines/changes/indexation.cpp @@ -160,6 +160,9 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont auto blobSchema = context.SchemaVersions.GetSchemaVerified(inserted.GetSchemaVersion()); std::vector filteredIds = inserted.GetMeta().GetSchemaSubset().Apply(blobSchema->GetIndexInfo().GetColumnIds(true)); usageColumnIds.insert(filteredIds.begin(), filteredIds.end()); + if (inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete) { + usageColumnIds.emplace((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); + } if (usageColumnIds.size() == resultSchema->GetIndexInfo().GetColumnIds(true).size()) { break; } @@ -179,8 +182,10 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont } IIndexInfo::AddSnapshotColumns(*batch, inserted.GetSnapshot()); - IIndexInfo::AddDeleteFlagsColumn(*batch, inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete); - usageColumnIds.insert(IIndexInfo::GetSystemColumnIds().begin(), IIndexInfo::GetSystemColumnIds().end()); + if (usageColumnIds.contains((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG)) { + IIndexInfo::AddDeleteFlagsColumn(*batch, inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete); + } + usageColumnIds.insert(IIndexInfo::GetSnapshotColumnIds().begin(), IIndexInfo::GetSnapshotColumnIds().end()); batch = resultSchema->NormalizeBatch(*blobSchema, batch, usageColumnIds).DetachResult(); pathBatches.Add(inserted, shardingFilterCommit, batch); From 96778b4cf9ba2c960484447298bdef0f5bb890ae Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Thu, 25 Jul 2024 17:34:07 +0300 Subject: [PATCH 2/5] fix --- ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp index 1f5504317aa2..6441f0b2d3ac 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp @@ -20,7 +20,7 @@ std::vector TMerger::Execute(c arrow::FieldVector indexFields; indexFields.emplace_back(IColumnMerger::PortionIdField); indexFields.emplace_back(IColumnMerger::PortionRecordIndexField); - IIndexInfo::AddSpecialFields(indexFields); + IIndexInfo::AddSnapshotFields(indexFields); auto dataSchema = std::make_shared(indexFields); NArrow::NMerger::TMergePartialStream mergeStream( resultFiltered->GetIndexInfo().GetReplaceKey(), dataSchema, false, IIndexInfo::GetSnapshotColumnNames()); From 73738489b1a10540b21c413b6fab177bcdddab54 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Thu, 25 Jul 2024 17:36:08 +0300 Subject: [PATCH 3/5] fix --- ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp index 6441f0b2d3ac..763bf5825d6f 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp @@ -137,7 +137,7 @@ std::vector TMerger::Execute(c TGeneralSerializedSlice slice(dataWithSecondary.GetExternalData(), schemaDetails, Context.Counters.SplitterCounters); auto b = batchResult->Slice(recordIdx, slice.GetRecordsCount()); - const ui32 deletionsCount = IIndexInfo::CalcDeletions(b, true); + const ui32 deletionsCount = IIndexInfo::CalcDeletions(b, false); auto constructor = TWritePortionInfoWithBlobsConstructor::BuildByBlobs(slice.GroupChunksByBlobs(groups), dataWithSecondary.GetSecondaryInplaceData(), pathId, resultFiltered->GetVersion(), resultFiltered->GetSnapshot(), SaverContext.GetStoragesManager()); From c8b77a846edcab3272302bbd8896d63d7f2ca737 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Fri, 26 Jul 2024 08:45:16 +0300 Subject: [PATCH 4/5] fix --- ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp | 3 +++ ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp index 763bf5825d6f..856c4107da8b 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp @@ -20,6 +20,9 @@ std::vector TMerger::Execute(c arrow::FieldVector indexFields; indexFields.emplace_back(IColumnMerger::PortionIdField); indexFields.emplace_back(IColumnMerger::PortionRecordIndexField); + if (resultFiltered->HasColumnId((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG)) { + IIndexInfo::AddDeleteFields(indexFields); + } IIndexInfo::AddSnapshotFields(indexFields); auto dataSchema = std::make_shared(indexFields); NArrow::NMerger::TMergePartialStream mergeStream( diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h index 2776bc319a81..43b337c1f120 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h @@ -77,6 +77,10 @@ class IIndexInfo { fields.push_back(arrow::field(SPEC_COL_TX_ID, arrow::uint64())); } + static void AddDeleteFields(std::vector>& fields) { + fields.push_back(arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean())); + } + static const std::set& GetSnapshotColumnIdsSet() { static const std::set result = { (ui32)ESpecialColumn::PLAN_STEP, (ui32)ESpecialColumn::TX_ID }; return result; From 7b3e8454d8a2b1c7f1b8ab08094ca7d75df27fe1 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Fri, 26 Jul 2024 13:45:37 +0300 Subject: [PATCH 5/5] improve and fix test --- ydb/core/kqp/ut/olap/helpers/typed_local.h | 2 +- ydb/core/kqp/ut/olap/sys_view_ut.cpp | 11 +++++++---- .../tx/columnshard/engines/changes/indexation.cpp | 2 +- .../columnshard/engines/portions/read_with_blobs.cpp | 2 -- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ydb/core/kqp/ut/olap/helpers/typed_local.h b/ydb/core/kqp/ut/olap/helpers/typed_local.h index 29852b7761da..a72cef64e33e 100644 --- a/ydb/core/kqp/ut/olap/helpers/typed_local.h +++ b/ydb/core/kqp/ut/olap/helpers/typed_local.h @@ -74,7 +74,7 @@ class TTypedLocalHelper: public Tests::NCS::THelper { void GetCount(ui64& count); template - void FillTable(const TFiller& fillPolicy, const ui32 pkKff = 0, const ui32 numRows = 800000) const { + void FillTable(const TFiller& fillPolicy, const double pkKff = 0, const ui32 numRows = 800000) const { std::vector builders; builders.emplace_back(NArrow::NConstruction::TSimpleArrayConstructor>::BuildNotNullable("pk_int", numRows * pkKff)); builders.emplace_back(std::make_shared>("field", fillPolicy)); diff --git a/ydb/core/kqp/ut/olap/sys_view_ut.cpp b/ydb/core/kqp/ut/olap/sys_view_ut.cpp index 13ff57223d8e..8583967214ca 100644 --- a/ydb/core/kqp/ut/olap/sys_view_ut.cpp +++ b/ydb/core/kqp/ut/olap/sys_view_ut.cpp @@ -5,6 +5,7 @@ #include "helpers/get_value.h" #include +#include #include #include @@ -229,7 +230,9 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) { helper.CreateTestOlapTable(); NArrow::NConstruction::TStringPoolFiller sPool(3, 52); helper.FillTable(sPool, 0, 800000); - csController->WaitCompactions(TDuration::Seconds(10)); + csController->WaitCompactions(TDuration::Seconds(5)); + helper.FillTable(sPool, 0.5, 800000); + csController->WaitCompactions(TDuration::Seconds(5)); helper.GetVolumes(rawBytes1, bytes1, false, {"new_column_ui64"}); AFL_VERIFY(rawBytes1 == 0); @@ -241,9 +244,9 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) { csController->WaitActualization(TDuration::Seconds(10)); ui64 rawBytes2; ui64 bytes2; - helper.GetVolumes(rawBytes2, bytes2, false, {"new_column_ui64"}); - AFL_VERIFY(rawBytes2 == 6500023)("real", rawBytes2); - AFL_VERIFY(bytes2 == 38880)("b", bytes2); + helper.GetVolumes(rawBytes2, bytes2, false, { "new_column_ui64", NOlap::IIndexInfo::SPEC_COL_DELETE_FLAG }); + AFL_VERIFY(rawBytes2 == 0)("real", rawBytes2); + AFL_VERIFY(bytes2 == 0)("b", bytes2); } } diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.cpp b/ydb/core/tx/columnshard/engines/changes/indexation.cpp index 242c3939ecc4..4218409e29d6 100644 --- a/ydb/core/tx/columnshard/engines/changes/indexation.cpp +++ b/ydb/core/tx/columnshard/engines/changes/indexation.cpp @@ -158,7 +158,7 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont for (auto& inserted : DataToIndex) { auto blobSchema = context.SchemaVersions.GetSchemaVerified(inserted.GetSchemaVersion()); - std::vector filteredIds = inserted.GetMeta().GetSchemaSubset().Apply(blobSchema->GetIndexInfo().GetColumnIds(true)); + std::vector filteredIds = inserted.GetMeta().GetSchemaSubset().Apply(blobSchema->GetIndexInfo().GetColumnIds(false)); usageColumnIds.insert(filteredIds.begin(), filteredIds.end()); if (inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete) { usageColumnIds.emplace((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp index 7d74512e1696..14fa9f854dcc 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp @@ -104,8 +104,6 @@ std::optional TReadPortionInfoWithBlobs::SyncP std::vector> newChunks; if (it != columnChunks.end()) { newChunks = to->GetIndexInfo().ActualizeColumnData(it->second, from->GetIndexInfo(), i); - } else { - newChunks = to->GetIndexInfo().MakeEmptyChunks(i, pageSizes, to->GetIndexInfo().GetColumnFeaturesVerified(i)); } AFL_VERIFY(entityChunksNew.emplace(i, std::move(newChunks)).second); }