diff --git a/ydb/core/kqp/ut/olap/helpers/typed_local.h b/ydb/core/kqp/ut/olap/helpers/typed_local.h index 29852b7761da..a72cef64e33e 100644 --- a/ydb/core/kqp/ut/olap/helpers/typed_local.h +++ b/ydb/core/kqp/ut/olap/helpers/typed_local.h @@ -74,7 +74,7 @@ class TTypedLocalHelper: public Tests::NCS::THelper { void GetCount(ui64& count); template - void FillTable(const TFiller& fillPolicy, const ui32 pkKff = 0, const ui32 numRows = 800000) const { + void FillTable(const TFiller& fillPolicy, const double pkKff = 0, const ui32 numRows = 800000) const { std::vector builders; builders.emplace_back(NArrow::NConstruction::TSimpleArrayConstructor>::BuildNotNullable("pk_int", numRows * pkKff)); builders.emplace_back(std::make_shared>("field", fillPolicy)); diff --git a/ydb/core/kqp/ut/olap/sys_view_ut.cpp b/ydb/core/kqp/ut/olap/sys_view_ut.cpp index 13ff57223d8e..8583967214ca 100644 --- a/ydb/core/kqp/ut/olap/sys_view_ut.cpp +++ b/ydb/core/kqp/ut/olap/sys_view_ut.cpp @@ -5,6 +5,7 @@ #include "helpers/get_value.h" #include +#include #include #include @@ -229,7 +230,9 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) { helper.CreateTestOlapTable(); NArrow::NConstruction::TStringPoolFiller sPool(3, 52); helper.FillTable(sPool, 0, 800000); - csController->WaitCompactions(TDuration::Seconds(10)); + csController->WaitCompactions(TDuration::Seconds(5)); + helper.FillTable(sPool, 0.5, 800000); + csController->WaitCompactions(TDuration::Seconds(5)); helper.GetVolumes(rawBytes1, bytes1, false, {"new_column_ui64"}); AFL_VERIFY(rawBytes1 == 0); @@ -241,9 +244,9 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) { csController->WaitActualization(TDuration::Seconds(10)); ui64 rawBytes2; ui64 bytes2; - helper.GetVolumes(rawBytes2, bytes2, false, {"new_column_ui64"}); - AFL_VERIFY(rawBytes2 == 6500023)("real", rawBytes2); - AFL_VERIFY(bytes2 == 38880)("b", bytes2); + helper.GetVolumes(rawBytes2, bytes2, false, { "new_column_ui64", NOlap::IIndexInfo::SPEC_COL_DELETE_FLAG }); + AFL_VERIFY(rawBytes2 == 0)("real", rawBytes2); + AFL_VERIFY(bytes2 == 0)("b", bytes2); } } diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp index 1f5504317aa2..856c4107da8b 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp @@ -20,7 +20,10 @@ std::vector TMerger::Execute(c arrow::FieldVector indexFields; indexFields.emplace_back(IColumnMerger::PortionIdField); indexFields.emplace_back(IColumnMerger::PortionRecordIndexField); - IIndexInfo::AddSpecialFields(indexFields); + if (resultFiltered->HasColumnId((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG)) { + IIndexInfo::AddDeleteFields(indexFields); + } + IIndexInfo::AddSnapshotFields(indexFields); auto dataSchema = std::make_shared(indexFields); NArrow::NMerger::TMergePartialStream mergeStream( resultFiltered->GetIndexInfo().GetReplaceKey(), dataSchema, false, IIndexInfo::GetSnapshotColumnNames()); @@ -137,7 +140,7 @@ std::vector TMerger::Execute(c TGeneralSerializedSlice slice(dataWithSecondary.GetExternalData(), schemaDetails, Context.Counters.SplitterCounters); auto b = batchResult->Slice(recordIdx, slice.GetRecordsCount()); - const ui32 deletionsCount = IIndexInfo::CalcDeletions(b, true); + const ui32 deletionsCount = IIndexInfo::CalcDeletions(b, false); auto constructor = TWritePortionInfoWithBlobsConstructor::BuildByBlobs(slice.GroupChunksByBlobs(groups), dataWithSecondary.GetSecondaryInplaceData(), pathId, resultFiltered->GetVersion(), resultFiltered->GetSnapshot(), SaverContext.GetStoragesManager()); diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.cpp b/ydb/core/tx/columnshard/engines/changes/indexation.cpp index 9fced30f5c9e..4218409e29d6 100644 --- a/ydb/core/tx/columnshard/engines/changes/indexation.cpp +++ b/ydb/core/tx/columnshard/engines/changes/indexation.cpp @@ -158,8 +158,11 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont for (auto& inserted : DataToIndex) { auto blobSchema = context.SchemaVersions.GetSchemaVerified(inserted.GetSchemaVersion()); - std::vector filteredIds = inserted.GetMeta().GetSchemaSubset().Apply(blobSchema->GetIndexInfo().GetColumnIds(true)); + std::vector filteredIds = inserted.GetMeta().GetSchemaSubset().Apply(blobSchema->GetIndexInfo().GetColumnIds(false)); usageColumnIds.insert(filteredIds.begin(), filteredIds.end()); + if (inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete) { + usageColumnIds.emplace((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); + } if (usageColumnIds.size() == resultSchema->GetIndexInfo().GetColumnIds(true).size()) { break; } @@ -179,8 +182,10 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont } IIndexInfo::AddSnapshotColumns(*batch, inserted.GetSnapshot()); - IIndexInfo::AddDeleteFlagsColumn(*batch, inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete); - usageColumnIds.insert(IIndexInfo::GetSystemColumnIds().begin(), IIndexInfo::GetSystemColumnIds().end()); + if (usageColumnIds.contains((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG)) { + IIndexInfo::AddDeleteFlagsColumn(*batch, inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete); + } + usageColumnIds.insert(IIndexInfo::GetSnapshotColumnIds().begin(), IIndexInfo::GetSnapshotColumnIds().end()); batch = resultSchema->NormalizeBatch(*blobSchema, batch, usageColumnIds).DetachResult(); pathBatches.Add(inserted, shardingFilterCommit, batch); diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp index 7d74512e1696..14fa9f854dcc 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp @@ -104,8 +104,6 @@ std::optional TReadPortionInfoWithBlobs::SyncP std::vector> newChunks; if (it != columnChunks.end()) { newChunks = to->GetIndexInfo().ActualizeColumnData(it->second, from->GetIndexInfo(), i); - } else { - newChunks = to->GetIndexInfo().MakeEmptyChunks(i, pageSizes, to->GetIndexInfo().GetColumnFeaturesVerified(i)); } AFL_VERIFY(entityChunksNew.emplace(i, std::move(newChunks)).second); } diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h index 2776bc319a81..43b337c1f120 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h @@ -77,6 +77,10 @@ class IIndexInfo { fields.push_back(arrow::field(SPEC_COL_TX_ID, arrow::uint64())); } + static void AddDeleteFields(std::vector>& fields) { + fields.push_back(arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean())); + } + static const std::set& GetSnapshotColumnIdsSet() { static const std::set result = { (ui32)ESpecialColumn::PLAN_STEP, (ui32)ESpecialColumn::TX_ID }; return result;