diff --git a/ydb/core/formats/arrow/common/container.cpp b/ydb/core/formats/arrow/common/container.cpp index 7b159f2eef06..9100a9fa56a0 100644 --- a/ydb/core/formats/arrow/common/container.cpp +++ b/ydb/core/formats/arrow/common/container.cpp @@ -148,27 +148,32 @@ std::shared_ptr TGeneralContainer::BuildEmpt return std::make_shared(Schema, std::move(columns)); } -std::shared_ptr TGeneralContainer::BuildTableOptional(const std::optional>& columnNames /*= {}*/) const { +std::shared_ptr TGeneralContainer::BuildTableOptional(const TTableConstructionContext& context) const { std::vector> columns; std::vector> fields; for (i32 i = 0; i < Schema->num_fields(); ++i) { - if (columnNames && !columnNames->contains(Schema->field(i)->name())) { + if (context.GetColumnNames() && !context.GetColumnNames()->contains(Schema->field(i)->name())) { continue; } - columns.emplace_back(Columns[i]->GetChunkedArray()); + if (context.GetRecordsCount() || context.GetStartIndex()) { + columns.emplace_back(Columns[i]->Slice(context.GetStartIndex().value_or(0), + context.GetRecordsCount().value_or(GetRecordsCount() - context.GetStartIndex().value_or(0)))); + } else { + columns.emplace_back(Columns[i]->GetChunkedArray()); + } fields.emplace_back(Schema->field(i)); } if (fields.empty()) { return nullptr; } AFL_VERIFY(RecordsCount); - return arrow::Table::Make(std::make_shared(fields), columns, *RecordsCount); + return arrow::Table::Make(std::make_shared(fields), columns, context.GetRecordsCount().value_or(*RecordsCount)); } -std::shared_ptr TGeneralContainer::BuildTableVerified(const std::optional>& columnNames /*= {}*/) const { - auto result = BuildTableOptional(columnNames); +std::shared_ptr TGeneralContainer::BuildTableVerified(const TTableConstructionContext& context) const { + auto result = BuildTableOptional(context); AFL_VERIFY(result); - AFL_VERIFY(!columnNames || result->schema()->num_fields() == (i32)columnNames->size()); + AFL_VERIFY(!context.GetColumnNames() || result->schema()->num_fields() == (i32)context.GetColumnNames()->size()); return result; } diff --git a/ydb/core/formats/arrow/common/container.h b/ydb/core/formats/arrow/common/container.h index dacd5d62c0b0..23f3279e8dcb 100644 --- a/ydb/core/formats/arrow/common/container.h +++ b/ydb/core/formats/arrow/common/container.h @@ -62,8 +62,29 @@ class TGeneralContainer { return Columns[idx]; } - std::shared_ptr BuildTableVerified(const std::optional>& columnNames = {}) const; - std::shared_ptr BuildTableOptional(const std::optional>& columnNames = {}) const; + class TTableConstructionContext { + private: + YDB_ACCESSOR_DEF(std::optional>, ColumnNames); + YDB_ACCESSOR_DEF(std::optional, StartIndex); + YDB_ACCESSOR_DEF(std::optional, RecordsCount); + + public: + TTableConstructionContext() = default; + TTableConstructionContext(std::set&& columnNames) + : ColumnNames(std::move(columnNames)) { + } + + TTableConstructionContext(const std::set& columnNames) + : ColumnNames(columnNames) { + } + + void SetColumnNames(const std::vector& names) { + ColumnNames = std::set(names.begin(), names.end()); + } + }; + + std::shared_ptr BuildTableVerified(const TTableConstructionContext& context = Default()) const; + std::shared_ptr BuildTableOptional(const TTableConstructionContext& context = Default()) const; std::shared_ptr BuildEmptySame() const; diff --git a/ydb/core/kqp/compute_actor/kqp_compute_events.h b/ydb/core/kqp/compute_actor/kqp_compute_events.h index a9dd127a64b0..3142aca26400 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_events.h +++ b/ydb/core/kqp/compute_actor/kqp_compute_events.h @@ -53,6 +53,7 @@ struct TEvScanData: public NActors::TEventLocal> SplittedBatches; TOwnedCellVec LastKey; + NKikimrKqp::TEvKqpScanCursor LastCursorProto; TDuration CpuTime; TDuration WaitTime; ui32 PageFaults = 0; // number of page faults occurred when filling in this message @@ -120,6 +121,7 @@ struct TEvScanData: public NActors::TEventLocalFinished = pbEv->Record.GetFinished(); ev->RequestedBytesLimitReached = pbEv->Record.GetRequestedBytesLimitReached(); ev->LastKey = TOwnedCellVec(TSerializedCellVec(pbEv->Record.GetLastKey()).GetCells()); + ev->LastCursorProto = pbEv->Record.GetLastCursor(); if (pbEv->Record.HasAvailablePacks()) { ev->AvailablePacks = pbEv->Record.GetAvailablePacks(); } @@ -153,6 +155,7 @@ struct TEvScanData: public NActors::TEventLocalRecord.SetPageFaults(PageFaults); Remote->Record.SetPageFault(PageFault); Remote->Record.SetLastKey(TSerializedCellVec::Serialize(LastKey)); + *Remote->Record.MutableLastCursor() = LastCursorProto; if (AvailablePacks) { Remote->Record.SetAvailablePacks(*AvailablePacks); } diff --git a/ydb/core/kqp/compute_actor/kqp_compute_state.h b/ydb/core/kqp/compute_actor/kqp_compute_state.h index 74311641732a..b4b71f3b7262 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_state.h +++ b/ydb/core/kqp/compute_actor/kqp_compute_state.h @@ -38,6 +38,7 @@ struct TShardState: public TCommonRetriesState { bool SubscribedOnTablet = false; TActorId ActorId; TOwnedCellVec LastKey; + std::optional LastCursorProto; std::optional AvailablePacks; TString PrintLastKey(TConstArrayRef keyTypes) const; diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h index 7b2c7556b275..7b3477d15cea 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h @@ -14,7 +14,8 @@ namespace NKikimr::NKqp::NScanPrivate { class IExternalObjectsProvider { public: - virtual std::unique_ptr BuildEvKqpScan(const ui32 scanId, const ui32 gen, const TSmallVec& ranges) const = 0; + virtual std::unique_ptr BuildEvKqpScan(const ui32 scanId, const ui32 gen, const TSmallVec& ranges, + const std::optional& cursor) const = 0; virtual const TVector& GetKeyColumnTypes() const = 0; }; @@ -61,7 +62,7 @@ class TShardScannerInfo { const auto& keyColumnTypes = externalObjectsProvider.GetKeyColumnTypes(); auto ranges = state.GetScanRanges(keyColumnTypes); - auto ev = externalObjectsProvider.BuildEvKqpScan(ScanId, Generation, ranges); + auto ev = externalObjectsProvider.BuildEvKqpScan(ScanId, Generation, ranges, state.LastCursorProto); AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "start_scanner")("tablet_id", TabletId)("generation", Generation) ("info", state.ToString(keyColumnTypes))("range", DebugPrintRanges(keyColumnTypes, ranges, *AppData()->TypeRegistry)) diff --git a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp index e07a597f96bd..c17289b35f8d 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp @@ -37,8 +37,7 @@ TKqpScanFetcherActor::TKqpScanFetcherActor(const NKikimrKqp::TKqpSnapshot& snaps , ShardsScanningPolicy(shardsScanningPolicy) , Counters(counters) , InFlightShards(ScanId, *this) - , InFlightComputes(ComputeActorIds) -{ + , InFlightComputes(ComputeActorIds) { Y_UNUSED(traceId); AFL_ENSURE(!Meta.GetReads().empty()); AFL_ENSURE(Meta.GetTable().GetTableKind() != (ui32)ETableKind::SysView); @@ -47,7 +46,7 @@ TKqpScanFetcherActor::TKqpScanFetcherActor(const NKikimrKqp::TKqpSnapshot& snaps for (size_t i = 0; i < Meta.KeyColumnTypesSize(); i++) { NScheme::TTypeId typeId = Meta.GetKeyColumnTypes().at(i); NScheme::TTypeInfo typeInfo = NScheme::NTypeIds::IsParametrizedType(typeId) ? - NScheme::TypeInfoFromProto(typeId,Meta.GetKeyColumnTypeInfos().at(i)) : + NScheme::TypeInfoFromProto(typeId, Meta.GetKeyColumnTypeInfos().at(i)) : NScheme::TTypeInfo(typeId); KeyColumnTypes.push_back(typeInfo); } @@ -127,19 +126,19 @@ void TKqpScanFetcherActor::HandleExecute(TEvKqpCompute::TEvScanData::TPtr& ev) { ("ScanId", ev->Get()->ScanId) ("Finished", ev->Get()->Finished) ("Lock", [&]() { - TStringBuilder builder; - for (const auto& lock : ev->Get()->LocksInfo.Locks) { - builder << lock.ShortDebugString(); - } - return builder; - }()) + TStringBuilder builder; + for (const auto& lock : ev->Get()->LocksInfo.Locks) { + builder << lock.ShortDebugString(); + } + return builder; + }()) ("BrokenLocks", [&]() { - TStringBuilder builder; - for (const auto& lock : ev->Get()->LocksInfo.BrokenLocks) { - builder << lock.ShortDebugString(); - } - return builder; - }()); + TStringBuilder builder; + for (const auto& lock : ev->Get()->LocksInfo.BrokenLocks) { + builder << lock.ShortDebugString(); + } + return builder; + }()); TInstant startTime = TActivationContext::Now(); if (ev->Get()->Finished) { @@ -347,11 +346,12 @@ void TKqpScanFetcherActor::HandleExecute(TEvTxProxySchemeCache::TEvResolveKeySet if (!state.LastKey.empty()) { PendingShards.front().LastKey = std::move(state.LastKey); - while(!PendingShards.empty() && PendingShards.front().GetScanRanges(KeyColumnTypes).empty()) { + while (!PendingShards.empty() && PendingShards.front().GetScanRanges(KeyColumnTypes).empty()) { CA_LOG_D("Nothing to read " << PendingShards.front().ToString(KeyColumnTypes)); auto readShard = std::move(PendingShards.front()); PendingShards.pop_front(); PendingShards.front().LastKey = std::move(readShard.LastKey); + PendingShards.front().LastCursorProto = std::move(readShard.LastCursorProto); } AFL_ENSURE(!PendingShards.empty()); @@ -409,7 +409,8 @@ bool TKqpScanFetcherActor::SendScanFinished() { return true; } -std::unique_ptr TKqpScanFetcherActor::BuildEvKqpScan(const ui32 scanId, const ui32 gen, const TSmallVec& ranges) const { +std::unique_ptr TKqpScanFetcherActor::BuildEvKqpScan(const ui32 scanId, const ui32 gen, + const TSmallVec& ranges, const std::optional& cursor) const { auto ev = std::make_unique(); ev->Record.SetLocalPathId(ScanDataMeta.TableId.PathId.LocalPathId); for (auto& column : ScanDataMeta.GetColumns()) { @@ -423,6 +424,9 @@ std::unique_ptr TKqpScanFetcherActor::BuildEv } } ev->Record.MutableSkipNullKeys()->CopyFrom(Meta.GetSkipNullKeys()); + if (cursor) { + *ev->Record.MutableScanCursor() = *cursor; + } auto protoRanges = ev->Record.MutableRanges(); protoRanges->Reserve(ranges.size()); @@ -489,10 +493,11 @@ void TKqpScanFetcherActor::ProcessPendingScanDataItem(TEvKqpCompute::TEvScanData AFL_ENSURE(state->ActorId == ev->Sender)("expected", state->ActorId)("got", ev->Sender); state->LastKey = std::move(msg.LastKey); + state->LastCursorProto = std::move(msg.LastCursorProto); const ui64 rowsCount = msg.GetRowsCount(); AFL_ENSURE(!LockTxId || !msg.LocksInfo.Locks.empty() || !msg.LocksInfo.BrokenLocks.empty()); AFL_ENSURE(LockTxId || (msg.LocksInfo.Locks.empty() && msg.LocksInfo.BrokenLocks.empty())); - AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("action","got EvScanData")("rows", rowsCount)("finished", msg.Finished)("exceeded", msg.RequestedBytesLimitReached) + AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("action", "got EvScanData")("rows", rowsCount)("finished", msg.Finished)("exceeded", msg.RequestedBytesLimitReached) ("scan", ScanId)("packs_to_send", InFlightComputes.GetPacksToSendCount()) ("from", ev->Sender)("shards remain", PendingShards.size()) ("in flight scans", InFlightShards.GetScansCount()) diff --git a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h index 64d7041d7218..d513939b4e18 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h @@ -108,7 +108,8 @@ class TKqpScanFetcherActor: public NActors::TActorBootstrapped BuildEvKqpScan(const ui32 scanId, const ui32 gen, const TSmallVec& ranges) const override; + virtual std::unique_ptr BuildEvKqpScan(const ui32 scanId, const ui32 gen, + const TSmallVec& ranges, const std::optional& cursor) const override; virtual const TVector& GetKeyColumnTypes() const override { return KeyColumnTypes; } diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index b8cf586a9578..acb3fd29114f 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -1700,6 +1700,7 @@ message TColumnShardConfig { optional bool ColumnChunksV0Usage = 25 [default = true]; optional bool ColumnChunksV1Usage = 26 [default = true]; optional uint64 MemoryLimitScanPortion = 27 [default = 100000000]; + optional string ReaderClassName = 28; } message TSchemeShardConfig { diff --git a/ydb/core/protos/kqp.proto b/ydb/core/protos/kqp.proto index b487df72e037..2351a539ba2f 100644 --- a/ydb/core/protos/kqp.proto +++ b/ydb/core/protos/kqp.proto @@ -642,6 +642,19 @@ message TEvScanError { optional uint64 TabletId = 4; } +message TEvKqpScanCursor { + message TColumnShardScanPlain { + } + message TColumnShardScanSimple { + optional uint64 SourceId = 1; + optional uint32 StartRecordIndex = 2; + } + oneof Implementation { + TColumnShardScanPlain ColumnShardPlain = 10; + TColumnShardScanSimple ColumnShardSimple = 11; + } +} + message TEvRemoteScanData { optional uint32 ScanId = 1; optional uint64 CpuTimeUs = 2; @@ -665,6 +678,7 @@ message TEvRemoteScanData { optional bool RequestedBytesLimitReached = 11 [default = false]; optional uint32 AvailablePacks = 12; + optional TEvKqpScanCursor LastCursor = 13; } message TEvRemoteScanDataAck { diff --git a/ydb/core/protos/tx_datashard.proto b/ydb/core/protos/tx_datashard.proto index bf13ccf9caab..4a6b7de3884c 100644 --- a/ydb/core/protos/tx_datashard.proto +++ b/ydb/core/protos/tx_datashard.proto @@ -1700,6 +1700,8 @@ message TEvKqpScan { optional TComputeShardingPolicy ComputeShardingPolicy = 23; optional uint64 LockTxId = 24; optional uint32 LockNodeId = 25; + optional string CSScanPolicy = 26; + optional NKikimrKqp.TEvKqpScanCursor ScanCursor = 27; } message TEvCompactTable { diff --git a/ydb/core/tx/columnshard/columnshard__write.cpp b/ydb/core/tx/columnshard/columnshard__write.cpp index 8359c9ce0676..39e53b35c8ac 100644 --- a/ydb/core/tx/columnshard/columnshard__write.cpp +++ b/ydb/core/tx/columnshard/columnshard__write.cpp @@ -94,7 +94,9 @@ void TColumnShard::Handle(NPrivateEvents::NWrite::TEvWritePortionResult::TPtr& e AFL_VERIFY(ev->Get()->GetWriteStatus() == NKikimrProto::OK); std::vector writtenPacks = ev->Get()->DetachInsertedPacks(); std::vector fails = ev->Get()->DetachFails(); + const TMonotonic now = TMonotonic::Now(); for (auto&& i : writtenPacks) { + Counters.OnWritePutBlobsSuccess(now - i.GetWriteMeta().GetWriteStartInstant(), i.GetRecordsCount()); Counters.GetWritesMonitor()->OnFinishWrite(i.GetDataSize(), 1); } for (auto&& i : fails) { @@ -554,12 +556,15 @@ void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActor return; } + Counters.GetColumnTablesCounters()->GetPathIdCounter(pathId)->OnWriteEvent(); + auto arrowData = std::make_shared(schema); if (!arrowData->Parse(operation, NEvWrite::TPayloadReader(*ev->Get()))) { Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "parsing data error"); ctx.Send(source, result.release(), 0, cookie); + return; } auto overloadStatus = CheckOverloaded(pathId); diff --git a/ydb/core/tx/columnshard/columnshard_impl.h b/ydb/core/tx/columnshard/columnshard_impl.h index 9b7f928fdc3d..711bf5f0cacb 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.h +++ b/ydb/core/tx/columnshard/columnshard_impl.h @@ -59,6 +59,9 @@ class TTxInternalScan; namespace NPlain { class TIndexScannerConstructor; } +namespace NSimple { +class TIndexScannerConstructor; +} } // namespace NReader namespace NDataSharing { @@ -109,7 +112,7 @@ class TSharingSessionsInitializer; class TInFlightReadsInitializer; class TSpecialValuesInitializer; class TTablesManagerInitializer; -} +} // namespace NLoading extern bool gAllowLogBatchingDefaultValue; @@ -198,6 +201,7 @@ class TColumnShard: public TActor, public NTabletFlatExecutor::TTa friend class NOlap::NReader::TTxScan; friend class NOlap::NReader::TTxInternalScan; friend class NOlap::NReader::NPlain::TIndexScannerConstructor; + friend class NOlap::NReader::NSimple::TIndexScannerConstructor; class TStoragesManager; friend class TTxController; @@ -246,7 +250,7 @@ class TColumnShard: public TActor, public NTabletFlatExecutor::TTa void Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvStartCompaction::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvMetadataAccessorsInfo::TPtr& ev, const TActorContext& ctx); - + void Handle(NPrivateEvents::NWrite::TEvWritePortionResult::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvScanStats::TPtr& ev, const TActorContext& ctx); diff --git a/ydb/core/tx/columnshard/counters/scan.h b/ydb/core/tx/columnshard/counters/scan.h index d9a02e1fef49..428d74baddab 100644 --- a/ydb/core/tx/columnshard/counters/scan.h +++ b/ydb/core/tx/columnshard/counters/scan.h @@ -295,6 +295,8 @@ class TConcreteScanCounters: public TScanCounters { std::shared_ptr AssembleTasksCount; std::shared_ptr ReadTasksCount; std::shared_ptr ResourcesAllocationTasksCount; + std::shared_ptr ResultsForSourceCount; + public: TScanAggregations Aggregations; @@ -302,6 +304,10 @@ class TConcreteScanCounters: public TScanCounters { return TCounterGuard(FetchAccessorsCount); } + TCounterGuard GetResultsForSourceGuard() const { + return TCounterGuard(ResultsForSourceCount); + } + TCounterGuard GetMergeTasksGuard() const { return TCounterGuard(MergeTasksCount); } @@ -320,7 +326,7 @@ class TConcreteScanCounters: public TScanCounters { bool InWaiting() const { return MergeTasksCount->Val() || AssembleTasksCount->Val() || ReadTasksCount->Val() || ResourcesAllocationTasksCount->Val() || - FetchAccessorsCount->Val(); + FetchAccessorsCount->Val() || ResultsForSourceCount->Val(); } void OnBlobsWaitDuration(const TDuration d, const TDuration fullScanDuration) const { @@ -335,6 +341,7 @@ class TConcreteScanCounters: public TScanCounters { , AssembleTasksCount(std::make_shared()) , ReadTasksCount(std::make_shared()) , ResourcesAllocationTasksCount(std::make_shared()) + , ResultsForSourceCount(std::make_shared()) , Aggregations(TBase::BuildAggregations()) { diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp b/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp index 5369ee82c52f..29ada345be25 100644 --- a/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp @@ -350,6 +350,109 @@ std::vector TPortionDataAccessor::GetColumnChunksPointers( return result; } +std::vector TPortionDataAccessor::BuildReadPages(const ui64 memoryLimit, const std::set& entityIds) const { + class TEntityDelimiter { + private: + YDB_READONLY(ui32, IndexStart, 0); + YDB_READONLY(ui32, EntityId, 0); + YDB_READONLY(ui32, ChunkIdx, 0); + YDB_READONLY(ui64, MemoryStartChunk, 0); + YDB_READONLY(ui64, MemoryFinishChunk, 0); + + public: + TEntityDelimiter(const ui32 indexStart, const ui32 entityId, const ui32 chunkIdx, const ui64 memStartChunk, const ui64 memFinishChunk) + : IndexStart(indexStart) + , EntityId(entityId) + , ChunkIdx(chunkIdx) + , MemoryStartChunk(memStartChunk) + , MemoryFinishChunk(memFinishChunk) { + } + + bool operator<(const TEntityDelimiter& item) const { + return std::tie(IndexStart, EntityId, ChunkIdx) < std::tie(item.IndexStart, item.EntityId, item.ChunkIdx); + } + }; + + class TGlobalDelimiter { + private: + YDB_READONLY(ui32, IndexStart, 0); + YDB_ACCESSOR(ui64, UsedMemory, 0); + YDB_ACCESSOR(ui64, WholeChunksMemory, 0); + + public: + TGlobalDelimiter(const ui32 indexStart) + : IndexStart(indexStart) { + } + }; + + std::vector delimiters; + + ui32 lastAppliedId = 0; + ui32 currentRecordIdx = 0; + bool needOne = false; + const TColumnRecord* lastRecord = nullptr; + for (auto&& i : GetRecordsVerified()) { + if (lastAppliedId != i.GetEntityId()) { + if (delimiters.size()) { + AFL_VERIFY(delimiters.back().GetIndexStart() == PortionInfo->GetRecordsCount()); + } + needOne = entityIds.contains(i.GetEntityId()); + currentRecordIdx = 0; + lastAppliedId = i.GetEntityId(); + lastRecord = nullptr; + } + if (!needOne) { + continue; + } + delimiters.emplace_back( + currentRecordIdx, i.GetEntityId(), i.GetChunkIdx(), i.GetMeta().GetRawBytes(), lastRecord ? lastRecord->GetMeta().GetRawBytes() : 0); + currentRecordIdx += i.GetMeta().GetRecordsCount(); + if (currentRecordIdx == PortionInfo->GetRecordsCount()) { + delimiters.emplace_back(currentRecordIdx, i.GetEntityId(), i.GetChunkIdx() + 1, 0, i.GetMeta().GetRawBytes()); + } + lastRecord = &i; + } + if (delimiters.empty()) { + return { TPortionDataAccessor::TReadPage(0, PortionInfo->GetRecordsCount(), 0) }; + } + std::sort(delimiters.begin(), delimiters.end()); + std::vector sumDelimiters; + for (auto&& i : delimiters) { + if (sumDelimiters.empty()) { + sumDelimiters.emplace_back(i.GetIndexStart()); + } else if (sumDelimiters.back().GetIndexStart() != i.GetIndexStart()) { + AFL_VERIFY(sumDelimiters.back().GetIndexStart() < i.GetIndexStart()); + TGlobalDelimiter backDelimiter(i.GetIndexStart()); + backDelimiter.MutableWholeChunksMemory() = sumDelimiters.back().GetWholeChunksMemory(); + backDelimiter.MutableUsedMemory() = sumDelimiters.back().GetUsedMemory(); + sumDelimiters.emplace_back(std::move(backDelimiter)); + } + sumDelimiters.back().MutableWholeChunksMemory() += i.GetMemoryFinishChunk(); + sumDelimiters.back().MutableUsedMemory() += i.GetMemoryStartChunk(); + } + std::vector recordIdx = { 0 }; + std::vector packMemorySize; + const TGlobalDelimiter* lastBorder = &sumDelimiters.front(); + for (auto&& i : sumDelimiters) { + const i64 sumMemory = (i64)i.GetUsedMemory() - (i64)lastBorder->GetWholeChunksMemory(); + AFL_VERIFY(sumMemory > 0); + if (((ui64)sumMemory >= memoryLimit || i.GetIndexStart() == PortionInfo->GetRecordsCount()) && i.GetIndexStart()) { + AFL_VERIFY(lastBorder->GetIndexStart() < i.GetIndexStart()); + recordIdx.emplace_back(i.GetIndexStart()); + packMemorySize.emplace_back(sumMemory); + lastBorder = &i; + } + } + AFL_VERIFY(recordIdx.front() == 0); + AFL_VERIFY(recordIdx.back() == PortionInfo->GetRecordsCount())("real", JoinSeq(",", recordIdx))("expected", PortionInfo->GetRecordsCount()); + AFL_VERIFY(recordIdx.size() == packMemorySize.size() + 1); + std::vector pages; + for (ui32 i = 0; i < packMemorySize.size(); ++i) { + pages.emplace_back(recordIdx[i], recordIdx[i + 1] - recordIdx[i], packMemorySize[i]); + } + return pages; +} + std::vector TPortionDataAccessor::BuildPages() const { std::vector pages; struct TPart { diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.h b/ydb/core/tx/columnshard/engines/portions/data_accessor.h index 46fbb43bc900..9e8f608385b7 100644 --- a/ydb/core/tx/columnshard/engines/portions/data_accessor.h +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.h @@ -436,6 +436,23 @@ class TPortionDataAccessor { std::vector BuildPages() const; ui64 GetMinMemoryForReadColumns(const std::optional>& columnIds) const; + + class TReadPage { + private: + YDB_READONLY(ui32, IndexStart, 0); + YDB_READONLY(ui32, RecordsCount, 0); + YDB_READONLY(ui64, MemoryUsage, 0); + + public: + TReadPage(const ui32 indexStart, const ui32 recordsCount, const ui64 memoryUsage) + : IndexStart(indexStart) + , RecordsCount(recordsCount) + , MemoryUsage(memoryUsage) { + AFL_VERIFY(RecordsCount); + } + }; + + std::vector BuildReadPages(const ui64 memoryLimit, const std::set& entityIds) const; }; } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/filter.h b/ydb/core/tx/columnshard/engines/predicate/filter.h index bbc70b5ff584..af9b339728b5 100644 --- a/ydb/core/tx/columnshard/engines/predicate/filter.h +++ b/ydb/core/tx/columnshard/engines/predicate/filter.h @@ -1,5 +1,9 @@ #pragma once #include "range.h" + +#include +#include + #include namespace NKikimr::NOlap { @@ -88,4 +92,158 @@ class TPKRangesFilter { } }; -} +class ICursorEntity { +private: + virtual ui64 DoGetEntityId() const = 0; + virtual ui64 DoGetEntityRecordsCount() const = 0; + +public: + ui64 GetEntityId() const { + return DoGetEntityId(); + } + ui64 GetEntityRecordsCount() const { + return DoGetEntityRecordsCount(); + } +}; + +class IScanCursor { +private: + virtual const std::shared_ptr& DoGetPKCursor() const = 0; + virtual bool DoCheckEntityIsBorder(const std::shared_ptr& entity, bool& usage) const = 0; + virtual bool DoCheckSourceIntervalUsage(const ui64 sourceId, const ui32 indexStart, const ui32 recordsCount) const = 0; + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrKqp::TEvKqpScanCursor& proto) = 0; + virtual void DoSerializeToProto(NKikimrKqp::TEvKqpScanCursor& proto) const = 0; + +public: + virtual bool IsInitialized() const = 0; + + virtual ~IScanCursor() = default; + + const std::shared_ptr& GetPKCursor() const { + return DoGetPKCursor(); + } + + bool CheckSourceIntervalUsage(const ui64 sourceId, const ui32 indexStart, const ui32 recordsCount) const { + AFL_VERIFY(IsInitialized()); + return DoCheckSourceIntervalUsage(sourceId, indexStart, recordsCount); + } + + bool CheckEntityIsBorder(const std::shared_ptr& entity, bool& usage) const { + AFL_VERIFY(IsInitialized()); + return DoCheckEntityIsBorder(entity, usage); + } + + TConclusionStatus DeserializeFromProto(const NKikimrKqp::TEvKqpScanCursor& proto) { + return DoDeserializeFromProto(proto); + } + + NKikimrKqp::TEvKqpScanCursor SerializeToProto() const { + NKikimrKqp::TEvKqpScanCursor result; + DoSerializeToProto(result); + return result; + } +}; + +class TSimpleScanCursor: public IScanCursor { +private: + YDB_READONLY_DEF(std::shared_ptr, PrimaryKey); + YDB_READONLY(ui64, SourceId, 0); + YDB_READONLY(ui32, RecordIndex, 0); + + virtual void DoSerializeToProto(NKikimrKqp::TEvKqpScanCursor& proto) const override { + proto.MutableColumnShardSimple()->SetSourceId(SourceId); + proto.MutableColumnShardSimple()->SetStartRecordIndex(RecordIndex); + } + + virtual const std::shared_ptr& DoGetPKCursor() const override { + AFL_VERIFY(!!PrimaryKey); + return PrimaryKey; + } + + virtual bool IsInitialized() const override { + return !!SourceId; + } + + virtual bool DoCheckEntityIsBorder(const std::shared_ptr& entity, bool& usage) const override { + if (SourceId != entity->GetEntityId()) { + return false; + } + AFL_VERIFY(RecordIndex <= entity->GetEntityRecordsCount()); + usage = RecordIndex < entity->GetEntityRecordsCount(); + return true; + } + + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrKqp::TEvKqpScanCursor& proto) override { + if (!proto.HasColumnShardSimple()) { + return TConclusionStatus::Success(); + } + if (!proto.GetColumnShardSimple().HasSourceId()) { + return TConclusionStatus::Fail("incorrect source id for cursor initialization"); + } + SourceId = proto.GetColumnShardSimple().GetSourceId(); + if (!proto.GetColumnShardSimple().HasStartRecordIndex()) { + return TConclusionStatus::Fail("incorrect record index for cursor initialization"); + } + RecordIndex = proto.GetColumnShardSimple().GetStartRecordIndex(); + return TConclusionStatus::Success(); + } + + virtual bool DoCheckSourceIntervalUsage(const ui64 sourceId, const ui32 indexStart, const ui32 recordsCount) const override { + AFL_VERIFY(sourceId == SourceId); + if (indexStart >= RecordIndex) { + return true; + } + AFL_VERIFY(indexStart + recordsCount <= RecordIndex); + return false; + } + +public: + TSimpleScanCursor() = default; + + TSimpleScanCursor(const std::shared_ptr& pk, const ui64 portionId, const ui32 recordIndex) + : PrimaryKey(pk) + , SourceId(portionId) + , RecordIndex(recordIndex) { + } +}; + +class TPlainScanCursor: public IScanCursor { +private: + YDB_READONLY_DEF(std::shared_ptr, PrimaryKey); + + virtual void DoSerializeToProto(NKikimrKqp::TEvKqpScanCursor& proto) const override { + *proto.MutableColumnShardPlain() = {}; + } + + virtual bool IsInitialized() const override { + return !!PrimaryKey; + } + + virtual const std::shared_ptr& DoGetPKCursor() const override { + AFL_VERIFY(!!PrimaryKey); + return PrimaryKey; + } + + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrKqp::TEvKqpScanCursor& /*proto*/) override { + return TConclusionStatus::Success(); + } + + virtual bool DoCheckEntityIsBorder(const std::shared_ptr& /*entity*/, bool& usage) const override { + usage = true; + return true; + } + + virtual bool DoCheckSourceIntervalUsage(const ui64 /*sourceId*/, const ui32 /*indexStart*/, const ui32 /*recordsCount*/) const override { + return true; + } + +public: + TPlainScanCursor() = default; + + TPlainScanCursor(const std::shared_ptr& pk) + : PrimaryKey(pk) { + AFL_VERIFY(PrimaryKey); + } +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/constructor.cpp b/ydb/core/tx/columnshard/engines/reader/abstract/constructor.cpp index 95a756f2f43d..884bfa01bf8b 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/reader/abstract/constructor.cpp @@ -1,11 +1,13 @@ #include "constructor.h" + +#include #include #include namespace NKikimr::NOlap::NReader { -NKikimr::TConclusionStatus IScannerConstructor::ParseProgram(const TVersionedIndex* vIndex, - const NKikimrSchemeOp::EOlapProgramType programType, const TString& serializedProgram, TReadDescription& read, const IColumnResolver& columnResolver) const { +NKikimr::TConclusionStatus IScannerConstructor::ParseProgram(const TVersionedIndex* vIndex, const NKikimrSchemeOp::EOlapProgramType programType, + const TString& serializedProgram, TReadDescription& read, const IColumnResolver& columnResolver) const { AFL_VERIFY(!read.ColumnIds.size() || !read.ColumnNames.size()); std::vector names; std::set namesChecker; @@ -47,7 +49,8 @@ NKikimr::TConclusionStatus IScannerConstructor::ParseProgram(const TVersionedInd } const auto getDiffColumnsMessage = [&]() { - return TStringBuilder() << "ssa program has different columns with kqp request: kqp_columns=" << JoinSeq(",", namesChecker) << " vs program_columns=" << JoinSeq(",", programColumns); + return TStringBuilder() << "ssa program has different columns with kqp request: kqp_columns=" << JoinSeq(",", namesChecker) + << " vs program_columns=" << JoinSeq(",", programColumns); }; if (namesChecker.size() != programColumns.size()) { @@ -66,7 +69,8 @@ NKikimr::TConclusionStatus IScannerConstructor::ParseProgram(const TVersionedInd } } -NKikimr::TConclusion> IScannerConstructor::BuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const { +NKikimr::TConclusion> IScannerConstructor::BuildReadMetadata( + const NColumnShard::TColumnShard* self, const TReadDescription& read) const { TConclusion> result = DoBuildReadMetadata(self, read); if (result.IsFail()) { return result; @@ -78,4 +82,17 @@ NKikimr::TConclusion> IScannerConstructor::Bu } } -} \ No newline at end of file +NKikimr::TConclusion> IScannerConstructor::BuildCursorFromProto( + const NKikimrKqp::TEvKqpScanCursor& proto) const { + auto result = DoBuildCursor(); + if (!result) { + return result; + } + auto status = result->DeserializeFromProto(proto); + if (status.IsFail()) { + return status; + } + return result; +} + +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/constructor.h b/ydb/core/tx/columnshard/engines/reader/abstract/constructor.h index 1eb95f2b224f..21fbe1f0acea 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/constructor.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/constructor.h @@ -8,6 +8,22 @@ namespace NKikimr::NOlap::NReader { +class TScannerConstructorContext { +private: + YDB_READONLY(TSnapshot, Snapshot, TSnapshot::Zero()); + YDB_READONLY(ui32, ItemsLimit, 0); + YDB_READONLY(bool, Reverse, false); + +public: + TScannerConstructorContext(const TSnapshot& snapshot, const ui32 itemsLimit, const bool reverse) + : Snapshot(snapshot) + , ItemsLimit(itemsLimit) + , Reverse(reverse) + { + + } +}; + class IScannerConstructor { protected: const TSnapshot Snapshot; @@ -17,17 +33,21 @@ class IScannerConstructor { const TString& serializedProgram, TReadDescription& read, const IColumnResolver& columnResolver) const; private: virtual TConclusion> DoBuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const = 0; + virtual std::shared_ptr DoBuildCursor() const = 0; + public: + using TFactory = NObjectFactory::TParametrizedObjectFactory; virtual ~IScannerConstructor() = default; - IScannerConstructor(const TSnapshot& snapshot, const ui64 itemsLimit, const bool reverse) - : Snapshot(snapshot) - , ItemsLimit(itemsLimit) - , IsReverse(reverse) + IScannerConstructor(const TScannerConstructorContext& context) + : Snapshot(context.GetSnapshot()) + , ItemsLimit(context.GetItemsLimit()) + , IsReverse(context.GetReverse()) { } + TConclusion> BuildCursorFromProto(const NKikimrKqp::TEvKqpScanCursor& proto) const; virtual TConclusionStatus ParseProgram(const TVersionedIndex* vIndex, const NKikimrTxDataShard::TEvKqpScan& proto, TReadDescription& read) const = 0; virtual std::vector GetPrimaryKeyScheme(const NColumnShard::TColumnShard* self) const = 0; TConclusion> BuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const; diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h b/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h index e885d4461dc8..e55b80f30771 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h @@ -53,7 +53,6 @@ class TReadContext { const TActorId ReadCoordinatorActorId; const TComputeShardingPolicy ComputeShardingPolicy; TAtomic AbortFlag = 0; - public: template std::shared_ptr GetReadMetadataPtrVerifiedAs() const { @@ -62,6 +61,10 @@ class TReadContext { return result; } + const std::shared_ptr& GetScanCursor() const { + return ReadMetadata->GetScanCursor(); + } + void AbortWithError(const TString& errorMessage) { if (AtomicCas(&AbortFlag, 1, 0)) { NActors::TActivationContext::Send( diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h index f144bf05f95a..5d1a684e0217 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h @@ -45,6 +45,7 @@ struct TReadMetadataBase { std::shared_ptr IndexVersionsPointer; TSnapshot RequestSnapshot; std::optional RequestShardingInfo; + std::shared_ptr ScanCursor; virtual void DoOnReadFinished(NColumnShard::TColumnShard& /*owner*/) const { } virtual void DoOnBeforeStartReading(NColumnShard::TColumnShard& /*owner*/) const { @@ -68,6 +69,10 @@ struct TReadMetadataBase { return TxId; } + const std::shared_ptr& GetScanCursor() const { + return ScanCursor; + } + std::optional GetLockId() const { return LockId; } @@ -135,12 +140,14 @@ struct TReadMetadataBase { } TReadMetadataBase(const std::shared_ptr index, const ESorting sorting, const TProgramContainer& ssaProgram, - const std::shared_ptr& schema, const TSnapshot& requestSnapshot) + const std::shared_ptr& schema, const TSnapshot& requestSnapshot, const std::shared_ptr& scanCursor) : Sorting(sorting) , Program(ssaProgram) , IndexVersionsPointer(index) , RequestSnapshot(requestSnapshot) - , ResultIndexSchema(schema) { + , ScanCursor(scanCursor) + , ResultIndexSchema(schema) + { } virtual ~TReadMetadataBase() = default; diff --git a/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp b/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp index 8e1f0a643cc1..6e2af3e7f772 100644 --- a/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp +++ b/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp @@ -255,15 +255,16 @@ bool TColumnShardScan::ProduceResults() noexcept { "batch_columns", JoinSeq(",", batch->schema()->field_names())); } if (CurrentLastReadKey) { - NArrow::NMerger::TSortableBatchPosition pNew( - result.GetLastReadKey(), 0, result.GetLastReadKey()->schema()->field_names(), {}, ReadMetadataRange->IsDescSorted()); - NArrow::NMerger::TSortableBatchPosition pOld( - CurrentLastReadKey, 0, CurrentLastReadKey->schema()->field_names(), {}, ReadMetadataRange->IsDescSorted()); - AFL_VERIFY(pOld < pNew)("old", pOld.DebugJson().GetStringRobust())("new", pNew.DebugJson().GetStringRobust()); + NArrow::NMerger::TSortableBatchPosition pNew(result.GetScanCursor()->GetPKCursor(), 0, + result.GetScanCursor()->GetPKCursor()->schema()->field_names(), {}, ReadMetadataRange->IsDescSorted()); + NArrow::NMerger::TSortableBatchPosition pOld(CurrentLastReadKey->GetPKCursor(), 0, + CurrentLastReadKey->GetPKCursor()->schema()->field_names(), {}, ReadMetadataRange->IsDescSorted()); + AFL_VERIFY(!(pNew < pOld))("old", pOld.DebugJson().GetStringRobust())("new", pNew.DebugJson().GetStringRobust()); } - CurrentLastReadKey = result.GetLastReadKey(); + CurrentLastReadKey = result.GetScanCursor(); - Result->LastKey = ConvertLastKey(result.GetLastReadKey()); + Result->LastKey = ConvertLastKey(result.GetScanCursor()->GetPKCursor()); + Result->LastCursorProto = result.GetScanCursor()->SerializeToProto(); SendResult(false, false); ScanIterator->OnSentDataFromInterval(result.GetNotFinishedIntervalIdx()); ACFL_DEBUG("stage", "finished")("iterator", ScanIterator->DebugString()); @@ -303,8 +304,8 @@ void TColumnShardScan::ContinueProcessing() { } } } - AFL_VERIFY(!ScanIterator || !ChunksLimiter.HasMore() || ScanCountersPool.InWaiting())("scan_actor_id", ScanActorId)("tx_id", TxId)("scan_id", ScanId)( - "gen", ScanGen)("tablet", TabletId)("debug", ScanIterator->DebugString()); +// AFL_VERIFY(!ScanIterator || !ChunksLimiter.HasMore() || ScanCountersPool.InWaiting())("scan_actor_id", ScanActorId)("tx_id", TxId)("scan_id", ScanId)( +// "gen", ScanGen)("tablet", TabletId)("debug", ScanIterator->DebugString()); } void TColumnShardScan::MakeResult(size_t reserveRows /*= 0*/) { diff --git a/ydb/core/tx/columnshard/engines/reader/actor/actor.h b/ydb/core/tx/columnshard/engines/reader/actor/actor.h index 454f0d0d795f..6ed07d077af4 100644 --- a/ydb/core/tx/columnshard/engines/reader/actor/actor.h +++ b/ydb/core/tx/columnshard/engines/reader/actor/actor.h @@ -136,7 +136,7 @@ class TColumnShardScan: public TActorBootstrapped, NArrow::IRo TChunksLimiter ChunksLimiter; THolder Result; - std::shared_ptr CurrentLastReadKey; + std::shared_ptr CurrentLastReadKey; bool Finished = false; std::optional LastResultInstant; diff --git a/ydb/core/tx/columnshard/engines/reader/common/description.h b/ydb/core/tx/columnshard/engines/reader/common/description.h index c180dcc8d067..58872a627b5d 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/description.h +++ b/ydb/core/tx/columnshard/engines/reader/common/description.h @@ -11,6 +11,8 @@ struct TReadDescription { private: TSnapshot Snapshot; TProgramContainer Program; + std::shared_ptr ScanCursor; + public: // Table ui64 TxId = 0; @@ -27,7 +29,17 @@ struct TReadDescription { // List of columns std::vector ColumnIds; std::vector ColumnNames; - + + const std::shared_ptr& GetScanCursor() const { + AFL_VERIFY(ScanCursor); + return ScanCursor; + } + + void SetScanCursor(const std::shared_ptr& cursor) { + AFL_VERIFY(!ScanCursor); + ScanCursor = cursor; + } + TReadDescription(const TSnapshot& snapshot, const bool isReverse) : Snapshot(snapshot) , PKRangesFilter(std::make_shared(isReverse)) { diff --git a/ydb/core/tx/columnshard/engines/reader/common/result.h b/ydb/core/tx/columnshard/engines/reader/common/result.h index e3028b01b5ad..6173d3147e87 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/result.h +++ b/ydb/core/tx/columnshard/engines/reader/common/result.h @@ -18,7 +18,7 @@ class TPartialReadResult: public TNonCopyable { // This 1-row batch contains the last key that was read while producing the ResultBatch. // NOTE: it might be different from the Key of last row in ResulBatch in case of filtering/aggregation/limit - std::shared_ptr LastReadKey; + std::shared_ptr ScanCursor; YDB_READONLY_DEF(std::optional, NotFinishedIntervalIdx); public: @@ -50,26 +50,25 @@ class TPartialReadResult: public TNonCopyable { return ResultBatch; } - const std::shared_ptr& GetLastReadKey() const { - return LastReadKey; + const std::shared_ptr& GetScanCursor() const { + return ScanCursor; } explicit TPartialReadResult(std::shared_ptr&& resourcesGuard, std::shared_ptr&& gGuard, const NArrow::TShardedRecordBatch& batch, - std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) + const std::shared_ptr& scanCursor, const std::optional notFinishedIntervalIdx) : ResourcesGuard(std::move(resourcesGuard)) , GroupGuard(std::move(gGuard)) , ResultBatch(batch) - , LastReadKey(lastKey) + , ScanCursor(scanCursor) , NotFinishedIntervalIdx(notFinishedIntervalIdx) { Y_ABORT_UNLESS(ResultBatch.GetRecordsCount()); - Y_ABORT_UNLESS(LastReadKey); - Y_ABORT_UNLESS(LastReadKey->num_rows() == 1); + Y_ABORT_UNLESS(ScanCursor); } - explicit TPartialReadResult( - const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) - : TPartialReadResult(nullptr, nullptr, batch, lastKey, notFinishedIntervalIdx) { + explicit TPartialReadResult(const NArrow::TShardedRecordBatch& batch, const std::shared_ptr& scanCursor, + const std::optional notFinishedIntervalIdx) + : TPartialReadResult(nullptr, nullptr, batch, scanCursor, notFinishedIntervalIdx) { } }; diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp index 78926d99dcea..e343b4674d8d 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp @@ -3,6 +3,7 @@ #include "resolver.h" #include +#include namespace NKikimr::NOlap::NReader::NPlain { @@ -35,7 +36,7 @@ NKikimr::TConclusion> TIndexScannerConstructo TDataStorageAccessor dataAccessor(insertTable, index); AFL_VERIFY(read.PathId); auto readMetadata = std::make_shared(read.PathId, index->CopyVersionedIndexPtr(), read.GetSnapshot(), - IsReverse ? TReadMetadataBase::ESorting::DESC : TReadMetadataBase::ESorting::ASC, read.GetProgram()); + IsReverse ? TReadMetadataBase::ESorting::DESC : TReadMetadataBase::ESorting::ASC, read.GetProgram(), nullptr); auto initResult = readMetadata->Init(self, read, dataAccessor); if (!initResult) { @@ -44,4 +45,8 @@ NKikimr::TConclusion> TIndexScannerConstructo return static_pointer_cast(readMetadata); } +std::shared_ptr TIndexScannerConstructor::DoBuildCursor() const { + return std::make_shared(); +} + } // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.h index bb576fdbdc70..3a534cd0d936 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.h @@ -4,14 +4,23 @@ namespace NKikimr::NOlap::NReader::NPlain { class TIndexScannerConstructor: public IScannerConstructor { +public: + static TString GetClassNameStatic() { + return "PLAIN"; + } private: using TBase = IScannerConstructor; + static const inline TFactory::TRegistrator Registrator = + TFactory::TRegistrator(GetClassNameStatic()); + + virtual std::shared_ptr DoBuildCursor() const override; + protected: virtual TConclusion> DoBuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const override; public: - using TBase::TBase; virtual TConclusionStatus ParseProgram(const TVersionedIndex* vIndex, const NKikimrTxDataShard::TEvKqpScan& proto, TReadDescription& read) const override; virtual std::vector GetPrimaryKeyScheme(const NColumnShard::TColumnShard* self) const override; + using TBase::TBase; }; } \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h index 317ee0f03da9..34ef6496fd69 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h @@ -115,11 +115,11 @@ struct TReadMetadata : public TReadMetadataBase { std::vector CommittedBlobs; std::shared_ptr ReadStats; - TReadMetadata(const ui64 pathId, const std::shared_ptr info, const TSnapshot& snapshot, const ESorting sorting, const TProgramContainer& ssaProgram) - : TBase(info, sorting, ssaProgram, info->GetSchemaVerified(snapshot), snapshot) + TReadMetadata(const ui64 pathId, const std::shared_ptr info, const TSnapshot& snapshot, const ESorting sorting, + const TProgramContainer& ssaProgram, const std::shared_ptr& scanCursor) + : TBase(info, sorting, ssaProgram, info->GetSchemaVerified(snapshot), snapshot, scanCursor) , PathId(pathId) - , ReadStats(std::make_shared()) - { + , ReadStats(std::make_shared()) { } virtual std::vector GetKeyYqlSchema() const override { diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make index 1ab826414813..883f2b6b8e33 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make @@ -1,7 +1,7 @@ LIBRARY() SRCS( - constructor.cpp + GLOBAL constructor.cpp resolver.cpp read_metadata.cpp ) diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp index 59f55446cdd3..6298efcdd13d 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp @@ -27,7 +27,8 @@ void TScanHead::OnIntervalResult(std::shared_ptrsecond->GetGroupGuard(); } - AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, std::make_shared(std::move(allocationGuard), std::move(gGuard), *newBatch, lastPK, callbackIdxSubscriver)).second); + AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, std::make_shared(std::move(allocationGuard), std::move(gGuard), *newBatch, + std::make_shared(lastPK), callbackIdxSubscriver)).second); } else { AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, nullptr).second); } diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/constructor.cpp b/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/constructor.cpp new file mode 100644 index 000000000000..4a3946192f13 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/constructor.cpp @@ -0,0 +1,47 @@ +#include "constructor.h" +#include "read_metadata.h" +#include "resolver.h" + +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +NKikimr::TConclusionStatus TIndexScannerConstructor::ParseProgram( + const TVersionedIndex* vIndex, const NKikimrTxDataShard::TEvKqpScan& proto, TReadDescription& read) const { + AFL_VERIFY(vIndex); + auto& indexInfo = vIndex->GetSchemaVerified(Snapshot)->GetIndexInfo(); + TIndexColumnResolver columnResolver(indexInfo); + return TBase::ParseProgram(vIndex, proto.GetOlapProgramType(), proto.GetOlapProgram(), read, columnResolver); +} + +std::vector TIndexScannerConstructor::GetPrimaryKeyScheme(const NColumnShard::TColumnShard* self) const { + auto& indexInfo = self->TablesManager.GetIndexInfo(Snapshot); + return indexInfo.GetPrimaryKeyColumns(); +} + +NKikimr::TConclusion> TIndexScannerConstructor::DoBuildReadMetadata( + const NColumnShard::TColumnShard* self, const TReadDescription& read) const { + auto& insertTable = self->InsertTable; + auto& index = self->TablesManager.GetPrimaryIndex(); + if (!insertTable || !index) { + return std::shared_ptr(); + } + + if (read.GetSnapshot().GetPlanInstant() < self->GetMinReadSnapshot().GetPlanInstant()) { + return TConclusionStatus::Fail(TStringBuilder() << "Snapshot too old: " << read.GetSnapshot() << ". CS min read snapshot: " + << self->GetMinReadSnapshot() << ". now: " << TInstant::Now()); + } + + TDataStorageAccessor dataAccessor(insertTable, index); + AFL_VERIFY(read.PathId); + auto readMetadata = std::make_shared(read.PathId, index->CopyVersionedIndexPtr(), read.GetSnapshot(), + IsReverse ? TReadMetadataBase::ESorting::DESC : TReadMetadataBase::ESorting::ASC, read.GetProgram(), read.GetScanCursor()); + + auto initResult = readMetadata->Init(self, read, dataAccessor); + if (!initResult) { + return initResult; + } + return static_pointer_cast(readMetadata); +} + +} // namespace NKikimr::NOlap::NReader::NSimple diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/constructor.h b/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/constructor.h new file mode 100644 index 000000000000..76596f8dd94e --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/constructor.h @@ -0,0 +1,28 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +class TIndexScannerConstructor: public IScannerConstructor { +public: + static TString GetClassNameStatic() { + return "SIMPLE"; + } + +private: + using TBase = IScannerConstructor; + static const inline TFactory::TRegistrator Registrator = + TFactory::TRegistrator(GetClassNameStatic()); + virtual std::shared_ptr DoBuildCursor() const override { + return std::make_shared(); + } + +protected: + virtual TConclusion> DoBuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const override; +public: + using TBase::TBase; + virtual TConclusionStatus ParseProgram(const TVersionedIndex* vIndex, const NKikimrTxDataShard::TEvKqpScan& proto, TReadDescription& read) const override; + virtual std::vector GetPrimaryKeyScheme(const NColumnShard::TColumnShard* self) const override; +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/read_metadata.cpp new file mode 100644 index 000000000000..d57492b742c8 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/read_metadata.cpp @@ -0,0 +1,124 @@ +#include "read_metadata.h" + +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +std::unique_ptr TReadMetadata::StartScan(const std::shared_ptr& readContext) const { + return std::make_unique(readContext, readContext->GetReadMetadataPtrVerifiedAs()); +} + +TConclusionStatus TReadMetadata::Init( + const NColumnShard::TColumnShard* owner, const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor) { + SetPKRangesFilter(readDescription.PKRangesFilter); + InitShardingInfo(readDescription.PathId); + TxId = readDescription.TxId; + LockId = readDescription.LockId; + if (LockId) { + owner->GetOperationsManager().RegisterLock(*LockId, owner->Generation()); + LockSharingInfo = owner->GetOperationsManager().GetLockVerified(*LockId).GetSharingInfo(); + } + + SelectInfo = dataAccessor.Select(readDescription, !!LockId); + if (LockId) { + for (auto&& i : SelectInfo->PortionsOrderedPK) { + if (i->HasInsertWriteId() && !i->HasCommitSnapshot()) { + if (owner->HasLongTxWrites(i->GetInsertWriteIdVerified())) { + } else { + auto op = owner->GetOperationsManager().GetOperationByInsertWriteIdVerified(i->GetInsertWriteIdVerified()); + AddWriteIdToCheck(i->GetInsertWriteIdVerified(), op->GetLockId()); + } + } + } + } + + StatsMode = readDescription.StatsMode; + return TConclusionStatus::Success(); +} + +std::set TReadMetadata::GetEarlyFilterColumnIds() const { + auto& indexInfo = ResultIndexSchema->GetIndexInfo(); + std::set result; + for (auto&& i : GetProgram().GetEarlyFilterColumns()) { + auto id = indexInfo.GetColumnIdOptional(i); + if (id) { + result.emplace(*id); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("early_filter_column", i); + } + } + return result; +} + +std::set TReadMetadata::GetPKColumnIds() const { + std::set result; + auto& indexInfo = ResultIndexSchema->GetIndexInfo(); + for (auto&& i : indexInfo.GetPrimaryKeyColumns()) { + Y_ABORT_UNLESS(result.emplace(indexInfo.GetColumnIdVerified(i.first)).second); + } + return result; +} + +std::shared_ptr TReadMetadata::BuildReader(const std::shared_ptr& context) const { + return std::make_shared(context); +} + +NArrow::NMerger::TSortableBatchPosition TReadMetadata::BuildSortedPosition(const NArrow::TReplaceKey& key) const { + return NArrow::NMerger::TSortableBatchPosition(key.ToBatch(GetReplaceKey()), 0, GetReplaceKey()->field_names(), {}, IsDescSorted()); +} + +void TReadMetadata::DoOnReadFinished(NColumnShard::TColumnShard& owner) const { + if (!GetLockId()) { + return; + } + const ui64 lock = *GetLockId(); + if (GetBrokenWithCommitted()) { + owner.GetOperationsManager().GetLockVerified(lock).SetBroken(); + } else { + NOlap::NTxInteractions::TTxConflicts conflicts; + for (auto&& i : GetConflictableLockIds()) { + conflicts.Add(i, lock); + } + auto writer = std::make_shared(PathId, conflicts); + owner.GetOperationsManager().AddEventForLock(owner, lock, writer); + } +} + +void TReadMetadata::DoOnBeforeStartReading(NColumnShard::TColumnShard& owner) const { + if (!LockId) { + return; + } + auto evWriter = std::make_shared( + PathId, GetResultSchema()->GetIndexInfo().GetPrimaryKey(), GetPKRangesFilterPtr(), GetConflictableLockIds()); + owner.GetOperationsManager().AddEventForLock(owner, *LockId, evWriter); +} + +void TReadMetadata::DoOnReplyConstruction(const ui64 tabletId, NKqp::NInternalImplementation::TEvScanData& scanData) const { + if (LockSharingInfo) { + NKikimrDataEvents::TLock lockInfo; + lockInfo.SetLockId(LockSharingInfo->GetLockId()); + lockInfo.SetGeneration(LockSharingInfo->GetGeneration()); + lockInfo.SetDataShard(tabletId); + lockInfo.SetCounter(LockSharingInfo->GetCounter()); + lockInfo.SetPathId(PathId); + lockInfo.SetHasWrites(LockSharingInfo->HasWrites()); + if (LockSharingInfo->IsBroken()) { + scanData.LocksInfo.BrokenLocks.emplace_back(std::move(lockInfo)); + } else { + scanData.LocksInfo.Locks.emplace_back(std::move(lockInfo)); + } + } +} + +bool TReadMetadata::IsMyUncommitted(const TInsertWriteId writeId) const { + AFL_VERIFY(LockSharingInfo); + auto it = ConflictedWriteIds.find(writeId); + AFL_VERIFY(it != ConflictedWriteIds.end())("write_id", writeId)("write_ids_count", ConflictedWriteIds.size()); + return it->second.GetLockId() == LockSharingInfo->GetLockId(); +} + +} // namespace NKikimr::NOlap::NReader::NSimple diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/read_metadata.h new file mode 100644 index 000000000000..f894284dfd94 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/read_metadata.h @@ -0,0 +1,172 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { +class TLockSharingInfo; +} + +namespace NKikimr::NOlap::NReader::NSimple { + +// Holds all metadata that is needed to perform read/scan +struct TReadMetadata : public TReadMetadataBase { + using TBase = TReadMetadataBase; + +private: + const ui64 PathId; + std::shared_ptr BrokenWithCommitted = std::make_shared(); + std::shared_ptr LockSharingInfo; + + class TWriteIdInfo { + private: + const ui64 LockId; + std::shared_ptr Conflicts; + + public: + TWriteIdInfo(const ui64 lockId, const std::shared_ptr& counter) + : LockId(lockId) + , Conflicts(counter) { + } + + ui64 GetLockId() const { + return LockId; + } + + void MarkAsConflictable() const { + Conflicts->Inc(); + } + + bool IsConflictable() const { + return Conflicts->Val(); + } + }; + + THashMap> LockConflictCounters; + THashMap ConflictedWriteIds; + + virtual void DoOnReadFinished(NColumnShard::TColumnShard& owner) const override; + virtual void DoOnBeforeStartReading(NColumnShard::TColumnShard& owner) const override; + virtual void DoOnReplyConstruction(const ui64 tabletId, NKqp::NInternalImplementation::TEvScanData& scanData) const override; + +public: + using TConstPtr = std::shared_ptr; + + bool GetBrokenWithCommitted() const { + return BrokenWithCommitted->Val(); + } + THashSet GetConflictableLockIds() const { + THashSet result; + for (auto&& i : ConflictedWriteIds) { + if (i.second.IsConflictable()) { + result.emplace(i.second.GetLockId()); + } + } + return result; + } + + bool IsLockConflictable(const ui64 lockId) const { + auto it = LockConflictCounters.find(lockId); + AFL_VERIFY(it != LockConflictCounters.end()); + return it->second->Val(); + } + + bool IsWriteConflictable(const TInsertWriteId writeId) const { + auto it = ConflictedWriteIds.find(writeId); + AFL_VERIFY(it != ConflictedWriteIds.end()); + return it->second.IsConflictable(); + } + + void AddWriteIdToCheck(const TInsertWriteId writeId, const ui64 lockId) { + auto it = LockConflictCounters.find(lockId); + if (it == LockConflictCounters.end()) { + it = LockConflictCounters.emplace(lockId, std::make_shared()).first; + } + AFL_VERIFY(ConflictedWriteIds.emplace(writeId, TWriteIdInfo(lockId, it->second)).second); + } + + [[nodiscard]] bool IsMyUncommitted(const TInsertWriteId writeId) const; + + void SetConflictedWriteId(const TInsertWriteId writeId) const { + auto it = ConflictedWriteIds.find(writeId); + AFL_VERIFY(it != ConflictedWriteIds.end()); + it->second.MarkAsConflictable(); + } + + void SetBrokenWithCommitted() const { + BrokenWithCommitted->Inc(); + } + + NArrow::NMerger::TSortableBatchPosition BuildSortedPosition(const NArrow::TReplaceKey& key) const; + std::shared_ptr BuildReader(const std::shared_ptr& context) const; + + bool HasProcessingColumnIds() const { + return GetProgram().HasProcessingColumnIds(); + } + + ui64 GetPathId() const { + return PathId; + } + + std::shared_ptr SelectInfo; + NYql::NDqProto::EDqStatsMode StatsMode = NYql::NDqProto::EDqStatsMode::DQ_STATS_MODE_NONE; + std::shared_ptr ReadStats; + + TReadMetadata(const ui64 pathId, const std::shared_ptr info, const TSnapshot& snapshot, const ESorting sorting, + const TProgramContainer& ssaProgram, const std::shared_ptr& scanCursor) + : TBase(info, sorting, ssaProgram, info->GetSchemaVerified(snapshot), snapshot, scanCursor) + , PathId(pathId) + , ReadStats(std::make_shared()) + { + } + + virtual std::vector GetKeyYqlSchema() const override { + return GetResultSchema()->GetIndexInfo().GetPrimaryKeyColumns(); + } + + TConclusionStatus Init(const NColumnShard::TColumnShard* owner, const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor); + + std::vector GetColumnsOrder() const { + auto schema = GetResultSchema(); + std::vector result; + for (auto&& i : schema->GetSchema()->fields()) { + result.emplace_back(i->name()); + } + return result; + } + + std::set GetEarlyFilterColumnIds() const; + std::set GetPKColumnIds() const; + + bool Empty() const { + Y_ABORT_UNLESS(SelectInfo); + return SelectInfo->PortionsOrderedPK.empty(); + } + + size_t NumIndexedBlobs() const { + Y_ABORT_UNLESS(SelectInfo); + return SelectInfo->Stats().Blobs; + } + + std::unique_ptr StartScan(const std::shared_ptr& readContext) const override; + + void Dump(IOutputStream& out) const override { + out << " index blobs: " << NumIndexedBlobs() + // << " with program steps: " << (Program ? Program->Steps.size() : 0) + << " at snapshot: " << GetRequestSnapshot().DebugString(); + TBase::Dump(out); + if (SelectInfo) { + out << ", "; + SelectInfo->DebugStream(out); + } + } + + friend IOutputStream& operator << (IOutputStream& out, const TReadMetadata& meta) { + meta.Dump(out); + return out; + } +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/resolver.cpp b/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/resolver.cpp new file mode 100644 index 000000000000..5f0452250202 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/resolver.cpp @@ -0,0 +1,5 @@ +#include "resolver.h" + +namespace NKikimr::NOlap::NReader::NSimple { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/resolver.h b/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/resolver.h new file mode 100644 index 000000000000..6267658734e5 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/resolver.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +class TIndexColumnResolver: public IColumnResolver { + const NOlap::TIndexInfo& IndexInfo; + +public: + explicit TIndexColumnResolver(const NOlap::TIndexInfo& indexInfo) + : IndexInfo(indexInfo) { + } + + virtual std::optional GetColumnIdOptional(const TString& name) const override { + return IndexInfo.GetColumnIdOptional(name); + } + + TString GetColumnName(ui32 id, bool required) const override { + return IndexInfo.GetColumnName(id, required); + } + + NSsa::TColumnInfo GetDefaultColumn() const override { + return NSsa::TColumnInfo::Original((ui32)NOlap::TIndexInfo::ESpecialColumn::PLAN_STEP, NOlap::TIndexInfo::SPEC_COL_PLAN_STEP); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/ya.make b/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/ya.make new file mode 100644 index 000000000000..883f2b6b8e33 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/constructor/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + GLOBAL constructor.cpp + resolver.cpp + read_metadata.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/reader/abstract + ydb/core/kqp/compute_actor +) + +END() diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/columns_set.cpp b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/columns_set.cpp new file mode 100644 index 000000000000..d053b9affd4e --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/columns_set.cpp @@ -0,0 +1,79 @@ +#include "columns_set.h" +#include +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +TString TColumnsSet::DebugString() const { + return TStringBuilder() << "(" + << "column_ids=" << JoinSeq(",", ColumnIds) << ";" + << "column_names=" << JoinSeq(",", ColumnNames) << ";" + << ");"; +} + +TColumnsSet TColumnsSet::operator-(const TColumnsSet& external) const { + if (external.IsEmpty() || IsEmpty()) { + return *this; + } + TColumnsSet result = *this; + for (auto&& i : external.ColumnIds) { + result.ColumnIds.erase(i); + } + arrow::FieldVector fields; + for (auto&& i : Schema->fields()) { + if (!external.Schema->GetFieldByName(i->name())) { + fields.emplace_back(i); + } + } + result.Schema = std::make_shared(fields); + result.Rebuild(); + return result; +} + +TColumnsSet TColumnsSet::operator+(const TColumnsSet& external) const { + if (external.IsEmpty()) { + return *this; + } + if (IsEmpty()) { + return external; + } + TColumnsSet result = *this; + result.ColumnIds.insert(external.ColumnIds.begin(), external.ColumnIds.end()); + auto fields = result.Schema->fields(); + for (auto&& i : external.Schema->fields()) { + if (!result.Schema->GetFieldByName(i->name())) { + fields.emplace_back(i); + } + } + result.Schema = std::make_shared(fields); + result.Rebuild(); + return result; +} + +bool TColumnsSet::ColumnsOnly(const std::vector& fieldNames) const { + if (fieldNames.size() != GetColumnsCount()) { + return false; + } + std::set fieldNamesSet; + for (auto&& i : fieldNames) { + if (!fieldNamesSet.emplace(i).second) { + return false; + } + if (!ColumnNames.contains(TString(i.data(), i.size()))) { + return false; + } + } + return true; +} + +void TColumnsSet::Rebuild() { + ColumnNamesVector.clear(); + ColumnNames.clear(); + for (auto&& i : Schema->field_names()) { + ColumnNamesVector.emplace_back(i); + ColumnNames.emplace(i); + } + FilteredSchema = std::make_shared(FullReadSchema, ColumnIds); +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/columns_set.h b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/columns_set.h new file mode 100644 index 000000000000..dca3e42df6ea --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/columns_set.h @@ -0,0 +1,214 @@ +#pragma once +#include +#include + +#include + +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +enum class EMemType { + Blob, + Raw, + RawSequential +}; + +enum class EStageFeaturesIndexes { + Accessors = 0, + Filter = 1, + Fetching = 2, + Merge = 3 +}; + +class TIndexesSet { +private: + YDB_READONLY_DEF(std::vector, IndexIds); + YDB_READONLY_DEF(std::set, IndexIdsSet); + +public: + TIndexesSet(const std::set& indexIds) + : IndexIds(indexIds.begin(), indexIds.end()) + , IndexIdsSet(indexIds) { + AFL_VERIFY(IndexIds.size() == IndexIdsSet.size())("indexes", JoinSeq(",", IndexIds)); + } + + TIndexesSet(const ui32& indexId) + : IndexIds({ indexId }) + , IndexIdsSet({ indexId }) { + } + + ui32 GetIndexesCount() const { + return IndexIds.size(); + } + + TString DebugString() const { + return TStringBuilder() << JoinSeq(",", IndexIds); + } +}; + +class TColumnsSetIds { +protected: + std::set ColumnIds; + +public: + const std::set& GetColumnIds() const { + return ColumnIds; + } + + TString DebugString() const { + return JoinSeq(",", ColumnIds); + } + + TColumnsSetIds(const std::set& ids) + : ColumnIds(ids) { + } + TColumnsSetIds() = default; + TColumnsSetIds(std::set&& ids) + : ColumnIds(std::move(ids)) { + } + + TColumnsSetIds(const std::vector& ids) + : ColumnIds(ids.begin(), ids.end()) { + } + + TColumnsSetIds operator+(const TColumnsSetIds& external) const { + TColumnsSetIds result = *this; + result.ColumnIds.insert(external.ColumnIds.begin(), external.ColumnIds.end()); + return result; + } + + TColumnsSetIds operator-(const TColumnsSetIds& external) const { + TColumnsSetIds result = *this; + for (auto&& i : external.ColumnIds) { + result.ColumnIds.erase(i); + } + return result; + } + bool IsEmpty() const { + return ColumnIds.empty(); + } + + bool operator!() const { + return IsEmpty(); + } + ui32 GetColumnsCount() const { + return ColumnIds.size(); + } + + bool Contains(const std::shared_ptr& columnsSet) const { + if (!columnsSet) { + return true; + } + return Contains(*columnsSet); + } + + bool IsEqual(const std::shared_ptr& columnsSet) const { + if (!columnsSet) { + return false; + } + return IsEqual(*columnsSet); + } + + bool Contains(const TColumnsSetIds& columnsSet) const { + for (auto&& i : columnsSet.ColumnIds) { + if (!ColumnIds.contains(i)) { + return false; + } + } + return true; + } + + bool Cross(const TColumnsSetIds& columnsSet) const { + for (auto&& i : columnsSet.ColumnIds) { + if (ColumnIds.contains(i)) { + return true; + } + } + return false; + } + + std::set Intersect(const TColumnsSetIds& columnsSet) const { + std::set result; + for (auto&& i : columnsSet.ColumnIds) { + if (ColumnIds.contains(i)) { + result.emplace(i); + } + } + return result; + } + + bool IsEqual(const TColumnsSetIds& columnsSet) const { + if (columnsSet.GetColumnIds().size() != ColumnIds.size()) { + return false; + } + auto itA = ColumnIds.begin(); + auto itB = columnsSet.ColumnIds.begin(); + while (itA != ColumnIds.end()) { + if (*itA != *itB) { + return false; + } + ++itA; + ++itB; + } + return true; + } +}; + +class TColumnsSet: public TColumnsSetIds { +private: + using TBase = TColumnsSetIds; + YDB_READONLY_DEF(std::set, ColumnNames); + std::vector ColumnNamesVector; + YDB_READONLY_DEF(std::shared_ptr, Schema); + ISnapshotSchema::TPtr FullReadSchema; + YDB_READONLY_DEF(ISnapshotSchema::TPtr, FilteredSchema); + + void Rebuild(); + +public: + TColumnsSet() = default; + const std::vector& GetColumnNamesVector() const { + return ColumnNamesVector; + } + + bool ColumnsOnly(const std::vector& fieldNames) const; + + std::shared_ptr BuildSamePtr(const std::set& columnIds) const { + return std::make_shared(columnIds, FullReadSchema); + } + + TColumnsSet(const std::set& columnIds, const ISnapshotSchema::TPtr& fullReadSchema) + : TBase(columnIds) + , FullReadSchema(fullReadSchema) { + AFL_VERIFY(!!FullReadSchema); + Schema = FullReadSchema->GetIndexInfo().GetColumnsSchema(ColumnIds); + Rebuild(); + } + + TColumnsSet(const std::vector& columnIds, const ISnapshotSchema::TPtr& fullReadSchema) + : TBase(columnIds) + , FullReadSchema(fullReadSchema) { + AFL_VERIFY(!!FullReadSchema); + Schema = FullReadSchema->GetIndexInfo().GetColumnsSchema(ColumnIds); + Rebuild(); + } + + const ISnapshotSchema& GetFilteredSchemaVerified() const { + AFL_VERIFY(FilteredSchema); + return *FilteredSchema; + } + + const std::shared_ptr& GetFilteredSchemaPtrVerified() const { + AFL_VERIFY(FilteredSchema); + return FilteredSchema; + } + + TString DebugString() const; + + TColumnsSet operator+(const TColumnsSet& external) const; + + TColumnsSet operator-(const TColumnsSet& external) const; +}; + +} // namespace NKikimr::NOlap::NReader::NSimple diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/constructor.cpp b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/constructor.cpp new file mode 100644 index 000000000000..bd1f1c5a1ac6 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/constructor.cpp @@ -0,0 +1,22 @@ +#include "constructor.h" +#include +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +void TBlobsFetcherTask::DoOnDataReady(const std::shared_ptr& /*resourcesGuard*/) { + Source->MutableStageData().AddBlobs(Source->DecodeBlobAddresses(ExtractBlobsData())); + AFL_VERIFY(Step.Next()); + auto task = std::make_shared(Source, std::move(Step), Context->GetCommonContext()->GetScanActorId()); + NConveyor::TScanServiceOperator::SendTaskToExecute(task); +} + +bool TBlobsFetcherTask::DoOnError(const TString& storageId, const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("error_on_blob_reading", range.ToString())("scan_actor_id", Context->GetCommonContext()->GetScanActorId()) + ("status", status.GetErrorMessage())("status_code", status.GetStatus())("storage_id", storageId); + NActors::TActorContext::AsActorContext().Send(Context->GetCommonContext()->GetScanActorId(), + std::make_unique(TConclusionStatus::Fail("cannot read blob range " + range.ToString()))); + return false; +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/constructor.h b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/constructor.h new file mode 100644 index 000000000000..237923ed5882 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/constructor.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include +#include +#include +#include +#include "source.h" + +namespace NKikimr::NOlap::NReader::NSimple { + +class TBlobsFetcherTask: public NBlobOperations::NRead::ITask, public NColumnShard::TMonitoringObjectsCounter { +private: + using TBase = NBlobOperations::NRead::ITask; + const std::shared_ptr Source; + TFetchingScriptCursor Step; + const std::shared_ptr Context; + + virtual void DoOnDataReady(const std::shared_ptr& resourcesGuard) override; + virtual bool DoOnError(const TString& storageId, const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) override; +public: + TBlobsFetcherTask(const std::vector>& readActions, const std::shared_ptr& sourcePtr, + const TFetchingScriptCursor& step, const std::shared_ptr& context, const TString& taskCustomer, const TString& externalTaskId) + : TBase(readActions, taskCustomer, externalTaskId) + , Source(sourcePtr) + , Step(step) + , Context(context) + { + + } +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/context.cpp b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/context.cpp new file mode 100644 index 000000000000..caa7f0330506 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/context.cpp @@ -0,0 +1,309 @@ +#include "context.h" +#include "source.h" + +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +std::unique_ptr TSpecialReadContext::BuildMerger() const { + return std::make_unique( + ReadMetadata->GetReplaceKey(), ProgramInputColumns->GetSchema(), CommonContext->IsReverse(), IIndexInfo::GetSnapshotColumnNames()); +} + +std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(const std::shared_ptr& source) { + if (!source->GetStageData().HasPortionAccessor()) { + if (!AskAccumulatorsScript) { + AskAccumulatorsScript = std::make_shared(*this); + AskAccumulatorsScript->AddStep(source->PredictAccessorsSize(), EStageFeaturesIndexes::Accessors); + AskAccumulatorsScript->AddStep(); + AskAccumulatorsScript->AddStep(*FFColumns); + } + return AskAccumulatorsScript; + } + const bool partialUsageByPK = [&]() { + switch (source->GetUsageClass()) { + case TPKRangeFilter::EUsageClass::PartialUsage: + return true; + case TPKRangeFilter::EUsageClass::DontUsage: + return true; + case TPKRangeFilter::EUsageClass::FullUsage: + return false; + } + }(); + const bool useIndexes = (IndexChecker ? source->HasIndexes(IndexChecker->GetIndexIds()) : false); + const bool needSnapshots = ReadMetadata->GetRequestSnapshot() < source->GetRecordSnapshotMax(); + const bool hasDeletions = source->GetHasDeletions(); + bool needShardingFilter = false; + if (!!ReadMetadata->GetRequestShardingInfo()) { + auto ver = source->GetShardingVersionOptional(); + if (!ver || *ver < ReadMetadata->GetRequestShardingInfo()->GetSnapshotVersion()) { + needShardingFilter = true; + } + } + { + auto result = CacheFetchingScripts[needSnapshots ? 1 : 0][partialUsageByPK ? 1 : 0][useIndexes ? 1 : 0][needShardingFilter ? 1 : 0] + [hasDeletions ? 1 : 0]; + if (!result) { + TGuard wg(Mutex); + result = CacheFetchingScripts[needSnapshots ? 1 : 0][partialUsageByPK ? 1 : 0][useIndexes ? 1 : 0] + [needShardingFilter ? 1 : 0][hasDeletions ? 1 : 0]; + if (!result) { + result = BuildColumnsFetchingPlan(needSnapshots, partialUsageByPK, useIndexes, needShardingFilter, hasDeletions); + CacheFetchingScripts[needSnapshots ? 1 : 0][partialUsageByPK ? 1 : 0][useIndexes ? 1 : 0] + [needShardingFilter ? 1 : 0][hasDeletions ? 1 : 0] = result; + } + } + AFL_VERIFY(result); + if (*result) { + return *result; + } else { + std::shared_ptr result = std::make_shared(*this); + result->SetBranchName("FAKE"); + result->AddStep(std::make_shared(source->GetRecordsCount())); + return result; + } + } +} + +class TColumnsAccumulator { +private: + TColumnsSetIds FetchingReadyColumns; + TColumnsSetIds AssemblerReadyColumns; + ISnapshotSchema::TPtr FullSchema; + std::shared_ptr GuaranteeNotOptional; + +public: + TColumnsAccumulator(const std::shared_ptr& guaranteeNotOptional, const ISnapshotSchema::TPtr& fullSchema) + : FullSchema(fullSchema) + , GuaranteeNotOptional(guaranteeNotOptional) { + } + + TColumnsSetIds GetNotFetchedAlready(const TColumnsSetIds& columns) const { + return columns - FetchingReadyColumns; + } + + bool AddFetchingStep(TFetchingScript& script, const TColumnsSetIds& columns, const EStageFeaturesIndexes stage) { + auto actualColumns = GetNotFetchedAlready(columns); + FetchingReadyColumns = FetchingReadyColumns + (TColumnsSetIds)columns; + if (!actualColumns.IsEmpty()) { + script.Allocation(columns.GetColumnIds(), stage, EMemType::Blob); + script.AddStep(std::make_shared(actualColumns)); + return true; + } + return false; + } + bool AddAssembleStep(TFetchingScript& script, const TColumnsSetIds& columns, const TString& purposeId, const EStageFeaturesIndexes stage, + const bool sequential) { + auto actualColumns = columns - AssemblerReadyColumns; + AssemblerReadyColumns = AssemblerReadyColumns + columns; + if (!actualColumns.IsEmpty()) { + auto actualSet = std::make_shared(actualColumns.GetColumnIds(), FullSchema); + if (sequential) { + const auto notSequentialColumnIds = GuaranteeNotOptional->Intersect(*actualSet); + if (notSequentialColumnIds.size()) { + script.Allocation(notSequentialColumnIds, stage, EMemType::Raw); + std::shared_ptr cross = actualSet->BuildSamePtr(notSequentialColumnIds); + script.AddStep(cross, purposeId); + *actualSet = *actualSet - *cross; + } + if (!actualSet->IsEmpty()) { + script.Allocation(notSequentialColumnIds, stage, EMemType::RawSequential); + script.AddStep(actualSet, purposeId); + } + } else { + script.Allocation(actualColumns.GetColumnIds(), stage, EMemType::Raw); + script.AddStep(actualSet, purposeId); + } + return true; + } + return false; + } +}; + +std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(const bool needSnapshots, + const bool partialUsageByPredicateExt, const bool useIndexes, const bool needFilterSharding, const bool needFilterDeletion) const { + std::shared_ptr result = std::make_shared(*this); + const bool partialUsageByPredicate = partialUsageByPredicateExt && PredicateColumns->GetColumnsCount(); + + TColumnsAccumulator acc(MergeColumns, ReadMetadata->GetResultSchema()); + if (!!IndexChecker && useIndexes) { + result->AddStep(std::make_shared(std::make_shared(IndexChecker->GetIndexIds()))); + result->AddStep(std::make_shared(IndexChecker)); + } + if (needFilterSharding && !ShardingColumns->IsEmpty()) { + const TColumnsSetIds columnsFetch = *ShardingColumns; + acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Filter); + acc.AddAssembleStep(*result, columnsFetch, "SPEC_SHARDING", EStageFeaturesIndexes::Filter, false); + result->AddStep(std::make_shared()); + } + { + result->SetBranchName("exclusive"); + TColumnsSet columnsFetch = *EFColumns; + if (needFilterDeletion) { + columnsFetch = columnsFetch + *DeletionColumns; + } + if (needSnapshots || FFColumns->Cross(*SpecColumns)) { + columnsFetch = columnsFetch + *SpecColumns; + } + if (partialUsageByPredicate) { + columnsFetch = columnsFetch + *PredicateColumns; + } + + if (columnsFetch.GetColumnsCount()) { + acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Filter); + } + + if (needFilterDeletion) { + acc.AddAssembleStep(*result, *DeletionColumns, "SPEC_DELETION", EStageFeaturesIndexes::Filter, false); + result->AddStep(std::make_shared()); + } + if (partialUsageByPredicate) { + acc.AddAssembleStep(*result, *PredicateColumns, "PREDICATE", EStageFeaturesIndexes::Filter, false); + result->AddStep(std::make_shared()); + } + if (needSnapshots || FFColumns->Cross(*SpecColumns)) { + acc.AddAssembleStep(*result, *SpecColumns, "SPEC", EStageFeaturesIndexes::Filter, false); + result->AddStep(std::make_shared()); + } + for (auto&& i : ReadMetadata->GetProgram().GetSteps()) { + if (i->GetFilterOriginalColumnIds().empty()) { + break; + } + TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetResultSchema()); + acc.AddAssembleStep(*result, stepColumnIds, "EF", EStageFeaturesIndexes::Filter, false); + result->AddStep(std::make_shared(i)); + if (!i->IsFilterOnly()) { + break; + } + } + if (GetReadMetadata()->Limit) { + result->AddStep(std::make_shared(GetReadMetadata()->Limit, GetReadMetadata()->IsDescSorted())); + } + acc.AddFetchingStep(*result, *FFColumns, EStageFeaturesIndexes::Fetching); + acc.AddAssembleStep(*result, *FFColumns, "LAST", EStageFeaturesIndexes::Fetching, false); + } + result->AddStep(); + return result; +} + +TSpecialReadContext::TSpecialReadContext(const std::shared_ptr& commonContext) + : CommonContext(commonContext) { + + ReadMetadata = dynamic_pointer_cast(CommonContext->GetReadMetadata()); + Y_ABORT_UNLESS(ReadMetadata); + Y_ABORT_UNLESS(ReadMetadata->SelectInfo); + + double kffAccessors = 0.01; + double kffFilter = 0.45; + double kffFetching = 0.45; + double kffMerge = 0.10; + TString stagePrefix; + if (ReadMetadata->GetEarlyFilterColumnIds().size()) { + stagePrefix = "EF"; + kffFilter = 0.7; + kffFetching = 0.15; + kffMerge = 0.14; + kffAccessors = 0.01; + } else { + stagePrefix = "FO"; + kffFilter = 0.1; + kffFetching = 0.75; + kffMerge = 0.14; + kffAccessors = 0.01; + } + + std::vector> stages = { + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildStageFeatures( + stagePrefix + "::ACCESSORS", kffAccessors * TGlobalLimits::ScanMemoryLimit), + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildStageFeatures( + stagePrefix + "::FILTER", kffFilter * TGlobalLimits::ScanMemoryLimit), + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildStageFeatures( + stagePrefix + "::FETCHING", kffFetching * TGlobalLimits::ScanMemoryLimit), + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildStageFeatures(stagePrefix + "::MERGE", kffMerge * TGlobalLimits::ScanMemoryLimit) + }; + ProcessMemoryGuard = + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildProcessGuard(CommonContext->GetReadMetadata()->GetTxId(), stages); + ProcessScopeGuard = + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildScopeGuard(CommonContext->GetReadMetadata()->GetTxId(), GetCommonContext()->GetScanId()); + + auto readSchema = ReadMetadata->GetResultSchema(); + SpecColumns = std::make_shared(TIndexInfo::GetSnapshotColumnIdsSet(), readSchema); + IndexChecker = ReadMetadata->GetProgram().GetIndexChecker(); + { + auto predicateColumns = ReadMetadata->GetPKRangesFilter().GetColumnIds(ReadMetadata->GetIndexInfo()); + if (predicateColumns.size()) { + PredicateColumns = std::make_shared(predicateColumns, readSchema); + } else { + PredicateColumns = std::make_shared(); + } + } + { + std::set columnIds = { NPortion::TSpecialColumns::SPEC_COL_DELETE_FLAG_INDEX }; + DeletionColumns = std::make_shared(columnIds, ReadMetadata->GetResultSchema()); + } + + if (!!ReadMetadata->GetRequestShardingInfo()) { + auto shardingColumnIds = + ReadMetadata->GetIndexInfo().GetColumnIdsVerified(ReadMetadata->GetRequestShardingInfo()->GetShardingInfo()->GetColumnNames()); + ShardingColumns = std::make_shared(shardingColumnIds, ReadMetadata->GetResultSchema()); + } else { + ShardingColumns = std::make_shared(); + } + { + auto efColumns = ReadMetadata->GetEarlyFilterColumnIds(); + if (efColumns.size()) { + EFColumns = std::make_shared(efColumns, readSchema); + } else { + EFColumns = std::make_shared(); + } + } + if (ReadMetadata->HasProcessingColumnIds()) { + FFColumns = std::make_shared(ReadMetadata->GetProcessingColumnIds(), readSchema); + if (SpecColumns->Contains(*FFColumns) && !EFColumns->IsEmpty()) { + FFColumns = std::make_shared(*EFColumns + *SpecColumns); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("ff_modified", FFColumns->DebugString()); + } else { + AFL_VERIFY(!FFColumns->Contains(*SpecColumns))("info", FFColumns->DebugString()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("ff_first", FFColumns->DebugString()); + } + } else { + FFColumns = EFColumns; + } + if (FFColumns->IsEmpty()) { + ProgramInputColumns = SpecColumns; + } else { + ProgramInputColumns = FFColumns; + } + + PKColumns = std::make_shared(ReadMetadata->GetPKColumnIds(), readSchema); + MergeColumns = std::make_shared(*PKColumns + *SpecColumns); + + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("columns_context_info", DebugString()); +} + +TString TSpecialReadContext::DebugString() const { + TStringBuilder sb; + sb << "ef=" << EFColumns->DebugString() << ";" + << "sharding=" << ShardingColumns->DebugString() << ";" + << "pk=" << PKColumns->DebugString() << ";" + << "ff=" << FFColumns->DebugString() << ";" + << "program_input=" << ProgramInputColumns->DebugString() << ";"; + return sb; +} + +TString TSpecialReadContext::ProfileDebugString() const { + TStringBuilder sb; + const auto GetBit = [](const ui32 val, const ui32 pos) -> ui32 { + return (val & (1 << pos)) ? 1 : 0; + }; + + for (ui32 i = 0; i < (1 << 5); ++i) { + auto script = CacheFetchingScripts[GetBit(i, 0)][GetBit(i, 1)][GetBit(i, 2)][GetBit(i, 3)][GetBit(i, 4)]; + if (script && *script) { + sb << (*script)->DebugString() << ";"; + } + } + return sb; +} + +} // namespace NKikimr::NOlap::NReader::NSimple diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/context.h b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/context.h new file mode 100644 index 000000000000..f64d0923d6bf --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/context.h @@ -0,0 +1,85 @@ +#pragma once +#include "columns_set.h" +#include "fetching.h" +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +class IDataSource; + +class TSpecialReadContext { +private: + YDB_READONLY_DEF(std::shared_ptr, CommonContext); + YDB_READONLY_DEF(std::shared_ptr, ProcessMemoryGuard); + YDB_READONLY_DEF(std::shared_ptr, ProcessScopeGuard); + + YDB_READONLY_DEF(std::shared_ptr, SpecColumns); + YDB_READONLY_DEF(std::shared_ptr, MergeColumns); + YDB_READONLY_DEF(std::shared_ptr, ShardingColumns); + YDB_READONLY_DEF(std::shared_ptr, DeletionColumns); + YDB_READONLY_DEF(std::shared_ptr, EFColumns); + YDB_READONLY_DEF(std::shared_ptr, PredicateColumns); + YDB_READONLY_DEF(std::shared_ptr, PKColumns); + YDB_READONLY_DEF(std::shared_ptr, FFColumns); + YDB_READONLY_DEF(std::shared_ptr, ProgramInputColumns); + + YDB_READONLY_DEF(std::shared_ptr, MergeStageMemory); + YDB_READONLY_DEF(std::shared_ptr, FilterStageMemory); + YDB_READONLY_DEF(std::shared_ptr, FetchingStageMemory); + + TAtomic AbortFlag = 0; + NIndexes::TIndexCheckerContainer IndexChecker; + TReadMetadata::TConstPtr ReadMetadata; + std::shared_ptr EmptyColumns = std::make_shared(); + std::shared_ptr BuildColumnsFetchingPlan(const bool needSnapshots, const bool partialUsageByPredicateExt, + const bool useIndexes, const bool needFilterSharding, const bool needFilterDeletion) const; + TMutex Mutex; + std::array>, 2>, 2>, 2>, 2>, 2> + CacheFetchingScripts; + std::shared_ptr AskAccumulatorsScript; + +public: + const ui64 ReduceMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetReduceMemoryIntervalLimit(); + const ui64 RejectMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetRejectMemoryIntervalLimit(); + const ui64 ReadSequentiallyBufferSize = TGlobalLimits::DefaultReadSequentiallyBufferSize; + + ui64 GetProcessMemoryControlId() const { + AFL_VERIFY(ProcessMemoryGuard); + return ProcessMemoryGuard->GetProcessId(); + } + ui64 GetRequestedMemoryBytes() const { + return MergeStageMemory->GetFullMemory() + FilterStageMemory->GetFullMemory() + FetchingStageMemory->GetFullMemory(); + } + + const TReadMetadata::TConstPtr& GetReadMetadata() const { + return ReadMetadata; + } + + bool IsAborted() const { + return AtomicGet(AbortFlag); + } + + void Abort() { + AtomicSet(AbortFlag, 1); + } + + ~TSpecialReadContext() { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("profile", ProfileDebugString()); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("fetching", DebugString()); + } + + std::unique_ptr BuildMerger() const; + + TString DebugString() const; + TString ProfileDebugString() const; + + TSpecialReadContext(const std::shared_ptr& commonContext); + + std::shared_ptr GetColumnsFetchingPlan(const std::shared_ptr& source); +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/fetched_data.cpp b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/fetched_data.cpp new file mode 100644 index 000000000000..bf38c466b75b --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/fetched_data.cpp @@ -0,0 +1,21 @@ +#include "fetched_data.h" + +#include +#include +#include + +namespace NKikimr::NOlap { + +void TFetchedData::SyncTableColumns(const std::vector>& fields, const ISnapshotSchema& schema) { + for (auto&& i : fields) { + if (Table->GetSchema()->GetFieldByName(i->name())) { + continue; + } + Table + ->AddField(i, std::make_shared(NArrow::TThreadSimpleArraysCache::Get( + i->type(), schema.GetExternalDefaultValueVerified(i->name()), Table->num_rows()))) + .Validate(); + } +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/fetched_data.h b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/fetched_data.h new file mode 100644 index 000000000000..001f24553338 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/fetched_data.h @@ -0,0 +1,236 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +namespace NKikimr::NOlap { + +class TFetchedData { +protected: + using TBlobs = THashMap; + YDB_ACCESSOR_DEF(TBlobs, Blobs); + YDB_READONLY_DEF(std::shared_ptr, Table); + YDB_READONLY_DEF(std::shared_ptr, Filter); + YDB_READONLY(bool, UseFilter, false); + + std::optional PortionAccessor; + bool DataAdded = false; + +public: + TFetchedData(const bool useFilter) + : UseFilter(useFilter) { + } + + void SetUseFilter(const bool value) { + if (UseFilter == value) { + return; + } + AFL_VERIFY(!DataAdded); + } + + bool HasPortionAccessor() const { + return !!PortionAccessor; + } + + void SetPortionAccessor(TPortionDataAccessor&& accessor) { + AFL_VERIFY(!PortionAccessor); + PortionAccessor = std::move(accessor); + } + + const TPortionDataAccessor& GetPortionAccessor() const { + AFL_VERIFY(!!PortionAccessor); + return *PortionAccessor; + } + + ui32 GetFilteredCount(const ui32 recordsCount, const ui32 defLimit) const { + if (!Filter) { + return std::min(defLimit, recordsCount); + } + return Filter->GetFilteredCount().value_or(recordsCount); + } + + void SyncTableColumns(const std::vector>& fields, const ISnapshotSchema& schema); + + std::shared_ptr GetAppliedFilter() const { + return UseFilter ? Filter : nullptr; + } + + std::shared_ptr GetNotAppliedFilter() const { + return UseFilter ? nullptr : Filter; + } + + TString ExtractBlob(const TChunkAddress& address) { + auto it = Blobs.find(address); + AFL_VERIFY(it != Blobs.end()); + AFL_VERIFY(it->second.IsBlob()); + auto result = it->second.GetData(); + Blobs.erase(it); + return result; + } + + void AddBlobs(THashMap&& blobData) { + for (auto&& i : blobData) { + AFL_VERIFY(Blobs.emplace(i.first, std::move(i.second)).second); + } + } + + void AddDefaults(THashMap&& blobs) { + for (auto&& i : blobs) { + AFL_VERIFY(Blobs.emplace(i.first, std::move(i.second)).second); + } + } + + bool IsEmpty() const { + return (Filter && Filter->IsTotalDenyFilter()) || (Table && !Table->num_rows()); + } + + void Clear() { + Filter = std::make_shared(NArrow::TColumnFilter::BuildDenyFilter()); + Table = nullptr; + } + + void AddFilter(const std::shared_ptr& filter) { + DataAdded = true; + if (!filter) { + return; + } + return AddFilter(*filter); + } + + void CutFilter(const ui32 recordsCount, const ui32 limit, const bool reverse) { + auto filter = std::make_shared(NArrow::TColumnFilter::BuildAllowFilter()); + ui32 recordsCountImpl = Filter ? Filter->GetFilteredCount().value_or(recordsCount) : recordsCount; + if (recordsCountImpl < limit) { + return; + } + if (reverse) { + filter->Add(false, recordsCountImpl - limit); + filter->Add(true, limit); + } else { + filter->Add(true, limit); + filter->Add(false, recordsCountImpl - limit); + } + if (Filter) { + if (UseFilter) { + AddFilter(*filter); + } else { + AddFilter(Filter->CombineSequentialAnd(*filter)); + } + } else { + AddFilter(*filter); + } + } + + void AddFilter(const NArrow::TColumnFilter& filter) { + if (UseFilter && Table) { + AFL_VERIFY(filter.Apply(Table)); + } + if (!Filter) { + Filter = std::make_shared(filter); + } else if (UseFilter) { + *Filter = Filter->CombineSequentialAnd(filter); + } else { + *Filter = Filter->And(filter); + } + } + + void AddBatch(const std::shared_ptr& table) { + DataAdded = true; + AFL_VERIFY(table); + if (UseFilter) { + AddBatch(table->BuildTableVerified()); + } else { + if (!Table) { + Table = table; + } else { + auto mergeResult = Table->MergeColumnsStrictly(*table); + AFL_VERIFY(mergeResult.IsSuccess())("error", mergeResult.GetErrorMessage()); + } + } + } + + void AddBatch(const std::shared_ptr& table) { + DataAdded = true; + auto tableLocal = table; + if (Filter && UseFilter) { + AFL_VERIFY(Filter->Apply(tableLocal)); + } + if (!Table) { + Table = std::make_shared(tableLocal); + } else { + auto mergeResult = Table->MergeColumnsStrictly(NArrow::TGeneralContainer(tableLocal)); + AFL_VERIFY(mergeResult.IsSuccess())("error", mergeResult.GetErrorMessage()); + } + } +}; + +class TFetchedResult { +private: + YDB_READONLY_DEF(std::shared_ptr, Batch); + YDB_READONLY_DEF(std::shared_ptr, NotAppliedFilter); + std::optional> PagesToResult; + std::optional> ChunkToReply; + +public: + TFetchedResult(std::unique_ptr&& data) + : Batch(data->GetTable()) + , NotAppliedFilter(data->GetNotAppliedFilter()) { + } + + TPortionDataAccessor::TReadPage ExtractPageForResult() { + AFL_VERIFY(PagesToResult); + AFL_VERIFY(PagesToResult->size()); + auto result = PagesToResult->front(); + PagesToResult->pop_front(); + return result; + } + + const std::deque& GetPagesToResultVerified() const { + AFL_VERIFY(PagesToResult); + return *PagesToResult; + } + + void SetPages(std::vector&& pages) { + AFL_VERIFY(!PagesToResult); + PagesToResult = std::deque(pages.begin(), pages.end()); + } + + void SetResultChunk(std::shared_ptr&& table, const ui32 indexStart, const ui32 recordsCount) { + auto page = ExtractPageForResult(); + AFL_VERIFY(page.GetIndexStart() == indexStart)("real", page.GetIndexStart())("expected", indexStart); + AFL_VERIFY(page.GetRecordsCount() == recordsCount)("real", page.GetRecordsCount())("expected", recordsCount); + AFL_VERIFY(!ChunkToReply); + ChunkToReply = std::move(table); + } + + bool IsFinished() const { + return GetPagesToResultVerified().empty(); + } + + bool HasResultChunk() const { + return !!ChunkToReply; + } + + std::shared_ptr ExtractResultChunk() { + AFL_VERIFY(!!ChunkToReply); + auto result = std::move(*ChunkToReply); + ChunkToReply.reset(); + return result; + } + + bool IsEmpty() const { + return !Batch || Batch->num_rows() == 0 || (NotAppliedFilter && NotAppliedFilter->IsTotalDenyFilter()); + } +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/fetching.cpp b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/fetching.cpp new file mode 100644 index 000000000000..97b410654d0f --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/fetching.cpp @@ -0,0 +1,403 @@ +#include "fetching.h" +#include "plain_read_data.h" +#include "source.h" + +#include +#include +#include + +#include + +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +bool TStepAction::DoApply(IDataReader& owner) const { + if (FinishedFlag) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "apply"); + auto* plainReader = static_cast(&owner); + plainReader->MutableScanner().OnSourceReady(Source, nullptr, 0, Source->GetRecordsCount(), *plainReader); + } + return true; +} + +TConclusionStatus TStepAction::DoExecuteImpl() { + if (Source->GetContext()->IsAborted()) { + return TConclusionStatus::Success(); + } + auto executeResult = Cursor.Execute(Source); + if (!executeResult) { + return executeResult; + } + if (*executeResult) { + FinishedFlag = true; + } + return TConclusionStatus::Success(); +} + +TStepAction::TStepAction(const std::shared_ptr& source, TFetchingScriptCursor&& cursor, const NActors::TActorId& ownerActorId) + : TBase(ownerActorId) + , Source(source) + , Cursor(std::move(cursor)) + , CountersGuard(Source->GetContext()->GetCommonContext()->GetCounters().GetAssembleTasksGuard()) { +} + +TConclusion TColumnBlobsFetchingStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& step) const { + return !source->StartFetchingColumns(source, step, Columns); +} + +ui64 TColumnBlobsFetchingStep::GetProcessingDataSize(const std::shared_ptr& source) const { + return source->GetColumnBlobBytes(Columns.GetColumnIds()); +} + +TConclusion TIndexBlobsFetchingStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& step) const { + return !source->StartFetchingIndexes(source, step, Indexes); +} + +TConclusion TAssemblerStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + source->AssembleColumns(Columns); + return true; +} + +ui64 TAssemblerStep::GetProcessingDataSize(const std::shared_ptr& source) const { + return source->GetColumnRawBytes(Columns->GetColumnIds()); +} + +TConclusion TOptionalAssemblerStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + source->AssembleColumns(Columns, !source->IsSourceInMemory()); + return true; +} + +ui64 TOptionalAssemblerStep::GetProcessingDataSize(const std::shared_ptr& source) const { + return source->GetColumnsVolume(Columns->GetColumnIds(), EMemType::RawSequential); +} + +TConclusion TFilterProgramStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + AFL_VERIFY(source); + AFL_VERIFY(Step); + auto filter = Step->BuildFilter(source->GetStageData().GetTable()); + if (!filter.ok()) { + return TConclusionStatus::Fail(filter.status().message()); + } + source->MutableStageData().AddFilter(*filter); + return true; +} + +TConclusion TPredicateFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + auto filter = + source->GetContext()->GetReadMetadata()->GetPKRangesFilter().BuildFilter(source->GetStageData().GetTable()->BuildTableVerified()); + source->MutableStageData().AddFilter(filter); + return true; +} + +TConclusion TSnapshotFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + auto filter = MakeSnapshotFilter( + source->GetStageData().GetTable()->BuildTableVerified(), source->GetContext()->GetReadMetadata()->GetRequestSnapshot()); + if (filter.GetFilteredCount().value_or(source->GetRecordsCount()) != source->GetRecordsCount()) { + if (source->AddTxConflict()) { + return true; + } + } + source->MutableStageData().AddFilter(filter); + return true; +} + +TConclusion TDeletionFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + auto filterTable = source->GetStageData().GetTable()->BuildTableOptional(std::set({ TIndexInfo::SPEC_COL_DELETE_FLAG })); + if (!filterTable) { + return true; + } + AFL_VERIFY(filterTable->column(0)->type()->id() == arrow::boolean()->id()); + NArrow::TColumnFilter filter = NArrow::TColumnFilter::BuildAllowFilter(); + for (auto&& i : filterTable->column(0)->chunks()) { + auto filterFlags = static_pointer_cast(i); + for (ui32 i = 0; i < filterFlags->length(); ++i) { + filter.Add(!filterFlags->GetView(i)); + } + } + source->MutableStageData().AddFilter(filter); + return true; +} + +TConclusion TShardingFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + NYDBTest::TControllers::GetColumnShardController()->OnSelectShardingFilter(); + const auto& shardingInfo = source->GetContext()->GetReadMetadata()->GetRequestShardingInfo()->GetShardingInfo(); + auto filter = shardingInfo->GetFilter(source->GetStageData().GetTable()->BuildTableVerified()); + source->MutableStageData().AddFilter(filter); + return true; +} + +TConclusion TBuildFakeSpec::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + std::vector> columns; + for (auto&& f : IIndexInfo::ArrowSchemaSnapshot()->fields()) { + columns.emplace_back(NArrow::TThreadSimpleArraysCache::GetConst(f->type(), NArrow::DefaultScalar(f->type()), Count)); + } + source->MutableStageData().AddBatch( + std::make_shared(arrow::RecordBatch::Make(TIndexInfo::ArrowSchemaSnapshot(), Count, columns))); + return true; +} + +TConclusion TApplyIndexStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + source->ApplyIndex(IndexChecker); + return true; +} + +TConclusion TFetchingScriptCursor::Execute(const std::shared_ptr& source) { + AFL_VERIFY(source); + NMiniKQL::TThrowingBindTerminator bind; + Script->OnExecute(); + while (!Script->IsFinished(CurrentStepIdx)) { + if (source->GetStageData().IsEmpty()) { + source->OnEmptyStageData(); + break; + } + auto step = Script->GetStep(CurrentStepIdx); + TMemoryProfileGuard mGuard("SCAN_PROFILE::FETCHING::" + step->GetName() + "::" + Script->GetBranchName(), + IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("scan_step", step->DebugString())("scan_step_idx", CurrentStepIdx); + AFL_VERIFY(!CurrentStartInstant); + CurrentStartInstant = TMonotonic::Now(); + AFL_VERIFY(!CurrentStartDataSize); + CurrentStartDataSize = step->GetProcessingDataSize(source); + const TConclusion resultStep = step->ExecuteInplace(source, *this); + if (!resultStep) { + return resultStep; + } + if (!*resultStep) { + return false; + } + FlushDuration(); + ++CurrentStepIdx; + } + return true; +} + +bool TAllocateMemoryStep::TFetchingStepAllocation::DoOnAllocated(std::shared_ptr&& guard, + const std::shared_ptr& /*allocation*/) { + auto data = Source.lock(); + if (!data || data->GetContext()->IsAborted()) { + guard->Release(); + return false; + } + data->RegisterAllocationGuard(std::move(guard)); + Step.Next(); + auto task = std::make_shared(data, std::move(Step), data->GetContext()->GetCommonContext()->GetScanActorId()); + NConveyor::TScanServiceOperator::SendTaskToExecute(task); + return true; +} + +TAllocateMemoryStep::TFetchingStepAllocation::TFetchingStepAllocation( + const std::shared_ptr& source, const ui64 mem, const TFetchingScriptCursor& step) + : TBase(mem) + , Source(source) + , Step(step) + , TasksGuard(source->GetContext()->GetCommonContext()->GetCounters().GetResourcesAllocationTasksGuard()) { +} + +void TAllocateMemoryStep::TFetchingStepAllocation::DoOnAllocationImpossible(const TString& errorMessage) { + auto sourcePtr = Source.lock(); + if (sourcePtr) { + sourcePtr->GetContext()->GetCommonContext()->AbortWithError( + "cannot allocate memory for step " + Step.GetName() + ": '" + errorMessage + "'"); + } +} + +TConclusion TAllocateMemoryStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const { + ui64 size = PredefinedSize.value_or(0); + for (auto&& i : Packs) { + ui32 sizeLocal = source->GetColumnsVolume(i.GetColumns().GetColumnIds(), i.GetMemType()); + if (source->GetStageData().GetUseFilter() && source->GetContext()->GetReadMetadata()->Limit && i.GetMemType() != EMemType::Blob) { + const ui32 filtered = + source->GetStageData().GetFilteredCount(source->GetRecordsCount(), source->GetContext()->GetReadMetadata()->Limit); + if (filtered < source->GetRecordsCount()) { + sizeLocal = sizeLocal * 1.0 * filtered / source->GetRecordsCount(); + } + } + size += sizeLocal; + } + + auto allocation = std::make_shared(source, size, step); + NGroupedMemoryManager::TScanMemoryLimiterOperator::SendToAllocation(source->GetContext()->GetProcessMemoryControlId(), + source->GetContext()->GetCommonContext()->GetScanId(), source->GetMemoryGroupId(), { allocation }, (ui32)StageIndex); + return false; +} + +ui64 TAllocateMemoryStep::GetProcessingDataSize(const std::shared_ptr& /*source*/) const { + return 0; +} + +TString TFetchingScript::DebugString() const { + TStringBuilder sb; + TStringBuilder sbBranch; + for (auto&& i : Steps) { + if (i->GetSumDuration() > TDuration::MilliSeconds(10)) { + sbBranch << "{" << i->DebugString() << "};"; + } + } + if (!sbBranch) { + return ""; + } + sb << "{branch:" << BranchName << ";limit:" << Limit << ";"; + if (FinishInstant && StartInstant) { + sb << "duration:" << *FinishInstant - *StartInstant << ";"; + } + + sb << "steps_10Ms:[" << sbBranch << "]}"; + return sb; +} + +TFetchingScript::TFetchingScript(const TSpecialReadContext& context) + : Limit(context.GetReadMetadata()->Limit) { +} + +void TFetchingScript::Allocation(const std::set& entityIds, const EStageFeaturesIndexes stage, const EMemType mType) { + if (Steps.size() == 0) { + AddStep(entityIds, mType, stage); + } else { + std::optional addIndex; + for (i32 i = Steps.size() - 1; i >= 0; --i) { + if (auto allocation = std::dynamic_pointer_cast(Steps[i])) { + if (allocation->GetStage() == stage) { + allocation->AddAllocation(entityIds, mType); + return; + } else { + addIndex = i + 1; + } + break; + } else if (std::dynamic_pointer_cast(Steps[i])) { + continue; + } else if (std::dynamic_pointer_cast(Steps[i])) { + continue; + } else { + addIndex = i + 1; + break; + } + } + AFL_VERIFY(addIndex); + InsertStep(*addIndex, entityIds, mType, stage); + } +} + +NKikimr::TConclusion TFilterCutLimit::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + source->MutableStageData().CutFilter(source->GetRecordsCount(), Limit, Reverse); + return true; +} + +TConclusion TPortionAccessorFetchingStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& step) const { + return !source->StartFetchingAccessor(source, step); +} + +TConclusion TDetectInMem::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + if (Columns.GetColumnsCount()) { + source->SetSourceInMemory( + source->GetColumnRawBytes(Columns.GetColumnIds()) < NYDBTest::TControllers::GetColumnShardController()->GetMemoryLimitScanPortion()); + } else { + source->SetSourceInMemory(true); + } + AFL_VERIFY(source->GetStageData().HasPortionAccessor()); + auto plan = source->GetContext()->GetColumnsFetchingPlan(source); + source->InitFetchingPlan(plan); + TFetchingScriptCursor cursor(plan, 0); + auto task = std::make_shared(source, std::move(cursor), source->GetContext()->GetCommonContext()->GetScanActorId()); + NConveyor::TScanServiceOperator::SendTaskToExecute(task); + return false; +} + +namespace { +class TApplySourceResult: public IDataTasksProcessor::ITask { +private: + using TBase = IDataTasksProcessor::ITask; + YDB_READONLY_DEF(std::shared_ptr, Result); + YDB_READONLY_DEF(std::shared_ptr, Source); + YDB_READONLY(ui32, StartIndex, 0); + YDB_READONLY(ui32, OriginalRecordsCount, 0); + NColumnShard::TCounterGuard Guard; + TFetchingScriptCursor Step; + +public: + TString GetTaskClassIdentifier() const override { + return "TApplySourceResult"; + } + + TApplySourceResult(const std::shared_ptr& source, std::shared_ptr&& result, const ui32 startIndex, + const ui32 originalRecordsCount, const TFetchingScriptCursor& step) + : TBase(NActors::TActorId()) + , Result(result) + , Source(source) + , StartIndex(startIndex) + , OriginalRecordsCount(originalRecordsCount) + , Guard(source->GetContext()->GetCommonContext()->GetCounters().GetResultsForSourceGuard()) + , Step(step) + { + } + + virtual TConclusionStatus DoExecuteImpl() override { + AFL_VERIFY(false)("event", "not applicable"); + return TConclusionStatus::Success(); + } + virtual bool DoApply(IDataReader& indexedDataRead) const override { + auto* plainReader = static_cast(&indexedDataRead); + auto resultCopy = Result; + Source->SetCursor(Step); + plainReader->MutableScanner().OnSourceReady(Source, std::move(resultCopy), StartIndex, OriginalRecordsCount, *plainReader); + return true; + } +}; + +} // namespace + +TConclusion TBuildResultStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const { + auto context = source->GetContext(); + NArrow::TGeneralContainer::TTableConstructionContext contextTableConstruct; + contextTableConstruct.SetColumnNames(context->GetProgramInputColumns()->GetColumnNamesVector()); + if (!source->IsSourceInMemory()) { + contextTableConstruct.SetStartIndex(StartIndex).SetRecordsCount(RecordsCount); + } else { + AFL_VERIFY(StartIndex == 0); + AFL_VERIFY(RecordsCount == source->GetRecordsCount())("records_count", RecordsCount)("source", source->GetRecordsCount()); + } + std::shared_ptr resultBatch; + if (!source->GetStageResult().IsEmpty()) { + resultBatch = source->GetStageResult().GetBatch()->BuildTableVerified(contextTableConstruct); + AFL_VERIFY((ui32)resultBatch->num_columns() == context->GetProgramInputColumns()->GetColumnNamesVector().size()); + if (auto filter = source->GetStageResult().GetNotAppliedFilter()) { + filter->Apply(resultBatch, StartIndex, RecordsCount); + } + if (resultBatch && resultBatch->num_rows()) { + NArrow::TStatusValidator::Validate(context->GetReadMetadata()->GetProgram().ApplyProgram(resultBatch)); + } + } + NActors::TActivationContext::AsActorContext().Send(context->GetCommonContext()->GetScanActorId(), + new NColumnShard::TEvPrivate::TEvTaskProcessedResult( + std::make_shared(source, std::move(resultBatch), StartIndex, RecordsCount, step))); + return false; +} + +TConclusion TPrepareResultStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + source->Finalize(NYDBTest::TControllers::GetColumnShardController()->GetMemoryLimitScanPortion()); + std::shared_ptr plan = std::make_shared(*source->GetContext()); + if (source->IsSourceInMemory()) { + AFL_VERIFY(source->GetStageResult().GetPagesToResultVerified().size() == 1); + } + for (auto&& i : source->GetStageResult().GetPagesToResultVerified()) { + if (source->GetIsStartedByCursor() && !source->GetContext()->GetCommonContext()->GetScanCursor()->CheckSourceIntervalUsage( + source->GetSourceId(), i.GetIndexStart(), i.GetRecordsCount())) { + continue; + } + plan->AddStep(i.GetIndexStart(), i.GetRecordsCount()); + } + AFL_VERIFY(!plan->IsFinished(0)); + source->InitFetchingPlan(plan); + + TFetchingScriptCursor cursor(plan, 0); + auto task = std::make_shared(source, std::move(cursor), source->GetContext()->GetCommonContext()->GetScanActorId()); + NConveyor::TScanServiceOperator::SendTaskToExecute(task); + return false; +} + +} // namespace NKikimr::NOlap::NReader::NSimple diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/fetching.h b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/fetching.h new file mode 100644 index 000000000000..08cf3e88788c --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/fetching.h @@ -0,0 +1,520 @@ +#pragma once +#include "columns_set.h" + +#include +#include +#include +#include +#include +#include + +#include + +namespace NKikimr::NOlap::NReader::NSimple { +class IDataSource; +class TFetchingScriptCursor; +class TSpecialReadContext; +class IFetchingStep { +private: + YDB_READONLY_DEF(TString, Name); + YDB_READONLY(TDuration, SumDuration, TDuration::Zero()); + YDB_READONLY(ui64, SumSize, 0); + +protected: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const = 0; + virtual TString DoDebugString() const { + return ""; + } + +public: + void AddDuration(const TDuration d) { + SumDuration += d; + } + void AddDataSize(const ui64 size) { + SumSize += size; + } + + virtual ~IFetchingStep() = default; + + [[nodiscard]] TConclusion ExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const { + return DoExecuteInplace(source, step); + } + + virtual ui64 GetProcessingDataSize(const std::shared_ptr& /*source*/) const { + return 0; + } + + IFetchingStep(const TString& name) + : Name(name) { + } + + TString DebugString() const { + TStringBuilder sb; + sb << "name=" << Name << ";duration=" << SumDuration << ";" + << "size=" << 1e-9 * SumSize << ";details={" << DoDebugString() << "};"; + return sb; + } +}; + +class TFetchingScript { +private: + YDB_ACCESSOR(TString, BranchName, "UNDEFINED"); + std::vector> Steps; + std::optional StartInstant; + std::optional FinishInstant; + const ui32 Limit; + +public: + TFetchingScript(const TSpecialReadContext& context); + + void Allocation(const std::set& entityIds, const EStageFeaturesIndexes stage, const EMemType mType); + + void AddStepDataSize(const ui32 index, const ui64 size) { + GetStep(index)->AddDataSize(size); + } + + void AddStepDuration(const ui32 index, const TDuration d) { + FinishInstant = TMonotonic::Now(); + GetStep(index)->AddDuration(d); + } + + void OnExecute() { + if (!StartInstant) { + StartInstant = TMonotonic::Now(); + } + } + + TString DebugString() const; + + const std::shared_ptr& GetStep(const ui32 index) const { + AFL_VERIFY(index < Steps.size()); + return Steps[index]; + } + + template + std::shared_ptr AddStep(Args... args) { + auto result = std::make_shared(args...); + Steps.emplace_back(result); + return result; + } + + template + std::shared_ptr InsertStep(const ui32 index, Args... args) { + AFL_VERIFY(index <= Steps.size())("index", index)("size", Steps.size()); + auto result = std::make_shared(args...); + Steps.insert(Steps.begin() + index, result); + return result; + } + + void AddStep(const std::shared_ptr& step) { + AFL_VERIFY(step); + Steps.emplace_back(step); + } + + bool IsFinished(const ui32 currentStepIdx) const { + AFL_VERIFY(currentStepIdx <= Steps.size()); + return currentStepIdx == Steps.size(); + } + + ui32 Execute(const ui32 startStepIdx, const std::shared_ptr& source) const; +}; + +class TFetchingScriptCursor { +private: + std::optional CurrentStartInstant; + std::optional CurrentStartDataSize; + ui32 CurrentStepIdx = 0; + std::shared_ptr Script; + void FlushDuration() { + AFL_VERIFY(CurrentStartInstant); + AFL_VERIFY(CurrentStartDataSize); + Script->AddStepDuration(CurrentStepIdx, TMonotonic::Now() - *CurrentStartInstant); + Script->AddStepDataSize(CurrentStepIdx, *CurrentStartDataSize); + CurrentStartInstant.reset(); + CurrentStartDataSize.reset(); + } + +public: + TFetchingScriptCursor(const std::shared_ptr& script, const ui32 index) + : CurrentStepIdx(index) + , Script(script) { + AFL_VERIFY(!Script->IsFinished(CurrentStepIdx)); + } + + const TString& GetName() const { + return Script->GetStep(CurrentStepIdx)->GetName(); + } + + TString DebugString() const { + return Script->GetStep(CurrentStepIdx)->DebugString(); + } + + bool Next() { + FlushDuration(); + return !Script->IsFinished(++CurrentStepIdx); + } + + TConclusion Execute(const std::shared_ptr& source); +}; + +class TStepAction: public IDataTasksProcessor::ITask { +private: + using TBase = IDataTasksProcessor::ITask; + std::shared_ptr Source; + TFetchingScriptCursor Cursor; + bool FinishedFlag = false; + NColumnShard::TCounterGuard CountersGuard; + +protected: + virtual bool DoApply(IDataReader& owner) const override; + virtual TConclusionStatus DoExecuteImpl() override; + +public: + virtual TString GetTaskClassIdentifier() const override { + return "STEP_ACTION"; + } + + TStepAction(const std::shared_ptr& source, TFetchingScriptCursor&& cursor, const NActors::TActorId& ownerActorId); +}; + +class TBuildFakeSpec: public IFetchingStep { +private: + using TBase = IFetchingStep; + const ui32 Count = 0; + +protected: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + +public: + TBuildFakeSpec(const ui32 count) + : TBase("FAKE_SPEC") + , Count(count) { + AFL_VERIFY(Count); + } +}; + +class TApplyIndexStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + const NIndexes::TIndexCheckerContainer IndexChecker; + +protected: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + +public: + TApplyIndexStep(const NIndexes::TIndexCheckerContainer& indexChecker) + : TBase("APPLY_INDEX") + , IndexChecker(indexChecker) { + } +}; + +class TAllocateMemoryStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + class TColumnsPack { + private: + YDB_READONLY_DEF(TColumnsSetIds, Columns); + YDB_READONLY(EMemType, MemType, EMemType::Blob); + + public: + TColumnsPack(const TColumnsSetIds& columns, const EMemType memType) + : Columns(columns) + , MemType(memType) { + } + }; + std::vector Packs; + THashMap> Control; + const EStageFeaturesIndexes StageIndex; + const std::optional PredefinedSize; + +protected: + class TFetchingStepAllocation: public NGroupedMemoryManager::IAllocation { + private: + using TBase = NGroupedMemoryManager::IAllocation; + std::weak_ptr Source; + TFetchingScriptCursor Step; + NColumnShard::TCounterGuard TasksGuard; + virtual bool DoOnAllocated(std::shared_ptr&& guard, + const std::shared_ptr& allocation) override; + virtual void DoOnAllocationImpossible(const TString& errorMessage) override; + + public: + TFetchingStepAllocation(const std::shared_ptr& source, const ui64 mem, const TFetchingScriptCursor& step); + }; + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; + virtual TString DoDebugString() const override { + return TStringBuilder() << "stage=" << StageIndex << ";"; + } + +public: + void AddAllocation(const TColumnsSetIds& ids, const EMemType memType) { + if (!ids.GetColumnsCount()) { + return; + } + for (auto&& i : ids.GetColumnIds()) { + AFL_VERIFY(Control[i].emplace(memType).second); + } + Packs.emplace_back(ids, memType); + } + EStageFeaturesIndexes GetStage() const { + return StageIndex; + } + + TAllocateMemoryStep(const ui64 memSize, const EStageFeaturesIndexes stageIndex) + : TBase("ALLOCATE_MEMORY::" + ::ToString(stageIndex)) + , StageIndex(stageIndex) + , PredefinedSize(memSize) { + } + + TAllocateMemoryStep(const TColumnsSetIds& columns, const EMemType memType, const EStageFeaturesIndexes stageIndex) + : TBase("ALLOCATE_MEMORY::" + ::ToString(stageIndex)) + , StageIndex(stageIndex) { + AddAllocation(columns, memType); + } +}; + +class TDetectInMemStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + const TColumnsSetIds Columns; + +protected: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + virtual TString DoDebugString() const override { + return TStringBuilder() << "columns=" << Columns.DebugString() << ";"; + } + +public: + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; + TDetectInMemStep(const TColumnsSetIds& columns) + : TBase("FETCHING_COLUMNS") + , Columns(columns) { + AFL_VERIFY(Columns.GetColumnsCount()); + } +}; + +class TPrepareResultStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + +protected: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + virtual TString DoDebugString() const override { + return TStringBuilder(); + } + +public: + virtual ui64 GetProcessingDataSize(const std::shared_ptr& /*source*/) const override { + return 0; + } + TPrepareResultStep() + : TBase("PREPARE_RESULT") { + } +}; + +class TBuildResultStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + const ui32 StartIndex; + const ui32 RecordsCount; + +protected: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + virtual TString DoDebugString() const override { + return TStringBuilder(); + } + +public: + virtual ui64 GetProcessingDataSize(const std::shared_ptr& /*source*/) const override { + return 0; + } + TBuildResultStep(const ui32 startIndex, const ui32 recordsCount) + : TBase("BUILD_RESULT") + , StartIndex(startIndex) + , RecordsCount(recordsCount) + { + } +}; + +class TColumnBlobsFetchingStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + TColumnsSetIds Columns; + +protected: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + virtual TString DoDebugString() const override { + return TStringBuilder() << "columns=" << Columns.DebugString() << ";"; + } + +public: + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; + TColumnBlobsFetchingStep(const TColumnsSetIds& columns) + : TBase("FETCHING_COLUMNS") + , Columns(columns) { + AFL_VERIFY(Columns.GetColumnsCount()); + } +}; + +class TPortionAccessorFetchingStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + +protected: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + virtual TString DoDebugString() const override { + return TStringBuilder(); + } + +public: + TPortionAccessorFetchingStep() + : TBase("FETCHING_ACCESSOR") { + } +}; + +class TIndexBlobsFetchingStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + std::shared_ptr Indexes; + +protected: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + virtual TString DoDebugString() const override { + return TStringBuilder() << "indexes=" << Indexes->DebugString() << ";"; + } + +public: + TIndexBlobsFetchingStep(const std::shared_ptr& indexes) + : TBase("FETCHING_INDEXES") + , Indexes(indexes) { + AFL_VERIFY(Indexes); + AFL_VERIFY(Indexes->GetIndexesCount()); + } +}; + +class TAssemblerStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + YDB_READONLY_DEF(std::shared_ptr, Columns); + virtual TString DoDebugString() const override { + return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; + } + +public: + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TAssemblerStep(const std::shared_ptr& columns, const TString& specName = Default()) + : TBase("ASSEMBLER" + (specName ? "::" + specName : "")) + , Columns(columns) { + AFL_VERIFY(Columns); + AFL_VERIFY(Columns->GetColumnsCount()); + } +}; + +class TOptionalAssemblerStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + YDB_READONLY_DEF(std::shared_ptr, Columns); + virtual TString DoDebugString() const override { + return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; + } + +public: + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; + + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TOptionalAssemblerStep(const std::shared_ptr& columns, const TString& specName = Default()) + : TBase("OPTIONAL_ASSEMBLER" + (specName ? "::" + specName : "")) + , Columns(columns) { + AFL_VERIFY(Columns); + AFL_VERIFY(Columns->GetColumnsCount()); + } +}; + +class TFilterProgramStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + std::shared_ptr Step; + +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TFilterProgramStep(const std::shared_ptr& step) + : TBase("PROGRAM") + , Step(step) { + } +}; + +class TFilterCutLimit: public IFetchingStep { +private: + using TBase = IFetchingStep; + const ui32 Limit; + const bool Reverse; + +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TFilterCutLimit(const ui32 limit, const bool reverse) + : TBase("LIMIT") + , Limit(limit) + , Reverse(reverse) { + } +}; + +class TPredicateFilter: public IFetchingStep { +private: + using TBase = IFetchingStep; + +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TPredicateFilter() + : TBase("PREDICATE") { + } +}; + +class TSnapshotFilter: public IFetchingStep { +private: + using TBase = IFetchingStep; + +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TSnapshotFilter() + : TBase("SNAPSHOT") { + } +}; + +class TDetectInMem: public IFetchingStep { +private: + using TBase = IFetchingStep; + TColumnsSetIds Columns; + +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TDetectInMem(const TColumnsSetIds& columns) + : TBase("DETECT_IN_MEM") + , Columns(columns) { + } +}; + +class TDeletionFilter: public IFetchingStep { +private: + using TBase = IFetchingStep; + +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TDeletionFilter() + : TBase("DELETION") { + } +}; + +class TShardingFilter: public IFetchingStep { +private: + using TBase = IFetchingStep; + +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TShardingFilter() + : TBase("SHARDING") { + } +}; + +} // namespace NKikimr::NOlap::NReader::NSimple diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/iterator.cpp b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/iterator.cpp new file mode 100644 index 000000000000..39c548800d27 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/iterator.cpp @@ -0,0 +1,59 @@ +#include "iterator.h" + +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +TColumnShardScanIterator::TColumnShardScanIterator(const std::shared_ptr& context, const TReadMetadata::TConstPtr& readMetadata) + : Context(context) + , ReadMetadata(readMetadata) + , ReadyResults(context->GetCounters()) { + IndexedData = readMetadata->BuildReader(Context); + Y_ABORT_UNLESS(Context->GetReadMetadata()->IsSorted()); +} + +TConclusion> TColumnShardScanIterator::GetBatch() { + FillReadyResults(); + return ReadyResults.pop_front(); +} + +void TColumnShardScanIterator::PrepareResults() { + FillReadyResults(); +} + +TConclusion TColumnShardScanIterator::ReadNextInterval() { + return IndexedData->ReadNextInterval(); +} + +void TColumnShardScanIterator::DoOnSentDataFromInterval(const ui32 intervalIdx) const { + return IndexedData->OnSentDataFromInterval(intervalIdx); +} + +void TColumnShardScanIterator::FillReadyResults() { + auto ready = IndexedData->ExtractReadyResults(MaxRowsInBatch); + const i64 limitLeft = Context->GetReadMetadata()->Limit == 0 ? INT64_MAX : Context->GetReadMetadata()->Limit; + for (size_t i = 0; i < ready.size(); ++i) { + auto& batch = ReadyResults.emplace_back(std::move(ready[i])); + AFL_VERIFY(batch->GetResultBatch().num_rows() <= limitLeft); + ItemsRead += batch->GetResultBatch().num_rows(); + } +} + +TColumnShardScanIterator::~TColumnShardScanIterator() { + if (!IndexedData->IsFinished()) { + IndexedData->Abort("iterator destructor"); + } + ReadMetadata->ReadStats->PrintToLog(); +} + +void TColumnShardScanIterator::Apply(const std::shared_ptr& task) { + if (!IndexedData->IsFinished()) { + Y_ABORT_UNLESS(task->Apply(*IndexedData)); + } +} + +const TReadStats& TColumnShardScanIterator::GetStats() const { + return *ReadMetadata->ReadStats; +} + +} // namespace NKikimr::NOlap::NReader::NSimple diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/iterator.h b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/iterator.h new file mode 100644 index 000000000000..46d34944f20d --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/iterator.h @@ -0,0 +1,103 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +struct TReadMetadata; + +class TReadyResults { +private: + const NColumnShard::TConcreteScanCounters Counters; + std::deque> Data; + i64 RecordsCount = 0; +public: + TString DebugString() const { + TStringBuilder sb; + sb + << "count:" << Data.size() << ";" + << "records_count:" << RecordsCount << ";" + ; + if (Data.size()) { + sb << "schema=" << Data.front()->GetResultBatch().schema()->ToString() << ";"; + } + return sb; + } + TReadyResults(const NColumnShard::TConcreteScanCounters& counters) + : Counters(counters) + { + + } + const std::shared_ptr& emplace_back(std::shared_ptr&& v) { + AFL_VERIFY(!!v); + RecordsCount += v->GetResultBatch().num_rows(); + Data.emplace_back(std::move(v)); + return Data.back(); + } + std::shared_ptr pop_front() { + if (Data.empty()) { + return {}; + } + auto result = std::move(Data.front()); + AFL_VERIFY(RecordsCount >= result->GetResultBatch().num_rows()); + RecordsCount -= result->GetResultBatch().num_rows(); + Data.pop_front(); + return result; + } + bool empty() const { + return Data.empty(); + } + size_t size() const { + return Data.size(); + } +}; + +class TColumnShardScanIterator: public TScanIteratorBase { +private: + std::shared_ptr Context; + std::shared_ptr ReadMetadata; + TReadyResults ReadyResults; + std::shared_ptr IndexedData; + ui64 ItemsRead = 0; + const i64 MaxRowsInBatch = 5000; + virtual void DoOnSentDataFromInterval(const ui32 intervalIdx) const override; + +public: + TColumnShardScanIterator(const std::shared_ptr& context, const std::shared_ptr& readMetadata); + ~TColumnShardScanIterator(); + + virtual TConclusionStatus Start() override { + AFL_VERIFY(IndexedData); + return IndexedData->Start(); + } + + virtual std::optional GetAvailableResultsCount() const override { + return ReadyResults.size(); + } + + virtual const TReadStats& GetStats() const override; + + virtual TString DebugString(const bool verbose) const override { + return TStringBuilder() + << "ready_results:(" << ReadyResults.DebugString() << ");" + << "indexed_data:(" << IndexedData->DebugString(verbose) << ")" + ; + } + + virtual void Apply(const std::shared_ptr& task) override; + + bool Finished() const override { + return IndexedData->IsFinished() && ReadyResults.empty(); + } + + virtual TConclusion> GetBatch() override; + virtual void PrepareResults() override; + + virtual TConclusion ReadNextInterval() override; + +private: + void FillReadyResults(); +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/plain_read_data.cpp b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/plain_read_data.cpp new file mode 100644 index 000000000000..b98e3d7dba9e --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/plain_read_data.cpp @@ -0,0 +1,58 @@ +#include "plain_read_data.h" + +namespace NKikimr::NOlap::NReader::NSimple { + +TPlainReadData::TPlainReadData(const std::shared_ptr& context) + : TBase(context) + , SpecialReadContext(std::make_shared(context)) +{ + ui32 sourceIdx = 0; + std::deque> sources; + const auto& portions = GetReadMetadata()->SelectInfo->PortionsOrderedPK; + ui64 compactedPortionsBytes = 0; + ui64 insertedPortionsBytes = 0; + for (auto&& i : portions) { + if (i->GetMeta().GetProduced() == NPortion::EProduced::COMPACTED || i->GetMeta().GetProduced() == NPortion::EProduced::SPLIT_COMPACTED) { + compactedPortionsBytes += i->GetTotalBlobBytes(); + } else { + insertedPortionsBytes += i->GetTotalBlobBytes(); + } + sources.emplace_back(std::make_shared(sourceIdx++, i, SpecialReadContext)); + } + Scanner = std::make_shared(std::move(sources), SpecialReadContext); + + auto& stats = GetReadMetadata()->ReadStats; + stats->IndexPortions = GetReadMetadata()->SelectInfo->PortionsOrderedPK.size(); + stats->IndexBatches = GetReadMetadata()->NumIndexedBlobs(); + stats->SchemaColumns = (*SpecialReadContext->GetProgramInputColumns() - *SpecialReadContext->GetSpecColumns()).GetColumnsCount(); + stats->InsertedPortionsBytes = insertedPortionsBytes; + stats->CompactedPortionsBytes = compactedPortionsBytes; + +} + +std::vector> TPlainReadData::DoExtractReadyResults(const int64_t /*maxRowsInBatch*/) { + auto result = std::move(PartialResults); + PartialResults.clear(); +// auto result = TPartialReadResult::SplitResults(std::move(PartialResults), maxRowsInBatch); + ui32 count = 0; + for (auto&& r: result) { + count += r->GetRecordsCount(); + } + AFL_VERIFY(count == ReadyResultsCount); + ReadyResultsCount = 0; + + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "DoExtractReadyResults")("result", result.size())("count", count)("finished", Scanner->IsFinished()); + return result; +} + +TConclusion TPlainReadData::DoReadNextInterval() { + return Scanner->BuildNextInterval(); +} + +void TPlainReadData::OnIntervalResult(const std::shared_ptr& result) { +// result->GetResourcesGuardOnly()->Update(result->GetMemorySize()); + ReadyResultsCount += result->GetRecordsCount(); + PartialResults.emplace_back(result); +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/plain_read_data.h b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/plain_read_data.h new file mode 100644 index 000000000000..5e64761ac5f1 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/plain_read_data.h @@ -0,0 +1,78 @@ +#pragma once +#include "columns_set.h" +#include "source.h" +#include "scanner.h" + +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +class TPlainReadData: public IDataReader, TNonCopyable, NColumnShard::TMonitoringObjectsCounter { +private: + using TBase = IDataReader; + std::shared_ptr Scanner; + std::shared_ptr SpecialReadContext; + std::vector> PartialResults; + ui32 ReadyResultsCount = 0; +protected: + virtual TConclusionStatus DoStart() override { + return Scanner->Start(); + } + + virtual TString DoDebugString(const bool verbose) const override { + TStringBuilder sb; + sb << SpecialReadContext->DebugString() << ";"; + if (verbose) { + sb << "intervals_schema=" << Scanner->DebugString(); + } + return sb; + } + + virtual std::vector> DoExtractReadyResults(const int64_t maxRowsInBatch) override; + virtual TConclusion DoReadNextInterval() override; + + virtual void DoAbort() override { + SpecialReadContext->Abort(); + Scanner->Abort(); + PartialResults.clear(); + Y_ABORT_UNLESS(IsFinished()); + } + virtual bool DoIsFinished() const override { + return (Scanner->IsFinished() && PartialResults.empty()); + } +public: + const TReadMetadata::TConstPtr& GetReadMetadata() const { + return SpecialReadContext->GetReadMetadata(); + } + + const std::shared_ptr& GetSpecialReadContext() const { + return SpecialReadContext; + } + + const TScanHead& GetScanner() const { + return *Scanner; + } + + TScanHead& MutableScanner() { + return *Scanner; + } + virtual void OnSentDataFromInterval(const ui32 sourceIdx) const override { + if (SpecialReadContext->IsAborted()) { + return; + } + Scanner->ContinueSource(sourceIdx); + } + + void OnIntervalResult(const std::shared_ptr& result); + + TPlainReadData(const std::shared_ptr& context); + ~TPlainReadData() { + if (!SpecialReadContext->IsAborted()) { + Abort("unexpected on destructor"); + } + } +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/scanner.cpp b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/scanner.cpp new file mode 100644 index 000000000000..00a0ae70a15e --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/scanner.cpp @@ -0,0 +1,133 @@ +#include "plain_read_data.h" +#include "scanner.h" + +#include + +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +void TScanHead::OnSourceReady(const std::shared_ptr& source, std::shared_ptr&& table, const ui32 startIndex, + const ui32 recordsCount, TPlainReadData& reader) { + source->MutableStageResult().SetResultChunk(std::move(table), startIndex, recordsCount); + if ((!table || !table->num_rows()) && Context->GetCommonContext()->GetReadMetadata()->Limit && InFlightLimit < MaxInFlight) { + InFlightLimit = 2 * InFlightLimit; + } + while (FetchingSources.size()) { + auto frontSource = *FetchingSources.begin(); + if (!frontSource->HasStageResult()) { + break; + } + if (!frontSource->GetStageResult().HasResultChunk()) { + break; + } + auto table = frontSource->MutableStageResult().ExtractResultChunk(); + const bool isFinished = frontSource->GetStageResult().IsFinished(); + std::optional sourceIdxToContinue; + if (!isFinished) { + sourceIdxToContinue = frontSource->GetSourceIdx(); + } + if (table && table->num_rows()) { + auto cursor = + std::make_shared(frontSource->GetStartPKRecordBatch(), frontSource->GetSourceId(), startIndex + recordsCount); + reader.OnIntervalResult(std::make_shared(nullptr, nullptr, table, cursor, sourceIdxToContinue)); + } else if (sourceIdxToContinue) { + ContinueSource(*sourceIdxToContinue); + break; + } + if (!isFinished) { + break; + } + AFL_VERIFY(FetchingSourcesByIdx.erase(frontSource->GetSourceIdx())); + if (Context->GetCommonContext()->GetReadMetadata()->Limit) { + FinishedSources.emplace(*FetchingSources.begin()); + } + FetchingSources.erase(FetchingSources.begin()); + while (FetchingSources.size() && FinishedSources.size()) { + auto finishedSource = *FinishedSources.begin(); + auto fetchingSource = *FetchingSources.begin(); + if (finishedSource->GetFinish() < fetchingSource->GetStart()) { + FetchedCount += finishedSource->GetRecordsCount(); + } + FinishedSources.erase(FinishedSources.begin()); + if (FetchedCount > Context->GetCommonContext()->GetReadMetadata()->Limit) { + Context->Abort(); + Abort(); + } + } + } +} + +TConclusionStatus TScanHead::Start() { + for (auto&& i : SortedSources) { + i->InitFetchingPlan(Context->GetColumnsFetchingPlan(i)); + } + return TConclusionStatus::Success(); +} + +TScanHead::TScanHead(std::deque>&& sources, const std::shared_ptr& context) + : Context(context) { + if (HasAppData()) { + if (AppDataVerified().ColumnShardConfig.HasMaxInFlightIntervalsOnRequest()) { + MaxInFlight = AppDataVerified().ColumnShardConfig.GetMaxInFlightIntervalsOnRequest(); + } + } + if (Context->GetReadMetadata()->Limit) { + InFlightLimit = 1; + } else { + InFlightLimit = MaxInFlight; + } + bool started = !context->GetCommonContext()->GetScanCursor()->IsInitialized(); + for (auto&& i : sources) { + if (!started) { + bool usage = false; + if (!context->GetCommonContext()->GetScanCursor()->CheckEntityIsBorder(i, usage)) { + continue; + } + started = true; + if (!usage) { + continue; + } + i->SetIsStartedByCursor(); + } + SortedSources.emplace(i); + } +} + +TConclusion TScanHead::BuildNextInterval() { + if (Context->IsAborted()) { + return false; + } + bool changed = false; + while (SortedSources.size() && FetchingSources.size() < InFlightLimit) { + (*SortedSources.begin())->StartProcessing(*SortedSources.begin()); + FetchingSources.emplace(*SortedSources.begin()); + FetchingSourcesByIdx.emplace((*SortedSources.begin())->GetSourceIdx(), *SortedSources.begin()); + SortedSources.erase(SortedSources.begin()); + changed = true; + } + return changed; +} + +const TReadContext& TScanHead::GetContext() const { + return *Context->GetCommonContext(); +} + +bool TScanHead::IsReverse() const { + return GetContext().GetReadMetadata()->IsDescSorted(); +} + +void TScanHead::Abort() { + AFL_VERIFY(Context->IsAborted()); + for (auto&& i : FetchingSources) { + i->Abort(); + } + for (auto&& i : SortedSources) { + i->Abort(); + } + FetchingSources.clear(); + SortedSources.clear(); + Y_ABORT_UNLESS(IsFinished()); +} + +} // namespace NKikimr::NOlap::NReader::NSimple diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/scanner.h b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/scanner.h new file mode 100644 index 000000000000..bc94b997b4bc --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/scanner.h @@ -0,0 +1,76 @@ +#pragma once +#include "source.h" +#include +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +class TPlainReadData; + +class TDataSourceEndpoint { +private: + YDB_READONLY_DEF(std::vector>, StartSources); + YDB_READONLY_DEF(std::vector>, FinishSources); +public: + void AddStart(std::shared_ptr source) { + StartSources.emplace_back(source); + } + void AddFinish(std::shared_ptr source) { + FinishSources.emplace_back(source); + } +}; + +class TScanHead { +private: + std::shared_ptr Context; + THashMap> FetchingSourcesByIdx; + std::set, IDataSource::TCompareStartForScanSequence> SortedSources; + std::set, IDataSource::TCompareStartForScanSequence> FetchingSources; + std::set, IDataSource::TCompareFinishForScanSequence> FinishedSources; + ui64 FetchedCount = 0; + ui64 InFlightLimit = 1; + ui64 MaxInFlight = 256; +public: + + void ContinueSource(const ui32 sourceIdx) const { + auto it = FetchingSourcesByIdx.find(sourceIdx); + AFL_VERIFY(it != FetchingSourcesByIdx.end())("source_idx", sourceIdx)("count", FetchingSourcesByIdx.size()); + it->second->ContinueCursor(it->second); + } + + bool IsReverse() const; + void Abort(); + + bool IsFinished() const { + return FetchingSources.empty() && SortedSources.empty(); + } + + const TReadContext& GetContext() const; + + TString DebugString() const { + TStringBuilder sb; + sb << "S:"; + for (auto&& i : SortedSources) { + sb << i->GetSourceId() << ";"; + } + sb << "F:"; + for (auto&& i : FetchingSources) { + sb << i->GetSourceId() << ";"; + } + return sb; + } + + void OnSourceReady(const std::shared_ptr& source, std::shared_ptr&& table, const ui32 startIndex, + const ui32 recordsCount, TPlainReadData& reader); + + TConclusionStatus Start(); + + TScanHead(std::deque>&& sources, const std::shared_ptr& context); + + [[nodiscard]] TConclusion BuildNextInterval(); + +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/source.cpp b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/source.cpp new file mode 100644 index 000000000000..9c578661f064 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/source.cpp @@ -0,0 +1,245 @@ +#include "constructor.h" +#include "fetched_data.h" +#include "plain_read_data.h" +#include "source.h" + +#include +#include +#include +#include +#include +#include + +#include + +namespace NKikimr::NOlap::NReader::NSimple { + +void IDataSource::InitFetchingPlan(const std::shared_ptr& fetching) { + AFL_VERIFY(fetching); +// AFL_VERIFY(!FetchingPlan); + FetchingPlan = fetching; +} + +void IDataSource::StartProcessing(const std::shared_ptr& sourcePtr) { + AFL_VERIFY(!ProcessingStarted); + AFL_VERIFY(FetchingPlan); + AFL_VERIFY(!Context->IsAborted()); + ProcessingStarted = true; + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("InitFetchingPlan", FetchingPlan->DebugString())("source_idx", SourceIdx); + NActors::TLogContextGuard logGuard(NActors::TLogContextBuilder::Build()("source", SourceIdx)("method", "InitFetchingPlan")); + TFetchingScriptCursor cursor(FetchingPlan, 0); + auto task = std::make_shared(sourcePtr, std::move(cursor), Context->GetCommonContext()->GetScanActorId()); + NConveyor::TScanServiceOperator::SendTaskToExecute(task); +} + +void IDataSource::ContinueCursor(const std::shared_ptr& sourcePtr) { + AFL_VERIFY(!!ScriptCursor); + if (ScriptCursor->Next()) { + auto task = std::make_shared(sourcePtr, std::move(*ScriptCursor), Context->GetCommonContext()->GetScanActorId()); + NConveyor::TScanServiceOperator::SendTaskToExecute(task); + ScriptCursor.reset(); + } +} + +void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlobsAction& blobsAction, + THashMap& defaultBlocks, const std::shared_ptr& filter) { + const NArrow::TColumnFilter& cFilter = filter ? *filter : NArrow::TColumnFilter::BuildAllowFilter(); + ui32 fetchedChunks = 0; + ui32 nullChunks = 0; + for (auto&& i : columnIds) { + auto columnChunks = GetStageData().GetPortionAccessor().GetColumnChunksPointers(i); + if (columnChunks.empty()) { + continue; + } + auto itFilter = cFilter.GetIterator(false, Portion->GetRecordsCount()); + bool itFinished = false; + for (auto&& c : columnChunks) { + AFL_VERIFY(!itFinished); + if (!itFilter.IsBatchForSkip(c->GetMeta().GetRecordsCount())) { + auto reading = blobsAction.GetReading(Portion->GetColumnStorageId(c->GetColumnId(), Schema->GetIndexInfo())); + reading->SetIsBackgroundProcess(false); + reading->AddRange(Portion->RestoreBlobRange(c->BlobRange)); + ++fetchedChunks; + } else { + defaultBlocks.emplace(c->GetAddress(), TPortionDataAccessor::TAssembleBlobInfo(c->GetMeta().GetRecordsCount(), + Schema->GetExternalDefaultValueVerified(c->GetColumnId()))); + ++nullChunks; + } + itFinished = !itFilter.Next(c->GetMeta().GetRecordsCount()); + } + AFL_VERIFY(itFinished)("filter", itFilter.DebugString())("count", Portion->GetRecordsCount()); + } + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "chunks_stats")("fetch", fetchedChunks)("null", nullChunks)( + "reading_actions", blobsAction.GetStorageIds())("columns", columnIds.size()); +} + +bool TPortionDataSource::DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName()); + AFL_VERIFY(columns.GetColumnsCount()); + AFL_VERIFY(!StageData->GetAppliedFilter() || !StageData->GetAppliedFilter()->IsTotalDenyFilter()); + auto& columnIds = columns.GetColumnIds(); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName())("fetching_info", step.DebugString()); + + TBlobsAction action(GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN); + { + THashMap nullBlocks; + NeedFetchColumns(columnIds, action, nullBlocks, StageData->GetAppliedFilter()); + StageData->AddDefaults(std::move(nullBlocks)); + } + + auto readActions = action.GetReadingActions(); + if (!readActions.size()) { + return false; + } + + auto constructor = std::make_shared(readActions, sourcePtr, step, GetContext(), "CS::READ::" + step.GetName(), ""); + NActors::TActivationContext::AsActorContext().Register(new NOlap::NBlobOperations::NRead::TActor(constructor)); + return true; +} + +bool TPortionDataSource::DoStartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName()); + AFL_VERIFY(indexes->GetIndexesCount()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName())("fetching_info", step.DebugString()); + + TBlobsAction action(GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN); + { + std::set indexIds; + for (auto&& i : GetStageData().GetPortionAccessor().GetIndexesVerified()) { + if (!indexes->GetIndexIdsSet().contains(i.GetIndexId())) { + continue; + } + indexIds.emplace(i.GetIndexId()); + if (auto bRange = i.GetBlobRangeOptional()) { + auto readAction = action.GetReading(Portion->GetIndexStorageId(i.GetIndexId(), Schema->GetIndexInfo())); + readAction->SetIsBackgroundProcess(false); + readAction->AddRange(Portion->RestoreBlobRange(*bRange)); + } + } + if (indexes->GetIndexIdsSet().size() != indexIds.size()) { + return false; + } + } + auto readingActions = action.GetReadingActions(); + if (!readingActions.size()) { + NYDBTest::TControllers::GetColumnShardController()->OnIndexSelectProcessed({}); + return false; + } + + auto constructor = std::make_shared(readingActions, sourcePtr, step, GetContext(), "CS::READ::" + step.GetName(), ""); + NActors::TActivationContext::AsActorContext().Register(new NOlap::NBlobOperations::NRead::TActor(constructor)); + return true; +} + +void TPortionDataSource::DoAbort() { +} + +void TPortionDataSource::DoApplyIndex(const NIndexes::TIndexCheckerContainer& indexChecker) { + THashMap> indexBlobs; + std::set indexIds = indexChecker->GetIndexIds(); + // NActors::TLogContextGuard gLog = NActors::TLogContextBuilder::Build()("records_count", GetRecordsCount())("portion_id", Portion->GetAddress().DebugString()); + std::vector pages = GetStageData().GetPortionAccessor().BuildPages(); + NArrow::TColumnFilter constructor = NArrow::TColumnFilter::BuildAllowFilter(); + for (auto&& p : pages) { + for (auto&& i : p.GetIndexes()) { + if (!indexIds.contains(i->GetIndexId())) { + continue; + } + if (i->HasBlobData()) { + indexBlobs[i->GetIndexId()].emplace_back(i->GetBlobDataVerified()); + } else { + indexBlobs[i->GetIndexId()].emplace_back(StageData->ExtractBlob(i->GetAddress())); + } + } + for (auto&& i : indexIds) { + if (!indexBlobs.contains(i)) { + return; + } + } + if (indexChecker->Check(indexBlobs)) { + NYDBTest::TControllers::GetColumnShardController()->OnIndexSelectProcessed(true); + constructor.Add(true, p.GetRecordsCount()); + } else { + NYDBTest::TControllers::GetColumnShardController()->OnIndexSelectProcessed(false); + constructor.Add(false, p.GetRecordsCount()); + } + } + AFL_VERIFY(constructor.Size() == Portion->GetRecordsCount()); + if (constructor.IsTotalDenyFilter()) { + StageData->AddFilter(NArrow::TColumnFilter::BuildDenyFilter()); + } else if (constructor.IsTotalAllowFilter()) { + return; + } else { + StageData->AddFilter(constructor); + } +} + +void TPortionDataSource::DoAssembleColumns(const std::shared_ptr& columns, const bool sequential) { + auto blobSchema = GetContext()->GetReadMetadata()->GetLoadSchemaVerified(*Portion); + + std::optional ss; + if (Portion->HasInsertWriteId()) { + if (Portion->HasCommitSnapshot()) { + ss = Portion->GetCommitSnapshotVerified(); + } else if (GetContext()->GetReadMetadata()->IsMyUncommitted(Portion->GetInsertWriteIdVerified())) { + ss = GetContext()->GetReadMetadata()->GetRequestSnapshot(); + } + } + + auto batch = GetStageData() + .GetPortionAccessor() + .PrepareForAssemble(*blobSchema, columns->GetFilteredSchemaVerified(), MutableStageData().MutableBlobs(), ss) + .AssembleToGeneralContainer(sequential ? columns->GetColumnIds() : std::set()) + .DetachResult(); + + MutableStageData().AddBatch(batch); +} + +namespace { +class TPortionAccessorFetchingSubscriber: public IDataAccessorRequestsSubscriber { +private: + TFetchingScriptCursor Step; + std::shared_ptr Source; + virtual void DoOnRequestsFinished(TDataAccessorsResult&& result) override { + AFL_VERIFY(!result.HasErrors()); + AFL_VERIFY(result.GetPortions().size() == 1)("count", result.GetPortions().size()); + Source->MutableStageData().SetPortionAccessor(std::move(result.ExtractPortionsVector().front())); + AFL_VERIFY(Step.Next()); + auto task = std::make_shared(Source, std::move(Step), Source->GetContext()->GetCommonContext()->GetScanActorId()); + NConveyor::TScanServiceOperator::SendTaskToExecute(task); + } + +public: + TPortionAccessorFetchingSubscriber(const TFetchingScriptCursor& step, const std::shared_ptr& source) + : Step(step) + , Source(source) { + } +}; + +} // namespace + +bool TPortionDataSource::DoStartFetchingAccessor(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step) { + AFL_VERIFY(!StageData->HasPortionAccessor()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName())("fetching_info", step.DebugString()); + + std::shared_ptr request = std::make_shared(); + request->AddPortion(Portion); + request->RegisterSubscriber(std::make_shared(step, sourcePtr)); + GetContext()->GetCommonContext()->GetDataAccessorsManager()->AskData(request); + return true; +} + +TPortionDataSource::TPortionDataSource( + const ui32 sourceIdx, const std::shared_ptr& portion, const std::shared_ptr& context) + : TBase(portion->GetPortionId(), sourceIdx, context, portion->IndexKeyStart(), portion->IndexKeyEnd(), + portion->RecordSnapshotMin(TSnapshot::Zero()), portion->RecordSnapshotMax(TSnapshot::Zero()), portion->GetRecordsCount(), + portion->GetShardingVersionOptional(), portion->GetMeta().GetDeletionsCount()) + , Portion(portion) + , Schema(GetContext()->GetReadMetadata()->GetLoadSchemaVerified(*portion)) + , SourceGroupGuard(NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildGroupGuard( + GetContext()->GetProcessMemoryControlId(), GetContext()->GetCommonContext()->GetScanId())) { +} + +} // namespace NKikimr::NOlap::NReader::NSimple diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/source.h b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/source.h new file mode 100644 index 000000000000..91c4533cb773 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/source.h @@ -0,0 +1,436 @@ +#pragma once +#include "columns_set.h" +#include "context.h" +#include "fetched_data.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace NKikimr::NOlap { +class IDataReader; +} + +namespace NKikimr::NOlap::NReader::NSimple { + +class TFetchingInterval; +class TPlainReadData; +class IFetchTaskConstructor; +class IFetchingStep; + +class TPortionPage { +private: + YDB_READONLY(ui32, StartIndex, 0); + YDB_READONLY(ui32, RecordsCount, 0); + YDB_READONLY(ui64, MemoryBytes, 0); + YDB_ACCESSOR_DEF(std::shared_ptr, Result); + +public: + TPortionPage(const ui32 startIndex, const ui32 recordsCount, const ui64 memoryBytes) + : StartIndex(startIndex) + , RecordsCount(recordsCount) + , MemoryBytes(memoryBytes) + { + + } +}; + +class IDataSource: public ICursorEntity { +private: + YDB_READONLY(ui32, SourceId, 0); + YDB_READONLY(ui32, SourceIdx, 0); + YDB_READONLY_DEF(NArrow::NMerger::TSortableBatchPosition, Start); + YDB_READONLY_DEF(NArrow::NMerger::TSortableBatchPosition, Finish); + NArrow::TReplaceKey StartReplaceKey; + NArrow::TReplaceKey FinishReplaceKey; + YDB_READONLY_DEF(std::shared_ptr, Context); + YDB_READONLY(TSnapshot, RecordSnapshotMin, TSnapshot::Zero()); + YDB_READONLY(TSnapshot, RecordSnapshotMax, TSnapshot::Zero()); + YDB_READONLY(ui32, RecordsCount, 0); + YDB_READONLY_DEF(std::optional, ShardingVersionOptional); + YDB_READONLY(bool, HasDeletions, false); + virtual NJson::TJsonValue DoDebugJson() const = 0; + std::shared_ptr FetchingPlan; + std::vector> ResourceGuards; + YDB_READONLY(TPKRangeFilter::EUsageClass, UsageClass, TPKRangeFilter::EUsageClass::PartialUsage); + bool ProcessingStarted = false; + bool IsStartedByCursor = false; + + virtual ui64 DoGetEntityId() const override { + return SourceId; + } + + virtual ui64 DoGetEntityRecordsCount() const override { + return RecordsCount; + } + + std::optional ScriptCursor; + +protected: + std::optional IsSourceInMemoryFlag; + + std::unique_ptr StageData; + std::unique_ptr StageResult; + + virtual bool DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) = 0; + virtual bool DoStartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) = 0; + virtual void DoAssembleColumns(const std::shared_ptr& columns, const bool sequential) = 0; + virtual void DoAbort() = 0; + virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& indexMeta) = 0; + virtual NJson::TJsonValue DoDebugJsonForMemory() const { + return NJson::JSON_MAP; + } + virtual bool DoAddTxConflict() = 0; + virtual bool DoStartFetchingAccessor(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step) = 0; + +public: + virtual ui64 GetMemoryGroupId() const = 0; + bool GetIsStartedByCursor() const { + return IsStartedByCursor; + } + + void SetIsStartedByCursor() { + IsStartedByCursor = true; + } + + void SetCursor(const TFetchingScriptCursor& scriptCursor) { + AFL_VERIFY(!ScriptCursor); + ScriptCursor = scriptCursor; + } + + void ContinueCursor(const std::shared_ptr& sourcePtr); + + class TCompareStartForScanSequence { + public: + bool operator()(const std::shared_ptr& l, const std::shared_ptr& r) const { + const std::partial_ordering compareResult = l->GetStart().Compare(r->GetStart()); + if (compareResult == std::partial_ordering::equivalent) { + return l->GetSourceId() < r->GetSourceId(); + } else { + return compareResult == std::partial_ordering::less; + } + }; + }; + + class TCompareFinishForScanSequence { + public: + bool operator()(const std::shared_ptr& l, const std::shared_ptr& r) const { + const std::partial_ordering compareResult = l->GetFinish().Compare(r->GetFinish()); + if (compareResult == std::partial_ordering::equivalent) { + return l->GetSourceId() < r->GetSourceId(); + } else { + return compareResult == std::partial_ordering::less; + } + }; + }; + + virtual std::shared_ptr GetStartPKRecordBatch() const = 0; + + void StartProcessing(const std::shared_ptr& sourcePtr); + virtual ui64 PredictAccessorsSize() const = 0; + + bool StartFetchingAccessor(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step) { + return DoStartFetchingAccessor(sourcePtr, step); + } + + bool AddTxConflict() { + if (!Context->GetCommonContext()->HasLock()) { + return false; + } + if (DoAddTxConflict()) { + StageData->Clear(); + return true; + } + return false; + } + + ui64 GetResourceGuardsMemory() const { + ui64 result = 0; + for (auto&& i : ResourceGuards) { + result += i->GetMemory(); + } + return result; + } + void RegisterAllocationGuard(const std::shared_ptr& guard) { + ResourceGuards.emplace_back(guard); + } + bool IsSourceInMemory() const { + AFL_VERIFY(IsSourceInMemoryFlag); + return *IsSourceInMemoryFlag; + } + void SetSourceInMemory(const bool value) { + AFL_VERIFY(!IsSourceInMemoryFlag); + IsSourceInMemoryFlag = value; + AFL_VERIFY(StageData); + if (!value) { + StageData->SetUseFilter(value); + } + } + virtual THashMap DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobsOriginal) const = 0; + + virtual ui64 GetPathId() const = 0; + virtual bool HasIndexes(const std::set& indexIds) const = 0; + + const NArrow::TReplaceKey& GetStartReplaceKey() const { + return StartReplaceKey; + } + const NArrow::TReplaceKey& GetFinishReplaceKey() const { + return FinishReplaceKey; + } + + bool HasStageResult() const { + return !!StageResult; + } + + const TFetchedResult& GetStageResult() const { + AFL_VERIFY(!!StageResult); + return *StageResult; + } + + TFetchedResult& MutableStageResult() { + AFL_VERIFY(!!StageResult); + return *StageResult; + } + + void Finalize(const std::optional memoryLimit) { + AFL_VERIFY(!StageResult); + AFL_VERIFY(StageData); + TMemoryProfileGuard mpg("SCAN_PROFILE::STAGE_RESULT", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + + const auto accessor = StageData->GetPortionAccessor(); + StageResult = std::make_unique(std::move(StageData)); + if (memoryLimit) { + StageResult->SetPages(accessor.BuildReadPages(*memoryLimit, GetContext()->GetProgramInputColumns()->GetColumnIds())); + } else { + StageResult->SetPages({ TPortionDataAccessor::TReadPage(0, GetRecordsCount(), 0) }); + } + } + + void ApplyIndex(const NIndexes::TIndexCheckerContainer& indexMeta) { + return DoApplyIndex(indexMeta); + } + + void AssembleColumns(const std::shared_ptr& columns, const bool sequential = false) { + if (columns->IsEmpty()) { + return; + } + DoAssembleColumns(columns, sequential); + } + + bool StartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) { + return DoStartFetchingColumns(sourcePtr, step, columns); + } + + bool StartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { + AFL_VERIFY(indexes); + return DoStartFetchingIndexes(sourcePtr, step, indexes); + } + void InitFetchingPlan(const std::shared_ptr& fetching); + + std::shared_ptr GetLastPK() const { + return Finish.BuildSortingCursor().ExtractSortingPosition(Finish.GetSortFields()); + } + + virtual ui64 GetColumnsVolume(const std::set& columnIds, const EMemType type) const = 0; + + virtual ui64 GetColumnRawBytes(const std::set& columnIds) const = 0; + virtual ui64 GetIndexRawBytes(const std::set& indexIds) const = 0; + virtual ui64 GetColumnBlobBytes(const std::set& columnsIds) const = 0; + + void Abort() { + DoAbort(); + } + + NJson::TJsonValue DebugJsonForMemory() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("details", DoDebugJsonForMemory()); + result.InsertValue("count", RecordsCount); + return result; + } + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("source_idx", SourceIdx); + result.InsertValue("start", Start.DebugJson()); + result.InsertValue("finish", Finish.DebugJson()); + result.InsertValue("specific", DoDebugJson()); + return result; + } + + bool OnIntervalFinished(const ui32 intervalIdx); + + void OnEmptyStageData() { + if (!ResourceGuards.size()) { + return; + } + ResourceGuards.back()->Update(0); + Finalize(std::nullopt); + } + + const TFetchedData& GetStageData() const { + AFL_VERIFY(StageData); + return *StageData; + } + + TFetchedData& MutableStageData() { + AFL_VERIFY(StageData); + return *StageData; + } + + IDataSource(const ui32 sourceId, const ui32 sourceIdx, const std::shared_ptr& context, + const NArrow::TReplaceKey& start, + const NArrow::TReplaceKey& finish, const TSnapshot& recordSnapshotMin, const TSnapshot& recordSnapshotMax, const ui32 recordsCount, + const std::optional shardingVersion, const bool hasDeletions) + : SourceId(sourceId) + , SourceIdx(sourceIdx) + , Start(context->GetReadMetadata()->BuildSortedPosition(start)) + , Finish(context->GetReadMetadata()->BuildSortedPosition(finish)) + , StartReplaceKey(start) + , FinishReplaceKey(finish) + , Context(context) + , RecordSnapshotMin(recordSnapshotMin) + , RecordSnapshotMax(recordSnapshotMax) + , RecordsCount(recordsCount) + , ShardingVersionOptional(shardingVersion) + , HasDeletions(hasDeletions) { + StageData = std::make_unique(true); + UsageClass = Context->GetReadMetadata()->GetPKRangesFilter().IsPortionInPartialUsage(GetStartReplaceKey(), GetFinishReplaceKey()); + AFL_VERIFY(UsageClass != TPKRangeFilter::EUsageClass::DontUsage); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "portions_for_merge")("start", Start.DebugJson())("finish", Finish.DebugJson()); + if (Start.IsReverseSort()) { + std::swap(Start, Finish); + } + Y_ABORT_UNLESS(Start.Compare(Finish) != std::partial_ordering::greater); + } + + virtual ~IDataSource() = default; +}; + +class TPortionDataSource: public IDataSource { +private: + using TBase = IDataSource; + const TPortionInfo::TConstPtr Portion; + std::shared_ptr Schema; + const std::shared_ptr SourceGroupGuard; + + void NeedFetchColumns(const std::set& columnIds, TBlobsAction& blobsAction, + THashMap& nullBlocks, const std::shared_ptr& filter); + + virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& indexChecker) override; + virtual bool DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) override; + virtual bool DoStartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) override; + virtual void DoAssembleColumns(const std::shared_ptr& columns, const bool sequential) override; + virtual NJson::TJsonValue DoDebugJson() const override { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("type", "portion"); + result.InsertValue("info", Portion->DebugString()); + result.InsertValue("commit", Portion->GetCommitSnapshotOptional().value_or(TSnapshot::Zero()).DebugString()); + result.InsertValue("insert", (ui64)Portion->GetInsertWriteIdOptional().value_or(TInsertWriteId(0))); + return result; + } + + virtual NJson::TJsonValue DoDebugJsonForMemory() const override { + NJson::TJsonValue result = TBase::DoDebugJsonForMemory(); + if (GetStageData().HasPortionAccessor()) { + auto columns = GetStageData().GetPortionAccessor().GetColumnIds(); + // result.InsertValue("sequential_columns", JoinSeq(",", SequentialEntityIds)); + result.InsertValue("in_mem", GetStageData().GetPortionAccessor().GetColumnRawBytes(columns, false)); + result.InsertValue("columns_in_mem", JoinSeq(",", columns)); + } + result.InsertValue("portion_id", Portion->GetPortionId()); + result.InsertValue("raw", Portion->GetTotalRawBytes()); + result.InsertValue("blob", Portion->GetTotalBlobBytes()); + result.InsertValue("read_memory", GetColumnRawBytes(GetStageData().GetPortionAccessor().GetColumnIds())); + return result; + } + virtual void DoAbort() override; + virtual ui64 GetPathId() const override { + return Portion->GetPathId(); + } + + virtual bool DoStartFetchingAccessor(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step) override; + +public: + virtual ui64 GetMemoryGroupId() const override { + return SourceGroupGuard->GetGroupId(); + } + + virtual ui64 PredictAccessorsSize() const override { + return Portion->GetApproxChunksCount(GetContext()->GetCommonContext()->GetReadMetadata()->GetResultSchema()->GetColumnsCount()) * sizeof(TColumnRecord); + } + + virtual std::shared_ptr GetStartPKRecordBatch() const override { + return Portion->GetMeta().GetFirstLastPK().GetBatch()->Slice(0, 1); + } + + virtual bool DoAddTxConflict() override { + if (Portion->HasCommitSnapshot() || !Portion->HasInsertWriteId()) { + GetContext()->GetReadMetadata()->SetBrokenWithCommitted(); + return true; + } else if (!GetContext()->GetReadMetadata()->IsMyUncommitted(Portion->GetInsertWriteIdVerified())) { + GetContext()->GetReadMetadata()->SetConflictedWriteId(Portion->GetInsertWriteIdVerified()); + return true; + } + return false; + } + + virtual bool HasIndexes(const std::set& indexIds) const override { + return Schema->GetIndexInfo().HasIndexes(indexIds); + } + + virtual THashMap DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobsOriginal) const override { + return GetStageData().GetPortionAccessor().DecodeBlobAddresses(std::move(blobsOriginal), Schema->GetIndexInfo()); + } + + virtual ui64 GetColumnsVolume(const std::set& columnIds, const EMemType type) const override { + AFL_VERIFY(columnIds.size()); + switch (type) { + case EMemType::Raw: + return GetStageData().GetPortionAccessor().GetColumnRawBytes(columnIds, false); + case EMemType::Blob: + return GetStageData().GetPortionAccessor().GetColumnBlobBytes(columnIds, false); + case EMemType::RawSequential: + return GetStageData().GetPortionAccessor().GetMinMemoryForReadColumns(columnIds); + } + } + + virtual ui64 GetColumnRawBytes(const std::set& columnsIds) const override { + AFL_VERIFY(columnsIds.size()); + return GetStageData().GetPortionAccessor().GetColumnRawBytes(columnsIds, false); + } + + virtual ui64 GetColumnBlobBytes(const std::set& columnsIds) const override { + return GetStageData().GetPortionAccessor().GetColumnBlobBytes(columnsIds, false); + } + + virtual ui64 GetIndexRawBytes(const std::set& indexIds) const override { + return Portion->GetTotalRawBytes(); + return GetStageData().GetPortionAccessor().GetIndexRawBytes(indexIds, false); + } + + const TPortionInfo& GetPortionInfo() const { + return *Portion; + } + + const TPortionInfo::TConstPtr& GetPortionInfoPtr() const { + return Portion; + } + + TPortionDataSource(const ui32 sourceIdx, const std::shared_ptr& portion, const std::shared_ptr& context); +}; + +} // namespace NKikimr::NOlap::NReader::NSimple diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/ya.make b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/ya.make new file mode 100644 index 000000000000..e6fb9d623a7d --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/ya.make @@ -0,0 +1,24 @@ +LIBRARY() + +SRCS( + scanner.cpp + constructor.cpp + source.cpp + fetched_data.cpp + plain_read_data.cpp + columns_set.cpp + context.cpp + fetching.cpp + iterator.cpp +) + +PEERDIR( + ydb/core/formats/arrow + ydb/core/tx/columnshard/blobs_action + ydb/core/tx/conveyor/usage + ydb/core/tx/limiter/grouped_memory/usage +) + +GENERATE_ENUM_SERIALIZATION(columns_set.h) + +END() diff --git a/ydb/core/tx/columnshard/engines/reader/simple_reader/ya.make b/ydb/core/tx/columnshard/engines/reader/simple_reader/ya.make new file mode 100644 index 000000000000..6926bde3581e --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/simple_reader/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( +) + +PEERDIR( + ydb/core/tx/columnshard/engines/reader/simple_reader/constructor + ydb/core/tx/columnshard/engines/reader/simple_reader/iterator +) + +END() diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h index 72ffbdda7754..a44b43beec7b 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h @@ -68,7 +68,7 @@ class TStatsIteratorBase: public TScanIteratorBase { continue; } auto table = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({resultBatch})); - return std::make_shared(table, lastKey, std::nullopt); + return std::make_shared(table, std::make_shared(lastKey), std::nullopt); } return std::shared_ptr(); } diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/metadata.h b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/metadata.h index c5068be3c82f..7a9ee6bd36b5 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/metadata.h @@ -18,7 +18,7 @@ struct TReadStatsMetadata: public TReadMetadataBase { explicit TReadStatsMetadata(const std::shared_ptr& info, ui64 tabletId, const ESorting sorting, const TProgramContainer& ssaProgram, const std::shared_ptr& schema, const TSnapshot& requestSnapshot) - : TBase(info, sorting, ssaProgram, schema, requestSnapshot) + : TBase(info, sorting, ssaProgram, schema, requestSnapshot, nullptr) , TabletId(tabletId) { } }; diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/policy.h b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/policy.h index 72c528145579..dc277d498140 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/policy.h +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/policy.h @@ -10,7 +10,7 @@ namespace NKikimr::NOlap::NReader::NSysView::NAbstract { class ISysViewPolicy { private: - virtual std::unique_ptr DoCreateConstructor(const TSnapshot& snapshot, const ui64 itemsLimit, const bool reverse) const = 0; + virtual std::unique_ptr DoCreateConstructor(const TScannerConstructorContext& request) const = 0; virtual std::shared_ptr DoCreateMetadataFiller() const = 0; public: virtual ~ISysViewPolicy() = default; @@ -24,8 +24,8 @@ class ISysViewPolicy { AFL_VERIFY(!!result); return result; } - std::unique_ptr CreateConstructor(const TSnapshot& snapshot, const ui64 itemsLimit, const bool reverse) const { - auto result = DoCreateConstructor(snapshot, itemsLimit, reverse); + std::unique_ptr CreateConstructor(const TScannerConstructorContext& request) const { + auto result = DoCreateConstructor(request); AFL_VERIFY(!!result); return result; } diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h index b932e86533f7..1808888ff6be 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h @@ -153,9 +153,8 @@ class TStatsIterator: public NAbstract::TStatsIterator DoCreateConstructor( - const TSnapshot& snapshot, const ui64 itemsLimit, const bool reverse) const override { - return std::make_unique(snapshot, itemsLimit, reverse); + virtual std::unique_ptr DoCreateConstructor(const TScannerConstructorContext& request) const override { + return std::make_unique(request); } virtual std::shared_ptr DoCreateMetadataFiller() const override { return std::make_shared(); @@ -168,9 +167,8 @@ class TStoreSysViewPolicy: public NAbstract::ISysViewPolicy { class TTableSysViewPolicy: public NAbstract::ISysViewPolicy { protected: - virtual std::unique_ptr DoCreateConstructor( - const TSnapshot& snapshot, const ui64 itemsLimit, const bool reverse) const override { - return std::make_unique(snapshot, itemsLimit, reverse); + virtual std::unique_ptr DoCreateConstructor(const TScannerConstructorContext& request) const override { + return std::make_unique(request); } virtual std::shared_ptr DoCreateMetadataFiller() const override { return std::make_shared(); diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/constructor/constructor.h b/ydb/core/tx/columnshard/engines/reader/sys_view/constructor/constructor.h index 02b3220a9565..64ef291fc81a 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/constructor/constructor.h +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/constructor/constructor.h @@ -13,6 +13,10 @@ class TStatScannerConstructor: public IScannerConstructor { private: using TBase = IScannerConstructor; + virtual std::shared_ptr DoBuildCursor() const override { + return nullptr; + } + virtual std::shared_ptr BuildMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const = 0; virtual TConclusion> DoBuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const override { diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/granules/granules.h b/ydb/core/tx/columnshard/engines/reader/sys_view/granules/granules.h index 8effbf9b6618..ec0ed83714bd 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/granules/granules.h +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/granules/granules.h @@ -41,8 +41,8 @@ class TStatsIterator : public NAbstract::TStatsIterator DoCreateConstructor(const TSnapshot& snapshot, const ui64 itemsLimit, const bool reverse) const override { - return std::make_unique(snapshot, itemsLimit, reverse); + virtual std::unique_ptr DoCreateConstructor(const TScannerConstructorContext& request) const override { + return std::make_unique(request); } virtual std::shared_ptr DoCreateMetadataFiller() const override { return std::make_shared(); @@ -54,8 +54,8 @@ class TStoreSysViewPolicy: public NAbstract::ISysViewPolicy { class TTableSysViewPolicy: public NAbstract::ISysViewPolicy { protected: - virtual std::unique_ptr DoCreateConstructor(const TSnapshot& snapshot, const ui64 itemsLimit, const bool reverse) const override { - return std::make_unique(snapshot, itemsLimit, reverse); + virtual std::unique_ptr DoCreateConstructor(const TScannerConstructorContext& request) const override { + return std::make_unique(request); } virtual std::shared_ptr DoCreateMetadataFiller() const override { return std::make_shared(); diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/optimizer/optimizer.h b/ydb/core/tx/columnshard/engines/reader/sys_view/optimizer/optimizer.h index c442c46242cb..6c69bece0fd0 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/optimizer/optimizer.h +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/optimizer/optimizer.h @@ -63,8 +63,8 @@ class TMetadataFromTable: public NAbstract::TMetadataFromTable { class TStoreSysViewPolicy: public NAbstract::ISysViewPolicy { protected: - virtual std::unique_ptr DoCreateConstructor(const TSnapshot& snapshot, const ui64 itemsLimit, const bool reverse) const override { - return std::make_unique(snapshot, itemsLimit, reverse); + virtual std::unique_ptr DoCreateConstructor(const TScannerConstructorContext& request) const override { + return std::make_unique(request); } virtual std::shared_ptr DoCreateMetadataFiller() const override { return std::make_shared(); @@ -76,8 +76,8 @@ class TStoreSysViewPolicy: public NAbstract::ISysViewPolicy { class TTableSysViewPolicy: public NAbstract::ISysViewPolicy { protected: - virtual std::unique_ptr DoCreateConstructor(const TSnapshot& snapshot, const ui64 itemsLimit, const bool reverse) const override { - return std::make_unique(snapshot, itemsLimit, reverse); + virtual std::unique_ptr DoCreateConstructor(const TScannerConstructorContext& request) const override { + return std::make_unique(request); } virtual std::shared_ptr DoCreateMetadataFiller() const override { return std::make_shared(); diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.h b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.h index 82cf42beff06..9f5fd67fb8c9 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.h +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.h @@ -38,8 +38,8 @@ class TStatsIterator : public NAbstract::TStatsIterator DoCreateConstructor(const TSnapshot& snapshot, const ui64 itemsLimit, const bool reverse) const override { - return std::make_unique(snapshot, itemsLimit, reverse); + virtual std::unique_ptr DoCreateConstructor(const TScannerConstructorContext& request) const override { + return std::make_unique(request); } virtual std::shared_ptr DoCreateMetadataFiller() const override { return std::make_shared(); @@ -51,8 +51,8 @@ class TStoreSysViewPolicy: public NAbstract::ISysViewPolicy { class TTableSysViewPolicy: public NAbstract::ISysViewPolicy { protected: - virtual std::unique_ptr DoCreateConstructor(const TSnapshot& snapshot, const ui64 itemsLimit, const bool reverse) const override { - return std::make_unique(snapshot, itemsLimit, reverse); + virtual std::unique_ptr DoCreateConstructor(const TScannerConstructorContext& request) const override { + return std::make_unique(request); } virtual std::shared_ptr DoCreateMetadataFiller() const override { return std::make_shared(); diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp b/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp index 17b7c8497fbd..eab1651e5f4b 100644 --- a/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp @@ -38,13 +38,13 @@ void TTxInternalScan::Complete(const TActorContext& ctx) { const NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build()("tablet", Self->TabletID())("snapshot", snapshot.DebugString()); TReadMetadataPtr readMetadataRange; + TScannerConstructorContext context(snapshot, 0, request.GetReverse()); { TReadDescription read(snapshot, request.GetReverse()); read.PathId = request.GetPathId(); read.LockId = LockId; read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); - std::unique_ptr scannerConstructor( - new NPlain::TIndexScannerConstructor(snapshot, request.GetItemsLimit(), request.GetReverse())); + std::unique_ptr scannerConstructor(new NPlain::TIndexScannerConstructor(context)); read.ColumnIds = request.GetColumnIds(); read.ColumnNames = request.GetColumnNames(); if (request.RangesFilter) { diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp index d3c1aa0f47ff..1e682656024e 100644 --- a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp @@ -38,6 +38,7 @@ void TTxScan::Complete(const TActorContext& ctx) { if (snapshot.IsZero()) { snapshot = Self->GetLastTxSnapshot(); } + TScannerConstructorContext context(snapshot, request.HasItemsLimit() ? request.GetItemsLimit() : 0, request.GetReverse()); const auto scanId = request.GetScanId(); const ui64 txId = request.GetTxId(); const ui32 scanGen = request.GetGeneration(); @@ -62,17 +63,31 @@ void TTxScan::Complete(const TActorContext& ctx) { read.PathId = request.GetLocalPathId(); read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); read.TableName = table; - bool isIndex = false; + const TString defaultReader = + AppDataVerified().ColumnShardConfig.GetReaderClassName() ? AppDataVerified().ColumnShardConfig.GetReaderClassName() : "PLAIN"; std::unique_ptr scannerConstructor = [&]() { - const ui64 itemsLimit = request.HasItemsLimit() ? request.GetItemsLimit() : 0; auto sysViewPolicy = NSysView::NAbstract::ISysViewPolicy::BuildByPath(read.TableName); - isIndex = !sysViewPolicy; if (!sysViewPolicy) { - return std::unique_ptr(new NPlain::TIndexScannerConstructor(snapshot, itemsLimit, request.GetReverse())); + auto constructor = NReader::IScannerConstructor::TFactory::MakeHolder( + request.GetCSScanPolicy() ? request.GetCSScanPolicy() : defaultReader, context); + if (!constructor) { + return std::unique_ptr(); + } + return std::unique_ptr(constructor.Release()); } else { - return sysViewPolicy->CreateConstructor(snapshot, itemsLimit, request.GetReverse()); + return sysViewPolicy->CreateConstructor(context); } }(); + if (!scannerConstructor) { + return SendError("cannot build scanner", AppDataVerified().ColumnShardConfig.GetReaderClassName(), ctx); + } + { + auto cursorConclusion = scannerConstructor->BuildCursorFromProto(request.GetScanCursor()); + if (cursorConclusion.IsFail()) { + return SendError("cannot build scanner cursor", cursorConclusion.GetErrorMessage(), ctx); + } + read.SetScanCursor(cursorConclusion.DetachResult()); + } read.ColumnIds.assign(request.GetColumnTags().begin(), request.GetColumnTags().end()); read.StatsMode = request.GetStatsMode(); diff --git a/ydb/core/tx/columnshard/engines/reader/ya.make b/ydb/core/tx/columnshard/engines/reader/ya.make index c1a5dbd87327..c74241203529 100644 --- a/ydb/core/tx/columnshard/engines/reader/ya.make +++ b/ydb/core/tx/columnshard/engines/reader/ya.make @@ -12,6 +12,7 @@ PEERDIR( ydb/core/tx/columnshard/resources ydb/core/tx/program ydb/core/tx/columnshard/engines/reader/plain_reader + ydb/core/tx/columnshard/engines/reader/simple_reader ydb/core/tx/columnshard/engines/reader/sys_view ydb/core/tx/columnshard/engines/reader/abstract ydb/core/tx/columnshard/engines/reader/common diff --git a/ydb/core/tx/columnshard/export/actor/export_actor.h b/ydb/core/tx/columnshard/export/actor/export_actor.h index ce4c4d517ecc..9089277daf27 100644 --- a/ydb/core/tx/columnshard/export/actor/export_actor.h +++ b/ydb/core/tx/columnshard/export/actor/export_actor.h @@ -82,6 +82,7 @@ class TActor: public NBackground::TSessionActor { virtual void OnBootstrap(const TActorContext& /*ctx*/) override { auto evStart = ExportSession->GetTask().GetSelector()->BuildRequestInitiator(ExportSession->GetCursor()); evStart->Record.SetGeneration((ui64)TabletId); + evStart->Record.SetCSScanPolicy("PLAIN"); Send(TabletActorId, evStart.release()); Become(&TActor::StateFunc); } diff --git a/ydb/core/tx/columnshard/operations/events.h b/ydb/core/tx/columnshard/operations/events.h index d5c9ff925ae9..affceeb82b3f 100644 --- a/ydb/core/tx/columnshard/operations/events.h +++ b/ydb/core/tx/columnshard/operations/events.h @@ -34,6 +34,14 @@ class TInsertedPortions { YDB_READONLY_DEF(std::vector, InsertWriteIds); public: + ui64 GetRecordsCount() const { + ui64 result = 0; + for (auto&& i : Portions) { + result += i.GetPKBatch()->num_rows(); + } + return result; + } + const NEvWrite::TWriteMeta& GetWriteMeta() const { return WriteMeta; } diff --git a/ydb/core/tx/columnshard/operations/write.cpp b/ydb/core/tx/columnshard/operations/write.cpp index 540900f881d1..934dafd0f7d8 100644 --- a/ydb/core/tx/columnshard/operations/write.cpp +++ b/ydb/core/tx/columnshard/operations/write.cpp @@ -45,7 +45,7 @@ void TWriteOperation::Start( void TWriteOperation::CommitOnExecute( TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const { - Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); + Y_ABORT_UNLESS(Status == EOperationStatus::Prepared || InsertWriteIds.empty()); TBlobGroupSelector dsGroupSelector(owner.Info()); NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); @@ -55,8 +55,10 @@ void TWriteOperation::CommitOnExecute( auto pathExists = [&](ui64 pathId) { return owner.TablesManager.HasTable(pathId); }; - const auto counters = owner.InsertTable->Commit(dbTable, snapshot.GetPlanStep(), snapshot.GetTxId(), insertWriteIds, pathExists); - owner.Counters.GetTabletCounters()->OnWriteCommitted(counters); + if (insertWriteIds.size()) { + const auto counters = owner.InsertTable->Commit(dbTable, snapshot.GetPlanStep(), snapshot.GetTxId(), insertWriteIds, pathExists); + owner.Counters.GetTabletCounters()->OnWriteCommitted(counters); + } } else { for (auto&& i : InsertWriteIds) { owner.MutableIndexAs().MutableGranuleVerified(PathId).CommitPortionOnExecute(txc, i, snapshot); @@ -65,7 +67,7 @@ void TWriteOperation::CommitOnExecute( } void TWriteOperation::CommitOnComplete(TColumnShard& owner, const NOlap::TSnapshot& /*snapshot*/) const { - Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); + Y_ABORT_UNLESS(Status == EOperationStatus::Prepared || InsertWriteIds.empty()); if (!WritePortions) { owner.UpdateInsertTableCounters(); } else { diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp index 38ef52ec8c7b..8e08a0fd331f 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp @@ -4,6 +4,9 @@ #include #include #include +#include +#include +#include #include #include @@ -96,26 +99,29 @@ void PlanWriteTx(TTestBasicRuntime& runtime, const TActorId& sender, NOlap::TSna ui32 WaitWriteResult(TTestBasicRuntime& runtime, ui64 shardId, std::vector* writeIds) { TAutoPtr handle; - auto event = runtime.GrabEdgeEvent(handle); + auto event = runtime.GrabEdgeEvent(handle); UNIT_ASSERT(event); - auto& resWrite = Proto(event); + auto& resWrite = event->Record; UNIT_ASSERT_EQUAL(resWrite.GetOrigin(), shardId); - UNIT_ASSERT_EQUAL(resWrite.GetTxInitiator(), 0); - if (writeIds && resWrite.GetStatus() == NKikimrTxColumnShard::EResultStatus::SUCCESS) { - writeIds->push_back(resWrite.GetWriteId()); + if (writeIds && resWrite.GetStatus() == NKikimrDataEvents::TEvWriteResult::STATUS_PREPARED) { + writeIds->push_back(resWrite.GetTxId()); } return resWrite.GetStatus(); } -bool WriteDataImpl(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shardId, const ui64 tableId, - const NLongTxService::TLongTxId& longTxId, const ui64 writeId, - const TString& data, const std::shared_ptr& schema, std::vector* writeIds, const NEvWrite::EModificationType mType) { +bool WriteDataImpl(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shardId, const ui64 tableId, const ui64 writeId, + const TString& data, const std::shared_ptr& schema, std::vector* writeIds, const NEvWrite::EModificationType mType, const ui64 lockId) { const TString dedupId = ToString(writeId); - auto write = std::make_unique(sender, longTxId, tableId, dedupId, data, writeId, mType); - Y_ABORT_UNLESS(schema); - write->SetArrowSchema(NArrow::SerializeSchema(*schema)); + auto write = std::make_unique(writeId, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); + write->SetLockId(lockId, 1); + auto& operation = write->AddOperation(TEnumOperator::SerializeToWriteProto(mType), TTableId(0, tableId, 1), {}, + 0, NKikimrDataEvents::FORMAT_ARROW); + *operation.MutablePayloadSchema() = NArrow::SerializeSchema(*schema); + NEvWrite::TPayloadWriter writer(*write); + auto dataCopy = data; + writer.AddDataToPayload(std::move(dataCopy)); ForwardToTablet(runtime, shardId, sender, write.release()); if (writeIds) { @@ -125,25 +131,21 @@ bool WriteDataImpl(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shar } bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shardId, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector& ydbSchema, std::vector* writeIds, const NEvWrite::EModificationType mType) { - NLongTxService::TLongTxId longTxId; - UNIT_ASSERT(longTxId.ParseString("ydb://long-tx/01ezvvxjdk2hd4vdgjs68knvp8?node_id=1")); - return WriteDataImpl( - runtime, sender, shardId, tableId, longTxId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), writeIds, mType); + const std::vector& ydbSchema, std::vector* writeIds, const NEvWrite::EModificationType mType, + const ui64 lockId) { + return WriteDataImpl(runtime, sender, shardId, tableId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), writeIds, mType, lockId); } bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 writeId, const ui64 tableId, const TString& data, const std::vector& ydbSchema, bool waitResult, std::vector* writeIds, - const NEvWrite::EModificationType mType) { - NLongTxService::TLongTxId longTxId; - UNIT_ASSERT(longTxId.ParseString("ydb://long-tx/01ezvvxjdk2hd4vdgjs68knvp8?node_id=1")); + const NEvWrite::EModificationType mType, const ui64 lockId) { if (writeIds) { - return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, longTxId, writeId, data, - NArrow::MakeArrowSchema(ydbSchema), writeIds, mType); + return WriteDataImpl( + runtime, sender, TTestTxConfig::TxTablet0, tableId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), writeIds, mType, lockId); } std::vector ids; - return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, longTxId, writeId, data, - NArrow::MakeArrowSchema(ydbSchema), waitResult ? &ids : nullptr, mType); + return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), + waitResult ? &ids : nullptr, mType, lockId); } std::optional WriteData(TTestBasicRuntime& runtime, TActorId& sender, const NLongTxService::TLongTxId& longTxId, @@ -205,24 +207,24 @@ void ScanIndexStats(TTestBasicRuntime& runtime, TActorId& sender, const std::vec ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, scan.release()); } -void ProposeCommit(TTestBasicRuntime& runtime, TActorId& sender, ui64 shardId, ui64 txId, const std::vector& writeIds) { - NKikimrTxColumnShard::ETransactionKind txKind = NKikimrTxColumnShard::ETransactionKind::TX_KIND_COMMIT; - TString txBody = TTestSchema::CommitTxBody(0, writeIds); +void ProposeCommit(TTestBasicRuntime& runtime, TActorId& sender, ui64 shardId, ui64 txId, const std::vector& writeIds, const ui64 lockId) { + auto write = std::make_unique(txId, NKikimrDataEvents::TEvWrite::MODE_PREPARE); + auto* lock = write->Record.MutableLocks()->AddLocks(); + lock->SetLockId(lockId); + write->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); - ForwardToTablet(runtime, shardId, sender, - new TEvColumnShard::TEvProposeTransaction(txKind, sender, txId, txBody)); + ForwardToTablet(runtime, shardId, sender, write.release()); TAutoPtr handle; - auto event = runtime.GrabEdgeEvent(handle); + auto event = runtime.GrabEdgeEvent(handle); UNIT_ASSERT(event); - auto& res = Proto(event); - UNIT_ASSERT_EQUAL(res.GetTxKind(), txKind); - UNIT_ASSERT_EQUAL(res.GetTxId(), txId); - UNIT_ASSERT_EQUAL(res.GetStatus(), NKikimrTxColumnShard::EResultStatus::PREPARED); + auto& res = event->Record; + AFL_VERIFY(res.GetTxId() == txId)("tx_id", txId)("res", res.GetTxId()); + UNIT_ASSERT_EQUAL(res.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_PREPARED); } -void ProposeCommit(TTestBasicRuntime& runtime, TActorId& sender, ui64 txId, const std::vector& writeIds) { - ProposeCommit(runtime, sender, TTestTxConfig::TxTablet0, txId, writeIds); +void ProposeCommit(TTestBasicRuntime& runtime, TActorId& sender, ui64 txId, const std::vector& writeIds, const ui64 lockId) { + ProposeCommit(runtime, sender, TTestTxConfig::TxTablet0, txId, writeIds, lockId); } void PlanCommit(TTestBasicRuntime& runtime, TActorId& sender, ui64 planStep, const TSet& txIds) { @@ -246,12 +248,12 @@ void PlanCommit(TTestBasicRuntime& runtime, TActorId& sender, ui64 shardId, ui64 TAutoPtr handle; for (ui32 i = 0; i < txIds.size(); ++i) { - auto event = runtime.GrabEdgeEvent(handle); + auto event = runtime.GrabEdgeEvent(handle); UNIT_ASSERT(event); - auto& res = Proto(event); + auto& res = event->Record; UNIT_ASSERT(txIds.contains(res.GetTxId())); - UNIT_ASSERT_EQUAL(res.GetStatus(), NKikimrTxColumnShard::EResultStatus::SUCCESS); + UNIT_ASSERT_EQUAL(res.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); } Wakeup(runtime, sender, shardId); } diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h index 8a8369252829..3dd60dcb8bb0 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h @@ -408,11 +408,11 @@ void PlanWriteTx(TTestBasicRuntime& runtime, const TActorId& sender, NOlap::TSna bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shardId, const ui64 writeId, const ui64 tableId, const TString& data, const std::vector& ydbSchema, std::vector* writeIds, - const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert); + const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert, const ui64 lockId = 1); bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 writeId, const ui64 tableId, const TString& data, const std::vector& ydbSchema, bool waitResult = true, std::vector* writeIds = nullptr, - const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert); + const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert, const ui64 lockId = 1); std::optional WriteData(TTestBasicRuntime& runtime, TActorId& sender, const NLongTxService::TLongTxId& longTxId, ui64 tableId, const ui64 writePartId, const TString& data, @@ -423,8 +423,8 @@ ui32 WaitWriteResult(TTestBasicRuntime& runtime, ui64 shardId, std::vector void ScanIndexStats(TTestBasicRuntime& runtime, TActorId& sender, const std::vector& pathIds, NOlap::TSnapshot snap, ui64 scanId = 0); -void ProposeCommit(TTestBasicRuntime& runtime, TActorId& sender, ui64 shardId, ui64 txId, const std::vector& writeIds); -void ProposeCommit(TTestBasicRuntime& runtime, TActorId& sender, ui64 txId, const std::vector& writeIds); +void ProposeCommit(TTestBasicRuntime& runtime, TActorId& sender, ui64 shardId, ui64 txId, const std::vector& writeIds, const ui64 lockId = 1); +void ProposeCommit(TTestBasicRuntime& runtime, TActorId& sender, ui64 txId, const std::vector& writeIds, const ui64 lockId = 1); void PlanCommit(TTestBasicRuntime& runtime, TActorId& sender, ui64 shardId, ui64 planStep, const TSet& txIds); void PlanCommit(TTestBasicRuntime& runtime, TActorId& sender, ui64 planStep, const TSet& txIds); diff --git a/ydb/core/tx/columnshard/ut_rw/ut_backup.cpp b/ydb/core/tx/columnshard/ut_rw/ut_backup.cpp index 6463e4a0a266..887780e4da73 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_backup.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_backup.cpp @@ -82,7 +82,6 @@ Y_UNIT_TEST_SUITE(Backup) { PlanCommit(runtime, sender, ++planStep, txId); } - const ui32 start = csControllerGuard->GetInsertStartedCounter().Val(); TestWaitCondition(runtime, "insert compacted", [&]() { ++writeId; @@ -90,7 +89,7 @@ Y_UNIT_TEST_SUITE(Backup) { WriteData(runtime, sender, writeId, tableId, MakeTestBlob({writeId * 100, (writeId + 1) * 100}, schema), schema, true, &writeIds); ProposeCommit(runtime, sender, ++txId, writeIds); PlanCommit(runtime, sender, ++planStep, txId); - return csControllerGuard->GetInsertStartedCounter().Val() > start + 1; + return true; }, TDuration::Seconds(1000)); NKikimrTxColumnShard::TBackupTxBody txBody; diff --git a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp index 1e6f1c59376f..25e1de8038e7 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp @@ -40,7 +40,7 @@ using TDefaultTestsController = NKikimr::NYDBTest::NColumnShard::TController; template bool DataHas(const std::vector>& batches, std::pair range, bool requireUniq = false, - const std::string& columnName = "timestamp") { + const std::string& columnName = "timestamp", const bool inverseCheck = false) { static constexpr const bool isStrKey = std::is_same_v; THashMap keys; @@ -81,18 +81,19 @@ bool DataHas(const std::vector>& batches, st } } + bool problems = false; for (auto& [key, count] : keys) { - if (!count) { + if (!count && !inverseCheck) { Cerr << "No key: " << key << "\n"; - return false; + problems = true; } if (requireUniq && count > 1) { Cerr << "Not unique key: " << key << " (count: " << count << ")\n"; - return false; + problems = true; } } - return true; + return !problems; } template @@ -107,6 +108,11 @@ bool DataHas(const std::vector& blobs, const TString& srtSchema, std::p return DataHas(batches, range, requireUniq, columnName); } +bool DataNotHas(const std::vector>& batches, std::pair range, bool requireUniq = false, + const std::string& columnName = "timestamp") { + return DataHas(batches, range, requireUniq, columnName, true); +} + template bool DataHasOnly(const std::vector>& batches, std::pair range) { static constexpr const bool isStrKey = std::is_same_v; @@ -424,7 +430,7 @@ void TestWrite(const TestTableDescription& table) { UNIT_ASSERT(bigData.size() > NColumnShard::TLimits::GetMaxBlobSize()); UNIT_ASSERT(bigData.size() < NColumnShard::TLimits::GetMaxBlobSize() + 2 * 1024 * 1024); ok = WriteData(runtime, sender, writeId++, tableId, bigData, ydbSchema); - UNIT_ASSERT(!ok); + UNIT_ASSERT(ok); } void TestWriteOverload(const TestTableDescription& table) { @@ -482,11 +488,11 @@ void TestWriteOverload(const TestTableDescription& table) { UNIT_ASSERT(WriteData(runtime, sender, ++writeId, tableId, testBlob, table.Schema, false)); } - UNIT_ASSERT_VALUES_EQUAL(WaitWriteResult(runtime, TTestTxConfig::TxTablet0), (ui32)NKikimrTxColumnShard::EResultStatus::OVERLOADED); + UNIT_ASSERT_VALUES_EQUAL(WaitWriteResult(runtime, TTestTxConfig::TxTablet0), (ui32)NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED); while (capturedWrites.size()) { resendOneCaptured(); - UNIT_ASSERT_VALUES_EQUAL(WaitWriteResult(runtime, TTestTxConfig::TxTablet0), (ui32)NKikimrTxColumnShard::EResultStatus::SUCCESS); + UNIT_ASSERT_VALUES_EQUAL(WaitWriteResult(runtime, TTestTxConfig::TxTablet0), (ui32)NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); } UNIT_ASSERT(WriteData(runtime, sender, ++writeId, tableId, testBlob, table.Schema)); // OK after overload @@ -524,8 +530,9 @@ void TestWriteReadDup(const TestTableDescription& table = {}) { TSet txIds; for (ui32 i = 0; i <= 5; ++i) { std::vector writeIds; - UNIT_ASSERT(WriteData(runtime, sender, ++writeId, tableId, testData, ydbSchema, true, &writeIds)); - ProposeCommit(runtime, sender, ++txId, writeIds); + ++txId; + UNIT_ASSERT(WriteData(runtime, sender, ++writeId, tableId, testData, ydbSchema, true, &writeIds, NEvWrite::EModificationType::Upsert, txId)); + ProposeCommit(runtime, sender, txId, writeIds, txId); txIds.insert(txId); } PlanCommit(runtime, sender, planStep, txIds); @@ -542,69 +549,6 @@ void TestWriteReadDup(const TestTableDescription& table = {}) { } } -void TestWriteReadLongTxDup() { - TTestBasicRuntime runtime; - TTester::Setup(runtime); - auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); - - TActorId sender = runtime.AllocateEdgeActor(); - CreateTestBootstrapper(runtime, CreateTestTabletInfo(TTestTxConfig::TxTablet0, TTabletTypes::ColumnShard), &CreateColumnShard); - - TDispatchOptions options; - options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); - runtime.DispatchEvents(options); - - // - - ui64 tableId = 1; - auto ydbSchema = TTestSchema::YdbSchema(); - SetupSchema(runtime, sender, tableId); - - constexpr ui32 numRows = 10; - std::pair portion = { 10, 10 + numRows }; - - NLongTxService::TLongTxId longTxId; - UNIT_ASSERT(longTxId.ParseString("ydb://long-tx/01ezvvxjdk2hd4vdgjs68knvp8?node_id=1")); - - ui64 txId = 0; - ui64 planStep = 100; - std::optional writeId; - - // Only the first blob with dedup pair {longTx, dedupId} should be inserted - // Others should return OK (write retries emulation) - for (ui32 i = 0; i < 4; ++i) { - auto data = MakeTestBlob({ portion.first + i, portion.second + i }, ydbSchema); - UNIT_ASSERT(data.size() < NColumnShard::TLimits::MIN_BYTES_TO_INSERT); - - auto writeIdOpt = WriteData(runtime, sender, longTxId, tableId, 1, data, ydbSchema); - UNIT_ASSERT(writeIdOpt); - if (!i) { - writeId = *writeIdOpt; - } - UNIT_ASSERT_EQUAL(*writeIdOpt, *writeId); - } - - ProposeCommit(runtime, sender, ++txId, { *writeId }); - TSet txIds = { txId }; - PlanCommit(runtime, sender, planStep, txIds); - - // read - TAutoPtr handle; - { - TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); - reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); - auto rb = reader.ReadAll(); - UNIT_ASSERT(reader.IsCorrectlyFinished()); - UNIT_ASSERT(rb); - UNIT_ASSERT(rb->num_rows()); - Y_UNUSED(NArrow::TColumnOperator().VerifyIfAbsent().Extract(rb, TTestSchema::ExtractNames(ydbSchema))); - UNIT_ASSERT((ui32)rb->num_columns() == TTestSchema::ExtractNames(ydbSchema).size()); - UNIT_ASSERT(CheckOrdered(rb)); - UNIT_ASSERT(DataHas({ rb }, portion, true)); - UNIT_ASSERT(DataHasOnly({ rb }, portion)); - } -} - void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString codec = "") { auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); csControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); @@ -787,8 +731,8 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString UNIT_ASSERT(rb->num_rows()); UNIT_ASSERT(CheckOrdered(rb)); UNIT_ASSERT(DataHas({ rb }, portion[0])); - UNIT_ASSERT(!DataHas({ rb }, portion[1])); - UNIT_ASSERT(!DataHas({ rb }, portion[2])); + UNIT_ASSERT(DataNotHas({ rb }, portion[1])); + UNIT_ASSERT(DataNotHas({ rb }, portion[2])); } // read 8, planstep 22 (full index) @@ -805,7 +749,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString UNIT_ASSERT(CheckOrdered(rb)); UNIT_ASSERT(DataHas({ rb }, portion[0])); UNIT_ASSERT(DataHas({ rb }, portion[1])); - UNIT_ASSERT(!DataHas({ rb }, portion[2])); + UNIT_ASSERT(DataNotHas({ rb }, portion[2])); } // commit 3: ins:0, cmt:1, idx:1 @@ -836,7 +780,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString UNIT_ASSERT(DataHas({ rb }, portion[0])); UNIT_ASSERT(DataHas({ rb }, portion[1])); UNIT_ASSERT(DataHas({ rb }, portion[2])); - UNIT_ASSERT(!DataHas({ rb }, portion[3])); + UNIT_ASSERT(DataNotHas({ rb }, portion[3])); } // commit 4: ins:0, cmt:2, idx:1 (with duplicates in PK) @@ -862,7 +806,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString UNIT_ASSERT(DataHas({ rb }, portion[1])); UNIT_ASSERT(DataHas({ rb }, portion[2])); UNIT_ASSERT(DataHas({ rb }, portion[3])); - UNIT_ASSERT(DataHas({ rb }, { 0, 500 }, true)); + UNIT_ASSERT(DataHas({ rb }, { 0, 500 }, false)); const ui64 compactedBytes = reader.GetReadStat("compacted_bytes"); const ui64 insertedBytes = reader.GetReadStat("inserted_bytes"); @@ -1730,7 +1674,6 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { Y_UNIT_TEST(WriteReadDuplicate) { TestWriteReadDup(); - TestWriteReadLongTxDup(); } Y_UNIT_TEST(WriteReadModifications) { diff --git a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp index 7060877880f0..1220870814bf 100644 --- a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp +++ b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp @@ -1028,6 +1028,7 @@ void TestDropWriteRace() { ui64 tableId = 1; ui64 planStep = 1000000000; // greater then delays ui64 txId = 100; + ui32 writeId = 0; NLongTxService::TLongTxId longTxId; UNIT_ASSERT(longTxId.ParseString("ydb://long-tx/01ezvvxjdk2hd4vdgjs68knvp8?node_id=1")); @@ -1038,9 +1039,9 @@ void TestDropWriteRace() { UNIT_ASSERT(data.size() < NColumnShard::TLimits::MIN_BYTES_TO_INSERT); // Write into InsertTable - auto writeIdOpt = WriteData(runtime, sender, longTxId, tableId, 1, data, testYdbSchema); - UNIT_ASSERT(writeIdOpt); - ProposeCommit(runtime, sender, ++txId, {*writeIdOpt}); + ++txId; + AFL_VERIFY(WriteData(runtime, sender, ++writeId, tableId, data, testYdbSchema)); + ProposeCommit(runtime, sender, txId, { writeId }); auto commitTxId = txId; // Drop table diff --git a/ydb/core/tx/conveyor/service/service.cpp b/ydb/core/tx/conveyor/service/service.cpp index 68900c3e25ec..c34acba81c9b 100644 --- a/ydb/core/tx/conveyor/service/service.cpp +++ b/ydb/core/tx/conveyor/service/service.cpp @@ -13,7 +13,7 @@ TDistributor::TDistributor(const TConfig& config, const TString& conveyorName, T void TDistributor::Bootstrap() { const ui32 workersCount = Config.GetWorkersCountForConveyor(NKqp::TStagePredictor::GetUsableThreads()); - AFL_NOTICE(NKikimrServices::TX_CONVEYOR)("name", ConveyorName)("action", "conveyor_registered")("config", Config.DebugString()); + AFL_NOTICE(NKikimrServices::TX_CONVEYOR)("name", ConveyorName)("action", "conveyor_registered")("config", Config.DebugString())("actor_id", SelfId()); for (ui32 i = 0; i < workersCount; ++i) { const double usage = Config.GetWorkerCPUUsage(i); Workers.emplace_back(Register(new TWorker(ConveyorName, usage, SelfId()))); @@ -46,10 +46,10 @@ void TDistributor::HandleMain(TEvInternal::TEvTaskProcessedResult::TPtr& ev) { } void TDistributor::HandleMain(TEvExecution::TEvNewTask::TPtr& ev) { - AFL_DEBUG(NKikimrServices::TX_CONVEYOR)("action", "add_task")("sender", ev->Sender); Counters.IncomingRate->Inc(); const TString taskClass = ev->Get()->GetTask()->GetTaskClassIdentifier(); + AFL_DEBUG(NKikimrServices::TX_CONVEYOR)("action", "add_task")("sender", ev->Sender)("task", taskClass); auto itSignal = Signals.find(taskClass); if (itSignal == Signals.end()) { itSignal = Signals.emplace(taskClass, std::make_shared("Conveyor/" + ConveyorName, taskClass)).first; diff --git a/ydb/core/tx/conveyor/usage/service.h b/ydb/core/tx/conveyor/usage/service.h index 6ba3c3320fde..22928ae3e6f3 100644 --- a/ydb/core/tx/conveyor/usage/service.h +++ b/ydb/core/tx/conveyor/usage/service.h @@ -41,9 +41,9 @@ class TServiceOperatorImpl { context.Register(new TAsyncTaskExecutor(task)); } static bool SendTaskToExecute(const std::shared_ptr& task) { - auto& context = NActors::TActorContext::AsActorContext(); - const NActors::TActorId& selfId = context.SelfID; - if (TSelf::IsEnabled()) { + if (TSelf::IsEnabled() && NActors::TlsActivationContext) { + auto& context = NActors::TActorContext::AsActorContext(); + const NActors::TActorId& selfId = context.SelfID; context.Send(MakeServiceId(selfId.NodeId()), new NConveyor::TEvExecution::TEvNewTask(task)); return true; } else { diff --git a/ydb/core/tx/data_events/common/modification_type.h b/ydb/core/tx/data_events/common/modification_type.h index cf9f8d90e24f..f93eeda183e3 100644 --- a/ydb/core/tx/data_events/common/modification_type.h +++ b/ydb/core/tx/data_events/common/modification_type.h @@ -49,18 +49,18 @@ class TEnumOperator { } } - static TProto SerializeToProto(const NEvWrite::EModificationType value) { + static NKikimrDataEvents::TEvWrite::TOperation::EOperationType SerializeToWriteProto(const NEvWrite::EModificationType value) { switch (value) { case NEvWrite::EModificationType::Upsert: - return NKikimrTxColumnShard::TEvWrite::OPERATION_UPSERT; + return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT; case NEvWrite::EModificationType::Insert: - return NKikimrTxColumnShard::TEvWrite::OPERATION_INSERT; + return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_INSERT; case NEvWrite::EModificationType::Delete: - return NKikimrTxColumnShard::TEvWrite::OPERATION_DELETE; + return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_DELETE; case NEvWrite::EModificationType::Replace: - return NKikimrTxColumnShard::TEvWrite::OPERATION_REPLACE; + return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE; case NEvWrite::EModificationType::Update: - return NKikimrTxColumnShard::TEvWrite::OPERATION_UPDATE; + return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPDATE; } } @@ -81,6 +81,21 @@ class TEnumOperator { } } + static TProto SerializeToProto(const NEvWrite::EModificationType value) { + switch (value) { + case NEvWrite::EModificationType::Upsert: + return NKikimrTxColumnShard::TEvWrite::OPERATION_UPSERT; + case NEvWrite::EModificationType::Insert: + return NKikimrTxColumnShard::TEvWrite::OPERATION_INSERT; + case NEvWrite::EModificationType::Delete: + return NKikimrTxColumnShard::TEvWrite::OPERATION_DELETE; + case NEvWrite::EModificationType::Replace: + return NKikimrTxColumnShard::TEvWrite::OPERATION_REPLACE; + case NEvWrite::EModificationType::Update: + return NKikimrTxColumnShard::TEvWrite::OPERATION_UPDATE; + } + } + static NEvWrite::EModificationType DeserializeFromProto(const NKikimrTxColumnShard::TEvWrite::EModificationType value) { switch (value) { case NKikimrTxColumnShard::TEvWrite::OPERATION_UPSERT: diff --git a/ydb/core/tx/data_events/events.h b/ydb/core/tx/data_events/events.h index bd4f06284e9d..f4b190eaff82 100644 --- a/ydb/core/tx/data_events/events.h +++ b/ydb/core/tx/data_events/events.h @@ -62,7 +62,8 @@ struct TDataEvents { return *this; } - void AddOperation(NKikimrDataEvents::TEvWrite_TOperation::EOperationType operationType, const TTableId& tableId, const std::vector& columnIds, + NKikimrDataEvents::TEvWrite::TOperation& AddOperation(NKikimrDataEvents::TEvWrite_TOperation::EOperationType operationType, + const TTableId& tableId, const std::vector& columnIds, ui64 payloadIndex, NKikimrDataEvents::EDataFormat payloadFormat) { Y_ABORT_UNLESS(operationType != NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UNSPECIFIED); Y_ABORT_UNLESS(payloadFormat != NKikimrDataEvents::FORMAT_UNSPECIFIED); @@ -75,6 +76,7 @@ struct TDataEvents { operation->MutableTableId()->SetTableId(tableId.PathId.LocalPathId); operation->MutableTableId()->SetSchemaVersion(tableId.SchemaVersion); operation->MutableColumnIds()->Assign(columnIds.begin(), columnIds.end()); + return *operation; } ui64 GetTxId() const { diff --git a/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp b/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp index 7836ca000823..f0f5fb0832ee 100644 --- a/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp +++ b/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp @@ -715,8 +715,9 @@ Y_UNIT_TEST_SUITE(TOlap) { TSet txIds; for (ui32 i = 0; i < 10; ++i) { std::vector writeIds; - NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds, NEvWrite::EModificationType::Upsert); - NTxUT::ProposeCommit(runtime, sender, shardId, ++txId, writeIds); + ++txId; + NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds, NEvWrite::EModificationType::Upsert, txId); + NTxUT::ProposeCommit(runtime, sender, shardId, txId, writeIds, txId); txIds.insert(txId); } @@ -727,9 +728,10 @@ Y_UNIT_TEST_SUITE(TOlap) { // trigger periodic stats at shard (after timeout) std::vector writeIds; - NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds, NEvWrite::EModificationType::Upsert); - NTxUT::ProposeCommit(runtime, sender, shardId, ++txId, writeIds); - NTxUT::PlanCommit(runtime, sender, shardId, ++planStep, {txId}); + ++txId; + NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds, NEvWrite::EModificationType::Upsert, txId); + NTxUT::ProposeCommit(runtime, sender, shardId, txId, writeIds, txId); + NTxUT::PlanCommit(runtime, sender, shardId, ++planStep, { txId }); } csController->WaitIndexation(TDuration::Seconds(5)); {