From 5fcd16e5ccd14cad19934b36314992e5cddc66d5 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 3 May 2024 12:18:05 +0300 Subject: [PATCH 01/35] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 43 +++++------- ydb/core/kqp/runtime/kqp_write_table.cpp | 87 +++++++++++++++++++++--- ydb/core/kqp/runtime/kqp_write_table.h | 16 ++++- 3 files changed, 109 insertions(+), 37 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 2ca31614ff68..a869a1914129 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -62,7 +62,7 @@ namespace { public: struct TInFlightBatch { - TString Data; + NKikimr::NKqp::IPayloadSerializer::IBatchPtr Batch; }; size_t Size() const { @@ -101,30 +101,22 @@ namespace { return Batches.at(index); } - std::optional PopBatches(const ui64 cookie) { - if (BatchesInFlight != 0 && Cookie == cookie) { - ui64 dataSize = 0; - for (size_t index = 0; index < BatchesInFlight; ++index) { - dataSize += Batches.front().Data.size(); - Batches.pop_front(); - } - - ++Cookie; - SendAttempts = 0; - BatchesInFlight = 0; - - Memory -= dataSize; - return dataSize; + std::optional PopBatch(const ui64 Cookie) { + if (!IsEmpty() && Cookie == CurrentBatch().Cookie) { + auto batch = std::move(Batches.front()); + Batches.pop_front(); + Memory -= batch.Batch->GetMemory(); + return std::move(batch); } return std::nullopt; } - void PushBatch(TString&& data) { + void PushBatch(NKikimr::NKqp::IPayloadSerializer::IBatchPtr&& batch) { YQL_ENSURE(!IsClosed()); Batches.push_back(TInFlightBatch{ - .Data = std::move(data) + .Batch = std::move(batch), }); - Memory += Batches.back().Data.size(); + Memory += Batches.back().Batch->GetMemory(); } bool AddAndCheckLock(const NKikimrDataEvents::TLock& lock) { @@ -610,8 +602,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void PopShardBatch(ui64 shardId, ui64 cookie) { TResumeNotificationManager resumeNotificator(*this); auto& shardInfo = ShardsInfo.GetShard(shardId); - if (const auto removedDataSize = shardInfo.PopBatches(cookie); removedDataSize) { - EgressStats.Bytes += *removedDataSize; + if (const auto batch = shardInfo.PopBatch(cookie); batch) { + EgressStats.Bytes += batch->Batch->GetMemory(); EgressStats.Chunks++; EgressStats.Splits++; EgressStats.Resume(); @@ -635,10 +627,9 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N auto& shard = ShardsInfo.GetShard(shardId); while (true) { auto batch = Serializer->FlushBatch(shardId); - if (batch.empty()) { - break; + if (batch && !batch->IsEmpty()) { + shard.PushBatch(std::move(batch)); } - shard.PushBatch(std::move(batch)); } if (shard.GetBatchesInFlight() == 0) { shard.MakeNextBatches( @@ -697,10 +688,10 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N ui64 totalDataSize = 0; for (size_t index = 0; index < shard.GetBatchesInFlight(); ++index) { const auto& inFlightBatch = shard.GetBatch(index); - YQL_ENSURE(!inFlightBatch.Data.empty()); - totalDataSize += inFlightBatch.Data.size(); + YQL_ENSURE(!inFlightBatch.Batch->IsEmpty()); + totalDataSize += inFlightBatch.Batch->GetMemory(); const ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(*evWrite) - .AddDataToPayload(TString(inFlightBatch.Data)); + .AddDataToPayload(inFlightBatch.Batch->SerializeToString()); evWrite->AddOperation( NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE, { diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index d90522d2458d..84813ac8b483 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -146,6 +146,22 @@ TVector BuildKeyColumnTypes( class TColumnShardPayloadSerializer : public IPayloadSerializer { using TRecordBatchPtr = std::shared_ptr; + class TBatch : public IPayloadSerializer::IBatch { + public: + TString SerializeToString() const override { + return Data; + } + + i64 GetMemory() const override { + return Data.size(); + } + + TBatch(TString&& data) : Data(std::move(data)) {} + TBatch(const TString& data) : Data(data) {} + + TString Data; + }; + public: TColumnShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, @@ -196,16 +212,21 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { for (auto& [shard, infos] : splittedData.GetShardsInfo()) { for (auto&& shardInfo : infos) { - auto& batch = Batches[shard].emplace_back(); - batch = shardInfo->GetData(); - Memory += batch.size(); - YQL_ENSURE(!batch.empty()); + auto& batch = Batches[shard].emplace_back( + MakeIntrusive(shardInfo->GetData())); // TODO: get rid of copy + //batch = shardInfo->GetData(); + Memory += batch->GetMemory(); + YQL_ENSURE(batch->GetMemory() != 0); } ShardIds.insert(shard); } } } + void ReturnBatch(IPayloadSerializer::IBatchPtr&& batch) override { + Y_UNUSED(batch); + } + NKikimrDataEvents::EDataFormat GetDataFormat() override { return NKikimrDataEvents::FORMAT_ARROW; } @@ -218,6 +239,11 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { return Memory; } + void Close() override { + YQL_ENSURE(!Closed); + Closed = true; + } + bool IsClosed() override { return Closed; } @@ -237,7 +263,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { return std::move(newBatches); } - TString FlushBatch(ui64 shardId) override { + IBatchPtr FlushBatch(ui64 shardId) override { if (!Batches.contains(shardId)) { return {}; } @@ -295,9 +321,39 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { }; class TDataShardPayloadSerializer : public IPayloadSerializer { - struct TBatch { + /*class TBatch : public IPayloadSerializer::IBatch { + public: + TString SerializeToString() const override { + return ; + } + + i64 GetMemory() const override { + return Size; + } + + std::vector Extract() { + Size = 0; + return std::move(Data); + } + + private: std::vector Data; ui64 Size = 0; + };*/ + + class TBatch : public IPayloadSerializer::IBatch { + public: + TString SerializeToString() const override { + return Data; + } + + i64 GetMemory() const override { + return Data.size(); + } + + TBatch(TString&& data) : Data(std::move(data)) {} + + TString Data; }; public: @@ -357,6 +413,10 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { }); } + void ReturnBatch(IPayloadSerializer::IBatchPtr&& batch) override { + Y_UNUSED(batch); + } + NKikimrDataEvents::EDataFormat GetDataFormat() override { return NKikimrDataEvents::FORMAT_CELLVEC; } @@ -369,6 +429,11 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { return Memory; } + void Close() override { + YQL_ENSURE(!Closed); + Closed = true; + } + bool IsClosed() override { return Closed; } @@ -395,19 +460,19 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { if (batch.empty()) { break; } - result[shardId].emplace_back(std::move(batch)); + result[shardId].emplace_back(MakeIntrusive(std::move(batch))); }; } Batchers.clear(); return result; } - TString FlushBatch(ui64 shardId) override { + IBatchPtr FlushBatch(ui64 shardId) override { if (!Batchers.contains(shardId)) { return {}; } auto& batcher = Batchers.at(shardId); - return ExtractNextBatch(batcher, false); + return MakeIntrusive(ExtractNextBatch(batcher, false)); } const THashSet& GetShardIds() const override { @@ -438,6 +503,10 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { } +bool IPayloadSerializer::IBatch::IsEmpty() const { + return GetMemory() == 0; +} + IPayloadSerializerPtr CreateColumnShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, const TConstArrayRef inputColumns, diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 05644e557f04..846e18921084 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -12,7 +12,19 @@ namespace NKqp { class IPayloadSerializer : public TThrRefBase { public: + class IBatch : public TThrRefBase { + public: + virtual TString SerializeToString() const = 0; + virtual i64 GetMemory() const = 0; + bool IsEmpty() const; + }; + + using IBatchPtr = TIntrusivePtr; + virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data, bool close) = 0; + virtual void ReturnBatch(IBatchPtr&& batch) = 0; + + virtual void Close() = 0; virtual bool IsClosed() = 0; virtual bool IsEmpty() = 0; @@ -21,11 +33,11 @@ class IPayloadSerializer : public TThrRefBase { virtual NKikimrDataEvents::EDataFormat GetDataFormat() = 0; virtual std::vector GetWriteColumnIds() = 0; - using TBatches = THashMap>; + using TBatches = THashMap>; virtual TBatches FlushBatchesForce() = 0; - virtual TString FlushBatch(ui64 shardId) = 0; + virtual IBatchPtr FlushBatch(ui64 shardId) = 0; virtual const THashSet& GetShardIds() const = 0; virtual i64 GetMemory() = 0; From eee1263123ea2e5114963207083c40b6bf4899b7 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 7 May 2024 17:47:28 +0300 Subject: [PATCH 02/35] split --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 2 +- ydb/core/kqp/runtime/kqp_write_table.cpp | 19 +++++++++++++++++- ydb/core/tx/sharding/sharding.cpp | 25 ++++++++++++++++-------- ydb/core/tx/sharding/sharding.h | 1 + 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index a869a1914129..4226c0a4c695 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -350,7 +350,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N NYql::NDqProto::StatusIds::INTERNAL_ERROR); } - if (Finished || GetFreeSpace() <= 0 || SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { + if (Finished || GetFreeSpace() <= 0) { TResumeNotificationManager resumeNotificator(*this); for (auto& [shardId, batches] : Serializer->FlushBatchesForce()) { for (auto& batch : batches) { diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 84813ac8b483..310117328fd9 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include namespace NKikimr { namespace NKqp { @@ -177,6 +179,20 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { if (!BatchBuilder.Start(BuildBatchBuilderColumns(schemeEntry), 0, 0, err)) { yexception() << "Failed to start batch builder: " + err; } + + // TODO: Checks from ydb/core/tx/data_events/columnshard_splitter.cpp + const auto& description = schemeEntry.ColumnTableInfo->Description; + const auto& scheme = description.GetSchema(); + const auto& sharding = description.GetSharding(); + + NSchemeShard::TOlapSchema olapSchema; + olapSchema.ParseFromLocalDB(scheme); + auto shardingConclusion = NSharding::TShardingBase::BuildFromProto(olapSchema, sharding); + if (shardingConclusion.IsFail()) { + ythrow yexception() << "Ydb::StatusIds::SCHEME_ERROR : " << shardingConclusion.GetErrorMessage(); + } + YQL_ENSURE(shardingConclusion.GetResult() != nullptr); + Sharding = shardingConclusion.DetachResult(); } void AddData(NMiniKQL::TUnboxedValueBatch&& data, bool close) override { @@ -214,7 +230,6 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { for (auto&& shardInfo : infos) { auto& batch = Batches[shard].emplace_back( MakeIntrusive(shardInfo->GetData())); // TODO: get rid of copy - //batch = shardInfo->GetData(); Memory += batch->GetMemory(); YQL_ENSURE(batch->GetMemory() != 0); } @@ -296,6 +311,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { } TString GetSerializedData() const override { + YQL_ENSURE(false); return NArrow::SerializeBatchNoCompression(Batch); } }; @@ -305,6 +321,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { const NMiniKQL::TTypeEnvironment& TypeEnv; const NSchemeCache::TSchemeCacheNavigate::TEntry& SchemeEntry; + std::shared_ptr Sharding; const TVector Columns; const std::vector WriteIndex; diff --git a/ydb/core/tx/sharding/sharding.cpp b/ydb/core/tx/sharding/sharding.cpp index 97367a27c6f3..7234806fce07 100644 --- a/ydb/core/tx/sharding/sharding.cpp +++ b/ydb/core/tx/sharding/sharding.cpp @@ -241,21 +241,30 @@ NKikimrSchemeOp::TColumnTableSharding IShardingBase::SerializeToProto() const { return result; } -NKikimr::TConclusion>> IShardingBase::SplitByShards(const std::shared_ptr& batch, const ui64 chunkBytesLimit) { - THashMap> sharding = MakeSharding(batch); - THashMap> chunks; - if (sharding.size() == 1) { - AFL_VERIFY(chunks.emplace(sharding.begin()->first, batch).second); +THashMap> IShardingBase::SplitByShardsToArrowBatches(const std::shared_ptr& batch) { + auto sharding = MakeSharding(batch); + std::vector> chunks; + if (Shards.size() == 1) { + chunks = {batch}; } else { chunks = NArrow::ShardingSplit(batch, sharding); } - AFL_VERIFY(chunks.size() == sharding.size()); - NArrow::TBatchSplitttingContext context(chunkBytesLimit); - THashMap> result; + AFL_VERIFY(chunks.size() == Shards.size()); + THashMap> result; for (auto&& [tabletId, chunk]: chunks) { if (!chunk) { continue; } + result.emplace(tabletId, std::move(chunk)); + } + return result; +} + +TConclusion>> IShardingBase::SplitByShards(const std::shared_ptr& batch, const ui64 chunkBytesLimit) { + auto splitted = SplitByShardsToArrowBatches(batch); + NArrow::TBatchSplitttingContext context(chunkBytesLimit); + THashMap> result; + for (auto [tabletId, chunk] : splitted) { auto blobsSplittedConclusion = NArrow::SplitByBlobSize(chunk, context); if (blobsSplittedConclusion.IsFail()) { return TConclusionStatus::Fail("cannot split batch in according to limits: " + blobsSplittedConclusion.GetErrorMessage()); diff --git a/ydb/core/tx/sharding/sharding.h b/ydb/core/tx/sharding/sharding.h index 1b49ba974397..4e0ba237a8b3 100644 --- a/ydb/core/tx/sharding/sharding.h +++ b/ydb/core/tx/sharding/sharding.h @@ -310,6 +310,7 @@ class IShardingBase { virtual THashMap> MakeSharding(const std::shared_ptr& batch) const = 0; + THashMap> SplitByShardsToArrowBatches(const std::shared_ptr& batch); TConclusion>> SplitByShards(const std::shared_ptr& batch, const ui64 chunkBytesLimit); virtual TString DebugString() const; From a8303ca81e530c8c380c85f7d394340b56c7bcce Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 8 May 2024 11:15:32 +0300 Subject: [PATCH 03/35] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 34 +++++++++++++++--------- ydb/core/kqp/runtime/kqp_write_table.h | 34 ++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 4226c0a4c695..5b7840f662bc 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -85,28 +85,36 @@ namespace { Closed = true; } - void MakeNextBatches(ui64 maxDataSize, ui64 maxCount) { + void MakeNextBatches(i64 maxDataSize, ui64 maxCount) { YQL_ENSURE(BatchesInFlight == 0); - ui64 dataSize = 0; + i64 dataSize = 0; while (BatchesInFlight < maxCount && BatchesInFlight < Batches.size() - && dataSize + GetBatch(BatchesInFlight).Data.size() <= maxDataSize) { - dataSize += GetBatch(BatchesInFlight).Data.size(); + && dataSize + GetBatch(BatchesInFlight).Batch->GetMemory() <= maxDataSize) { + dataSize += GetBatch(BatchesInFlight).Batch->GetMemory(); ++BatchesInFlight; } - YQL_ENSURE(BatchesInFlight == Batches.size() || GetBatch(BatchesInFlight).Data.size() <= maxDataSize); + YQL_ENSURE(BatchesInFlight == Batches.size() || GetBatch(BatchesInFlight).Batch->GetMemory() <= maxDataSize); } const TInFlightBatch& GetBatch(size_t index) const { return Batches.at(index); } - std::optional PopBatch(const ui64 Cookie) { - if (!IsEmpty() && Cookie == CurrentBatch().Cookie) { - auto batch = std::move(Batches.front()); - Batches.pop_front(); - Memory -= batch.Batch->GetMemory(); - return std::move(batch); + std::optional PopBatches(const ui64 cookie) { + if (BatchesInFlight != 0 && Cookie == cookie) { + ui64 dataSize = 0; + for (size_t index = 0; index < BatchesInFlight; ++index) { + dataSize += Batches.front().Batch->GetMemory(); + Batches.pop_front(); + } + + ++Cookie; + SendAttempts = 0; + BatchesInFlight = 0; + + Memory -= dataSize; + return dataSize; } return std::nullopt; } @@ -602,8 +610,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void PopShardBatch(ui64 shardId, ui64 cookie) { TResumeNotificationManager resumeNotificator(*this); auto& shardInfo = ShardsInfo.GetShard(shardId); - if (const auto batch = shardInfo.PopBatch(cookie); batch) { - EgressStats.Bytes += batch->Batch->GetMemory(); + if (const auto removedDataSize = shardInfo.PopBatches(cookie); removedDataSize) { + EgressStats.Bytes += *removedDataSize; EgressStats.Chunks++; EgressStats.Splits++; EgressStats.Resume(); diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 846e18921084..e58e8ce54463 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -56,5 +56,39 @@ IPayloadSerializerPtr CreateDataShardPayloadSerializer( const TConstArrayRef inputColumns, const NMiniKQL::TTypeEnvironment& typeEnv); + +class IShardedEvWriteController : public TThrRefBase { +public: + virtual void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) = 0; + virtual void OnPartitioningChanged( + const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, + const NSchemeCache::TSchemeCacheRequest::TEntry& partitionsEntry) = 0; + + virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data) = 0; + virtual void Close() = 0; + + virtual ui64 GetNextNewShardId() = 0; + + struct TMessageMetadata { + ui64 SendAttempts = 0; + ui64 Cookie = 0; + bool IsFinal = false; + }; + virtual std::optional GetMessageMetadata(ui64 shardId) = 0; + + virtual bool SerializeMessage(ui64 shardId, NKikimr::NEvents::TDataEvents::TEvWrite& evWrite) = 0; + + virtual bool OnMessageSent(ui64 shardId, ui64 cookie) = 0; + virtual bool OnMessageAcknowledged(ui64 shardId, ui64 cookie) = 0; + + virtual i64 GetMemory() = 0; + + virtual bool IsClosed() = 0; + virtual bool IsEmpty() = 0; + virtual bool IsFinished() = 0; +}; + +using IShardedEvWriteControllerPtr = TIntrusivePtr; + } } From b7f0d9cfc1b34b30ac982b0e0f176acca0f6bbc9 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 8 May 2024 13:03:45 +0300 Subject: [PATCH 04/35] improve_splitting --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 12 +++++--- ydb/core/kqp/runtime/kqp_write_table.cpp | 39 +++++++----------------- ydb/core/kqp/runtime/kqp_write_table.h | 2 +- 3 files changed, 20 insertions(+), 33 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 5b7840f662bc..200994bf24cc 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -351,14 +351,17 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N YQL_ENSURE(Serializer); try { - Serializer->AddData(std::move(data), Finished); + Serializer->AddData(std::move(data)); + if (Finished) { + Serializer->Close(); + } } catch (...) { RuntimeError( CurrentExceptionMessage(), NYql::NDqProto::StatusIds::INTERNAL_ERROR); } - if (Finished || GetFreeSpace() <= 0) { + if (Finished || GetFreeSpace() <= 0 || SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { TResumeNotificationManager resumeNotificator(*this); for (auto& [shardId, batches] : Serializer->FlushBatchesForce()) { for (auto& batch : batches) { @@ -635,9 +638,10 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N auto& shard = ShardsInfo.GetShard(shardId); while (true) { auto batch = Serializer->FlushBatch(shardId); - if (batch && !batch->IsEmpty()) { - shard.PushBatch(std::move(batch)); + if (!batch || batch->IsEmpty()) { + break; } + shard.PushBatch(std::move(batch)); } if (shard.GetBatchesInFlight() == 0) { shard.MakeNextBatches( diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 310117328fd9..415048977177 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -172,16 +172,16 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { : TypeEnv(typeEnv) , SchemeEntry(schemeEntry) , Columns(BuildColumns(inputColumns)) - , WriteIndex(BuildWriteIndex(schemeEntry, inputColumns)) + , WriteIndex(BuildWriteIndex(SchemeEntry, inputColumns)) , WriteColumnIds(BuildWriteColumnIds(inputColumns, WriteIndex)) , BatchBuilder(arrow::Compression::UNCOMPRESSED, BuildNotNullColumns(inputColumns)) { TString err; - if (!BatchBuilder.Start(BuildBatchBuilderColumns(schemeEntry), 0, 0, err)) { + if (!BatchBuilder.Start(BuildBatchBuilderColumns(SchemeEntry), 0, 0, err)) { yexception() << "Failed to start batch builder: " + err; } // TODO: Checks from ydb/core/tx/data_events/columnshard_splitter.cpp - const auto& description = schemeEntry.ColumnTableInfo->Description; + const auto& description = SchemeEntry.ColumnTableInfo->Description; const auto& scheme = description.GetSchema(); const auto& sharding = description.GetSharding(); @@ -195,9 +195,8 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { Sharding = shardingConclusion.DetachResult(); } - void AddData(NMiniKQL::TUnboxedValueBatch&& data, bool close) override { + void AddData(NMiniKQL::TUnboxedValueBatch&& data) override { YQL_ENSURE(!Closed); - Closed = close; TVector cells(Columns.size()); data.ForEachRow([&](const auto& row) { @@ -211,29 +210,14 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { BatchBuilder.AddRow(TConstArrayRef{cells.begin(), cells.end()}); }); + // TODO: add min limit for flushing const auto batch = BatchBuilder.FlushBatch(true); if (batch) { - const auto dataAccessor = GetDataAccessor(batch); - - auto shardsSplitter = NKikimr::NEvWrite::IShardsSplitter::BuildSplitter(SchemeEntry); - if (!shardsSplitter) { - ythrow yexception() << "Failed to build splitter"; - } - auto initStatus = shardsSplitter->SplitData(SchemeEntry, *dataAccessor); - if (!initStatus.Ok()) { - ythrow yexception() << "Failed to split batch: " << initStatus.GetErrorMessage(); - } - - const auto& splittedData = shardsSplitter->GetSplitData(); - - for (auto& [shard, infos] : splittedData.GetShardsInfo()) { - for (auto&& shardInfo : infos) { - auto& batch = Batches[shard].emplace_back( - MakeIntrusive(shardInfo->GetData())); // TODO: get rid of copy - Memory += batch->GetMemory(); - YQL_ENSURE(batch->GetMemory() != 0); - } - ShardIds.insert(shard); + for (auto [shardId, shardBatch] : Sharding->SplitByShardsToArrowBatches(batch)) { + auto& batch = Batches[shardId].emplace_back(MakeIntrusive(NArrow::SerializeBatchNoCompression(shardBatch))); + Memory += batch->GetMemory(); + YQL_ENSURE(batch->GetMemory() != 0); + ShardIds.insert(shardId); } } } @@ -388,9 +372,8 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { , KeyColumnTypes(BuildKeyColumnTypes(SchemeEntry)) { } - void AddData(NMiniKQL::TUnboxedValueBatch&& data, bool close) override { + void AddData(NMiniKQL::TUnboxedValueBatch&& data) override { YQL_ENSURE(!Closed); - Closed = close; TVector cells(Columns.size()); data.ForEachRow([&](const auto& row) { diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index e58e8ce54463..7395092f3017 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -21,7 +21,7 @@ class IPayloadSerializer : public TThrRefBase { using IBatchPtr = TIntrusivePtr; - virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data, bool close) = 0; + virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data) = 0; virtual void ReturnBatch(IBatchPtr&& batch) = 0; virtual void Close() = 0; From f9ae7addf1fdb05961d9a1f33f8942003ad391b2 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 8 May 2024 13:42:20 +0300 Subject: [PATCH 05/35] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 1 + ydb/core/kqp/runtime/kqp_write_table.cpp | 49 +++++++++--------------- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 200994bf24cc..a33c326cc8b6 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -542,6 +542,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N << " ShardID=" << ev->Get()->Record.GetOrigin() << "," << " Sink=" << this->SelfId() << "." << " Ignored this error."); + // TODO: more retries return; } case NKikimrDataEvents::TEvWriteResult::STATUS_CANCELLED: { diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 415048977177..139839a3653a 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -17,6 +16,7 @@ namespace NKqp { namespace { constexpr ui64 MaxBatchBytes = 8_MB; +constexpr ui64 MaxPreshardedBatchBytes = 8_MB; TVector BuildColumns(const TConstArrayRef inputColumns) { TVector result; @@ -197,6 +197,9 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { void AddData(NMiniKQL::TUnboxedValueBatch&& data) override { YQL_ENSURE(!Closed); + if (data.empty()) { + return; + } TVector cells(Columns.size()); data.ForEachRow([&](const auto& row) { @@ -210,20 +213,24 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { BatchBuilder.AddRow(TConstArrayRef{cells.begin(), cells.end()}); }); - // TODO: add min limit for flushing - const auto batch = BatchBuilder.FlushBatch(true); - if (batch) { + FlushPresharded(false); + } + + void ReturnBatch(IPayloadSerializer::IBatchPtr&& batch) override { + Y_UNUSED(batch); + } + + void FlushPresharded(bool force) { + if ((BatchBuilder.Bytes() > 0 && force) || BatchBuilder.Bytes() >= MaxPreshardedBatchBytes) { + const auto batch = BatchBuilder.FlushBatch(true); + YQL_ENSURE(batch); for (auto [shardId, shardBatch] : Sharding->SplitByShardsToArrowBatches(batch)) { auto& batch = Batches[shardId].emplace_back(MakeIntrusive(NArrow::SerializeBatchNoCompression(shardBatch))); Memory += batch->GetMemory(); YQL_ENSURE(batch->GetMemory() != 0); ShardIds.insert(shardId); } - } - } - - void ReturnBatch(IPayloadSerializer::IBatchPtr&& batch) override { - Y_UNUSED(batch); + } } NKikimrDataEvents::EDataFormat GetDataFormat() override { @@ -241,6 +248,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { void Close() override { YQL_ENSURE(!Closed); Closed = true; + FlushPresharded(true); } bool IsClosed() override { @@ -256,6 +264,8 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { } TBatches FlushBatchesForce() override { + FlushPresharded(true); + TBatches newBatches; std::swap(Batches, newBatches); Memory = 0; @@ -281,27 +291,6 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { } private: - NKikimr::NEvWrite::IShardsSplitter::IEvWriteDataAccessor::TPtr GetDataAccessor( - const TRecordBatchPtr& batch) const { - struct TDataAccessor : public NKikimr::NEvWrite::IShardsSplitter::IEvWriteDataAccessor { - TRecordBatchPtr Batch; - - TDataAccessor(const TRecordBatchPtr& batch) - : Batch(batch) { - } - - TRecordBatchPtr GetDeserializedBatch() const override { - return Batch; - } - - TString GetSerializedData() const override { - YQL_ENSURE(false); - return NArrow::SerializeBatchNoCompression(Batch); - } - }; - - return std::make_shared(batch); - } const NMiniKQL::TTypeEnvironment& TypeEnv; const NSchemeCache::TSchemeCacheNavigate::TEntry& SchemeEntry; From da5b23918c59286d0d58c865a82fb70f0725f65f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 8 May 2024 16:12:15 +0300 Subject: [PATCH 06/35] fix --- ydb/core/kqp/runtime/kqp_write_table.cpp | 120 +++++++++++++++++++---- ydb/core/kqp/runtime/kqp_write_table.h | 1 + 2 files changed, 100 insertions(+), 21 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 139839a3653a..c65f05ca7e36 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -16,7 +16,7 @@ namespace NKqp { namespace { constexpr ui64 MaxBatchBytes = 8_MB; -constexpr ui64 MaxPreshardedBatchBytes = 8_MB; +constexpr ui64 MaxUnshardedBatchBytes = 8_MB; TVector BuildColumns(const TConstArrayRef inputColumns) { TVector result; @@ -151,17 +151,28 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { class TBatch : public IPayloadSerializer::IBatch { public: TString SerializeToString() const override { - return Data; + return NArrow::SerializeBatchNoCompression(Data); + } + + i64 GetSerializedMemory() const override { + // For columnshard there are no hard 8MB operation limit, + // so we can use approx estimate. + return NArrow::GetBatchDataSize(Data); } i64 GetMemory() const override { - return Data.size(); + return NArrow::GetBatchDataSize(Data); } - TBatch(TString&& data) : Data(std::move(data)) {} - TBatch(const TString& data) : Data(data) {} + TBatch(const TRecordBatchPtr& data) : Data(data) {} - TString Data; + private: + TRecordBatchPtr Data; + }; + + struct TUnpreparedBatch { + ui64 TotalDataSize = 0; + std::deque Batches; }; public: @@ -213,24 +224,81 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { BatchBuilder.AddRow(TConstArrayRef{cells.begin(), cells.end()}); }); - FlushPresharded(false); + FlushUnsharded(false); } void ReturnBatch(IPayloadSerializer::IBatchPtr&& batch) override { Y_UNUSED(batch); } - void FlushPresharded(bool force) { - if ((BatchBuilder.Bytes() > 0 && force) || BatchBuilder.Bytes() >= MaxPreshardedBatchBytes) { - const auto batch = BatchBuilder.FlushBatch(true); - YQL_ENSURE(batch); - for (auto [shardId, shardBatch] : Sharding->SplitByShardsToArrowBatches(batch)) { - auto& batch = Batches[shardId].emplace_back(MakeIntrusive(NArrow::SerializeBatchNoCompression(shardBatch))); - Memory += batch->GetMemory(); - YQL_ENSURE(batch->GetMemory() != 0); + void FlushUnsharded(bool force) { + if ((BatchBuilder.Bytes() > 0 && force) || BatchBuilder.Bytes() >= MaxUnshardedBatchBytes) { + const auto unshardedBatch = BatchBuilder.FlushBatch(true); + YQL_ENSURE(unshardedBatch); + for (auto [shardId, shardBatch] : Sharding->SplitByShardsToArrowBatches(unshardedBatch)) { + const i64 shardBatchMemory = NArrow::GetBatchDataSize(shardBatch); + YQL_ENSURE(shardBatchMemory != 0); + + auto& unpreparedBatch = UnpreparedBatches[shardId]; + unpreparedBatch.TotalDataSize += shardBatchMemory; + unpreparedBatch.Batches.emplace_back(shardBatch); + Memory += shardBatchMemory; + + FlushUnpreparedBatch(shardId, unpreparedBatch, false); + ShardIds.insert(shardId); } - } + } + } + + void FlushUnpreparedBatch(const ui64 shardId, TUnpreparedBatch& unpreparedBatch, bool force = true) { + while (!unpreparedBatch.Batches.empty() && (unpreparedBatch.TotalDataSize >= MaxBatchBytes || force)) { + std::vector toPrepare; + i64 toPrepareSize = 0; + while (!unpreparedBatch.Batches.empty()) { + auto batch = unpreparedBatch.Batches.front(); + unpreparedBatch.Batches.pop_front(); + + NArrow::TRowSizeCalculator rowCalculator(8); + if (!rowCalculator.InitBatch(batch)) { + ythrow yexception() << "unexpected column type on batch initialization for row size calculator"; + } + + i64 nextRowSize = 0; + for (i64 index = 0; index < batch->num_rows(); ++index) { + nextRowSize = rowCalculator.GetRowBytesSize(index); + + if (toPrepareSize + nextRowSize >= (i64)MaxBatchBytes) { + toPrepare.push_back(batch->Slice(0, index)); + unpreparedBatch.Batches.push_front(batch->Slice(index, batch->num_rows() - index)); + + Memory -= NArrow::GetBatchDataSize(batch); + Memory += NArrow::GetBatchDataSize(unpreparedBatch.Batches.front()); + break; + } else { + toPrepareSize += nextRowSize; + } + } + + if (toPrepareSize + nextRowSize >= (i64)MaxBatchBytes) { + break; + } + + Memory -= NArrow::GetBatchDataSize(batch); + toPrepare.push_back(batch); + } + + auto& batch = Batches[shardId].emplace_back( + MakeIntrusive(NArrow::CombineBatches(toPrepare))); + Memory += batch->GetMemory(); + YQL_ENSURE(batch->GetMemory() != 0); + } + } + + void FlushUnpreparedForce() { + for (auto& [shardId, unpreparedBatch] : UnpreparedBatches) { + FlushUnpreparedBatch(shardId, unpreparedBatch, true); + } } NKikimrDataEvents::EDataFormat GetDataFormat() override { @@ -242,13 +310,13 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { } i64 GetMemory() override { - return Memory; + return Memory + BatchBuilder.Bytes(); } void Close() override { YQL_ENSURE(!Closed); Closed = true; - FlushPresharded(true); + FlushUnsharded(true); } bool IsClosed() override { @@ -264,11 +332,16 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { } TBatches FlushBatchesForce() override { - FlushPresharded(true); + FlushUnsharded(true); + FlushUnpreparedForce(); TBatches newBatches; std::swap(Batches, newBatches); - Memory = 0; + for (const auto& [_, batches] : newBatches) { + for (const auto& batch : batches) { + Memory -= batch->GetMemory(); + } + } return std::move(newBatches); } @@ -283,6 +356,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { const auto batch = std::move(batches.front()); batches.pop_front(); + Memory -= batch->GetMemory(); return batch; } @@ -301,7 +375,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { const std::vector WriteColumnIds; NArrow::TArrowBatchBuilder BatchBuilder; - + THashMap UnpreparedBatches; TBatches Batches; THashSet ShardIds; @@ -337,6 +411,10 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { return Data; } + i64 GetSerializedMemory() const override { + return Data.size(); + } + i64 GetMemory() const override { return Data.size(); } diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 7395092f3017..086890bd3b91 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -15,6 +15,7 @@ class IPayloadSerializer : public TThrRefBase { class IBatch : public TThrRefBase { public: virtual TString SerializeToString() const = 0; + virtual i64 GetSerializedMemory() const = 0; virtual i64 GetMemory() const = 0; bool IsEmpty() const; }; From 630f28eea174fa2eb749195865b2642899ac6c10 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 8 May 2024 16:22:31 +0300 Subject: [PATCH 07/35] buffer --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index a33c326cc8b6..353d23a79bdb 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -361,7 +361,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N NYql::NDqProto::StatusIds::INTERNAL_ERROR); } - if (Finished || GetFreeSpace() <= 0 || SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { + if (Finished || GetFreeSpace() <= 0) { TResumeNotificationManager resumeNotificator(*this); for (auto& [shardId, batches] : Serializer->FlushBatchesForce()) { for (auto& batch : batches) { From 833b1009a366b127c84253f498a3cffdd67eaa2c Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 13 May 2024 12:40:35 +0300 Subject: [PATCH 08/35] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 4 +- ydb/core/kqp/runtime/kqp_write_table.cpp | 75 +++++++++++++----------- ydb/core/kqp/runtime/kqp_write_table.h | 5 +- ydb/core/scheme/scheme_tablecell.cpp | 14 +---- ydb/core/scheme/scheme_tablecell.h | 12 ++-- 5 files changed, 51 insertions(+), 59 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 353d23a79bdb..1a16dfc642ee 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -295,7 +295,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N Settings.GetInconsistentTx()) { YQL_ENSURE(std::holds_alternative(TxId)); - YQL_ENSURE(!InconsistentTx || ImmediateTx); + YQL_ENSURE(!InconsistentTx || !ImmediateTx); EgressStats.Level = args.StatsLevel; } @@ -694,7 +694,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N locks->AddReceivingShards(shardId); *locks->AddLocks() = *shard.GetLock(); } - } else { + } else if (!InconsistentTx) { evWrite->SetLockId(Settings.GetLockTxId(), Settings.GetLockNodeId()); } diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index c65f05ca7e36..d273a0e7983e 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -154,20 +154,24 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { return NArrow::SerializeBatchNoCompression(Data); } - i64 GetSerializedMemory() const override { - // For columnshard there are no hard 8MB operation limit, - // so we can use approx estimate. - return NArrow::GetBatchDataSize(Data); + i64 GetMemory() const override { + return Memory; } - i64 GetMemory() const override { - return NArrow::GetBatchDataSize(Data); + TRecordBatchPtr Extract() { + Memory = 0; + TRecordBatchPtr result = std::move(Data); + return result; } - TBatch(const TRecordBatchPtr& data) : Data(data) {} + TBatch(const TRecordBatchPtr& data) + : Data(data) + , Memory(NArrow::GetBatchDataSize(Data)) { + } private: TRecordBatchPtr Data; + i64 Memory; }; struct TUnpreparedBatch { @@ -227,7 +231,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { FlushUnsharded(false); } - void ReturnBatch(IPayloadSerializer::IBatchPtr&& batch) override { + void AddBatch(IPayloadSerializer::IBatchPtr&& batch) override { Y_UNUSED(batch); } @@ -385,43 +389,38 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { }; class TDataShardPayloadSerializer : public IPayloadSerializer { - /*class TBatch : public IPayloadSerializer::IBatch { + class TBatch : public IPayloadSerializer::IBatch { public: TString SerializeToString() const override { - return ; + return TSerializedCellMatrix::Serialize(Data, Rows, Columns); } i64 GetMemory() const override { return Size; } + bool IsEmpty() const { + return Data.empty(); + } + std::vector Extract() { Size = 0; + Rows = 0; return std::move(Data); } + TBatch(std::vector&& data, i64 size, ui32 rows, ui16 columns) + : Data(std::move(data)) + , Size(size) + , Rows(rows) + , Columns(columns) { + } + private: std::vector Data; ui64 Size = 0; - };*/ - - class TBatch : public IPayloadSerializer::IBatch { - public: - TString SerializeToString() const override { - return Data; - } - - i64 GetSerializedMemory() const override { - return Data.size(); - } - - i64 GetMemory() const override { - return Data.size(); - } - - TBatch(TString&& data) : Data(std::move(data)) {} - - TString Data; + ui32 Rows = 0; + ui16 Columns = 0; }; public: @@ -480,7 +479,7 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { }); } - void ReturnBatch(IPayloadSerializer::IBatchPtr&& batch) override { + void AddBatch(IPayloadSerializer::IBatchPtr&& batch) override { Y_UNUSED(batch); } @@ -513,10 +512,16 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { return IsClosed() && IsEmpty(); } - TString ExtractNextBatch(TCellsBatcher& batcher, bool force) { + IBatchPtr ExtractNextBatch(TCellsBatcher& batcher, bool force) { auto batchResult = batcher.Flush(force); Memory -= batchResult.Memory; - return batchResult.Data; + const ui32 rows = batchResult.Data.size() / Columns.size(); + YQL_ENSURE(Columns.size() <= std::numeric_limits::max()); + return MakeIntrusive( + std::move(batchResult.Data), + static_cast(batchResult.MemorySerialized), + rows, + static_cast(Columns.size())); } TBatches FlushBatchesForce() override { @@ -524,10 +529,10 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { for (auto& [shardId, batcher] : Batchers) { while (true) { auto batch = ExtractNextBatch(batcher, true); - if (batch.empty()) { + if (batch->IsEmpty()) { break; } - result[shardId].emplace_back(MakeIntrusive(std::move(batch))); + result[shardId].emplace_back(batch); }; } Batchers.clear(); @@ -539,7 +544,7 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { return {}; } auto& batcher = Batchers.at(shardId); - return MakeIntrusive(ExtractNextBatch(batcher, false)); + return ExtractNextBatch(batcher, false); } const THashSet& GetShardIds() const override { diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 086890bd3b91..2f9901d0ca01 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -15,7 +15,6 @@ class IPayloadSerializer : public TThrRefBase { class IBatch : public TThrRefBase { public: virtual TString SerializeToString() const = 0; - virtual i64 GetSerializedMemory() const = 0; virtual i64 GetMemory() const = 0; bool IsEmpty() const; }; @@ -23,7 +22,7 @@ class IPayloadSerializer : public TThrRefBase { using IBatchPtr = TIntrusivePtr; virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data) = 0; - virtual void ReturnBatch(IBatchPtr&& batch) = 0; + virtual void AddBatch(IBatchPtr&& batch) = 0; virtual void Close() = 0; @@ -71,8 +70,8 @@ class IShardedEvWriteController : public TThrRefBase { virtual ui64 GetNextNewShardId() = 0; struct TMessageMetadata { - ui64 SendAttempts = 0; ui64 Cookie = 0; + ui64 OperationsCount = 0; bool IsFinal = false; }; virtual std::optional GetMessageMetadata(ui64 shardId) = 0; diff --git a/ydb/core/scheme/scheme_tablecell.cpp b/ydb/core/scheme/scheme_tablecell.cpp index ceee7e618527..04f008769799 100644 --- a/ydb/core/scheme/scheme_tablecell.cpp +++ b/ydb/core/scheme/scheme_tablecell.cpp @@ -316,19 +316,11 @@ bool TCellsBatcher::IsEmpty() const { return Batches.empty(); } -TCellsBatcher::TOutputBatch TCellsBatcher::Flush(bool force) { - TOutputBatch res; +TCellsBatcher::TBatch TCellsBatcher::Flush(bool force) { + TBatch res; if ((!Batches.empty() && force) || Batches.size() > 1) { - TOutputBatch res; - res.Memory = Batches.front().Memory; - SerializeCellMatrix( - Batches.front().Data, - Batches.front().Data.size() / ColCount, - ColCount, - res.Data, - nullptr /*resultCells*/); + res = std::move(Batches.front()); Batches.pop_front(); - return res; } return res; } diff --git a/ydb/core/scheme/scheme_tablecell.h b/ydb/core/scheme/scheme_tablecell.h index 385446319c9c..7b4aece02799 100644 --- a/ydb/core/scheme/scheme_tablecell.h +++ b/ydb/core/scheme/scheme_tablecell.h @@ -659,21 +659,17 @@ class TCellsBatcher { bool IsEmpty() const; - struct TOutputBatch { + struct TBatch { ui64 Memory = 0; - TString Data; + ui64 MemorySerialized = 0; + TVector Data; }; - TOutputBatch Flush(bool force); + TBatch Flush(bool force); ui64 AddRow(TVector&& cells); private: - struct TBatch { - ui64 Memory = 0; - ui64 MemorySerialized = 0; - TVector Data; - }; std::deque Batches; ui16 ColCount; From 9b57f7510c49e2b4be0fbb29406e2a2784403332 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 13 May 2024 15:13:31 +0300 Subject: [PATCH 09/35] batches --- ydb/core/kqp/runtime/kqp_write_table.cpp | 94 ++++++++++++++---------- ydb/core/kqp/runtime/kqp_write_table.h | 4 +- ydb/core/scheme/scheme_tablecell.cpp | 4 +- ydb/core/scheme/scheme_tablecell.h | 2 +- 4 files changed, 61 insertions(+), 43 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index d273a0e7983e..5700d4292df7 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -232,30 +232,38 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { } void AddBatch(IPayloadSerializer::IBatchPtr&& batch) override { - Y_UNUSED(batch); + auto columnshardBatch = dynamic_cast(batch.Get()); + YQL_ENSURE(columnshardBatch); + auto data = columnshardBatch->Extract(); + YQL_ENSURE(data); + ShardAndFlushBatch(data, false); } void FlushUnsharded(bool force) { if ((BatchBuilder.Bytes() > 0 && force) || BatchBuilder.Bytes() >= MaxUnshardedBatchBytes) { const auto unshardedBatch = BatchBuilder.FlushBatch(true); YQL_ENSURE(unshardedBatch); - for (auto [shardId, shardBatch] : Sharding->SplitByShardsToArrowBatches(unshardedBatch)) { - const i64 shardBatchMemory = NArrow::GetBatchDataSize(shardBatch); - YQL_ENSURE(shardBatchMemory != 0); + ShardAndFlushBatch(unshardedBatch, force); + } + } - auto& unpreparedBatch = UnpreparedBatches[shardId]; - unpreparedBatch.TotalDataSize += shardBatchMemory; - unpreparedBatch.Batches.emplace_back(shardBatch); - Memory += shardBatchMemory; + void ShardAndFlushBatch(const TRecordBatchPtr& unshardedBatch, bool force) { + for (auto [shardId, shardBatch] : Sharding->SplitByShardsToArrowBatches(unshardedBatch)) { + const i64 shardBatchMemory = NArrow::GetBatchDataSize(shardBatch); + YQL_ENSURE(shardBatchMemory != 0); - FlushUnpreparedBatch(shardId, unpreparedBatch, false); + auto& unpreparedBatch = UnpreparedBatches[shardId]; + unpreparedBatch.TotalDataSize += shardBatchMemory; + unpreparedBatch.Batches.emplace_back(shardBatch); + Memory += shardBatchMemory; - ShardIds.insert(shardId); - } + FlushUnpreparedBatch(shardId, unpreparedBatch, force); + + ShardIds.insert(shardId); } } - void FlushUnpreparedBatch(const ui64 shardId, TUnpreparedBatch& unpreparedBatch, bool force = true) { + void FlushUnpreparedBatch(const ui64 shardId, TUnpreparedBatch& unpreparedBatch, bool force) { while (!unpreparedBatch.Batches.empty() && (unpreparedBatch.TotalDataSize >= MaxBatchBytes || force)) { std::vector toPrepare; i64 toPrepareSize = 0; @@ -337,7 +345,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { TBatches FlushBatchesForce() override { FlushUnsharded(true); - FlushUnpreparedForce(); + //FlushUnpreparedForce(); TBatches newBatches; std::swap(Batches, newBatches); @@ -438,13 +446,35 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { , KeyColumnTypes(BuildKeyColumnTypes(SchemeEntry)) { } + void AddRow(TArrayRef row, const TKeyDesc& keyRange) { + auto shardIter = std::lower_bound( + std::begin(keyRange.GetPartitions()), + std::end(keyRange.GetPartitions()), + TArrayRef(row.data(), KeyColumnTypes.size()), + [this](const auto &partition, const auto& key) { + const auto& range = *partition.Range; + return 0 > CompareBorders(range.EndKeyPrefix.GetCells(), key, + range.IsInclusive || range.IsPoint, true, KeyColumnTypes); + }); + + YQL_ENSURE(shardIter != keyRange.GetPartitions().end()); + + auto batcherIter = Batchers.find(shardIter->ShardId); + if (batcherIter == std::end(Batchers)) { + Batchers.emplace( + shardIter->ShardId, + TCellsBatcher(Columns.size(), MaxBatchBytes)); + } + + Memory += Batchers.at(shardIter->ShardId).AddRow(row); + ShardIds.insert(shardIter->ShardId); + } + void AddData(NMiniKQL::TUnboxedValueBatch&& data) override { YQL_ENSURE(!Closed); TVector cells(Columns.size()); data.ForEachRow([&](const auto& row) { - const auto& keyRange = GetKeyRange(); - for (size_t index = 0; index < Columns.size(); ++index) { cells[WriteIndex[index]] = MakeCell( Columns[index].PType, @@ -452,35 +482,23 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { TypeEnv, /* copy */ true); } - - auto shardIter = std::lower_bound( - std::begin(keyRange.GetPartitions()), - std::end(keyRange.GetPartitions()), - TArrayRef(cells.data(), KeyColumnTypes.size()), - [this](const auto &partition, const auto& key) { - const auto& range = *partition.Range; - return 0 > CompareBorders(range.EndKeyPrefix.GetCells(), key, - range.IsInclusive || range.IsPoint, true, KeyColumnTypes); - }); - - YQL_ENSURE(shardIter != keyRange.GetPartitions().end()); - - auto batcherIter = Batchers.find(shardIter->ShardId); - if (batcherIter == std::end(Batchers)) { - Batchers.emplace( - shardIter->ShardId, - TCellsBatcher(Columns.size(), MaxBatchBytes)); - } - - Memory += Batchers.at(shardIter->ShardId).AddRow(std::move(cells)); - ShardIds.insert(shardIter->ShardId); + AddRow(cells, GetKeyRange()); cells.resize(Columns.size()); }); } void AddBatch(IPayloadSerializer::IBatchPtr&& batch) override { - Y_UNUSED(batch); + auto datashardBatch = dynamic_cast(batch.Get()); + YQL_ENSURE(datashardBatch); + auto data = datashardBatch->Extract(); + const auto rows = data.size() / Columns.size(); + + for (size_t rowIndex = 0; rowIndex < rows; ++rowIndex) { + AddRow( + TArrayRef{&data[rowIndex * Columns.size()], Columns.size()}, + GetKeyRange()); + } } NKikimrDataEvents::EDataFormat GetDataFormat() override { diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 2f9901d0ca01..53c372c30265 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -57,7 +57,7 @@ IPayloadSerializerPtr CreateDataShardPayloadSerializer( const NMiniKQL::TTypeEnvironment& typeEnv); -class IShardedEvWriteController : public TThrRefBase { +class IShardedWriteController : public TThrRefBase { public: virtual void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) = 0; virtual void OnPartitioningChanged( @@ -88,7 +88,7 @@ class IShardedEvWriteController : public TThrRefBase { virtual bool IsFinished() = 0; }; -using IShardedEvWriteControllerPtr = TIntrusivePtr; +using IShardedWriteControllerPtr = TIntrusivePtr; } } diff --git a/ydb/core/scheme/scheme_tablecell.cpp b/ydb/core/scheme/scheme_tablecell.cpp index 04f008769799..2ee86265b1a4 100644 --- a/ydb/core/scheme/scheme_tablecell.cpp +++ b/ydb/core/scheme/scheme_tablecell.cpp @@ -325,7 +325,7 @@ TCellsBatcher::TBatch TCellsBatcher::Flush(bool force) { return res; } -ui64 TCellsBatcher::AddRow(TVector&& cells) { +ui64 TCellsBatcher::AddRow(TArrayRef cells) { Y_ABORT_UNLESS(cells.size() == ColCount); ui64 newMemory = 0; for (const auto& cell : cells) { @@ -337,7 +337,7 @@ ui64 TCellsBatcher::AddRow(TVector&& cells) { Batches.back().MemorySerialized = CellMatrixHeaderSize; } - for (const auto& cell : cells) { + for (auto& cell : cells) { Batches.back().Data.emplace_back(std::move(cell)); } diff --git a/ydb/core/scheme/scheme_tablecell.h b/ydb/core/scheme/scheme_tablecell.h index 7b4aece02799..b7635a424725 100644 --- a/ydb/core/scheme/scheme_tablecell.h +++ b/ydb/core/scheme/scheme_tablecell.h @@ -667,7 +667,7 @@ class TCellsBatcher { TBatch Flush(bool force); - ui64 AddRow(TVector&& cells); + ui64 AddRow(TArrayRef cells); private: std::deque Batches; From 2731a48602ee0602f6d5dc96ccaa20cbe3b3872a Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 14 May 2024 15:19:40 +0300 Subject: [PATCH 10/35] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 2 +- ydb/core/kqp/runtime/kqp_write_table.cpp | 235 ++++++++++++++++++++++- ydb/core/kqp/runtime/kqp_write_table.h | 9 +- 3 files changed, 235 insertions(+), 11 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 1a16dfc642ee..a6baed3f9644 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -792,7 +792,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } else { Serializer = CreateDataShardPayloadSerializer( *SchemeEntry, - *SchemeRequest, + std::move(*SchemeRequest), columnsMetadata, TypeEnv); } diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 5700d4292df7..180ca379e223 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -379,7 +379,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { private: const NMiniKQL::TTypeEnvironment& TypeEnv; - const NSchemeCache::TSchemeCacheNavigate::TEntry& SchemeEntry; + const NSchemeCache::TSchemeCacheNavigate::TEntry SchemeEntry; std::shared_ptr Sharding; const TVector Columns; @@ -434,12 +434,12 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { public: TDataShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, - const NSchemeCache::TSchemeCacheRequest::TEntry& partitionsEntry, + NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry, const TConstArrayRef inputColumns, const NMiniKQL::TTypeEnvironment& typeEnv) : TypeEnv(typeEnv) , SchemeEntry(schemeEntry) - , PartitionsEntry(partitionsEntry) + , KeyDescription(std::move(partitionsEntry.KeyDescription)) , Columns(BuildColumns(inputColumns)) , WriteIndex(BuildWriteIndexKeyFirst(SchemeEntry, inputColumns)) , WriteColumnIds(BuildWriteColumnIds(inputColumns, WriteIndex)) @@ -571,12 +571,13 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { private: const TKeyDesc& GetKeyRange() const { - return *PartitionsEntry.KeyDescription.Get(); + return *KeyDescription; } const NMiniKQL::TTypeEnvironment& TypeEnv; - const NSchemeCache::TSchemeCacheNavigate::TEntry& SchemeEntry; - const NSchemeCache::TSchemeCacheRequest::TEntry& PartitionsEntry; + const NSchemeCache::TSchemeCacheNavigate::TEntry SchemeEntry; + THolder KeyDescription; + //const NSchemeCache::TSchemeCacheRequest::TEntry PartitionsEntry; const TVector Columns; const std::vector WriteIndex; @@ -607,11 +608,229 @@ IPayloadSerializerPtr CreateColumnShardPayloadSerializer( IPayloadSerializerPtr CreateDataShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, - const NSchemeCache::TSchemeCacheRequest::TEntry& partitionsEntry, + NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry, const TConstArrayRef inputColumns, const NMiniKQL::TTypeEnvironment& typeEnv) { return MakeIntrusive( - schemeEntry, partitionsEntry, inputColumns, typeEnv); + schemeEntry, std::move(partitionsEntry), inputColumns, typeEnv); +} + +namespace { + +class TShardsInfo { +public: + class TShardInfo { + friend class TShardsInfo; + TShardInfo(i64& memory) + : Memory(memory) { + } + + public: + size_t Size() const { + return Batches.size(); + } + + bool IsEmpty() const { + return Batches.empty(); + } + + bool IsClosed() const { + return Closed; + } + + bool IsFinished() const { + return IsClosed() && IsEmpty(); + } + + void Close() { + Closed = true; + } + + void MakeNextBatches(i64 maxDataSize, ui64 maxCount) { + YQL_ENSURE(BatchesInFlight == 0); + i64 dataSize = 0; + while (BatchesInFlight < maxCount + && BatchesInFlight < Batches.size() + && dataSize + GetBatch(BatchesInFlight)->GetMemory() <= maxDataSize) { + dataSize += GetBatch(BatchesInFlight)->GetMemory(); + ++BatchesInFlight; + } + YQL_ENSURE(BatchesInFlight == Batches.size() || GetBatch(BatchesInFlight)->GetMemory() <= maxDataSize); + } + + const IPayloadSerializer::IBatchPtr& GetBatch(size_t index) const { + return Batches.at(index); + } + + std::optional PopBatches(const ui64 cookie) { + if (BatchesInFlight != 0 && Cookie == cookie) { + ui64 dataSize = 0; + for (size_t index = 0; index < BatchesInFlight; ++index) { + dataSize += Batches.front()->GetMemory(); + Batches.pop_front(); + } + + ++Cookie; + SendAttempts = 0; + BatchesInFlight = 0; + + Memory -= dataSize; + return dataSize; + } + return std::nullopt; + } + + void PushBatch(IPayloadSerializer::IBatchPtr&& batch) { + YQL_ENSURE(!IsClosed()); + Batches.emplace_back(std::move(batch)); + Memory += Batches.back()->GetMemory(); + } + + ui64 GetCookie() const { + return Cookie; + } + + size_t GetBatchesInFlight() const { + return BatchesInFlight; + } + + ui32 GetSendAttempts() const { + return SendAttempts; + } + + void IncSendAttempts() { + ++SendAttempts; + } + + private: + std::deque Batches; + bool Closed = false; + i64& Memory; + + ui64 Cookie = 1; + ui32 SendAttempts = 0; + size_t BatchesInFlight = 0; + }; + + TShardInfo& GetShard(const ui64 shard) { + auto it = ShardsInfo.find(shard); + if (it != std::end(ShardsInfo)) { + return it->second; + } + + auto [insertIt, _] = ShardsInfo.emplace(shard, TShardInfo(Memory)); + return insertIt->second; + } + + TVector GetPendingShards() { + TVector result; + for (const auto& [id, shard] : ShardsInfo) { + if (!shard.IsEmpty() && shard.GetSendAttempts() == 0) { + result.push_back(id); + } + } + return result; + } + + bool Has(ui64 shardId) const { + return ShardsInfo.contains(shardId); + } + + bool IsEmpty() const { + for (const auto& [_, shard] : ShardsInfo) { + if (!shard.IsEmpty()) { + return false; + } + } + return true; + } + + bool IsFinished() const { + for (const auto& [_, shard] : ShardsInfo) { + if (!shard.IsFinished()) { + return false; + } + } + return true; + } + + THashMap& GetShards() { + return ShardsInfo; + } + + i64 GetMemory() const { + return Memory; + } + +private: + THashMap ShardsInfo; + i64 Memory = 0; +}; + +/*class TShardedWriteController : public IShardedWriteController { +public: + void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) override { + if (Serializer) { + ForceFlush(); + } + //Serializer = ... + ReshardData(); + } + + void OnPartitioningChanged( + const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, + NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry) override { + if (Serializer) { + ForceFlush(); + } + //Serializer = ... + ReshardData(); + } + + void AddData(NMiniKQL::TUnboxedValueBatch&& data) override { + } + + void Close() override { + Close = true; + + } + + ui64 GetNextNewShardId() override { + } + + std::optional GetMessageMetadata(ui64 shardId) override { + } + + bool SerializeMessage(ui64 shardId, NKikimr::NEvents::TDataEvents::TEvWrite& evWrite) override { + } + + bool OnMessageSent(ui64 shardId, ui64 cookie) override { + } + + bool OnMessageAcknowledged(ui64 shardId, ui64 cookie) override { + } + + i64 GetMemory() override { + } + + bool IsClosed() override { + return Closed; + } + + bool IsEmpty() override { + } + + bool IsFinished() override { + return IsClosed() && IsEmpty(); + } + +private: + TShardsInfo ShardsInfo; + bool Closed = false; + + IPayloadSerializerPtr Serializer = nullptr; +};*/ + } } diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 53c372c30265..dacbf79d1c3b 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -52,7 +52,7 @@ IPayloadSerializerPtr CreateColumnShardPayloadSerializer( IPayloadSerializerPtr CreateDataShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, - const NSchemeCache::TSchemeCacheRequest::TEntry& partitionsEntry, + NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry, const TConstArrayRef inputColumns, const NMiniKQL::TTypeEnvironment& typeEnv); @@ -62,7 +62,7 @@ class IShardedWriteController : public TThrRefBase { virtual void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) = 0; virtual void OnPartitioningChanged( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, - const NSchemeCache::TSchemeCacheRequest::TEntry& partitionsEntry) = 0; + NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry) = 0; virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data) = 0; virtual void Close() = 0; @@ -90,5 +90,10 @@ class IShardedWriteController : public TThrRefBase { using IShardedWriteControllerPtr = TIntrusivePtr; + +IShardedWriteControllerPtr CreateShardedWriteController( + TVector&& inputColumns, + const NMiniKQL::TTypeEnvironment& typeEnv); + } } From 196af822e05850db468c1f934b9c6ac28018afac Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 17 May 2024 17:53:17 +0300 Subject: [PATCH 11/35] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 6 ++++-- ydb/core/kqp/runtime/kqp_write_table.cpp | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index a6baed3f9644..1be8baa6c38a 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -295,7 +295,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N Settings.GetInconsistentTx()) { YQL_ENSURE(std::holds_alternative(TxId)); - YQL_ENSURE(!InconsistentTx || !ImmediateTx); + YQL_ENSURE(!InconsistentTx && !ImmediateTx); EgressStats.Level = args.StatsLevel; } @@ -361,7 +361,9 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N NYql::NDqProto::StatusIds::INTERNAL_ERROR); } - if (Finished || GetFreeSpace() <= 0) { + CA_LOG_D("ADDED data:" << GetFreeSpace() << " " << Serializer->GetMemory() << "."); + + if (Finished || GetFreeSpace() <= 0/* || SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable*/) { TResumeNotificationManager resumeNotificator(*this); for (auto& [shardId, batches] : Serializer->FlushBatchesForce()) { for (auto& batch : batches) { diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 180ca379e223..cef208f7d124 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -265,6 +265,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { void FlushUnpreparedBatch(const ui64 shardId, TUnpreparedBatch& unpreparedBatch, bool force) { while (!unpreparedBatch.Batches.empty() && (unpreparedBatch.TotalDataSize >= MaxBatchBytes || force)) { + YQL_ENSURE(false, "shard and flush called"); std::vector toPrepare; i64 toPrepareSize = 0; while (!unpreparedBatch.Batches.empty()) { @@ -281,10 +282,11 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { nextRowSize = rowCalculator.GetRowBytesSize(index); if (toPrepareSize + nextRowSize >= (i64)MaxBatchBytes) { + Memory -= NArrow::GetBatchDataSize(batch); + toPrepare.push_back(batch->Slice(0, index)); unpreparedBatch.Batches.push_front(batch->Slice(index, batch->num_rows() - index)); - Memory -= NArrow::GetBatchDataSize(batch); Memory += NArrow::GetBatchDataSize(unpreparedBatch.Batches.front()); break; } else { @@ -345,7 +347,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { TBatches FlushBatchesForce() override { FlushUnsharded(true); - //FlushUnpreparedForce(); + FlushUnpreparedForce(); TBatches newBatches; std::swap(Batches, newBatches); From 4a823199b174c9352a4f5d6e470ec5a89ac39693 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 22 May 2024 18:18:08 +0300 Subject: [PATCH 12/35] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 2 +- ydb/core/kqp/runtime/kqp_write_table.cpp | 42 +++++++++++++++--------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 1be8baa6c38a..7ce8eebf7243 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -363,7 +363,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N CA_LOG_D("ADDED data:" << GetFreeSpace() << " " << Serializer->GetMemory() << "."); - if (Finished || GetFreeSpace() <= 0/* || SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable*/) { + if (Finished || GetFreeSpace() <= 0) { TResumeNotificationManager resumeNotificator(*this); for (auto& [shardId, batches] : Serializer->FlushBatchesForce()) { for (auto& batch : batches) { diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index cef208f7d124..5051dd0b1a37 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -2,13 +2,14 @@ #include #include +#include #include -#include -#include #include -#include +#include #include #include +#include +#include namespace NKikimr { namespace NKqp { @@ -179,6 +180,8 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { std::deque Batches; }; + const TString LogPrefix = "ColumnShardPayloadSerializer"; + public: TColumnShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, @@ -265,47 +268,54 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { void FlushUnpreparedBatch(const ui64 shardId, TUnpreparedBatch& unpreparedBatch, bool force) { while (!unpreparedBatch.Batches.empty() && (unpreparedBatch.TotalDataSize >= MaxBatchBytes || force)) { - YQL_ENSURE(false, "shard and flush called"); std::vector toPrepare; i64 toPrepareSize = 0; while (!unpreparedBatch.Batches.empty()) { auto batch = unpreparedBatch.Batches.front(); unpreparedBatch.Batches.pop_front(); + YQL_ENSURE(batch->num_rows() > 0); + const auto batchDataSize = NArrow::GetBatchDataSize(batch); + unpreparedBatch.TotalDataSize -= batchDataSize; + Memory -= batchDataSize; NArrow::TRowSizeCalculator rowCalculator(8); if (!rowCalculator.InitBatch(batch)) { ythrow yexception() << "unexpected column type on batch initialization for row size calculator"; } - i64 nextRowSize = 0; + bool splitted = false; for (i64 index = 0; index < batch->num_rows(); ++index) { - nextRowSize = rowCalculator.GetRowBytesSize(index); + i64 nextRowSize = rowCalculator.GetRowBytesSize(index); if (toPrepareSize + nextRowSize >= (i64)MaxBatchBytes) { - Memory -= NArrow::GetBatchDataSize(batch); + YQL_ENSURE(index > 0); toPrepare.push_back(batch->Slice(0, index)); unpreparedBatch.Batches.push_front(batch->Slice(index, batch->num_rows() - index)); - Memory += NArrow::GetBatchDataSize(unpreparedBatch.Batches.front()); + const auto newBatchDataSize = NArrow::GetBatchDataSize(unpreparedBatch.Batches.front()); + + unpreparedBatch.TotalDataSize += batchDataSize; + Memory += newBatchDataSize; + + splitted = true; break; } else { toPrepareSize += nextRowSize; } } - if (toPrepareSize + nextRowSize >= (i64)MaxBatchBytes) { + if (splitted) { break; } - Memory -= NArrow::GetBatchDataSize(batch); toPrepare.push_back(batch); } - auto& batch = Batches[shardId].emplace_back( - MakeIntrusive(NArrow::CombineBatches(toPrepare))); + auto batch = MakeIntrusive(NArrow::CombineBatches(toPrepare)); + Batches[shardId].emplace_back(batch); Memory += batch->GetMemory(); - YQL_ENSURE(batch->GetMemory() != 0); + YQL_ENSURE(batch->GetMemory() != 0); } } @@ -331,6 +341,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { YQL_ENSURE(!Closed); Closed = true; FlushUnsharded(true); + FlushUnpreparedForce(); } bool IsClosed() override { @@ -363,14 +374,15 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { if (!Batches.contains(shardId)) { return {}; } - auto batches = Batches.at(shardId); + auto& batches = Batches.at(shardId); if (batches.empty()) { return {}; } - const auto batch = std::move(batches.front()); + auto batch = std::move(batches.front()); batches.pop_front(); Memory -= batch->GetMemory(); + return batch; } From adc05a826833c7b34f6bb541af8879fc992b7922 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 23 May 2024 17:56:18 +0300 Subject: [PATCH 13/35] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 2 - ydb/core/kqp/runtime/kqp_write_table.cpp | 58 ++++++++++++++++++++---- ydb/core/kqp/runtime/kqp_write_table.h | 2 + 3 files changed, 51 insertions(+), 11 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 7ce8eebf7243..5a50e5257f65 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -361,8 +361,6 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N NYql::NDqProto::StatusIds::INTERNAL_ERROR); } - CA_LOG_D("ADDED data:" << GetFreeSpace() << " " << Serializer->GetMemory() << "."); - if (Finished || GetFreeSpace() <= 0) { TResumeNotificationManager resumeNotificator(*this); for (auto& [shardId, batches] : Serializer->FlushBatchesForce()) { diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 5051dd0b1a37..f0d3b9c4473b 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -180,8 +180,6 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { std::deque Batches; }; - const TString LogPrefix = "ColumnShardPayloadSerializer"; - public: TColumnShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, @@ -591,7 +589,6 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { const NMiniKQL::TTypeEnvironment& TypeEnv; const NSchemeCache::TSchemeCacheNavigate::TEntry SchemeEntry; THolder KeyDescription; - //const NSchemeCache::TSchemeCacheRequest::TEntry PartitionsEntry; const TVector Columns; const std::vector WriteIndex; @@ -781,13 +778,16 @@ class TShardsInfo { i64 Memory = 0; }; -/*class TShardedWriteController : public IShardedWriteController { +class TShardedWriteController : public IShardedWriteController { public: void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) override { if (Serializer) { - ForceFlush(); + Flush(true); } - //Serializer = ... + Serializer = CreateColumnShardPayloadSerializer( + schemeEntry, + InputColumnsMetadata, + TypeEnv); ReshardData(); } @@ -795,18 +795,43 @@ class TShardsInfo { const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry) override { if (Serializer) { - ForceFlush(); + Flush(true); } - //Serializer = ... + Serializer = CreateDataShardPayloadSerializer( + schemeEntry, + partitionsEntry, + InputColumnsMetadata, + TypeEnv); ReshardData(); } void AddData(NMiniKQL::TUnboxedValueBatch&& data) override { + YQL_ENSURE(!data.IsWide(), "Wide stream is not supported yet"); + YQL_ENSURE(!Closed); + + CA_LOG_D("New data: size=" << size << ", finished=" << finished << "."); + + YQL_ENSURE(Serializer); + try { + Serializer->AddData(std::move(data)); + } catch (...) { + RuntimeError( + CurrentExceptionMessage(), + NYql::NDqProto::StatusIds::INTERNAL_ERROR); + } + + Flush(GetMemory() >= MemoryLimit); } void Close() override { Close = true; + Flush(true); + YQL_ENSURE(Serializer->IsFinished()); + for (auto& [shardId, shardInfo] : ShardsInfo.GetShards()) { + shardInfo.Close(); + } + ProcessData(); } ui64 GetNextNewShardId() override { @@ -825,6 +850,7 @@ class TShardsInfo { } i64 GetMemory() override { + return Serializer->GetMemory() + ShardsInfo.GetMemory(); } bool IsClosed() override { @@ -839,11 +865,25 @@ class TShardsInfo { } private: + void ForceFlush() { + } + + void ProcessData() { + } + + void ReshardData() { + } + + TString LogPrefix = "ShardedWriteController"; + TShardsInfo ShardsInfo; bool Closed = false; + TVector InputColumnsMetadata; + const NMiniKQL::TTypeEnvironment& TypeEnv; + IPayloadSerializerPtr Serializer = nullptr; -};*/ +}; } diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index dacbf79d1c3b..96bc23a63ae5 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -45,6 +45,7 @@ class IPayloadSerializer : public TThrRefBase { using IPayloadSerializerPtr = TIntrusivePtr; + IPayloadSerializerPtr CreateColumnShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, const TConstArrayRef inputColumns, @@ -65,6 +66,7 @@ class IShardedWriteController : public TThrRefBase { NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry) = 0; virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data) = 0; + virtual void ProcessData(bool forceFlush) = 0; virtual void Close() = 0; virtual ui64 GetNextNewShardId() = 0; From 5e31ea556fba19c5e2b46cd04daa689f97f53cc9 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 24 May 2024 15:15:21 +0300 Subject: [PATCH 14/35] fix --- ydb/core/kqp/runtime/kqp_write_table.cpp | 137 +++++++++++++++++------ ydb/core/kqp/runtime/kqp_write_table.h | 29 +++-- 2 files changed, 125 insertions(+), 41 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index f0d3b9c4473b..e494fe39e238 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -782,7 +784,7 @@ class TShardedWriteController : public IShardedWriteController { public: void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) override { if (Serializer) { - Flush(true); + FlushSerializer(true); } Serializer = CreateColumnShardPayloadSerializer( schemeEntry, @@ -795,11 +797,11 @@ class TShardedWriteController : public IShardedWriteController { const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry) override { if (Serializer) { - Flush(true); + FlushSerializer(true); } Serializer = CreateDataShardPayloadSerializer( schemeEntry, - partitionsEntry, + std::move(partitionsEntry), InputColumnsMetadata, TypeEnv); ReshardData(); @@ -809,66 +811,136 @@ class TShardedWriteController : public IShardedWriteController { YQL_ENSURE(!data.IsWide(), "Wide stream is not supported yet"); YQL_ENSURE(!Closed); - CA_LOG_D("New data: size=" << size << ", finished=" << finished << "."); - YQL_ENSURE(Serializer); - try { - Serializer->AddData(std::move(data)); - } catch (...) { - RuntimeError( - CurrentExceptionMessage(), - NYql::NDqProto::StatusIds::INTERNAL_ERROR); - } + Serializer->AddData(std::move(data)); - Flush(GetMemory() >= MemoryLimit); + FlushSerializer(GetMemory() >= Settings.MemoryLimitTotal); + BuildBatches(); } void Close() override { - Close = true; - Flush(true); + YQL_ENSURE(Serializer); + Closed = true; + FlushSerializer(true); YQL_ENSURE(Serializer->IsFinished()); for (auto& [shardId, shardInfo] : ShardsInfo.GetShards()) { shardInfo.Close(); } - - ProcessData(); + BuildBatches(); } - ui64 GetNextNewShardId() override { - } + //ui64 GetNextNewShardId() override { + //} std::optional GetMessageMetadata(ui64 shardId) override { - } + const auto& shardInfo = ShardsInfo.GetShard(shardId); + if (shardInfo.IsEmpty()) { + return {}; + } - bool SerializeMessage(ui64 shardId, NKikimr::NEvents::TDataEvents::TEvWrite& evWrite) override { + TMessageMetadata meta; + meta.Cookie = shardInfo.GetCookie(); + meta.OperationsCount = shardInfo.GetBatchesInFlight(); + meta.IsFinal = shardInfo.IsClosed() && shardInfo.Size() == shardInfo.GetBatchesInFlight(); + + return meta; } - bool OnMessageSent(ui64 shardId, ui64 cookie) override { + TSerializationResult SerializeMessageToPayload(ui64 shardId, NKikimr::NEvents::TDataEvents::TEvWrite& evWrite) override { + TSerializationResult result; + + const auto& shardInfo = ShardsInfo.GetShard(shardId); + if (shardInfo.IsEmpty()) { + return result; + } + + for (size_t index = 0; index < shardInfo.GetBatchesInFlight(); ++index) { + const auto& inFlightBatch = shardInfo.GetBatch(index); + YQL_ENSURE(!inFlightBatch->IsEmpty()); + result.TotalDataSize += inFlightBatch->GetMemory(); + const ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(evWrite) + .AddDataToPayload(inFlightBatch->SerializeToString()); + result.PayloadIndexes.push_back(payloadIndex); + } + + return result; } - bool OnMessageAcknowledged(ui64 shardId, ui64 cookie) override { + std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) override { + auto& shardInfo = ShardsInfo.GetShard(shardId); + const auto removedDataSize = shardInfo.PopBatches(cookie); + if (removedDataSize) { + BuildBatchesForShard(shardInfo); + } + return removedDataSize; } - i64 GetMemory() override { + i64 GetMemory() const override { + YQL_ENSURE(Serializer); return Serializer->GetMemory() + ShardsInfo.GetMemory(); } - bool IsClosed() override { + bool IsClosed() const override { return Closed; } - bool IsEmpty() override { + bool IsEmpty() const override { + YQL_ENSURE(Serializer); + return Serializer->IsFinished() && ShardsInfo.IsFinished(); } - bool IsFinished() override { + bool IsFinished() const override { return IsClosed() && IsEmpty(); } + bool IsReady() const override { + return Serializer != nullptr; + } + + TShardedWriteController( + const TShardedWriteControllerSettings settings, + TVector&& inputColumnsMetadata, + const NMiniKQL::TTypeEnvironment& typeEnv) + : Settings(settings) + , InputColumnsMetadata(std::move(inputColumnsMetadata)) + , TypeEnv(typeEnv) { + } + private: - void ForceFlush() { + void FlushSerializer(bool force) { + if (force) { + for (auto& [shardId, batches] : Serializer->FlushBatchesForce()) { + for (auto& batch : batches) { + ShardsInfo.GetShard(shardId).PushBatch(std::move(batch)); + } + } + } else { + for (const ui64 shardId : Serializer->GetShardIds()) { + auto& shard = ShardsInfo.GetShard(shardId); + while (true) { + auto batch = Serializer->FlushBatch(shardId); + if (!batch || batch->IsEmpty()) { + break; + } + shard.PushBatch(std::move(batch)); + } + } + } + } + + void BuildBatches() { + for (const ui64 shardId : Serializer->GetShardIds()) { + auto& shard = ShardsInfo.GetShard(shardId); + BuildBatchesForShard(shard); + } } - void ProcessData() { + void BuildBatchesForShard(TShardsInfo::TShardInfo& shard) { + if (shard.GetBatchesInFlight() == 0) { + shard.MakeNextBatches( + Settings.MemoryLimitPerMessage, + Settings.MaxBatchesPerMessage); + } } void ReshardData() { @@ -876,12 +948,13 @@ class TShardedWriteController : public IShardedWriteController { TString LogPrefix = "ShardedWriteController"; - TShardsInfo ShardsInfo; - bool Closed = false; - + TShardedWriteControllerSettings Settings; TVector InputColumnsMetadata; const NMiniKQL::TTypeEnvironment& TypeEnv; + TShardsInfo ShardsInfo; + bool Closed = false; + IPayloadSerializerPtr Serializer = nullptr; }; diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 96bc23a63ae5..b4fc28e40710 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -66,10 +66,9 @@ class IShardedWriteController : public TThrRefBase { NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry) = 0; virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data) = 0; - virtual void ProcessData(bool forceFlush) = 0; virtual void Close() = 0; - virtual ui64 GetNextNewShardId() = 0; + //virtual ui64 GetNextNewShardId() = 0; struct TMessageMetadata { ui64 Cookie = 0; @@ -78,21 +77,33 @@ class IShardedWriteController : public TThrRefBase { }; virtual std::optional GetMessageMetadata(ui64 shardId) = 0; - virtual bool SerializeMessage(ui64 shardId, NKikimr::NEvents::TDataEvents::TEvWrite& evWrite) = 0; + struct TSerializationResult { + i64 TotalDataSize = 0; + TVector PayloadIndexes; + }; - virtual bool OnMessageSent(ui64 shardId, ui64 cookie) = 0; - virtual bool OnMessageAcknowledged(ui64 shardId, ui64 cookie) = 0; + virtual TSerializationResult SerializeMessageToPayload(ui64 shardId, NKikimr::NEvents::TDataEvents::TEvWrite& evWrite) = 0; - virtual i64 GetMemory() = 0; + virtual std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) = 0; - virtual bool IsClosed() = 0; - virtual bool IsEmpty() = 0; - virtual bool IsFinished() = 0; + virtual i64 GetMemory() const = 0; + + virtual bool IsClosed() const = 0; + virtual bool IsEmpty() const = 0; + virtual bool IsFinished() const = 0; + + virtual bool IsReady() const = 0; }; using IShardedWriteControllerPtr = TIntrusivePtr; +struct TShardedWriteControllerSettings { + i64 MemoryLimitTotal; + i64 MemoryLimitPerMessage; + i64 MaxBatchesPerMessage; +}; + IShardedWriteControllerPtr CreateShardedWriteController( TVector&& inputColumns, const NMiniKQL::TTypeEnvironment& typeEnv); From f855232276bfbb8d7b16a6243c669f3d4b4ecac5 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 24 May 2024 15:23:11 +0300 Subject: [PATCH 15/35] fix --- ydb/core/kqp/runtime/kqp_write_table.cpp | 7 ++++--- ydb/core/kqp/runtime/kqp_write_table.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index e494fe39e238..8d5a9a67bd24 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -735,7 +735,7 @@ class TShardsInfo { return insertIt->second; } - TVector GetPendingShards() { + TVector GetPendingShards() const { TVector result; for (const auto& [id, shard] : ShardsInfo) { if (!shard.IsEmpty() && shard.GetSendAttempts() == 0) { @@ -829,8 +829,9 @@ class TShardedWriteController : public IShardedWriteController { BuildBatches(); } - //ui64 GetNextNewShardId() override { - //} + TVector GetPendingShards() const override { + return ShardsInfo.GetPendingShards(); + } std::optional GetMessageMetadata(ui64 shardId) override { const auto& shardInfo = ShardsInfo.GetShard(shardId); diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index b4fc28e40710..a364c176c428 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -68,7 +68,7 @@ class IShardedWriteController : public TThrRefBase { virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data) = 0; virtual void Close() = 0; - //virtual ui64 GetNextNewShardId() = 0; + virtual TVector GetPendingShards() const = 0; struct TMessageMetadata { ui64 Cookie = 0; From d59021ecb60439ea46b750a646c904d12109278f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 24 May 2024 15:32:32 +0300 Subject: [PATCH 16/35] fix --- ydb/core/kqp/runtime/kqp_write_table.cpp | 8 ++++++++ ydb/core/kqp/runtime/kqp_write_table.h | 1 + 2 files changed, 9 insertions(+) diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 8d5a9a67bd24..bab06e2bfc80 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -961,5 +961,13 @@ class TShardedWriteController : public IShardedWriteController { } + +IShardedWriteControllerPtr CreateShardedWriteController( + const TShardedWriteControllerSettings& settings, + TVector&& inputColumns, + const NMiniKQL::TTypeEnvironment& typeEnv) { + return MakeIntrusive(settings, std::move(inputColumns), typeEnv); +} + } } diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index a364c176c428..be03ff20b9cc 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -105,6 +105,7 @@ struct TShardedWriteControllerSettings { }; IShardedWriteControllerPtr CreateShardedWriteController( + const TShardedWriteControllerSettings& settings, TVector&& inputColumns, const NMiniKQL::TTypeEnvironment& typeEnv); From 1d2ba650d558a0481c340f3c8dd54749d3cabf5f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 25 May 2024 15:26:51 +0300 Subject: [PATCH 17/35] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 66 ++++++++++++++++++------ ydb/core/kqp/runtime/kqp_write_table.cpp | 23 ++++++--- ydb/core/kqp/runtime/kqp_write_table.h | 2 + 3 files changed, 67 insertions(+), 24 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 5a50e5257f65..c511a8ecfbb5 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -323,8 +323,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } i64 GetFreeSpace() const final { - const i64 result = Serializer - ? MemoryLimit - Serializer->GetMemory() - ShardsInfo.GetMemory() + const i64 result = ShardedWriteController + ? MemoryLimit - ShardedWriteController->GetMemory() : std::numeric_limits::min(); // Can't use zero here because compute can use overcommit! return result; } @@ -349,7 +349,19 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N CA_LOG_D("New data: size=" << size << ", finished=" << finished << "."); - YQL_ENSURE(Serializer); + YQL_ENSURE(ShardedWriteController); + try { + ShardedWriteController->AddData(std::move(data)); + if (Finished) { + ShardedWriteController->Close(); + } + } catch (...) { + RuntimeError( + CurrentExceptionMessage(), + NYql::NDqProto::StatusIds::INTERNAL_ERROR); + } + + /*YQL_ENSURE(Serializer); try { Serializer->AddData(std::move(data)); if (Finished) { @@ -377,7 +389,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } YQL_ENSURE(Serializer->IsFinished()); - } + }*/ ProcessBatches(); } @@ -613,8 +625,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void PopShardBatch(ui64 shardId, ui64 cookie) { TResumeNotificationManager resumeNotificator(*this); - auto& shardInfo = ShardsInfo.GetShard(shardId); - if (const auto removedDataSize = shardInfo.PopBatches(cookie); removedDataSize) { + const auto removedDataSize = ShardedWriteController->OnMessageAcknowledged(shardId, cookie); + if (removedDataSize) { EgressStats.Bytes += *removedDataSize; EgressStats.Chunks++; EgressStats.Splits++; @@ -625,7 +637,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void ProcessBatches() { if (!ImmediateTx || Finished || GetFreeSpace() <= 0) { - MakeNewBatches(); + //MakeNewBatches(); SendBatchesToShards(); if (Finished && Serializer->IsFinished() && ShardsInfo.IsFinished()) { CA_LOG_D("Write actor finished"); @@ -634,7 +646,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } } - void MakeNewBatches() { + /*void MakeNewBatches() { for (const size_t shardId : Serializer->GetShardIds()) { auto& shard = ShardsInfo.GetShard(shardId); while (true) { @@ -652,9 +664,11 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N : kMaxBatchesPerMessage); } } - } + }*/ void SendBatchesToShards() { + + ShardedWriteController-> YQL_ENSURE(!ImmediateTx || ShardsInfo.GetShards().size() == 1); for (const size_t shardId : ShardsInfo.GetPendingShards()) { @@ -785,16 +799,34 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N try { if (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { - Serializer = CreateColumnShardPayloadSerializer( - *SchemeEntry, - columnsMetadata, + //Serializer = CreateColumnShardPayloadSerializer( + // *SchemeEntry, + // columnsMetadata, + // TypeEnv); + ShardedWriteController = CreateShardedWriteController( + TShardedWriteControllerSettings { + .MemoryLimitTotal = kInFlightMemoryLimitPerActor, + .MemoryLimitPerMessage = kMemoryLimitPerMessage, + .MaxBatchesPerMessage = 1, + }, + std::move(columnsMetadata), TypeEnv); + ShardedWriteController->OnPartitioningChanged(*SchemeEntry); } else { - Serializer = CreateDataShardPayloadSerializer( - *SchemeEntry, - std::move(*SchemeRequest), - columnsMetadata, + //Serializer = CreateDataShardPayloadSerializer( + // *SchemeEntry, + // std::move(*SchemeRequest), + // columnsMetadata, + // TypeEnv); + ShardedWriteController = CreateShardedWriteController( + TShardedWriteControllerSettings { + .MemoryLimitTotal = kInFlightMemoryLimitPerActor, + .MemoryLimitPerMessage = kMemoryLimitPerMessage, + .MaxBatchesPerMessage = kMaxBatchesPerMessage, + }, + std::move(columnsMetadata), TypeEnv); + ShardedWriteController->OnPartitioningChanged(*SchemeEntry, std::move(*SchemeRequest)); } ResumeExecution(); } catch (...) { @@ -834,6 +866,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N bool Finished = false; const i64 MemoryLimit = kInFlightMemoryLimitPerActor; + + IShardedWriteControllerPtr ShardedWriteController = nullptr; }; void RegisterKqpWriteActor(NYql::NDq::TDqAsyncIoFactory& factory, TIntrusivePtr counters) { diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index bab06e2bfc80..c7bed9c726b3 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -815,7 +815,7 @@ class TShardedWriteController : public IShardedWriteController { Serializer->AddData(std::move(data)); FlushSerializer(GetMemory() >= Settings.MemoryLimitTotal); - BuildBatches(); + // BuildBatches(); } void Close() override { @@ -826,7 +826,7 @@ class TShardedWriteController : public IShardedWriteController { for (auto& [shardId, shardInfo] : ShardsInfo.GetShards()) { shardInfo.Close(); } - BuildBatches(); + // BuildBatches(); } TVector GetPendingShards() const override { @@ -834,15 +834,17 @@ class TShardedWriteController : public IShardedWriteController { } std::optional GetMessageMetadata(ui64 shardId) override { - const auto& shardInfo = ShardsInfo.GetShard(shardId); + auto& shardInfo = ShardsInfo.GetShard(shardId); if (shardInfo.IsEmpty()) { return {}; } + BuildBatchesForShard(shardInfo); TMessageMetadata meta; meta.Cookie = shardInfo.GetCookie(); meta.OperationsCount = shardInfo.GetBatchesInFlight(); meta.IsFinal = shardInfo.IsClosed() && shardInfo.Size() == shardInfo.GetBatchesInFlight(); + meta.SendAttempts = shardInfo.GetSendAttempts(); return meta; } @@ -870,12 +872,17 @@ class TShardedWriteController : public IShardedWriteController { std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) override { auto& shardInfo = ShardsInfo.GetShard(shardId); const auto removedDataSize = shardInfo.PopBatches(cookie); - if (removedDataSize) { - BuildBatchesForShard(shardInfo); - } return removedDataSize; } + void OnMessageSent(ui64 shardId, ui64 cookie) override { + auto& shardInfo = ShardsInfo.GetShard(shardId); + if (shardInfo.IsEmpty() || shardInfo.GetCookie() != cookie) { + return; + } + shardInfo.IncSendAttempts(); + } + i64 GetMemory() const override { YQL_ENSURE(Serializer); return Serializer->GetMemory() + ShardsInfo.GetMemory(); @@ -929,12 +936,12 @@ class TShardedWriteController : public IShardedWriteController { } } - void BuildBatches() { + /*void BuildBatches() { for (const ui64 shardId : Serializer->GetShardIds()) { auto& shard = ShardsInfo.GetShard(shardId); BuildBatchesForShard(shard); } - } + }*/ void BuildBatchesForShard(TShardsInfo::TShardInfo& shard) { if (shard.GetBatchesInFlight() == 0) { diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index be03ff20b9cc..db69306f75de 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -74,6 +74,7 @@ class IShardedWriteController : public TThrRefBase { ui64 Cookie = 0; ui64 OperationsCount = 0; bool IsFinal = false; + ui64 SendAttempts = 0; }; virtual std::optional GetMessageMetadata(ui64 shardId) = 0; @@ -85,6 +86,7 @@ class IShardedWriteController : public TThrRefBase { virtual TSerializationResult SerializeMessageToPayload(ui64 shardId, NKikimr::NEvents::TDataEvents::TEvWrite& evWrite) = 0; virtual std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) = 0; + virtual void OnMessageSent(ui64 shardId, ui64 cookie) = 0; virtual i64 GetMemory() const = 0; From 8c644fe021c77b3efbc659dcc71de83e4744f970 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 27 May 2024 15:32:38 +0300 Subject: [PATCH 18/35] writer --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 363 ++++-------------- ydb/core/kqp/runtime/kqp_write_table.cpp | 25 +- ydb/core/kqp/runtime/kqp_write_table.h | 3 +- ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 20 +- 4 files changed, 93 insertions(+), 318 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index c511a8ecfbb5..b1a36e7006ee 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -28,8 +28,8 @@ namespace { constexpr i64 kMaxBatchesPerMessage = 8; struct TWriteActorBackoffSettings { - TDuration StartRetryDelay = TDuration::MilliSeconds(250); - TDuration MaxRetryDelay = TDuration::Seconds(10); + TDuration StartRetryDelay = TDuration::MilliSeconds(200); + TDuration MaxRetryDelay = TDuration::Seconds(20); double UnsertaintyRatio = 0.5; double Multiplier = 2.0; @@ -52,179 +52,27 @@ namespace { return delay; } - class TShardsInfo { - public: - class TShardInfo { - friend class TShardsInfo; - TShardInfo(i64& memory) - : Memory(memory) { - } - - public: - struct TInFlightBatch { - NKikimr::NKqp::IPayloadSerializer::IBatchPtr Batch; - }; - - size_t Size() const { - return Batches.size(); - } - - bool IsEmpty() const { - return Batches.empty(); - } - - bool IsClosed() const { - return Closed; - } - - bool IsFinished() const { - return IsClosed() && IsEmpty(); - } - - void Close() { - Closed = true; - } - - void MakeNextBatches(i64 maxDataSize, ui64 maxCount) { - YQL_ENSURE(BatchesInFlight == 0); - i64 dataSize = 0; - while (BatchesInFlight < maxCount - && BatchesInFlight < Batches.size() - && dataSize + GetBatch(BatchesInFlight).Batch->GetMemory() <= maxDataSize) { - dataSize += GetBatch(BatchesInFlight).Batch->GetMemory(); - ++BatchesInFlight; - } - YQL_ENSURE(BatchesInFlight == Batches.size() || GetBatch(BatchesInFlight).Batch->GetMemory() <= maxDataSize); - } - - const TInFlightBatch& GetBatch(size_t index) const { - return Batches.at(index); - } - - std::optional PopBatches(const ui64 cookie) { - if (BatchesInFlight != 0 && Cookie == cookie) { - ui64 dataSize = 0; - for (size_t index = 0; index < BatchesInFlight; ++index) { - dataSize += Batches.front().Batch->GetMemory(); - Batches.pop_front(); - } - - ++Cookie; - SendAttempts = 0; - BatchesInFlight = 0; - - Memory -= dataSize; - return dataSize; - } - return std::nullopt; - } - - void PushBatch(NKikimr::NKqp::IPayloadSerializer::IBatchPtr&& batch) { - YQL_ENSURE(!IsClosed()); - Batches.push_back(TInFlightBatch{ - .Batch = std::move(batch), - }); - Memory += Batches.back().Batch->GetMemory(); - } - - bool AddAndCheckLock(const NKikimrDataEvents::TLock& lock) { - if (!Lock) { - Lock = lock; - return true; - } else { - return lock.GetLockId() == Lock->GetLockId() - && lock.GetDataShard() == Lock->GetDataShard() - && lock.GetSchemeShard() == Lock->GetSchemeShard() - && lock.GetPathId() == Lock->GetPathId() - && lock.GetGeneration() == Lock->GetGeneration() - && lock.GetCounter() == Lock->GetCounter(); - } - } - - const std::optional& GetLock() const { - return Lock; - } - - ui64 GetCookie() const { - return Cookie; - } - - size_t GetBatchesInFlight() const { - return BatchesInFlight; - } - - ui32 GetSendAttempts() const { - return SendAttempts; - } - - void IncSendAttempts() { - ++SendAttempts; - } - - private: - std::deque Batches; - bool Closed = false; - std::optional Lock; - i64& Memory; - - ui64 Cookie = 1; - ui32 SendAttempts = 0; - size_t BatchesInFlight = 0; - }; - - TShardInfo& GetShard(const ui64 shard) { - auto it = ShardsInfo.find(shard); - if (it != std::end(ShardsInfo)) { - return it->second; - } - - auto [insertIt, _] = ShardsInfo.emplace(shard, TShardInfo(Memory)); - return insertIt->second; - } - - TVector GetPendingShards() { - TVector result; - for (const auto& [id, shard] : ShardsInfo) { - if (!shard.IsEmpty() && shard.GetSendAttempts() == 0) { - result.push_back(id); - } - } - return result; - } - - bool Has(ui64 shardId) const { - return ShardsInfo.contains(shardId); - } - - bool IsEmpty() const { - for (const auto& [_, shard] : ShardsInfo) { - if (!shard.IsEmpty()) { - return false; - } - } - return true; - } - - bool IsFinished() const { - for (const auto& [_, shard] : ShardsInfo) { - if (!shard.IsFinished()) { - return false; - } + struct TLockInfo { + bool AddAndCheckLock(const NKikimrDataEvents::TLock& lock) { + if (!Lock) { + Lock = lock; + return true; + } else { + return lock.GetLockId() == Lock->GetLockId() + && lock.GetDataShard() == Lock->GetDataShard() + && lock.GetSchemeShard() == Lock->GetSchemeShard() + && lock.GetPathId() == Lock->GetPathId() + && lock.GetGeneration() == Lock->GetGeneration() + && lock.GetCounter() == Lock->GetCounter(); } - return true; } - THashMap& GetShards() { - return ShardsInfo; - } - - i64 GetMemory() const { - return Memory; + const std::optional& GetLock() const { + return Lock; } private: - THashMap ShardsInfo; - i64 Memory = 0; + std::optional Lock; }; } @@ -331,8 +179,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N TMaybe ExtraData() override { NKikimrKqp::TEvKqpOutputActorResultInfo resultInfo; - for (const auto& [_, shardInfo] : ShardsInfo.GetShards()) { - if (const auto& lock = shardInfo.GetLock(); lock) { + for (const auto& [_, lockInfo] : LocksInfo) { + if (const auto& lock = lockInfo.GetLock(); lock) { resultInfo.AddLocks()->CopyFrom(*lock); } } @@ -361,36 +209,6 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N NYql::NDqProto::StatusIds::INTERNAL_ERROR); } - /*YQL_ENSURE(Serializer); - try { - Serializer->AddData(std::move(data)); - if (Finished) { - Serializer->Close(); - } - } catch (...) { - RuntimeError( - CurrentExceptionMessage(), - NYql::NDqProto::StatusIds::INTERNAL_ERROR); - } - - if (Finished || GetFreeSpace() <= 0) { - TResumeNotificationManager resumeNotificator(*this); - for (auto& [shardId, batches] : Serializer->FlushBatchesForce()) { - for (auto& batch : batches) { - ShardsInfo.GetShard(shardId).PushBatch(std::move(batch)); - } - } - resumeNotificator.CheckMemory(); - } - - if (Finished) { - for (auto& [shardId, shardInfo] : ShardsInfo.GetShards()) { - shardInfo.Close(); - } - - YQL_ENSURE(Serializer->IsFinished()); - }*/ - ProcessBatches(); } @@ -586,6 +404,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N << SchemeEntry->TableId.PathId.ToString() << "`." << " ShardID=" << ev->Get()->Record.GetOrigin() << "," << " Sink=" << this->SelfId() << "."); + // TODO: Reshard writes RuntimeError( TStringBuilder() << "Got SCHEME CHANGED for table `" << SchemeEntry->TableId.PathId.ToString() << "`.", @@ -617,7 +436,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N PopShardBatch(ev->Get()->Record.GetOrigin(), ev->Cookie); for (const auto& lock : ev->Get()->Record.GetTxLocks()) { - ShardsInfo.GetShard(ev->Get()->Record.GetOrigin()).AddAndCheckLock(lock); + LocksInfo[ev->Get()->Record.GetOrigin()].AddAndCheckLock(lock); } ProcessBatches(); @@ -637,51 +456,25 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void ProcessBatches() { if (!ImmediateTx || Finished || GetFreeSpace() <= 0) { - //MakeNewBatches(); SendBatchesToShards(); - if (Finished && Serializer->IsFinished() && ShardsInfo.IsFinished()) { - CA_LOG_D("Write actor finished"); - Callbacks->OnAsyncOutputFinished(GetOutputIndex()); - } } - } - /*void MakeNewBatches() { - for (const size_t shardId : Serializer->GetShardIds()) { - auto& shard = ShardsInfo.GetShard(shardId); - while (true) { - auto batch = Serializer->FlushBatch(shardId); - if (!batch || batch->IsEmpty()) { - break; - } - shard.PushBatch(std::move(batch)); - } - if (shard.GetBatchesInFlight() == 0) { - shard.MakeNextBatches( - kMemoryLimitPerMessage, - SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable - ? 1 - : kMaxBatchesPerMessage); - } + if (Finished && ShardedWriteController->IsFinished()) { + CA_LOG_D("Write actor finished"); + Callbacks->OnAsyncOutputFinished(GetOutputIndex()); } - }*/ + } void SendBatchesToShards() { - - ShardedWriteController-> - YQL_ENSURE(!ImmediateTx || ShardsInfo.GetShards().size() == 1); - - for (const size_t shardId : ShardsInfo.GetPendingShards()) { - const auto& shard = ShardsInfo.GetShard(shardId); - YQL_ENSURE(!shard.IsEmpty()); + for (const size_t shardId : ShardedWriteController->GetPendingShards()) { SendDataToShard(shardId); } } void SendDataToShard(const ui64 shardId) { - auto& shard = ShardsInfo.GetShard(shardId); - YQL_ENSURE(!shard.IsEmpty()); - if (shard.GetSendAttempts() >= BackoffSettings()->MaxWriteAttempts) { + const auto metadata = ShardedWriteController->GetMessageMetadata(shardId); + YQL_ENSURE(metadata); + if (metadata->SendAttempts >= BackoffSettings()->MaxWriteAttempts) { CA_LOG_E("ShardId=" << shardId << " for table '" << Settings.GetTable().GetPath() << "': retry limit exceeded." @@ -698,27 +491,24 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N auto evWrite = std::make_unique( NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); - if (ImmediateTx && FinalTx && Finished && shard.Size() == shard.GetBatchesInFlight()) { + if (ImmediateTx && FinalTx && Finished && metadata->IsFinal) { // Last immediate write (only for datashard) - if (shard.GetLock()) { + if (LocksInfo[shardId].GetLock()) { // multi immediate evwrite auto* locks = evWrite->Record.MutableLocks(); locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); locks->AddSendingShards(shardId); locks->AddReceivingShards(shardId); - *locks->AddLocks() = *shard.GetLock(); + *locks->AddLocks() = *LocksInfo.at(shardId).GetLock(); } } else if (!InconsistentTx) { evWrite->SetLockId(Settings.GetLockTxId(), Settings.GetLockNodeId()); } - ui64 totalDataSize = 0; - for (size_t index = 0; index < shard.GetBatchesInFlight(); ++index) { - const auto& inFlightBatch = shard.GetBatch(index); - YQL_ENSURE(!inFlightBatch.Batch->IsEmpty()); - totalDataSize += inFlightBatch.Batch->GetMemory(); - const ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(*evWrite) - .AddDataToPayload(inFlightBatch.Batch->SerializeToString()); + const auto serializationResult = ShardedWriteController->SerializeMessageToPayload(shardId, *evWrite); + YQL_ENSURE(serializationResult.TotalDataSize > 0); + + for (size_t payloadIndex : serializationResult.PayloadIndexes) { evWrite->AddOperation( NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE, { @@ -726,34 +516,34 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N Settings.GetTable().GetTableId(), Settings.GetTable().GetVersion(), }, - Serializer->GetWriteColumnIds(), + ShardedWriteController->GetWriteColumnIds(), payloadIndex, - Serializer->GetDataFormat()); + ShardedWriteController->GetDataFormat()); } CA_LOG_D("Send EvWrite to ShardID=" << shardId << ", TxId=" << std::get(TxId) - << ", LockTxId=" << Settings.GetLockTxId() << ", LockNodeId=" << Settings.GetLockNodeId() - << ", Size=" << totalDataSize << ", Cookie=" << shard.GetCookie() - << "; ShardBatchesLeft=" << shard.Size() << ", ShardClosed=" << shard.IsClosed() - << "; Attempts=" << shard.GetSendAttempts()); + << ", LockTxId=" << evWrite->Record.GetLockTxId() << ", LockNodeId=" << evWrite->Record.GetLockNodeId() + << ", Size=" << serializationResult.TotalDataSize << ", Cookie=" << metadata->Cookie + << ", Operations=" << metadata->OperationsCount << ", IsFinal=" << metadata->IsFinal + << ", Attempts=" << metadata->SendAttempts); Send( PipeCacheId, new TEvPipeCache::TEvForward(evWrite.release(), shardId, true), 0, - shard.GetCookie()); - shard.IncSendAttempts(); + metadata->Cookie); - TlsActivationContext->Schedule( - CalculateNextAttemptDelay(shard.GetSendAttempts()), - new IEventHandle(SelfId(), SelfId(), new TEvPrivate::TEvShardRequestTimeout(shardId), 0, shard.GetCookie())); + ShardedWriteController->OnMessageSent(shardId, metadata->Cookie); + + //if (SchemeEntry->Kind != NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { + TlsActivationContext->Schedule( + CalculateNextAttemptDelay(metadata->SendAttempts), + new IEventHandle(SelfId(), SelfId(), new TEvPrivate::TEvShardRequestTimeout(shardId), 0, metadata->Cookie)); + //} } void RetryShard(const ui64 shardId, const std::optional ifCookieEqual) { - if (!ShardsInfo.Has(shardId)) { - return; - } - const auto& shard = ShardsInfo.GetShard(shardId); - if (shard.IsEmpty() || (ifCookieEqual && shard.GetCookie() != ifCookieEqual)) { + const auto metadata = ShardedWriteController->GetMessageMetadata(shardId); + if (!metadata || (ifCookieEqual && metadata->Cookie != ifCookieEqual)) { return; } @@ -791,41 +581,29 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void Prepare() { YQL_ENSURE(SchemeEntry); - TVector columnsMetadata; - columnsMetadata.reserve(Settings.GetColumns().size()); - for (const auto & column : Settings.GetColumns()) { - columnsMetadata.push_back(column); + if (!ShardedWriteController) { + TVector columnsMetadata; + columnsMetadata.reserve(Settings.GetColumns().size()); + for (const auto & column : Settings.GetColumns()) { + columnsMetadata.push_back(column); + } + + ShardedWriteController = CreateShardedWriteController( + TShardedWriteControllerSettings { + .MemoryLimitTotal = kInFlightMemoryLimitPerActor, + .MemoryLimitPerMessage = kMemoryLimitPerMessage, + .MaxBatchesPerMessage = (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable + ? 1 + : kMaxBatchesPerMessage), + }, + std::move(columnsMetadata), + TypeEnv); } try { if (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { - //Serializer = CreateColumnShardPayloadSerializer( - // *SchemeEntry, - // columnsMetadata, - // TypeEnv); - ShardedWriteController = CreateShardedWriteController( - TShardedWriteControllerSettings { - .MemoryLimitTotal = kInFlightMemoryLimitPerActor, - .MemoryLimitPerMessage = kMemoryLimitPerMessage, - .MaxBatchesPerMessage = 1, - }, - std::move(columnsMetadata), - TypeEnv); ShardedWriteController->OnPartitioningChanged(*SchemeEntry); } else { - //Serializer = CreateDataShardPayloadSerializer( - // *SchemeEntry, - // std::move(*SchemeRequest), - // columnsMetadata, - // TypeEnv); - ShardedWriteController = CreateShardedWriteController( - TShardedWriteControllerSettings { - .MemoryLimitTotal = kInFlightMemoryLimitPerActor, - .MemoryLimitPerMessage = kMemoryLimitPerMessage, - .MaxBatchesPerMessage = kMaxBatchesPerMessage, - }, - std::move(columnsMetadata), - TypeEnv); ShardedWriteController->OnPartitioningChanged(*SchemeEntry, std::move(*SchemeRequest)); } ResumeExecution(); @@ -860,9 +638,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N std::optional SchemeEntry; std::optional SchemeRequest; - IPayloadSerializerPtr Serializer = nullptr; - TShardsInfo ShardsInfo; + THashMap LocksInfo; bool Finished = false; const i64 MemoryLimit = kInFlightMemoryLimitPerActor; diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index c7bed9c726b3..d7d4d48ed9fa 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -815,18 +815,17 @@ class TShardedWriteController : public IShardedWriteController { Serializer->AddData(std::move(data)); FlushSerializer(GetMemory() >= Settings.MemoryLimitTotal); - // BuildBatches(); } void Close() override { YQL_ENSURE(Serializer); Closed = true; + Serializer->Close(); FlushSerializer(true); YQL_ENSURE(Serializer->IsFinished()); for (auto& [shardId, shardInfo] : ShardsInfo.GetShards()) { shardInfo.Close(); } - // BuildBatches(); } TVector GetPendingShards() const override { @@ -869,6 +868,14 @@ class TShardedWriteController : public IShardedWriteController { return result; } + NKikimrDataEvents::EDataFormat GetDataFormat() override { + return Serializer->GetDataFormat(); + } + + std::vector GetWriteColumnIds() override { + return Serializer->GetWriteColumnIds(); + } + std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) override { auto& shardInfo = ShardsInfo.GetShard(shardId); const auto removedDataSize = shardInfo.PopBatches(cookie); @@ -892,13 +899,8 @@ class TShardedWriteController : public IShardedWriteController { return Closed; } - bool IsEmpty() const override { - YQL_ENSURE(Serializer); - return Serializer->IsFinished() && ShardsInfo.IsFinished(); - } - bool IsFinished() const override { - return IsClosed() && IsEmpty(); + return IsClosed() && Serializer->IsFinished() && ShardsInfo.IsFinished(); } bool IsReady() const override { @@ -936,13 +938,6 @@ class TShardedWriteController : public IShardedWriteController { } } - /*void BuildBatches() { - for (const ui64 shardId : Serializer->GetShardIds()) { - auto& shard = ShardsInfo.GetShard(shardId); - BuildBatchesForShard(shard); - } - }*/ - void BuildBatchesForShard(TShardsInfo::TShardInfo& shard) { if (shard.GetBatchesInFlight() == 0) { shard.MakeNextBatches( diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index db69306f75de..980ca571c59a 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -84,6 +84,8 @@ class IShardedWriteController : public TThrRefBase { }; virtual TSerializationResult SerializeMessageToPayload(ui64 shardId, NKikimr::NEvents::TDataEvents::TEvWrite& evWrite) = 0; + virtual NKikimrDataEvents::EDataFormat GetDataFormat() = 0; + virtual std::vector GetWriteColumnIds() = 0; virtual std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) = 0; virtual void OnMessageSent(ui64 shardId, ui64 cookie) = 0; @@ -91,7 +93,6 @@ class IShardedWriteController : public TThrRefBase { virtual i64 GetMemory() const = 0; virtual bool IsClosed() const = 0; - virtual bool IsEmpty() const = 0; virtual bool IsFinished() const = 0; virtual bool IsReady() const = 0; diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index 3658ba62b80d..4f0c4c6cf03c 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -2612,6 +2612,8 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); auto client = kikimr.GetQueryClient(); + Cerr << "<<<<<<<<>>>>>" << Endl; + { const TString sql = R"( REPLACE INTO `/Root/ColumnShard2` @@ -2620,15 +2622,15 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto insertResult = client.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); UNIT_ASSERT_C(insertResult.IsSuccess(), insertResult.GetIssues().ToString()); - auto it = client.StreamExecuteQuery(R"( - SELECT COUNT(*) FROM `/Root/ColumnShard2`; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); - TString output = StreamResultToYson(it); - CompareYson( - output, - R"([[10000u]])"); - } + //auto it = client.StreamExecuteQuery(R"( + // SELECT COUNT(*) FROM `/Root/CqolumnShard2`; + //)", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + //UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + //TString output = StreamResultToYson(it); + //CompareYson( + // output, + // R"([[10000u]])"); + }// } Y_UNIT_TEST(TableSink_ReplaceColumnShard) { From a571f2ac8dde24ccae2f0b2ce70ca09043774ed8 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 27 May 2024 17:04:53 +0300 Subject: [PATCH 19/35] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 5 +++-- ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 20 +++++++++---------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index b1a36e7006ee..8cadcfab6742 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -534,11 +534,12 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N ShardedWriteController->OnMessageSent(shardId, metadata->Cookie); - //if (SchemeEntry->Kind != NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { + // TODO: fix retries for columnshard + if (SchemeEntry->Kind != NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { TlsActivationContext->Schedule( CalculateNextAttemptDelay(metadata->SendAttempts), new IEventHandle(SelfId(), SelfId(), new TEvPrivate::TEvShardRequestTimeout(shardId), 0, metadata->Cookie)); - //} + } } void RetryShard(const ui64 shardId, const std::optional ifCookieEqual) { diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index 4f0c4c6cf03c..3658ba62b80d 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -2612,8 +2612,6 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); auto client = kikimr.GetQueryClient(); - Cerr << "<<<<<<<<>>>>>" << Endl; - { const TString sql = R"( REPLACE INTO `/Root/ColumnShard2` @@ -2622,15 +2620,15 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto insertResult = client.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); UNIT_ASSERT_C(insertResult.IsSuccess(), insertResult.GetIssues().ToString()); - //auto it = client.StreamExecuteQuery(R"( - // SELECT COUNT(*) FROM `/Root/CqolumnShard2`; - //)", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); - //UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); - //TString output = StreamResultToYson(it); - //CompareYson( - // output, - // R"([[10000u]])"); - }// + auto it = client.StreamExecuteQuery(R"( + SELECT COUNT(*) FROM `/Root/ColumnShard2`; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + TString output = StreamResultToYson(it); + CompareYson( + output, + R"([[10000u]])"); + } } Y_UNIT_TEST(TableSink_ReplaceColumnShard) { From 672f185de6dd90924a42d4b9a9c930031cce94e4 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 27 May 2024 17:23:58 +0300 Subject: [PATCH 20/35] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 41 +++++++++++++++--------- ydb/core/kqp/runtime/kqp_write_table.cpp | 8 ++--- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 8cadcfab6742..fc077ab9fbf6 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -404,12 +404,15 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N << SchemeEntry->TableId.PathId.ToString() << "`." << " ShardID=" << ev->Get()->Record.GetOrigin() << "," << " Sink=" << this->SelfId() << "."); - // TODO: Reshard writes - RuntimeError( - TStringBuilder() << "Got SCHEME CHANGED for table `" - << SchemeEntry->TableId.PathId.ToString() << "`.", - NYql::NDqProto::StatusIds::SCHEME_ERROR, - getIssues()); + if (InconsistentTx) { + ResolveTable(); + } else { + RuntimeError( + TStringBuilder() << "Got SCHEME CHANGED for table `" + << SchemeEntry->TableId.PathId.ToString() << "`.", + NYql::NDqProto::StatusIds::SCHEME_ERROR, + getIssues()); + } return; } case NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN: { @@ -589,16 +592,22 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N columnsMetadata.push_back(column); } - ShardedWriteController = CreateShardedWriteController( - TShardedWriteControllerSettings { - .MemoryLimitTotal = kInFlightMemoryLimitPerActor, - .MemoryLimitPerMessage = kMemoryLimitPerMessage, - .MaxBatchesPerMessage = (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable - ? 1 - : kMaxBatchesPerMessage), - }, - std::move(columnsMetadata), - TypeEnv); + try { + ShardedWriteController = CreateShardedWriteController( + TShardedWriteControllerSettings { + .MemoryLimitTotal = kInFlightMemoryLimitPerActor, + .MemoryLimitPerMessage = kMemoryLimitPerMessage, + .MaxBatchesPerMessage = (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable + ? 1 + : kMaxBatchesPerMessage), + }, + std::move(columnsMetadata), + TypeEnv); + } catch (...) { + RuntimeError( + CurrentExceptionMessage(), + NYql::NDqProto::StatusIds::INTERNAL_ERROR); + } } try { diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index d7d4d48ed9fa..b10695673a3b 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -188,18 +188,17 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { const TConstArrayRef inputColumns, // key columns then value columns const NMiniKQL::TTypeEnvironment& typeEnv) : TypeEnv(typeEnv) - , SchemeEntry(schemeEntry) , Columns(BuildColumns(inputColumns)) - , WriteIndex(BuildWriteIndex(SchemeEntry, inputColumns)) + , WriteIndex(BuildWriteIndex(schemeEntry, inputColumns)) , WriteColumnIds(BuildWriteColumnIds(inputColumns, WriteIndex)) , BatchBuilder(arrow::Compression::UNCOMPRESSED, BuildNotNullColumns(inputColumns)) { TString err; - if (!BatchBuilder.Start(BuildBatchBuilderColumns(SchemeEntry), 0, 0, err)) { + if (!BatchBuilder.Start(BuildBatchBuilderColumns(schemeEntry), 0, 0, err)) { yexception() << "Failed to start batch builder: " + err; } // TODO: Checks from ydb/core/tx/data_events/columnshard_splitter.cpp - const auto& description = SchemeEntry.ColumnTableInfo->Description; + const auto& description = schemeEntry.ColumnTableInfo->Description; const auto& scheme = description.GetSchema(); const auto& sharding = description.GetSharding(); @@ -393,7 +392,6 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { private: const NMiniKQL::TTypeEnvironment& TypeEnv; - const NSchemeCache::TSchemeCacheNavigate::TEntry SchemeEntry; std::shared_ptr Sharding; const TVector Columns; From 955d285ed3fb4b78e0aac3226f00ed79ac035b5b Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 28 May 2024 11:23:18 +0300 Subject: [PATCH 21/35] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 5 +---- ydb/core/kqp/runtime/kqp_write_table.cpp | 16 ++++++++++++++-- ydb/core/kqp/runtime/kqp_write_table.h | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index fc077ab9fbf6..11ef62e34561 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -242,9 +242,6 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - if (SchemeEntry) { - return; - } if (ev->Get()->Request->ErrorCount > 0) { RuntimeError(TStringBuilder() << "Failed to get table: " << TableId << "'", NYql::NDqProto::StatusIds::SCHEME_ERROR); @@ -272,7 +269,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } void ResolveShards() { - YQL_ENSURE(!SchemeRequest); + YQL_ENSURE(!SchemeRequest || InconsistentTx); YQL_ENSURE(SchemeEntry); TVector columns; diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index b10695673a3b..fabb5975979e 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -233,7 +233,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { FlushUnsharded(false); } - void AddBatch(IPayloadSerializer::IBatchPtr&& batch) override { + void AddBatch(const IPayloadSerializer::IBatchPtr& batch) override { auto columnshardBatch = dynamic_cast(batch.Get()); YQL_ENSURE(columnshardBatch); auto data = columnshardBatch->Extract(); @@ -500,7 +500,7 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { }); } - void AddBatch(IPayloadSerializer::IBatchPtr&& batch) override { + void AddBatch(const IPayloadSerializer::IBatchPtr& batch) override { auto datashardBatch = dynamic_cast(batch.Get()); YQL_ENSURE(datashardBatch); auto data = datashardBatch->Extract(); @@ -773,6 +773,11 @@ class TShardsInfo { return Memory; } + void Clear() { + ShardsInfo = {}; + Memory = 0; + } + private: THashMap ShardsInfo; i64 Memory = 0; @@ -789,6 +794,7 @@ class TShardedWriteController : public IShardedWriteController { InputColumnsMetadata, TypeEnv); ReshardData(); + ShardsInfo.Clear(); } void OnPartitioningChanged( @@ -803,6 +809,7 @@ class TShardedWriteController : public IShardedWriteController { InputColumnsMetadata, TypeEnv); ReshardData(); + ShardsInfo.Clear(); } void AddData(NMiniKQL::TUnboxedValueBatch&& data) override { @@ -945,6 +952,11 @@ class TShardedWriteController : public IShardedWriteController { } void ReshardData() { + for (auto& [_, shardInfo] : ShardsInfo.GetShards()) { + for (size_t index = 0; index < shardInfo.Size(); ++index) { + Serializer->AddBatch(shardInfo.GetBatch(index)); + } + } } TString LogPrefix = "ShardedWriteController"; diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 980ca571c59a..d09df441e9c7 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -22,7 +22,7 @@ class IPayloadSerializer : public TThrRefBase { using IBatchPtr = TIntrusivePtr; virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data) = 0; - virtual void AddBatch(IBatchPtr&& batch) = 0; + virtual void AddBatch(const IBatchPtr& batch) = 0; virtual void Close() = 0; From 89e72f8357cda26037af4420e81007c2b13986c6 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 28 May 2024 11:42:41 +0300 Subject: [PATCH 22/35] fix --- ydb/core/kqp/runtime/kqp_write_table.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index fabb5975979e..aef233744588 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -197,9 +197,11 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { yexception() << "Failed to start batch builder: " + err; } - // TODO: Checks from ydb/core/tx/data_events/columnshard_splitter.cpp + YQL_ENSURE(schemeEntry.ColumnTableInfo); const auto& description = schemeEntry.ColumnTableInfo->Description; + YQL_ENSURE(description.HasSchema()); const auto& scheme = description.GetSchema(); + YQL_ENSURE(description.HasSharding()); const auto& sharding = description.GetSharding(); NSchemeShard::TOlapSchema olapSchema; From e2a37086f1650084cf1627628700b4e715a2e9ff Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 28 May 2024 15:07:46 +0300 Subject: [PATCH 23/35] fix --- ydb/core/kqp/runtime/kqp_write_table.cpp | 5 ++--- ydb/core/tx/sharding/sharding.cpp | 17 +++++------------ 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index aef233744588..3459c61dc8fd 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -206,7 +206,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { NSchemeShard::TOlapSchema olapSchema; olapSchema.ParseFromLocalDB(scheme); - auto shardingConclusion = NSharding::TShardingBase::BuildFromProto(olapSchema, sharding); + auto shardingConclusion = NSharding::IShardingBase::BuildFromProto(olapSchema, sharding); if (shardingConclusion.IsFail()) { ythrow yexception() << "Ydb::StatusIds::SCHEME_ERROR : " << shardingConclusion.GetErrorMessage(); } @@ -392,9 +392,8 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { } private: - const NMiniKQL::TTypeEnvironment& TypeEnv; - std::shared_ptr Sharding; + std::shared_ptr Sharding; const TVector Columns; const std::vector WriteIndex; diff --git a/ydb/core/tx/sharding/sharding.cpp b/ydb/core/tx/sharding/sharding.cpp index 7234806fce07..444b94d73eba 100644 --- a/ydb/core/tx/sharding/sharding.cpp +++ b/ydb/core/tx/sharding/sharding.cpp @@ -243,21 +243,14 @@ NKikimrSchemeOp::TColumnTableSharding IShardingBase::SerializeToProto() const { THashMap> IShardingBase::SplitByShardsToArrowBatches(const std::shared_ptr& batch) { auto sharding = MakeSharding(batch); - std::vector> chunks; - if (Shards.size() == 1) { - chunks = {batch}; + THashMap> chunks; + if (sharding.size() == 1) { + AFL_VERIFY(chunks.emplace(sharding.begin()->first, batch).second); } else { chunks = NArrow::ShardingSplit(batch, sharding); } - AFL_VERIFY(chunks.size() == Shards.size()); - THashMap> result; - for (auto&& [tabletId, chunk]: chunks) { - if (!chunk) { - continue; - } - result.emplace(tabletId, std::move(chunk)); - } - return result; + AFL_VERIFY(chunks.size() == sharding.size()); + return chunks; } TConclusion>> IShardingBase::SplitByShards(const std::shared_ptr& batch, const ui64 chunkBytesLimit) { From dfd7ce7f771ec492b2ff1a9b111ebde3e8447154 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 29 May 2024 18:09:02 +0300 Subject: [PATCH 24/35] fix --- ydb/core/tx/sharding/sharding.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ydb/core/tx/sharding/sharding.cpp b/ydb/core/tx/sharding/sharding.cpp index 444b94d73eba..14f501817207 100644 --- a/ydb/core/tx/sharding/sharding.cpp +++ b/ydb/core/tx/sharding/sharding.cpp @@ -242,7 +242,7 @@ NKikimrSchemeOp::TColumnTableSharding IShardingBase::SerializeToProto() const { } THashMap> IShardingBase::SplitByShardsToArrowBatches(const std::shared_ptr& batch) { - auto sharding = MakeSharding(batch); + THashMap> sharding = MakeSharding(batch); THashMap> chunks; if (sharding.size() == 1) { AFL_VERIFY(chunks.emplace(sharding.begin()->first, batch).second); @@ -257,7 +257,10 @@ TConclusion>> IShardingBase auto splitted = SplitByShardsToArrowBatches(batch); NArrow::TBatchSplitttingContext context(chunkBytesLimit); THashMap> result; - for (auto [tabletId, chunk] : splitted) { + for (auto&& [tabletId, chunk] : splitted) { + if (!chunk) { + continue; + } auto blobsSplittedConclusion = NArrow::SplitByBlobSize(chunk, context); if (blobsSplittedConclusion.IsFail()) { return TConclusionStatus::Fail("cannot split batch in according to limits: " + blobsSplittedConclusion.GetErrorMessage()); From b9e789a7124fd5086da93365b2d681dd55f699b7 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 28 May 2024 15:36:32 +0300 Subject: [PATCH 25/35] fix --- ydb/core/kqp/host/kqp_statement_rewrite.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ydb/core/kqp/host/kqp_statement_rewrite.cpp b/ydb/core/kqp/host/kqp_statement_rewrite.cpp index f33180bc210a..2b1f86fced75 100644 --- a/ydb/core/kqp/host/kqp_statement_rewrite.cpp +++ b/ydb/core/kqp/host/kqp_statement_rewrite.cpp @@ -189,6 +189,10 @@ namespace { exprCtx.NewAtom(pos, "mode"), exprCtx.NewAtom(pos, "replace"), }), + exprCtx.NewList(pos, { + exprCtx.NewAtom(pos, "allowInconsistent"), + exprCtx.NewAtom(pos, "true"), + }), }), }); From 8ecbc56b456a49e706c6fe0ee064768a10b82b30 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 29 May 2024 11:10:26 +0300 Subject: [PATCH 26/35] flag --- ydb/core/kqp/common/kqp_yql.cpp | 9 +++++++++ ydb/core/kqp/common/kqp_yql.h | 3 +++ ydb/core/kqp/expr_nodes/kqp_expr_nodes.json | 3 ++- ydb/core/kqp/host/kqp_statement_rewrite.cpp | 3 +-- ydb/core/kqp/host/kqp_type_ann.cpp | 2 +- ydb/core/kqp/opt/kqp_opt_effects.cpp | 11 +++++++++-- 6 files changed, 25 insertions(+), 6 deletions(-) diff --git a/ydb/core/kqp/common/kqp_yql.cpp b/ydb/core/kqp/common/kqp_yql.cpp index 14e4dbd53257..988f6f900dd9 100644 --- a/ydb/core/kqp/common/kqp_yql.cpp +++ b/ydb/core/kqp/common/kqp_yql.cpp @@ -282,6 +282,9 @@ TKqpUpsertRowsSettings TKqpUpsertRowsSettings::Parse(const TCoNameValueTupleList } else if (name == TKqpUpsertRowsSettings::IsUpdateSettingName) { YQL_ENSURE(tuple.Ref().ChildrenSize() == 1); settings.IsUpdate = true; + } else if (name == TKqpUpsertRowsSettings::AllowInconsistentWritesSettingName) { + YQL_ENSURE(tuple.Ref().ChildrenSize() == 1); + settings.AllowInconsistentWrites = true; } else { YQL_ENSURE(false, "Unknown KqpUpsertRows setting name '" << name << "'"); } @@ -310,6 +313,12 @@ NNodes::TCoNameValueTupleList TKqpUpsertRowsSettings::BuildNode(TExprContext& ct .Name().Build(IsUpdateSettingName) .Done()); } + if (AllowInconsistentWrites) { + settings.emplace_back( + Build(ctx, pos) + .Name().Build(AllowInconsistentWritesSettingName) + .Done()); + } return Build(ctx, pos) .Add(settings) diff --git a/ydb/core/kqp/common/kqp_yql.h b/ydb/core/kqp/common/kqp_yql.h index efd99a91dbfc..6c9278d52861 100644 --- a/ydb/core/kqp/common/kqp_yql.h +++ b/ydb/core/kqp/common/kqp_yql.h @@ -81,12 +81,15 @@ struct TKqpReadTableSettings { struct TKqpUpsertRowsSettings { static constexpr TStringBuf InplaceSettingName = "Inplace"; static constexpr TStringBuf IsUpdateSettingName = "IsUpdate"; + static constexpr TStringBuf AllowInconsistentWritesSettingName = "AllowInconsistentReads"; bool Inplace = false; bool IsUpdate = false; + bool AllowInconsistentWrites = false; void SetInplace() { Inplace = true; } void SetIsUpdate() { IsUpdate = true; } + void SetAllowInconsistentWrites() { AllowInconsistentWrites = true; } static TKqpUpsertRowsSettings Parse(const NNodes::TCoNameValueTupleList& settingsList); static TKqpUpsertRowsSettings Parse(const NNodes::TKqpUpsertRows& node); diff --git a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json index 496d65422a38..7f471c794cdd 100644 --- a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json +++ b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json @@ -538,7 +538,8 @@ "Children": [ {"Index": 0, "Name": "Table", "Type": "TKqpTable"}, {"Index": 1, "Name": "Columns", "Type": "TCoAtomList"}, - {"Index": 2, "Name": "Settings", "Type": "TCoNameValueTupleList", "Optional": true} + {"Index": 2, "Name": "InconsistentWrite", "Type": "TCoAtom"}, + {"Index": 3, "Name": "Settings", "Type": "TCoNameValueTupleList", "Optional": true} ] }, { diff --git a/ydb/core/kqp/host/kqp_statement_rewrite.cpp b/ydb/core/kqp/host/kqp_statement_rewrite.cpp index 2b1f86fced75..b79d4f94ea31 100644 --- a/ydb/core/kqp/host/kqp_statement_rewrite.cpp +++ b/ydb/core/kqp/host/kqp_statement_rewrite.cpp @@ -190,8 +190,7 @@ namespace { exprCtx.NewAtom(pos, "replace"), }), exprCtx.NewList(pos, { - exprCtx.NewAtom(pos, "allowInconsistent"), - exprCtx.NewAtom(pos, "true"), + exprCtx.NewAtom(pos, "AllowInconsistentWrite"), }), }), }); diff --git a/ydb/core/kqp/host/kqp_type_ann.cpp b/ydb/core/kqp/host/kqp_type_ann.cpp index 1b7bc6311c36..1ea01b629491 100644 --- a/ydb/core/kqp/host/kqp_type_ann.cpp +++ b/ydb/core/kqp/host/kqp_type_ann.cpp @@ -1840,7 +1840,7 @@ TStatus AnnotateKqpSinkEffect(const TExprNode::TPtr& node, TExprContext& ctx) { } TStatus AnnotateTableSinkSettings(const TExprNode::TPtr& input, TExprContext& ctx) { - if (!EnsureMinMaxArgsCount(*input, 2, 3, ctx)) { + if (!EnsureMinMaxArgsCount(*input, 2, 4, ctx)) { return TStatus::Error; } input->SetTypeAnn(ctx.MakeType()); diff --git a/ydb/core/kqp/opt/kqp_opt_effects.cpp b/ydb/core/kqp/opt/kqp_opt_effects.cpp index c40ba344160c..1f5d2467bd7e 100644 --- a/ydb/core/kqp/opt/kqp_opt_effects.cpp +++ b/ydb/core/kqp/opt/kqp_opt_effects.cpp @@ -217,7 +217,8 @@ bool IsMapWrite(const TKikimrTableDescription& table, TExprBase input, TExprCont #undef DBG } -TDqStage RebuildPureStageWithSink(TExprBase expr, const TKqpTable& table, const TCoAtomList& columns, TExprContext& ctx) { +TDqStage RebuildPureStageWithSink(TExprBase expr, const TKqpTable& table, const TCoAtomList& columns, + const TKqpUpsertRowsSettings& settings, TExprContext& ctx) { Y_DEBUG_ABORT_UNLESS(IsDqPureExpr(expr)); return Build(ctx, expr.Pos()) @@ -239,6 +240,9 @@ TDqStage RebuildPureStageWithSink(TExprBase expr, const TKqpTable& table, const .Settings() .Table(table) .Columns(columns) + .InconsistentWrite(settings.AllowInconsistentWrites + ? ctx.NewAtom(expr.Pos(), "true") + : ctx.NewAtom(expr.Pos(), "false")) .Settings() .Build() .Build() @@ -292,7 +296,7 @@ bool BuildUpsertRowsEffect(const TKqlUpsertRows& node, TExprContext& ctx, const } if (IsDqPureExpr(node.Input())) { if (sinkEffect) { - stageInput = RebuildPureStageWithSink(node.Input(), node.Table(), node.Columns(), ctx); + stageInput = RebuildPureStageWithSink(node.Input(), node.Table(), node.Columns(), settings, ctx); effect = Build(ctx, node.Pos()) .Stage(stageInput.Cast().Ptr()) .SinkIndex().Build("0") @@ -330,6 +334,9 @@ bool BuildUpsertRowsEffect(const TKqlUpsertRows& node, TExprContext& ctx, const .Settings() .Table(node.Table()) .Columns(node.Columns()) + .InconsistentWrite(settings.AllowInconsistentWrites + ? ctx.NewAtom(node.Pos(), "true") + : ctx.NewAtom(node.Pos(), "false")) .Settings() .Build() .Build() From 5d816bcc39b29dbb84d28ec99cf8a8bd8367753f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 29 May 2024 16:27:17 +0300 Subject: [PATCH 27/35] fix --- ydb/core/kqp/common/kqp_yql.cpp | 2 +- ydb/core/kqp/common/kqp_yql.h | 2 +- ydb/core/kqp/host/kqp_statement_rewrite.cpp | 3 ++- ydb/core/kqp/opt/kqp_opt_kql.cpp | 19 +++++++++++++++ ydb/core/kqp/provider/yql_kikimr_datasink.cpp | 1 + ydb/core/kqp/provider/yql_kikimr_type_ann.cpp | 24 ++++++++++++------- .../kqp/query_compiler/kqp_query_compiler.cpp | 8 +++++++ ydb/core/kqp/runtime/kqp_write_actor.cpp | 2 +- ydb/core/kqp/ut/query/kqp_query_ut.cpp | 1 + 9 files changed, 50 insertions(+), 12 deletions(-) diff --git a/ydb/core/kqp/common/kqp_yql.cpp b/ydb/core/kqp/common/kqp_yql.cpp index 988f6f900dd9..7b7cd0625c26 100644 --- a/ydb/core/kqp/common/kqp_yql.cpp +++ b/ydb/core/kqp/common/kqp_yql.cpp @@ -283,7 +283,7 @@ TKqpUpsertRowsSettings TKqpUpsertRowsSettings::Parse(const TCoNameValueTupleList YQL_ENSURE(tuple.Ref().ChildrenSize() == 1); settings.IsUpdate = true; } else if (name == TKqpUpsertRowsSettings::AllowInconsistentWritesSettingName) { - YQL_ENSURE(tuple.Ref().ChildrenSize() == 1); + YQL_ENSURE(tuple.Ref().ChildrenSize() == 2); // TODO: 1 settings.AllowInconsistentWrites = true; } else { YQL_ENSURE(false, "Unknown KqpUpsertRows setting name '" << name << "'"); diff --git a/ydb/core/kqp/common/kqp_yql.h b/ydb/core/kqp/common/kqp_yql.h index 6c9278d52861..7fca43a5cd63 100644 --- a/ydb/core/kqp/common/kqp_yql.h +++ b/ydb/core/kqp/common/kqp_yql.h @@ -81,7 +81,7 @@ struct TKqpReadTableSettings { struct TKqpUpsertRowsSettings { static constexpr TStringBuf InplaceSettingName = "Inplace"; static constexpr TStringBuf IsUpdateSettingName = "IsUpdate"; - static constexpr TStringBuf AllowInconsistentWritesSettingName = "AllowInconsistentReads"; + static constexpr TStringBuf AllowInconsistentWritesSettingName = "AllowInconsistentWrites"; bool Inplace = false; bool IsUpdate = false; diff --git a/ydb/core/kqp/host/kqp_statement_rewrite.cpp b/ydb/core/kqp/host/kqp_statement_rewrite.cpp index b79d4f94ea31..caf600fda03c 100644 --- a/ydb/core/kqp/host/kqp_statement_rewrite.cpp +++ b/ydb/core/kqp/host/kqp_statement_rewrite.cpp @@ -190,7 +190,8 @@ namespace { exprCtx.NewAtom(pos, "replace"), }), exprCtx.NewList(pos, { - exprCtx.NewAtom(pos, "AllowInconsistentWrite"), + exprCtx.NewAtom(pos, "AllowInconsistentWrites"), + exprCtx.NewList(pos, {}), }), }), }); diff --git a/ydb/core/kqp/opt/kqp_opt_kql.cpp b/ydb/core/kqp/opt/kqp_opt_kql.cpp index ec833baf9365..6a3d63ab6ebf 100644 --- a/ydb/core/kqp/opt/kqp_opt_kql.cpp +++ b/ydb/core/kqp/opt/kqp_opt_kql.cpp @@ -243,6 +243,7 @@ TExprBase BuildUpsertTable(const TKiWriteTable& write, const TCoAtomList& inputC .Input(input.Ptr()) .Columns(columns.Ptr()) .ReturningColumns(write.ReturningColumns()) + //.Settings(write.Settings()) .Done(); return effect; @@ -263,6 +264,7 @@ TExprBase BuildUpsertTableWithIndex(const TKiWriteTable& write, const TCoAtomLis .Columns(columns.Ptr()) .ReturningColumns(write.ReturningColumns()) .GenerateColumnsIfInsert(generateColumnsIfInsert) + //.Settings(write.Settings()) .Done(); return effect; @@ -272,12 +274,28 @@ TExprBase BuildReplaceTable(const TKiWriteTable& write, const TCoAtomList& input const TCoAtomList& autoincrement, const TKikimrTableDescription& table, TExprContext& ctx) { + TVector settings; + auto setting = GetSetting(write.Settings().Ref(), "AllowInconsistentWrites"); + if (setting) { + Cerr << "TEST :: " << NCommon::ExprToPrettyString(ctx, *setting) << Endl; + settings.push_back(TCoNameValueTuple(setting)); + } + + Cerr << "TEST :: " << NCommon::ExprToPrettyString(ctx, write.Settings().Ref()) << Endl; + //auto settingResult = TCoNameValueTuple(setting); + //TODO: rewrite + auto result = Build(ctx, write.Pos()) + .Add(settings) + .Done() + .Ptr(); + const auto [input, columns] = BuildWriteInput(write, table, inputColumns, autoincrement, write.Pos(), ctx); auto effect = Build(ctx, write.Pos()) .Table(BuildTableMeta(table, write.Pos(), ctx)) .Input(input.Ptr()) .Columns(columns) .ReturningColumns(write.ReturningColumns()) + .Settings(result) .Done(); return effect; @@ -294,6 +312,7 @@ TExprBase BuildReplaceTableWithIndex(const TKiWriteTable& write, const TCoAtomLi .Columns(columns.Ptr()) .ReturningColumns(write.ReturningColumns()) .GenerateColumnsIfInsert().Build() + //.Settings(write.Settings()) .Done(); return effect; diff --git a/ydb/core/kqp/provider/yql_kikimr_datasink.cpp b/ydb/core/kqp/provider/yql_kikimr_datasink.cpp index 25cd72d3b6b1..906709cd578c 100644 --- a/ydb/core/kqp/provider/yql_kikimr_datasink.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_datasink.cpp @@ -998,6 +998,7 @@ class TKikimrDataSink : public TDataProviderBase .Ptr(); } } else { + Cerr << "TEST <<< >>> " << NCommon::ExprToPrettyString(ctx, *settings.Other.Ptr()) << Endl; return Build(ctx, node->Pos()) .World(node->Child(0)) .DataSink(node->Child(1)) diff --git a/ydb/core/kqp/provider/yql_kikimr_type_ann.cpp b/ydb/core/kqp/provider/yql_kikimr_type_ann.cpp index 6dd19301f2ac..702fdecaa475 100644 --- a/ydb/core/kqp/provider/yql_kikimr_type_ann.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_type_ann.cpp @@ -615,26 +615,34 @@ class TKiSinkTypeAnnotationTransformer : public TKiSinkVisitorTransformer generateColumnsIfInsert.push_back(ctx.NewAtom(node.Pos(), generatedColumn)); } - node.Ptr()->ChildRef(TKiWriteTable::idx_Settings) = Build(ctx, node.Pos()) - .Add(node.Settings()) - .Add() + TVector settings; + for (const auto& setting : node.Settings()) { + settings.push_back(setting); + } + settings.push_back( + Build(ctx, node.Pos()) .Name().Build("input_columns") .Value() .Add(columns) .Build() - .Build() - .Add() + .Done()); + settings.push_back( + Build(ctx, node.Pos()) .Name().Build("default_constraint_columns") .Value() .Add(defaultConstraintColumns) .Build() - .Build() - .Add() + .Done()); + settings.push_back( + Build(ctx, node.Pos()) .Name().Build("generate_columns_if_insert") .Value() .Add(generateColumnsIfInsert) .Build() - .Build() + .Done()); + + node.Ptr()->ChildRef(TKiWriteTable::idx_Settings) = Build(ctx, node.Pos()) + .Add(settings) .Done() .Ptr(); diff --git a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp index 9e3db46f023a..3605a9cf3b73 100644 --- a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp +++ b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp @@ -1033,6 +1033,7 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { for (const auto& columnName : tableMeta->KeyColumnNames) { const auto columnMeta = tableMeta->Columns.FindPtr(columnName); YQL_ENSURE(columnMeta != nullptr, "Unknown column in sink: \"" + columnName + "\""); + auto keyColumnProto = settingsProto.AddKeyColumns(); keyColumnProto->SetId(columnMeta->Id); keyColumnProto->SetName(columnName); @@ -1062,6 +1063,13 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { } } + Cerr << "TEST << " << settings.InconsistentWrite().Cast().StringValue() << Endl; + if (const auto inconsistentWrite = settings.InconsistentWrite().Cast(); inconsistentWrite.StringValue() == "true") { + settingsProto.SetInconsistentTx(true); + } + + //settingsProto.SetInconsistentTx(true); + internalSinkProto.MutableSettings()->PackFrom(settingsProto); } else { YQL_ENSURE(false, "Unsupported sink type"); diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 11ef62e34561..3555d5e347d1 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -143,7 +143,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N Settings.GetInconsistentTx()) { YQL_ENSURE(std::holds_alternative(TxId)); - YQL_ENSURE(!InconsistentTx && !ImmediateTx); + //YQL_ENSURE(!InconsistentTx && !ImmediateTx); EgressStats.Level = args.StatsLevel; } diff --git a/ydb/core/kqp/ut/query/kqp_query_ut.cpp b/ydb/core/kqp/ut/query/kqp_query_ut.cpp index 92b4541eee4a..995bd1d50e10 100644 --- a/ydb/core/kqp/ut/query/kqp_query_ut.cpp +++ b/ydb/core/kqp/ut/query/kqp_query_ut.cpp @@ -1787,6 +1787,7 @@ Y_UNIT_TEST_SUITE(KqpQuery) { Y_UNIT_TEST(OltpCreateAsSelect_Simple) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(true); appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); appConfig.MutableTableServiceConfig()->SetEnableCreateTableAs(true); appConfig.MutableTableServiceConfig()->SetEnablePerStatementQueryExecution(true); From 5811fd23c53b288698be041cf3f28c0c5c161cb9 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 29 May 2024 17:39:52 +0300 Subject: [PATCH 28/35] fix --- ydb/core/kqp/opt/kqp_opt_kql.cpp | 27 ++++++------------- ydb/core/kqp/provider/yql_kikimr_datasink.cpp | 1 - .../kqp/query_compiler/kqp_query_compiler.cpp | 3 --- ydb/library/yql/core/yql_opt_utils.cpp | 11 ++++++++ ydb/library/yql/core/yql_opt_utils.h | 1 + 5 files changed, 20 insertions(+), 23 deletions(-) diff --git a/ydb/core/kqp/opt/kqp_opt_kql.cpp b/ydb/core/kqp/opt/kqp_opt_kql.cpp index 6a3d63ab6ebf..8114c0e0177e 100644 --- a/ydb/core/kqp/opt/kqp_opt_kql.cpp +++ b/ydb/core/kqp/opt/kqp_opt_kql.cpp @@ -227,6 +227,7 @@ TExprBase BuildUpsertTable(const TKiWriteTable& write, const TCoAtomList& inputC YQL_ENSURE(generateColumnsIfInsertNode); TCoAtomList generateColumnsIfInsert = TCoNameValueTuple(generateColumnsIfInsertNode).Value().Cast(); + auto settings = FilterSettings(write.Settings().Ref(), {"AllowInconsistentWrites"}, ctx); const auto [input, columns] = BuildWriteInput(write, table, inputColumns, autoincrement, write.Pos(), ctx); if (generateColumnsIfInsert.Ref().ChildrenSize() > 0) { return Build(ctx, write.Pos()) @@ -243,7 +244,7 @@ TExprBase BuildUpsertTable(const TKiWriteTable& write, const TCoAtomList& inputC .Input(input.Ptr()) .Columns(columns.Ptr()) .ReturningColumns(write.ReturningColumns()) - //.Settings(write.Settings()) + .Settings(settings) .Done(); return effect; @@ -253,6 +254,7 @@ TExprBase BuildUpsertTableWithIndex(const TKiWriteTable& write, const TCoAtomLis const TCoAtomList& autoincrement, const TKikimrTableDescription& table, TExprContext& ctx) { + auto settings = FilterSettings(write.Settings().Ref(), {"AllowInconsistentWrites"}, ctx); const auto [input, columns] = BuildWriteInput(write, table, inputColumns, autoincrement, write.Pos(), ctx); auto generateColumnsIfInsertNode = GetSetting(write.Settings().Ref(), "generate_columns_if_insert"); YQL_ENSURE(generateColumnsIfInsertNode); @@ -264,7 +266,7 @@ TExprBase BuildUpsertTableWithIndex(const TKiWriteTable& write, const TCoAtomLis .Columns(columns.Ptr()) .ReturningColumns(write.ReturningColumns()) .GenerateColumnsIfInsert(generateColumnsIfInsert) - //.Settings(write.Settings()) + .Settings(settings) .Done(); return effect; @@ -274,28 +276,14 @@ TExprBase BuildReplaceTable(const TKiWriteTable& write, const TCoAtomList& input const TCoAtomList& autoincrement, const TKikimrTableDescription& table, TExprContext& ctx) { - TVector settings; - auto setting = GetSetting(write.Settings().Ref(), "AllowInconsistentWrites"); - if (setting) { - Cerr << "TEST :: " << NCommon::ExprToPrettyString(ctx, *setting) << Endl; - settings.push_back(TCoNameValueTuple(setting)); - } - - Cerr << "TEST :: " << NCommon::ExprToPrettyString(ctx, write.Settings().Ref()) << Endl; - //auto settingResult = TCoNameValueTuple(setting); - //TODO: rewrite - auto result = Build(ctx, write.Pos()) - .Add(settings) - .Done() - .Ptr(); - + auto settings = FilterSettings(write.Settings().Ref(), {"AllowInconsistentWrites"}, ctx); const auto [input, columns] = BuildWriteInput(write, table, inputColumns, autoincrement, write.Pos(), ctx); auto effect = Build(ctx, write.Pos()) .Table(BuildTableMeta(table, write.Pos(), ctx)) .Input(input.Ptr()) .Columns(columns) .ReturningColumns(write.ReturningColumns()) - .Settings(result) + .Settings(settings) .Done(); return effect; @@ -305,6 +293,7 @@ TExprBase BuildReplaceTableWithIndex(const TKiWriteTable& write, const TCoAtomLi const TCoAtomList& autoincrement, const TKikimrTableDescription& table, TExprContext& ctx) { + auto settings = FilterSettings(write.Settings().Ref(), {"AllowInconsistentWrites"}, ctx); const auto [input, columns] = BuildWriteInput(write, table, inputColumns, autoincrement, write.Pos(), ctx); auto effect = Build(ctx, write.Pos()) .Table(BuildTableMeta(table, write.Pos(), ctx)) @@ -312,7 +301,7 @@ TExprBase BuildReplaceTableWithIndex(const TKiWriteTable& write, const TCoAtomLi .Columns(columns.Ptr()) .ReturningColumns(write.ReturningColumns()) .GenerateColumnsIfInsert().Build() - //.Settings(write.Settings()) + .Settings(settings) .Done(); return effect; diff --git a/ydb/core/kqp/provider/yql_kikimr_datasink.cpp b/ydb/core/kqp/provider/yql_kikimr_datasink.cpp index 906709cd578c..25cd72d3b6b1 100644 --- a/ydb/core/kqp/provider/yql_kikimr_datasink.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_datasink.cpp @@ -998,7 +998,6 @@ class TKikimrDataSink : public TDataProviderBase .Ptr(); } } else { - Cerr << "TEST <<< >>> " << NCommon::ExprToPrettyString(ctx, *settings.Other.Ptr()) << Endl; return Build(ctx, node->Pos()) .World(node->Child(0)) .DataSink(node->Child(1)) diff --git a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp index 3605a9cf3b73..d556e0b9ed0b 100644 --- a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp +++ b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp @@ -1063,13 +1063,10 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { } } - Cerr << "TEST << " << settings.InconsistentWrite().Cast().StringValue() << Endl; if (const auto inconsistentWrite = settings.InconsistentWrite().Cast(); inconsistentWrite.StringValue() == "true") { settingsProto.SetInconsistentTx(true); } - //settingsProto.SetInconsistentTx(true); - internalSinkProto.MutableSettings()->PackFrom(settingsProto); } else { YQL_ENSURE(false, "Unsupported sink type"); diff --git a/ydb/library/yql/core/yql_opt_utils.cpp b/ydb/library/yql/core/yql_opt_utils.cpp index 37ae3d11ec19..576afe9ed0cc 100644 --- a/ydb/library/yql/core/yql_opt_utils.cpp +++ b/ydb/library/yql/core/yql_opt_utils.cpp @@ -555,6 +555,17 @@ TExprNode::TPtr GetSetting(const TExprNode& settings, const TStringBuf& name) { return nullptr; } +TExprNode::TPtr FilterSettings(const TExprNode& settings, const THashSet& names, TExprContext& ctx) { + TExprNode::TListType children; + for (auto setting : settings.Children()) { + if (setting->ChildrenSize() != 0 && names.contains(setting->Head().Content())) { + children.push_back(setting); + } + } + + return ctx.NewList(settings.Pos(), std::move(children)); +} + bool HasSetting(const TExprNode& settings, const TStringBuf& name) { return GetSetting(settings, name) != nullptr; } diff --git a/ydb/library/yql/core/yql_opt_utils.h b/ydb/library/yql/core/yql_opt_utils.h index 4dc04cae82fb..69759bf7e7eb 100644 --- a/ydb/library/yql/core/yql_opt_utils.h +++ b/ydb/library/yql/core/yql_opt_utils.h @@ -51,6 +51,7 @@ const TTypeAnnotationNode* RemoveOptionalType(const TTypeAnnotationNode* type); const TTypeAnnotationNode* RemoveAllOptionals(const TTypeAnnotationNode* type); TExprNode::TPtr GetSetting(const TExprNode& settings, const TStringBuf& name); +TExprNode::TPtr FilterSettings(const TExprNode& settings, const THashSet& names, TExprContext& ctx); bool HasSetting(const TExprNode& settings, const TStringBuf& name); bool HasAnySetting(const TExprNode& settings, const THashSet& names); TExprNode::TPtr RemoveSetting(const TExprNode& settings, const TStringBuf& name, TExprContext& ctx); From f27f80a5fd97e607097e334376d8d7700c9a873b Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 29 May 2024 18:08:04 +0300 Subject: [PATCH 29/35] fix --- ydb/core/kqp/host/kqp_statement_rewrite.cpp | 31 +++++++++++++-------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/ydb/core/kqp/host/kqp_statement_rewrite.cpp b/ydb/core/kqp/host/kqp_statement_rewrite.cpp index caf600fda03c..2351b71a3422 100644 --- a/ydb/core/kqp/host/kqp_statement_rewrite.cpp +++ b/ydb/core/kqp/host/kqp_statement_rewrite.cpp @@ -19,7 +19,7 @@ namespace { NYql::TExprNode::TPtr ReplaceInto = nullptr; }; - bool IsColumnTable(const NYql::NNodes::TMaybeNode& tableSettings) { + bool IsOlap(const NYql::NNodes::TMaybeNode& tableSettings) { if (!tableSettings) { return false; } @@ -85,6 +85,8 @@ namespace { return std::nullopt; } + const bool isOlap = IsOlap(settings.TableSettings); + const auto& insertData = writeArgs.Get(3); if (insertData.Ptr()->Content() == "Void") { return std::nullopt; @@ -136,7 +138,7 @@ namespace { const auto name = item->GetName(); auto currentType = item->GetItemType(); - const bool notNull = primariKeyColumns.contains(name) && IsColumnTable(settings.TableSettings); + const bool notNull = primariKeyColumns.contains(name) && isOlap; if (notNull && currentType->GetKind() == NYql::ETypeAnnotationKind::Optional) { currentType = currentType->Cast()->GetItemType(); @@ -169,6 +171,20 @@ namespace { const auto topLevelRead = NYql::FindTopLevelRead(insertData.Ptr()); + NYql::TExprNode::TListType insertSettings; + insertSettings.push_back( + exprCtx.NewList(pos, { + exprCtx.NewAtom(pos, "mode"), + exprCtx.NewAtom(pos, "replace"), + })); + if (!isOlap) { + insertSettings.push_back( + exprCtx.NewList(pos, { + exprCtx.NewAtom(pos, "AllowInconsistentWrites"), + exprCtx.NewList(pos, {}), + })); + } + const auto insert = exprCtx.NewCallable(pos, "Write!", { topLevelRead == nullptr ? exprCtx.NewWorld(pos) : exprCtx.NewCallable(pos, "Left!", {topLevelRead.Get()}), exprCtx.NewCallable(pos, "DataSink", { @@ -184,16 +200,7 @@ namespace { }), }), insertData.Ptr(), - exprCtx.NewList(pos, { - exprCtx.NewList(pos, { - exprCtx.NewAtom(pos, "mode"), - exprCtx.NewAtom(pos, "replace"), - }), - exprCtx.NewList(pos, { - exprCtx.NewAtom(pos, "AllowInconsistentWrites"), - exprCtx.NewList(pos, {}), - }), - }), + exprCtx.NewList(pos, std::move(insertSettings)), }); TCreateTableAsResult result; From f7350e1be277262fc569b4aa28c91cd6315cd2b2 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 31 May 2024 13:58:06 +0300 Subject: [PATCH 30/35] tests --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 3555d5e347d1..2c87e8661cf1 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -29,11 +29,11 @@ namespace { struct TWriteActorBackoffSettings { TDuration StartRetryDelay = TDuration::MilliSeconds(200); - TDuration MaxRetryDelay = TDuration::Seconds(20); + TDuration MaxRetryDelay = TDuration::Seconds(30); double UnsertaintyRatio = 0.5; double Multiplier = 2.0; - ui64 MaxWriteAttempts = 16; + ui64 MaxWriteAttempts = 32; }; const TWriteActorBackoffSettings* BackoffSettings() { @@ -297,7 +297,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } void Handle(TEvTxProxySchemeCache::TEvResolveKeySetResult::TPtr& ev) { - YQL_ENSURE(!SchemeRequest); + YQL_ENSURE(!SchemeRequest || InconsistentTx); auto* request = ev->Get()->Request.Get(); if (request->ErrorCount > 0) { @@ -356,11 +356,12 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N << SchemeEntry->TableId.PathId.ToString() << "`." << " ShardID=" << ev->Get()->Record.GetOrigin() << "," << " Sink=" << this->SelfId() << "."); - RuntimeError( - TStringBuilder() << "Got INTERNAL ERROR for table `" - << SchemeEntry->TableId.PathId.ToString() << "`.", - NYql::NDqProto::StatusIds::INTERNAL_ERROR, - getIssues()); + //RuntimeError( + // TStringBuilder() << "Got INTERNAL ERROR for table `" + // << SchemeEntry->TableId.PathId.ToString() << "`.", + // NYql::NDqProto::StatusIds::INTERNAL_ERROR, + // getIssues()); + ResolveTable(); return; } case NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED: { From ae44050bdc6aa6333006fad6c3c409530c8a8ac5 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 2 Jun 2024 16:21:50 +0300 Subject: [PATCH 31/35] tests --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 3 ++- ydb/core/kqp/runtime/kqp_write_table.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 2c87e8661cf1..db17fac72124 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -195,7 +195,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N Finished = finished; EgressStats.Resume(); - CA_LOG_D("New data: size=" << size << ", finished=" << finished << "."); + CA_LOG_D("New data: size=" << size << ", finished=" << finished << "." << "freespace=" << ShardedWriteController->GetMemory()); YQL_ENSURE(ShardedWriteController); try { @@ -208,6 +208,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N CurrentExceptionMessage(), NYql::NDqProto::StatusIds::INTERNAL_ERROR); } + CA_LOG_D("ADDED New data: freespace=" << ShardedWriteController->GetMemory()); ProcessBatches(); } diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 3459c61dc8fd..c69f32ec8eab 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -493,7 +493,7 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { Columns[index].PType, row.GetElement(index), TypeEnv, - /* copy */ true); + /* copy */ false); } AddRow(cells, GetKeyRange()); From 68e41fd82aa27bf3178770db9ef6616e6afd0474 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 5 Jun 2024 16:36:16 +0300 Subject: [PATCH 32/35] fix --- ydb/core/kqp/common/kqp_yql.cpp | 2 +- ydb/core/kqp/host/kqp_statement_rewrite.cpp | 3 +-- ydb/core/kqp/opt/kqp_opt_effects.cpp | 2 +- ydb/core/kqp/runtime/kqp_write_actor.cpp | 13 ++++++------- ydb/core/kqp/runtime/kqp_write_table.cpp | 5 +++-- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/ydb/core/kqp/common/kqp_yql.cpp b/ydb/core/kqp/common/kqp_yql.cpp index 7b7cd0625c26..988f6f900dd9 100644 --- a/ydb/core/kqp/common/kqp_yql.cpp +++ b/ydb/core/kqp/common/kqp_yql.cpp @@ -283,7 +283,7 @@ TKqpUpsertRowsSettings TKqpUpsertRowsSettings::Parse(const TCoNameValueTupleList YQL_ENSURE(tuple.Ref().ChildrenSize() == 1); settings.IsUpdate = true; } else if (name == TKqpUpsertRowsSettings::AllowInconsistentWritesSettingName) { - YQL_ENSURE(tuple.Ref().ChildrenSize() == 2); // TODO: 1 + YQL_ENSURE(tuple.Ref().ChildrenSize() == 1); settings.AllowInconsistentWrites = true; } else { YQL_ENSURE(false, "Unknown KqpUpsertRows setting name '" << name << "'"); diff --git a/ydb/core/kqp/host/kqp_statement_rewrite.cpp b/ydb/core/kqp/host/kqp_statement_rewrite.cpp index 2351b71a3422..11202b97a76f 100644 --- a/ydb/core/kqp/host/kqp_statement_rewrite.cpp +++ b/ydb/core/kqp/host/kqp_statement_rewrite.cpp @@ -180,8 +180,7 @@ namespace { if (!isOlap) { insertSettings.push_back( exprCtx.NewList(pos, { - exprCtx.NewAtom(pos, "AllowInconsistentWrites"), - exprCtx.NewList(pos, {}), + exprCtx.NewAtom(pos, "AllowInconsistentWrites"), })); } diff --git a/ydb/core/kqp/opt/kqp_opt_effects.cpp b/ydb/core/kqp/opt/kqp_opt_effects.cpp index 1f5d2467bd7e..5a765c573f5b 100644 --- a/ydb/core/kqp/opt/kqp_opt_effects.cpp +++ b/ydb/core/kqp/opt/kqp_opt_effects.cpp @@ -346,7 +346,7 @@ bool BuildUpsertRowsEffect(const TKqlUpsertRows& node, TExprContext& ctx, const .Name("row") .Done(); - if (table.Metadata->Kind == EKikimrTableKind::Olap) { + if (table.Metadata->Kind == EKikimrTableKind::Olap || settings.AllowInconsistentWrites) { // OLAP is expected to write into all shards (hash partitioning), // so we use serveral sinks for this without union all. // (TODO: shuffle by shard instead of DqCnMap) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 9297c67e9a61..d70a912c4330 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -25,10 +25,10 @@ namespace { constexpr i64 kInFlightMemoryLimitPerActor = 64_MB; constexpr i64 kMemoryLimitPerMessage = 48_MB; - constexpr i64 kMaxBatchesPerMessage = 8; + constexpr i64 kMaxBatchesPerMessage = 1; struct TWriteActorBackoffSettings { - TDuration StartRetryDelay = TDuration::MilliSeconds(200); + TDuration StartRetryDelay = TDuration::MilliSeconds(100); TDuration MaxRetryDelay = TDuration::Seconds(30); double UnsertaintyRatio = 0.5; double Multiplier = 2.0; @@ -92,7 +92,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void CheckMemory() { const auto freeSpace = Writer.GetFreeSpace(); - if (freeSpace > LastFreeMemory) { + if (freeSpace > LastFreeMemory && freeSpace >= Writer.MemoryLimit / 2) { + YQL_ENSURE(freeSpace > 0); Writer.ResumeExecution(); } LastFreeMemory = freeSpace; @@ -143,7 +144,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N Settings.GetInconsistentTx()) { YQL_ENSURE(std::holds_alternative(TxId)); - //YQL_ENSURE(!InconsistentTx && !ImmediateTx); + YQL_ENSURE(!ImmediateTx); EgressStats.Level = args.StatsLevel; } @@ -195,7 +196,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N Finished = finished; EgressStats.Resume(); - CA_LOG_D("New data: size=" << size << ", finished=" << finished << "." << "freespace=" << ShardedWriteController->GetMemory()); + CA_LOG_D("New data: size=" << size << ", finished=" << finished << "."); YQL_ENSURE(ShardedWriteController); try { @@ -208,8 +209,6 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N CurrentExceptionMessage(), NYql::NDqProto::StatusIds::INTERNAL_ERROR); } - CA_LOG_D("ADDED New data: freespace=" << ShardedWriteController->GetMemory()); - ProcessBatches(); } diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index c69f32ec8eab..a4e1513ce4b7 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -19,7 +19,7 @@ namespace NKqp { namespace { constexpr ui64 MaxBatchBytes = 8_MB; -constexpr ui64 MaxUnshardedBatchBytes = 8_MB; +constexpr ui64 MaxUnshardedBatchBytes = 4_MB; TVector BuildColumns(const TConstArrayRef inputColumns) { TVector result; @@ -489,11 +489,12 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { TVector cells(Columns.size()); data.ForEachRow([&](const auto& row) { for (size_t index = 0; index < Columns.size(); ++index) { + // TODO: move to SerializedVector cells[WriteIndex[index]] = MakeCell( Columns[index].PType, row.GetElement(index), TypeEnv, - /* copy */ false); + /* copy */ true); } AddRow(cells, GetKeyRange()); From 74b06b405c81c56e0d8b88764bf91f065438de1c Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 5 Jun 2024 18:13:12 +0300 Subject: [PATCH 33/35] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 88 +++++++++++++++++++----- ydb/core/kqp/runtime/kqp_write_table.cpp | 12 ++++ ydb/core/kqp/runtime/kqp_write_table.h | 2 + 3 files changed, 83 insertions(+), 19 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index d70a912c4330..1c677ddfd1b0 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -34,6 +34,7 @@ namespace { double Multiplier = 2.0; ui64 MaxWriteAttempts = 32; + ui64 MaxResolveAttempts = 5; }; const TWriteActorBackoffSettings* BackoffSettings() { @@ -109,15 +110,21 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N struct TEvPrivate { enum EEv { EvShardRequestTimeout = EventSpaceBegin(TKikimrEvents::ES_PRIVATE), + EvResolveRequestPlanned, }; struct TEvShardRequestTimeout : public TEventLocal { ui64 ShardId; + ui64 SendAttempts; - TEvShardRequestTimeout(ui64 shardId) - : ShardId(shardId) { + TEvShardRequestTimeout(ui64 shardId, ui64 sendAttempts) + : ShardId(shardId) + , SendAttempts(sendAttempts) { } }; + + struct TEvResolveRequestPlanned : public TEventLocal { + }; }; public: @@ -229,7 +236,32 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } } + void RetryResolveTable() { + if (ResolveAttempts == 0) { + ResolveTable(); + } + } + + void PlanResolveTable() { + TlsActivationContext->Schedule( + CalculateNextAttemptDelay(ResolveAttempts), + new IEventHandle(SelfId(), SelfId(), new TEvPrivate::TEvResolveRequestPlanned{}, 0, 0)); + } + void ResolveTable() { + SchemeEntry.reset(); + SchemeRequest.reset(); + + if (ResolveAttempts++ >= BackoffSettings()->MaxResolveAttempts) { + const auto error = TStringBuilder() + << "Too many table resolve attempts for Sink=" << this->SelfId() << "."; + CA_LOG_E(error); + RuntimeError( + error, + NYql::NDqProto::StatusIds::SCHEME_ERROR); + return; + } + CA_LOG_D("Resolve TableId=" << TableId); TAutoPtr request(new NSchemeCache::TSchemeCacheNavigate()); NSchemeCache::TSchemeCacheNavigate::TEntry entry; @@ -238,13 +270,16 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpTable; entry.SyncVersion = false; request->ResultSet.emplace_back(entry); + + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvInvalidateTable(TableId, {})); Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request)); } void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { if (ev->Get()->Request->ErrorCount > 0) { - RuntimeError(TStringBuilder() << "Failed to get table: " - << TableId << "'", NYql::NDqProto::StatusIds::SCHEME_ERROR); + CA_LOG_E(TStringBuilder() << "Failed to get table: " + << TableId << "'"); + PlanResolveTable(); return; } auto& resultSet = ev->Get()->Request->ResultSet; @@ -301,8 +336,9 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N auto* request = ev->Get()->Request.Get(); if (request->ErrorCount > 0) { - RuntimeError(TStringBuilder() << "Failed to get table: " - << TableId << "'", NYql::NDqProto::StatusIds::SCHEME_ERROR); + CA_LOG_E(TStringBuilder() << "Failed to get table: " + << TableId << "'"); + PlanResolveTable(); return; } @@ -336,6 +372,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N YQL_ENSURE(false); } case NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED: { + ResetShardRetries(ev->Get()->Record.GetOrigin(), ev->Cookie); ProcessWriteCompletedShard(ev); return; } @@ -356,12 +393,18 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N << SchemeEntry->TableId.PathId.ToString() << "`." << " ShardID=" << ev->Get()->Record.GetOrigin() << "," << " Sink=" << this->SelfId() << "."); - //RuntimeError( - // TStringBuilder() << "Got INTERNAL ERROR for table `" - // << SchemeEntry->TableId.PathId.ToString() << "`.", - // NYql::NDqProto::StatusIds::INTERNAL_ERROR, - // getIssues()); - ResolveTable(); + + // TODO: Add new status for splits in datashard. This is tmp solution. + if (getIssues().ToOneLineString().Contains("in a pre/offline state assuming this is due to a finished split (wrong shard state)")) { + ResetShardRetries(ev->Get()->Record.GetOrigin(), ev->Cookie); + RetryResolveTable(); + } else { + RuntimeError( + TStringBuilder() << "Got INTERNAL ERROR for table `" + << SchemeEntry->TableId.PathId.ToString() << "`.", + NYql::NDqProto::StatusIds::INTERNAL_ERROR, + getIssues()); + } return; } case NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED: { @@ -370,7 +413,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N << " ShardID=" << ev->Get()->Record.GetOrigin() << "," << " Sink=" << this->SelfId() << "." << " Ignored this error."); - // TODO: more retries + // TODO: support waiting return; } case NKikimrDataEvents::TEvWriteResult::STATUS_CANCELLED: { @@ -403,7 +446,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N << " ShardID=" << ev->Get()->Record.GetOrigin() << "," << " Sink=" << this->SelfId() << "."); if (InconsistentTx) { - ResolveTable(); + ResetShardRetries(ev->Get()->Record.GetOrigin(), ev->Cookie); + RetryResolveTable(); } else { RuntimeError( TStringBuilder() << "Got SCHEME CHANGED for table `" @@ -539,13 +583,13 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N if (SchemeEntry->Kind != NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { TlsActivationContext->Schedule( CalculateNextAttemptDelay(metadata->SendAttempts), - new IEventHandle(SelfId(), SelfId(), new TEvPrivate::TEvShardRequestTimeout(shardId), 0, metadata->Cookie)); + new IEventHandle(SelfId(), SelfId(), new TEvPrivate::TEvShardRequestTimeout(shardId, metadata->SendAttempts), 0, metadata->Cookie)); } } - void RetryShard(const ui64 shardId, const std::optional ifCookieEqual) { + void RetryShard(const ui64 shardId, const std::optional ifCookieEqual, const std::optional sendAttempts) { const auto metadata = ShardedWriteController->GetMessageMetadata(shardId); - if (!metadata || (ifCookieEqual && metadata->Cookie != ifCookieEqual)) { + if (!metadata || (ifCookieEqual && metadata->Cookie != ifCookieEqual) || (sendAttempts && metadata->SendAttempts != sendAttempts)) { return; } @@ -553,14 +597,18 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N SendDataToShard(shardId); } + void ResetShardRetries(const ui64 shardId, const ui64 cookie) { + ShardedWriteController->ResetRetries(shardId, cookie); + } + void Handle(TEvPrivate::TEvShardRequestTimeout::TPtr& ev) { CA_LOG_W("Timeout shardID=" << ev->Get()->ShardId); - RetryShard(ev->Get()->ShardId, ev->Cookie); + RetryShard(ev->Get()->ShardId, ev->Cookie, ev->Get()->SendAttempts); } void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev) { CA_LOG_W("TEvDeliveryProblem was received from tablet: " << ev->Get()->TabletId); - RetryShard(ev->Get()->TabletId, std::nullopt); + RetryShard(ev->Get()->TabletId, std::nullopt, std::nullopt); } void RuntimeError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues = {}) { @@ -582,6 +630,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void Prepare() { YQL_ENSURE(SchemeEntry); + ResolveAttempts = 0; if (!ShardedWriteController) { TVector columnsMetadata; @@ -646,6 +695,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N std::optional SchemeEntry; std::optional SchemeRequest; + ui64 ResolveAttempts = 0; THashMap LocksInfo; bool Finished = false; diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index a4e1513ce4b7..5372bc3f5486 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -715,6 +715,10 @@ class TShardsInfo { ++SendAttempts; } + void ResetSendAttempts() { + SendAttempts = 0; + } + private: std::deque Batches; bool Closed = false; @@ -897,6 +901,14 @@ class TShardedWriteController : public IShardedWriteController { shardInfo.IncSendAttempts(); } + void ResetRetries(ui64 shardId, ui64 cookie) override { + auto& shardInfo = ShardsInfo.GetShard(shardId); + if (shardInfo.IsEmpty() || shardInfo.GetCookie() != cookie) { + return; + } + shardInfo.ResetSendAttempts(); + } + i64 GetMemory() const override { YQL_ENSURE(Serializer); return Serializer->GetMemory() + ShardsInfo.GetMemory(); diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index d09df441e9c7..7846cb954cc6 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -90,6 +90,8 @@ class IShardedWriteController : public TThrRefBase { virtual std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) = 0; virtual void OnMessageSent(ui64 shardId, ui64 cookie) = 0; + virtual void ResetRetries(ui64 shardId, ui64 cookie) = 0; + virtual i64 GetMemory() const = 0; virtual bool IsClosed() const = 0; From af17d86d428c01be920b7cb23b1ffb9238fb9267 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 10 Jun 2024 13:50:17 +0300 Subject: [PATCH 34/35] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 72 +++++++++++++++--------- ydb/core/kqp/runtime/kqp_write_table.cpp | 66 ++++++++++++++-------- 2 files changed, 89 insertions(+), 49 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 1c677ddfd1b0..65b2141608e9 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -28,8 +28,8 @@ namespace { constexpr i64 kMaxBatchesPerMessage = 1; struct TWriteActorBackoffSettings { - TDuration StartRetryDelay = TDuration::MilliSeconds(100); - TDuration MaxRetryDelay = TDuration::Seconds(30); + TDuration StartRetryDelay = TDuration::MilliSeconds(250); + TDuration MaxRetryDelay = TDuration::Seconds(5); double UnsertaintyRatio = 0.5; double Multiplier = 2.0; @@ -93,7 +93,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void CheckMemory() { const auto freeSpace = Writer.GetFreeSpace(); - if (freeSpace > LastFreeMemory && freeSpace >= Writer.MemoryLimit / 2) { + const auto targetMemory = Writer.MemoryLimit / 2; + if (freeSpace >= targetMemory && targetMemory > LastFreeMemory) { YQL_ENSURE(freeSpace > 0); Writer.ResumeExecution(); } @@ -115,11 +116,9 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N struct TEvShardRequestTimeout : public TEventLocal { ui64 ShardId; - ui64 SendAttempts; - TEvShardRequestTimeout(ui64 shardId, ui64 sendAttempts) - : ShardId(shardId) - , SendAttempts(sendAttempts) { + TEvShardRequestTimeout(ui64 shardId) + : ShardId(shardId) { } }; @@ -179,7 +178,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } i64 GetFreeSpace() const final { - const i64 result = ShardedWriteController + const i64 result = (ShardedWriteController && !IsResolving()) ? MemoryLimit - ShardedWriteController->GetMemory() : std::numeric_limits::min(); // Can't use zero here because compute can use overcommit! return result; @@ -203,7 +202,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N Finished = finished; EgressStats.Resume(); - CA_LOG_D("New data: size=" << size << ", finished=" << finished << "."); + CA_LOG_D("New data: size=" << size << ", finished=" << finished << ", used memory=" << ShardedWriteController->GetMemory() << "."); YQL_ENSURE(ShardedWriteController); try { @@ -236,8 +235,12 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } } + bool IsResolving() const { + return ResolveAttempts > 0; + } + void RetryResolveTable() { - if (ResolveAttempts == 0) { + if (!IsResolving()) { ResolveTable(); } } @@ -270,7 +273,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpTable; entry.SyncVersion = false; request->ResultSet.emplace_back(entry); - + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvInvalidateTable(TableId, {})); Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request)); } @@ -306,6 +309,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void ResolveShards() { YQL_ENSURE(!SchemeRequest || InconsistentTx); YQL_ENSURE(SchemeEntry); + CA_LOG_D("Resolve shards for TableId=" << TableId); TVector columns; TVector keyColumnTypes; @@ -345,6 +349,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N YQL_ENSURE(request->ResultSet.size() == 1); SchemeRequest = std::move(request->ResultSet[0]); + CA_LOG_D("Resolved shards for TableId=" << TableId << ". PartitionsCount=" << SchemeRequest->KeyDescription->GetPartitions().size() << "."); + Prepare(); } @@ -360,7 +366,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N CA_LOG_E("Got UNSPECIFIED for table `" << SchemeEntry->TableId.PathId.ToString() << "`." << " ShardID=" << ev->Get()->Record.GetOrigin() << "," - << " Sink=" << this->SelfId() << "."); + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); RuntimeError( TStringBuilder() << "Got UNSPECIFIED for table `" << SchemeEntry->TableId.PathId.ToString() << "`.", @@ -372,7 +379,6 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N YQL_ENSURE(false); } case NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED: { - ResetShardRetries(ev->Get()->Record.GetOrigin(), ev->Cookie); ProcessWriteCompletedShard(ev); return; } @@ -380,7 +386,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N CA_LOG_E("Got ABORTED for table `" << SchemeEntry->TableId.PathId.ToString() << "`." << " ShardID=" << ev->Get()->Record.GetOrigin() << "," - << " Sink=" << this->SelfId() << "."); + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); RuntimeError( TStringBuilder() << "Got ABORTED for table `" << SchemeEntry->TableId.PathId.ToString() << "`.", @@ -392,7 +399,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N CA_LOG_E("Got INTERNAL ERROR for table `" << SchemeEntry->TableId.PathId.ToString() << "`." << " ShardID=" << ev->Get()->Record.GetOrigin() << "," - << " Sink=" << this->SelfId() << "."); + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); // TODO: Add new status for splits in datashard. This is tmp solution. if (getIssues().ToOneLineString().Contains("in a pre/offline state assuming this is due to a finished split (wrong shard state)")) { @@ -412,7 +420,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N << SchemeEntry->TableId.PathId.ToString() << "`." << " ShardID=" << ev->Get()->Record.GetOrigin() << "," << " Sink=" << this->SelfId() << "." - << " Ignored this error."); + << " Ignored this error." + << getIssues().ToOneLineString()); // TODO: support waiting return; } @@ -420,7 +429,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N CA_LOG_E("Got CANCELLED for table `" << SchemeEntry->TableId.PathId.ToString() << "`." << " ShardID=" << ev->Get()->Record.GetOrigin() << "," - << " Sink=" << this->SelfId() << "."); + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); RuntimeError( TStringBuilder() << "Got CANCELLED for table `" << SchemeEntry->TableId.PathId.ToString() << "`.", @@ -432,7 +442,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N CA_LOG_E("Got BAD REQUEST for table `" << SchemeEntry->TableId.PathId.ToString() << "`." << " ShardID=" << ev->Get()->Record.GetOrigin() << "," - << " Sink=" << this->SelfId() << "."); + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); RuntimeError( TStringBuilder() << "Got BAD REQUEST for table `" << SchemeEntry->TableId.PathId.ToString() << "`.", @@ -444,7 +455,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N CA_LOG_E("Got SCHEME CHANGED for table `" << SchemeEntry->TableId.PathId.ToString() << "`." << " ShardID=" << ev->Get()->Record.GetOrigin() << "," - << " Sink=" << this->SelfId() << "."); + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); if (InconsistentTx) { ResetShardRetries(ev->Get()->Record.GetOrigin(), ev->Cookie); RetryResolveTable(); @@ -461,7 +473,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N CA_LOG_E("Got LOCKS BROKEN for table `" << SchemeEntry->TableId.PathId.ToString() << "`." << " ShardID=" << ev->Get()->Record.GetOrigin() << "," - << " Sink=" << this->SelfId() << "."); + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); RuntimeError( TStringBuilder() << "Got LOCKS BROKEN for table `" << SchemeEntry->TableId.PathId.ToString() << "`.", @@ -583,17 +596,23 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N if (SchemeEntry->Kind != NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { TlsActivationContext->Schedule( CalculateNextAttemptDelay(metadata->SendAttempts), - new IEventHandle(SelfId(), SelfId(), new TEvPrivate::TEvShardRequestTimeout(shardId, metadata->SendAttempts), 0, metadata->Cookie)); + new IEventHandle( + SelfId(), + SelfId(), + new TEvPrivate::TEvShardRequestTimeout(shardId), + 0, + metadata->Cookie)); } } - void RetryShard(const ui64 shardId, const std::optional ifCookieEqual, const std::optional sendAttempts) { + void RetryShard(const ui64 shardId, const std::optional ifCookieEqual) { const auto metadata = ShardedWriteController->GetMessageMetadata(shardId); - if (!metadata || (ifCookieEqual && metadata->Cookie != ifCookieEqual) || (sendAttempts && metadata->SendAttempts != sendAttempts)) { + if (!metadata || (ifCookieEqual && metadata->Cookie != ifCookieEqual)) { + CA_LOG_D("Retry failed: not found ShardID=" << shardId << " with Cookie=" << ifCookieEqual.value_or(0)); return; } - CA_LOG_T("Retry ShardID=" << shardId << " with Cookie=" << ifCookieEqual.value_or(0)); + CA_LOG_D("Retry ShardID=" << shardId << " with Cookie=" << ifCookieEqual.value_or(0)); SendDataToShard(shardId); } @@ -603,12 +622,12 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void Handle(TEvPrivate::TEvShardRequestTimeout::TPtr& ev) { CA_LOG_W("Timeout shardID=" << ev->Get()->ShardId); - RetryShard(ev->Get()->ShardId, ev->Cookie, ev->Get()->SendAttempts); + RetryShard(ev->Get()->ShardId, ev->Cookie); } void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev) { CA_LOG_W("TEvDeliveryProblem was received from tablet: " << ev->Get()->TabletId); - RetryShard(ev->Get()->TabletId, std::nullopt, std::nullopt); + RetryShard(ev->Get()->TabletId, std::nullopt); } void RuntimeError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues = {}) { @@ -669,6 +688,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N CurrentExceptionMessage(), NYql::NDqProto::StatusIds::INTERNAL_ERROR); } + ProcessBatches(); } void ResumeExecution() { diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 5372bc3f5486..8087a19f1074 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -507,6 +507,7 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { YQL_ENSURE(datashardBatch); auto data = datashardBatch->Extract(); const auto rows = data.size() / Columns.size(); + YQL_ENSURE(data.size() == rows * Columns.size()); for (size_t rowIndex = 0; rowIndex < rows; ++rowIndex) { AddRow( @@ -634,8 +635,11 @@ class TShardsInfo { public: class TShardInfo { friend class TShardsInfo; - TShardInfo(i64& memory) - : Memory(memory) { + TShardInfo(i64& memory, ui64& nextCookie, bool& closed) + : Memory(memory) + , NextCookie(nextCookie) + , Cookie(NextCookie++) + , Closed(closed) { } public: @@ -655,10 +659,6 @@ class TShardsInfo { return IsClosed() && IsEmpty(); } - void Close() { - Closed = true; - } - void MakeNextBatches(i64 maxDataSize, ui64 maxCount) { YQL_ENSURE(BatchesInFlight == 0); i64 dataSize = 0; @@ -683,7 +683,7 @@ class TShardsInfo { Batches.pop_front(); } - ++Cookie; + Cookie = NextCookie++; SendAttempts = 0; BatchesInFlight = 0; @@ -721,10 +721,13 @@ class TShardsInfo { private: std::deque Batches; - bool Closed = false; i64& Memory; - ui64 Cookie = 1; + ui64& NextCookie; + ui64 Cookie; + + bool& Closed; + ui32 SendAttempts = 0; size_t BatchesInFlight = 0; }; @@ -735,7 +738,7 @@ class TShardsInfo { return it->second; } - auto [insertIt, _] = ShardsInfo.emplace(shard, TShardInfo(Memory)); + auto [insertIt, _] = ShardsInfo.emplace(shard, TShardInfo(Memory, NextCookie, Closed)); return insertIt->second; } @@ -782,40 +785,61 @@ class TShardsInfo { void Clear() { ShardsInfo = {}; Memory = 0; + Closed = false; + } + + void Close() { + Closed = true; } private: THashMap ShardsInfo; i64 Memory = 0; + ui64 NextCookie = 1; + bool Closed = false; }; class TShardedWriteController : public IShardedWriteController { public: void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) override { - if (Serializer) { - FlushSerializer(true); - } + BeforePartitioningChanged(); Serializer = CreateColumnShardPayloadSerializer( schemeEntry, InputColumnsMetadata, TypeEnv); - ReshardData(); - ShardsInfo.Clear(); + AfterPartitioningChanged(); } void OnPartitioningChanged( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry) override { - if (Serializer) { - FlushSerializer(true); - } + BeforePartitioningChanged(); Serializer = CreateDataShardPayloadSerializer( schemeEntry, std::move(partitionsEntry), InputColumnsMetadata, TypeEnv); + AfterPartitioningChanged(); + } + + void BeforePartitioningChanged() { + if (Serializer) { + if (!Closed) { + Serializer->Close(); + } + FlushSerializer(true); + } + } + + void AfterPartitioningChanged() { + ShardsInfo.Close(); ReshardData(); ShardsInfo.Clear(); + if (Closed) { + Close(); + } else { + FlushSerializer(GetMemory() >= Settings.MemoryLimitTotal); + } } void AddData(NMiniKQL::TUnboxedValueBatch&& data) override { @@ -834,9 +858,7 @@ class TShardedWriteController : public IShardedWriteController { Serializer->Close(); FlushSerializer(true); YQL_ENSURE(Serializer->IsFinished()); - for (auto& [shardId, shardInfo] : ShardsInfo.GetShards()) { - shardInfo.Close(); - } + ShardsInfo.Close(); } TVector GetPendingShards() const override { @@ -973,8 +995,6 @@ class TShardedWriteController : public IShardedWriteController { } } - TString LogPrefix = "ShardedWriteController"; - TShardedWriteControllerSettings Settings; TVector InputColumnsMetadata; const NMiniKQL::TTypeEnvironment& TypeEnv; From d5c83d58e598810c155e880e316025edc39eb5de Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 10 Jun 2024 16:33:47 +0300 Subject: [PATCH 35/35] fix --- ydb/core/kqp/common/kqp_tx.cpp | 11 ----------- .../kqp/query_compiler/kqp_query_compiler.cpp | 13 +++++++++---- ydb/core/kqp/session_actor/kqp_query_state.h | 18 ++++++++++++------ 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/ydb/core/kqp/common/kqp_tx.cpp b/ydb/core/kqp/common/kqp_tx.cpp index d84b7798d7d6..71c6a380df2b 100644 --- a/ydb/core/kqp/common/kqp_tx.cpp +++ b/ydb/core/kqp/common/kqp_tx.cpp @@ -140,8 +140,6 @@ bool NeedSnapshot(const TKqpTransactionContext& txCtx, const NYql::TKikimrConfig size_t readPhases = 0; bool hasEffects = false; - bool hasSourceRead = false; - bool hasSinkWrite = false; for (const auto &tx : physicalQuery.GetTransactions()) { switch (tx.GetType()) { @@ -157,11 +155,6 @@ bool NeedSnapshot(const TKqpTransactionContext& txCtx, const NYql::TKikimrConfig if (tx.GetHasEffects()) { hasEffects = true; } - - for (const auto &stage : tx.GetStages()) { - hasSourceRead |= !stage.GetSources().empty(); - hasSinkWrite |= !stage.GetSinks().empty(); - } } if (txCtx.HasUncommittedChangesRead || AppData()->FeatureFlags.GetEnableForceImmediateEffectsExecution()) { @@ -169,10 +162,6 @@ bool NeedSnapshot(const TKqpTransactionContext& txCtx, const NYql::TKikimrConfig return true; } - if (hasSourceRead && hasSinkWrite) { - return true; - } - // We don't want snapshot when there are effects at the moment, // because it hurts performance when there are multiple single-shard // reads and a single distributed commit. Taking snapshot costs diff --git a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp index d556e0b9ed0b..a33ea3c888ab 100644 --- a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp +++ b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp @@ -752,7 +752,7 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { stageProto.SetOutputsCount(outputsCount); // Dq sinks - bool hasTableSink = false; + bool hasTxTableSink = false; if (auto maybeOutputsNode = stage.Outputs()) { auto outputsNode = maybeOutputsNode.Cast(); for (size_t i = 0; i < outputsNode.Size(); ++i) { @@ -764,12 +764,17 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { FillSink(sinkNode, sinkProto, tablesMap, ctx); sinkProto->SetOutputIndex(FromString(TStringBuf(sinkNode.Index()))); - // Only sinks to ydb tables can be considered as effects. - hasTableSink |= IsTableSink(sinkNode.DataSink().Cast().Category()); + if (IsTableSink(sinkNode.DataSink().Cast().Category())) { + // Only sinks with transactions to ydb tables can be considered as effects. + // Inconsistent internal sinks and external sinks (like S3) aren't effects. + auto settings = sinkNode.Settings().Maybe(); + YQL_ENSURE(settings); + hasTxTableSink |= settings.InconsistentWrite().Cast().StringValue() != "true"; + } } } - stageProto.SetIsEffectsStage(hasEffects || hasTableSink); + stageProto.SetIsEffectsStage(hasEffects || hasTxTableSink); auto paramsType = CollectParameters(stage, ctx); auto programBytecode = NDq::BuildProgram(stage.Program(), *paramsType, *KqlCompiler, TypeEnv, FuncRegistry, diff --git a/ydb/core/kqp/session_actor/kqp_query_state.h b/ydb/core/kqp/session_actor/kqp_query_state.h index 7cf13a2a14ee..cc43b58f04e1 100644 --- a/ydb/core/kqp/session_actor/kqp_query_state.h +++ b/ydb/core/kqp/session_actor/kqp_query_state.h @@ -329,8 +329,8 @@ class TKqpQueryState : public TNonCopyable { return true; } - if (HasSinkInTx(tx)) { - // At current time sinks require separate tnx with commit. + if (HasTxSinkInTx(tx)) { + // At current time transactional internal sinks require separate tnx with commit. return false; } @@ -392,7 +392,7 @@ class TKqpQueryState : public TNonCopyable { if (TxCtx->CanDeferEffects()) { // At current time sinks require separate tnx with commit. - while (tx && tx->GetHasEffects() && !HasSinkInTx(tx)) { + while (tx && tx->GetHasEffects() && !HasTxSinkInTx(tx)) { QueryData->CreateKqpValueMap(tx); bool success = TxCtx->AddDeferredEffect(tx, QueryData); YQL_ENSURE(success); @@ -409,10 +409,16 @@ class TKqpQueryState : public TNonCopyable { return tx; } - bool HasSinkInTx(const TKqpPhyTxHolder::TConstPtr& tx) const { + bool HasTxSinkInTx(const TKqpPhyTxHolder::TConstPtr& tx) const { for (const auto& stage : tx->GetStages()) { - if (!stage.GetSinks().empty()) { - return true; + for (const auto& sink : stage.GetSinks()) { + if (sink.GetTypeCase() == NKqpProto::TKqpSink::kInternalSink && sink.GetInternalSink().GetSettings().Is()) { + NKikimrKqp::TKqpTableSinkSettings settings; + YQL_ENSURE(sink.GetInternalSink().GetSettings().UnpackTo(&settings), "Failed to unpack settings"); + if (!settings.GetInconsistentTx()) { + return true; + } + } } } return false;