From b4af00e96a129b49e39d39e845220346aa172199 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 22 Apr 2024 14:58:26 +0000 Subject: [PATCH 01/21] WIP --- .../libs/checkpoint_storage/state_storage.h | 9 ++-- .../fq/libs/checkpoint_storage/ut/gc_ut.cpp | 4 +- .../fq/libs/checkpoint_storage/ut/ya.make | 2 +- .../ut/ydb_state_storage_ut.cpp | 22 ++++----- .../checkpoint_storage/ydb_state_storage.cpp | 38 +++++++++------- ydb/core/kqp/runtime/kqp_read_actor.cpp | 4 +- ydb/core/kqp/runtime/kqp_sequencer_actor.cpp | 4 +- .../kqp/runtime/kqp_stream_lookup_actor.cpp | 4 +- ydb/core/kqp/runtime/kqp_write_actor.cpp | 2 +- .../actors/compute/dq_async_compute_actor.cpp | 6 +-- .../actors/compute/dq_async_compute_actor.h | 3 +- .../dq/actors/compute/dq_checkpoints_states.h | 43 ++++++++++++++++++ .../yql/dq/actors/compute/dq_compute_actor.h | 45 +++++++++++++++++-- .../compute/dq_compute_actor_async_io.h | 16 ++++--- .../compute/dq_compute_actor_checkpoints.cpp | 44 +++++++++--------- .../compute/dq_compute_actor_checkpoints.h | 10 ++--- .../dq/actors/compute/dq_compute_actor_impl.h | 36 +++++++-------- .../compute/dq_sync_compute_actor_base.h | 17 +++---- ...dq_compute_actor_async_input_helper_ut.cpp | 4 +- .../dq_input_transform_lookup.cpp | 4 +- .../yql/dq/actors/task_runner/events.h | 8 ++-- .../task_runner/task_runner_actor_local.cpp | 12 ++--- ydb/library/yql/dq/proto/dq_checkpoint.proto | 2 +- .../clickhouse/actors/yql_ch_read_actor.cpp | 4 +- .../common/ut_helpers/dq_fake_ca.cpp | 6 +-- .../providers/common/ut_helpers/dq_fake_ca.h | 15 ++++--- .../yql/providers/dq/actors/worker_actor.cpp | 2 +- .../generic/actors/yql_generic_read_actor.cpp | 4 +- .../pq/async_io/dq_pq_read_actor.cpp | 7 +-- .../pq/async_io/dq_pq_write_actor.cpp | 9 ++-- .../providers/s3/actors/yql_s3_read_actor.cpp | 8 ++-- .../s3/actors/yql_s3_write_actor.cpp | 2 +- .../async_io/dq_solomon_write_actor.cpp | 7 +-- .../ydb/actors/yql_ydb_read_actor.cpp | 4 +- .../fq/pq_async_io/dq_pq_read_actor_ut.cpp | 14 +++--- .../fq/pq_async_io/dq_pq_write_actor_ut.cpp | 4 +- ydb/tests/fq/pq_async_io/ut_helpers.h | 6 +-- 37 files changed, 269 insertions(+), 162 deletions(-) create mode 100644 ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h diff --git a/ydb/core/fq/libs/checkpoint_storage/state_storage.h b/ydb/core/fq/libs/checkpoint_storage/state_storage.h index 35b639e10470..daa7ecdc20f5 100644 --- a/ydb/core/fq/libs/checkpoint_storage/state_storage.h +++ b/ydb/core/fq/libs/checkpoint_storage/state_storage.h @@ -2,7 +2,10 @@ #include -#include +// #include +#include + +#include #include #include @@ -15,7 +18,7 @@ namespace NFq { class IStateStorage : public virtual TThrRefBase { public: - using TGetStateResult = std::pair, NYql::TIssues>; + using TGetStateResult = std::pair, NYql::TIssues>; using TCountStatesResult = std::pair; virtual NThreading::TFuture Init() = 0; @@ -24,7 +27,7 @@ class IStateStorage : public virtual TThrRefBase { ui64 taskId, const TString& graphId, const TCheckpointId& checkpointId, - const NYql::NDqProto::TComputeActorState& state) = 0; + const NYql::NDq::TComputeActorState& state) = 0; virtual NThreading::TFuture GetState( const std::vector& taskIds, diff --git a/ydb/core/fq/libs/checkpoint_storage/ut/gc_ut.cpp b/ydb/core/fq/libs/checkpoint_storage/ut/gc_ut.cpp index 56054de5c375..0ff053406c88 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ut/gc_ut.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ut/gc_ut.cpp @@ -32,7 +32,7 @@ namespace { //////////////////////////////////////////////////////////////////////////////// -NYql::NDqProto::TComputeActorState MakeStateFromBlob(size_t blobSize, bool isIncrement = false) { +TComputeActorState MakeStateFromBlob(size_t blobSize, bool isIncrement = false) { TString blob; blob.reserve(blobSize); for (size_t i = 0; i < blobSize; ++i) { @@ -51,7 +51,7 @@ NYql::NDqProto::TComputeActorState MakeStateFromBlob(size_t blobSize, bool isInc const TStringBuf savedBuf = value.AsStringRef(); TString result; NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, savedBuf); - NYql::NDqProto::TComputeActorState state; + TComputeActorState state; state.MutableMiniKqlProgram()->MutableData()->MutableStateData()->SetBlob(result); return state; } diff --git a/ydb/core/fq/libs/checkpoint_storage/ut/ya.make b/ydb/core/fq/libs/checkpoint_storage/ut/ya.make index b3e6265e4f46..c17ecfaf6cd0 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ut/ya.make +++ b/ydb/core/fq/libs/checkpoint_storage/ut/ya.make @@ -24,5 +24,5 @@ SRCS( ydb_state_storage_ut.cpp ydb_checkpoint_storage_ut.cpp ) - +REQUIREMENTS(ram:32) END() diff --git a/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp b/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp index d07afda2b5c8..e93a6c0146b5 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp @@ -25,7 +25,7 @@ const TCheckpointId CheckpointId2(12, 1); const TCheckpointId CheckpointId3(12, 4); const TCheckpointId CheckpointId4(13, 2); -const size_t YdbRowSizeLimit = 500; +const size_t YdbRowSizeLimit = 16*1000*1000; //////////////////////////////////////////////////////////////////////////////// @@ -52,15 +52,15 @@ class TFixture : public NUnitTest::TBaseFixture { return storage; } - NYql::NDqProto::TComputeActorState MakeState(NYql::NUdf::TUnboxedValuePod&& value) { + TComputeActorState MakeState(NYql::NUdf::TUnboxedValuePod&& value) { TString result; NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, value.AsStringRef()); - NYql::NDqProto::TComputeActorState state; + TComputeActorState state; state.MutableMiniKqlProgram()->MutableData()->MutableStateData()->SetBlob(result); return state; } - NYql::NDqProto::TComputeActorState MakeStateFromBlob(size_t blobSize) { + TEvDqCompute::TEvComputeActorState MakeStateFromBlob(size_t blobSize) { TString blob; blob.reserve(blobSize); for (size_t i = 0; i < blobSize; ++i) { @@ -69,7 +69,7 @@ class TFixture : public NUnitTest::TBaseFixture { return MakeState(NKikimr::NMiniKQL::TOutputSerializer::MakeSimpleBlobState(blob, 0)); } - NYql::NDqProto::TComputeActorState MakeIncrementState(size_t miniKqlPStateSize) { + TEvDqCompute::TEvComputeActorState MakeIncrementState(size_t miniKqlPStateSize) { std::map map; size_t itemCount = 4; for (size_t i = 0; i < itemCount; ++i) { @@ -78,7 +78,7 @@ class TFixture : public NUnitTest::TBaseFixture { return MakeState(NKikimr::NMiniKQL::TOutputSerializer::MakeSnapshotState(map, 0)); } - NYql::NDqProto::TComputeActorState MakeIncrementState( + TEvDqCompute::TEvComputeActorState MakeIncrementState( const std::map& snapshot, const std::map& increment, const std::set& deleted) @@ -94,13 +94,13 @@ class TFixture : public NUnitTest::TBaseFixture { ui64 taskId, const TString& graphId, const TCheckpointId& checkpointId, - const NYql::NDqProto::TComputeActorState& state) + const TEvDqCompute::TEvComputeActorState& state) { auto issues = storage->SaveState(taskId, graphId, checkpointId, state).GetValueSync(); UNIT_ASSERT_C(issues.Empty(), issues.ToString()); } - NYql::NDqProto::TComputeActorState GetState( + TEvDqCompute::TEvComputeActorState GetState( TStateStoragePtr storage, const ui64 taskId, const TString& graphId, @@ -112,7 +112,7 @@ class TFixture : public NUnitTest::TBaseFixture { return states[0]; } - void ShouldSaveGetStateImpl(const char* tablePrefix, const NYql::NDqProto::TComputeActorState& state) { + void ShouldSaveGetStateImpl(const char* tablePrefix, const TEvDqCompute::TEvComputeActorState& state) { auto storage = GetStateStorage(tablePrefix); auto issues = storage->SaveState(1, "graph1", CheckpointId1, state).GetValueSync(); UNIT_ASSERT_C(issues.Empty(), issues.ToString()); @@ -147,14 +147,14 @@ Y_UNIT_TEST_SUITE(TStateStorageTest) { auto state2 = NKikimr::NMiniKQL::TOutputSerializer::MakeSimpleBlobState(TString(20, 'b'), 0); NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, state1.AsStringRef()); NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, state2.AsStringRef()); - NYql::NDqProto::TComputeActorState state; + TEvDqCompute::TEvComputeActorState state; state.MutableMiniKqlProgram()->MutableData()->MutableStateData()->SetBlob(result); ShouldSaveGetStateImpl("TStateStorageTestShouldSaveGetState", state); } Y_UNIT_TEST_F(ShouldSaveGetOldBigState, TFixture) { - ShouldSaveGetStateImpl("TStateStorageTestShouldSaveGetState", MakeStateFromBlob(YdbRowSizeLimit * 4)); + ShouldSaveGetStateImpl("TStateStorageTestShouldSaveGetState", MakeStateFromBlob(YdbRowSizeLimit * 150)); } Y_UNIT_TEST_F(ShouldSaveGetIncrementSmallState, TFixture) diff --git a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp index 1c8b84569e4a..e4aea15bfc61 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp @@ -39,7 +39,7 @@ enum class EStateType { class TIncrementLogic { public: - void Apply(NYql::NDqProto::TComputeActorState& update) { + void Apply(TComputeActorState& update) { Sources = update.GetSources(); // Always snapshot - copy it; Sinks = update.GetSinks(); ui64 nodeNum = 0; @@ -90,9 +90,9 @@ class TIncrementLogic { } } - NYql::NDqProto::TComputeActorState Build() { + TComputeActorState Build() { NKikimr::NMiniKQL::TScopedAlloc alloc(__LOCATION__); - NYql::NDqProto::TComputeActorState state; + TComputeActorState state; *state.MutableSources() = Sources; *state.MutableSinks() = Sinks; TString result; @@ -115,7 +115,7 @@ class TIncrementLogic { return state; } - static EStateType GetStateType(const NYql::NDqProto::TComputeActorState& state) { + static EStateType GetStateType(const TComputeActorState& state) { if (!state.HasMiniKqlProgram()) { return EStateType::Snapshot; } @@ -141,8 +141,8 @@ class TIncrementLogic { TString SimpleBlobNodeState; }; std::map NodeStates; - google::protobuf::RepeatedPtrField< ::NYql::NDqProto::TSourceState> Sources; - google::protobuf::RepeatedPtrField Sinks; + google::protobuf::RepeatedPtrField< ::NYql::NDq::TSourceState> Sources; + google::protobuf::RepeatedPtrField Sinks; TMaybe LastVersion; }; @@ -163,7 +163,7 @@ struct TContext : public TThrRefBase { std::list Rows; EStateType Type = EStateType::Snapshot; std::list ListOfStatesForReading; // ordered by desc - std::list States; + std::list States; }; const NActors::TActorSystem* ActorSystem; @@ -299,7 +299,7 @@ class TStateStorage : public IStateStorage { ui64 taskId, const TString& graphId, const TCheckpointId& checkpointId, - const NYql::NDqProto::TComputeActorState& state) override; + const TComputeActorState& state) override; TFuture GetState( const std::vector& taskIds, @@ -333,7 +333,7 @@ class TStateStorage : public IStateStorage { const TContextPtr& context); std::list SerializeState( - const NYql::NDqProto::TComputeActorState& state); + const TComputeActorState& state); EStateType DeserializeState( TContext::TaskInfo& taskInfo); @@ -344,7 +344,7 @@ class TStateStorage : public IStateStorage { TFuture ReadRows( const TContextPtr& context); - std::vector ApplyIncrements( + std::vector ApplyIncrements( const TContextPtr& context, NYql::TIssues& issues); @@ -418,13 +418,13 @@ EStateType TStateStorage::DeserializeState(TContext::TaskInfo& taskInfo) { it = taskInfo.Rows.erase(it); } taskInfo.States.push_front({}); - NYql::NDqProto::TComputeActorState& state = taskInfo.States.front(); + TComputeActorState& state = taskInfo.States.front(); auto res = state.ParseFromString(blob); Y_ENSURE(res, "Parsing error"); return TIncrementLogic::GetStateType(state); } -std::list TStateStorage::SerializeState(const NYql::NDqProto::TComputeActorState& state) { +std::list TStateStorage::SerializeState(const TComputeActorState& state) { std::list result; TString serializedState; if (!state.SerializeToString(&serializedState)) { @@ -447,8 +447,11 @@ TFuture TStateStorage::SaveState( ui64 taskId, const TString& graphId, const TCheckpointId& checkpointId, - const NYql::NDqProto::TComputeActorState& state) { + const TComputeActorState& state) { + const size_t stateSize = state.ByteSizeLong(); + std::cerr << "SaveState, size: " << stateSize << std::endl; + std::list serializedState; EStateType type = EStateType::Snapshot; @@ -456,11 +459,16 @@ TFuture TStateStorage::SaveState( type = TIncrementLogic::GetStateType(state); serializedState = SerializeState(state); if (serializedState.empty()) { + std::cerr << "SaveState, empty: " << std::endl; return MakeFuture(NYql::TIssues{NYql::TIssue{"Failed to serialize compute actor state"}}); } } catch (...) { + //std::cerr << "exception " << std::endl; + + LOG_STREAMS_STORAGE_SERVICE_AS_DEBUG(*NActors::TActivationContext::ActorSystem(), "Exception " << stateSize); return MakeFuture(NYql::TIssues{NYql::TIssue{CurrentExceptionMessage()}}); } + // return MakeFuture(NYql::TIssues{NYql::TIssue{"XXXXXX"}}); auto context = MakeIntrusive( NActors::TActivationContext::ActorSystem(), @@ -965,12 +973,12 @@ TFuture TStateStorage::ReadRows(const TContextPtr& context) { return promise.GetFuture(); } -std::vector TStateStorage::ApplyIncrements( +std::vector TStateStorage::ApplyIncrements( const TContextPtr& context, NYql::TIssues& issues) { LOG_STORAGE_DEBUG(context, "ApplyIncrements"); - std::vector states; + std::vector states; try { for (auto& task : context->Tasks) { diff --git a/ydb/core/kqp/runtime/kqp_read_actor.cpp b/ydb/core/kqp/runtime/kqp_read_actor.cpp index 4c254ed8b6bb..a0581d3f3585 100644 --- a/ydb/core/kqp/runtime/kqp_read_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_read_actor.cpp @@ -1326,9 +1326,9 @@ class TKqpReadActor : public TActorBootstrapped, public NYql::NDq } - void SaveState(const NYql::NDqProto::TCheckpoint&, NYql::NDqProto::TSourceState&) override {} + void SaveState(const NYql::NDqProto::TCheckpoint&, NYql::NDq::TSourceState&) override {} void CommitState(const NYql::NDqProto::TCheckpoint&) override {} - void LoadState(const NYql::NDqProto::TSourceState&) override {} + void LoadState(const NYql::NDq::TSourceState&) override {} void PassAway() override { Counters->ReadActorsCount->Dec(); diff --git a/ydb/core/kqp/runtime/kqp_sequencer_actor.cpp b/ydb/core/kqp/runtime/kqp_sequencer_actor.cpp index b9b6e7acc27c..3d93b7158796 100644 --- a/ydb/core/kqp/runtime/kqp_sequencer_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_sequencer_actor.cpp @@ -144,8 +144,8 @@ class TKqpSequencerActor : public NActors::TActorBootstrapped, public NYql::N } void CommitState(const NYql::NDqProto::TCheckpoint&) final {}; - void LoadState(const NYql::NDqProto::TSinkState&) final {}; + void LoadState(const NYql::NDq::TSinkState&) final {}; ui64 GetOutputIndex() const final { return OutputIndex; diff --git a/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.cpp b/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.cpp index 54a4c785c6b7..cd22d16af509 100644 --- a/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.cpp +++ b/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.cpp @@ -548,7 +548,7 @@ class TDqAsyncComputeActor : public TDqComputeActorBaseSwap(&*ProgramState); @@ -557,7 +557,7 @@ class TDqAsyncComputeActor : public TDqComputeActorBaseSaveState(checkpoint, sourceState); sourceState.SetInputIndex(inputIndex); } @@ -976,7 +976,7 @@ class TDqAsyncComputeActor : public TDqComputeActorBase> WaitingForStateResponse; bool SentStatsRequest; - mutable THolder ProgramState; + mutable THolder ProgramState; ui64 MkqlMemoryLimit = 0; TDqMemoryQuota::TProfileStats ProfileStats; bool CheckpointRequestedFromTaskRunner = false; diff --git a/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.h b/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.h index 4d07bc6be4a5..591562f7ab5d 100644 --- a/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.h +++ b/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.h @@ -6,7 +6,8 @@ #include #include #include -#include +##include +#include #include #include diff --git a/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h b/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h new file mode 100644 index 000000000000..8614f48f3bdb --- /dev/null +++ b/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h @@ -0,0 +1,43 @@ +#pragma once + +#include + +namespace NYql::NDq { + +struct TStateData { + TString Blob; + ui64 Version{0}; +}; + +struct TMiniKqlProgramState { + TStateData Data; + ui64 RuntimeVersion{0}; +}; + +struct TSourceState { +// State data for source. +// Typically there is only one element with state that +// source saved. But when we are migrating states +// between tasks there can be state +// from several different tasks sources. + std::list Data; + ui64 InputIndex; +}; + +struct TSinkState { + TStateData Data; + ui64 OutputIndex; +}; + +// Checkpoint for single compute actor. +struct TComputeActorState { + TMiniKqlProgramState MiniKqlProgram; + std::list Sources; + std::list Sinks; + + void Clear() { + // TODO; + } +}; + +} // namespace NDq::NYql diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor.h b/ydb/library/yql/dq/actors/compute/dq_compute_actor.h index 31c153de8824..a631137962ed 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor.h +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor.h @@ -4,7 +4,8 @@ #include #include #include -#include +//#include +#include #include #include #include @@ -17,6 +18,43 @@ namespace NYql { namespace NDq { + +// struct TStateData { +// TString Blob; +// ui64 Version{0}; +// }; + +// struct TMiniKqlProgramState { +// TStateData Data; +// ui64 RuntimeVersion{0}; +// }; + +// struct TSourceState { +// // State data for source. +// // Typically there is only one element with state that +// // source saved. But when we are migrating states +// // between tasks there can be state +// // from several different tasks sources. +// std::list Data; +// ui64 InputIndex; +// }; + +// struct TSinkState { +// TStateData Data; +// ui64 OutputIndex; +// }; + +// // Checkpoint for single compute actor. +// struct TComputeActorState { +// TMiniKqlProgramState MiniKqlProgram; +// std::list Sources; +// std::list Sinks; + +// void Clear() { +// // TODO; +// } +// }; + struct TEvDqCompute { struct TEvState : public NActors::TEventPB {}; struct TEvStateRequest : public NActors::TEventPB {}; @@ -106,7 +144,8 @@ struct TEvDqCompute { const TString GraphId; const ui64 TaskId; const NDqProto::TCheckpoint Checkpoint; - NDqProto::TComputeActorState State; + TComputeActorState State; + //NDqProto::TComputeActorState State; }; struct TEvSaveTaskStateResult : public NActors::TEventPB States; + std::vector States; const TIssues Issues; const ui64 Generation; }; diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor_async_io.h b/ydb/library/yql/dq/actors/compute/dq_compute_actor_async_io.h index b77a25a04312..b0b87fc20e62 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor_async_io.h +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor_async_io.h @@ -1,5 +1,6 @@ #pragma once #include +//include #include #include #include @@ -16,11 +17,14 @@ namespace NYql::NDqProto { class TCheckpoint; class TTaskInput; -class TSourceState; class TTaskOutput; -class TSinkState; } // namespace NYql::NDqProto +namespace NYql::NDq { +struct TSourceState; +struct TSinkState; +} // namespace NYql::NDq + namespace NActors { class IActor; } // namespace NActors @@ -118,9 +122,9 @@ struct IDqComputeActorAsyncInput { i64 freeSpace) = 0; // Checkpointing. - virtual void SaveState(const NDqProto::TCheckpoint& checkpoint, NDqProto::TSourceState& state) = 0; + virtual void SaveState(const NDqProto::TCheckpoint& checkpoint, TSourceState& state) = 0; virtual void CommitState(const NDqProto::TCheckpoint& checkpoint) = 0; // Apply side effects related to this checkpoint. - virtual void LoadState(const NDqProto::TSourceState& state) = 0; + virtual void LoadState(const TSourceState& state) = 0; virtual TDuration GetCpuTime() { return TDuration::Zero(); @@ -165,7 +169,7 @@ struct IDqComputeActorAsyncOutput { virtual void OnAsyncOutputError(ui64 outputIndex, const TIssues& issues, NYql::NDqProto::StatusIds::StatusCode fatalCode) = 0; // Checkpointing - virtual void OnAsyncOutputStateSaved(NDqProto::TSinkState&& state, ui64 outputIndex, const NDqProto::TCheckpoint& checkpoint) = 0; + virtual void OnAsyncOutputStateSaved(TSinkState&& state, ui64 outputIndex, const NDqProto::TCheckpoint& checkpoint) = 0; // Finishing virtual void OnAsyncOutputFinished(ui64 outputIndex) = 0; // Signal that async output has successfully written its finish flag and so compute actor is ready to finish. @@ -189,7 +193,7 @@ struct IDqComputeActorAsyncOutput { // Checkpointing. virtual void CommitState(const NDqProto::TCheckpoint& checkpoint) = 0; // Apply side effects related to this checkpoint. - virtual void LoadState(const NDqProto::TSinkState& state) = 0; + virtual void LoadState(const TSinkState& state) = 0; virtual TMaybe ExtraData() { return {}; } diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp index b3a8579a3d8d..41fea4f72d96 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp @@ -75,33 +75,33 @@ std::vector TaskIdsFromLoadPlan(const NDqProto::NDqStateLoadPlan::TTaskPla return taskIds; } -const NDqProto::TSourceState& FindSourceState( +const TSourceState& FindSourceState( const NDqProto::NDqStateLoadPlan::TSourcePlan::TForeignTaskSource& foreignTaskSource, - const std::vector& states, + const std::vector& states, const std::vector& taskIds) { // Find state index const auto stateIndexIt = std::lower_bound(taskIds.begin(), taskIds.end(), foreignTaskSource.GetTaskId()); YQL_ENSURE(stateIndexIt != taskIds.end(), "Task id was not found in plan"); const size_t stateIndex = std::distance(taskIds.begin(), stateIndexIt); - const NDqProto::TComputeActorState& state = states[stateIndex]; - for (const NDqProto::TSourceState& sourceState : state.GetSources()) { - if (sourceState.GetInputIndex() == foreignTaskSource.GetInputIndex()) { + const TComputeActorState& state = states[stateIndex]; + for (const TSourceState& sourceState : state.Sources) { + if (sourceState.InputIndex == foreignTaskSource.GetInputIndex()) { return sourceState; } } YQL_ENSURE(false, "Source input index " << foreignTaskSource.GetInputIndex() << " was not found in state"); // Make compiler happy - return state.GetSources(0); + return state.Sources.front(); } -NDqProto::TComputeActorState CombineForeignState( +TComputeActorState CombineForeignState( const NDqProto::NDqStateLoadPlan::TTaskPlan& plan, - const std::vector& states, + const std::vector& states, const std::vector& taskIds) { - NDqProto::TComputeActorState state; - state.MutableMiniKqlProgram()->MutableData()->MutableStateData()->SetVersion(TDqComputeActorCheckpoints::ComputeActorCurrentStateVersion); + TComputeActorState state; + state.MiniKqlProgram.Data.Version = TDqComputeActorCheckpoints::ComputeActorCurrentStateVersion; YQL_ENSURE(plan.GetProgram().GetStateType() == NDqProto::NDqStateLoadPlan::STATE_TYPE_EMPTY, "Unsupported program state type. Plan: " << plan); for (const auto& sinkPlan : plan.GetSinks()) { YQL_ENSURE(sinkPlan.GetStateType() == NDqProto::NDqStateLoadPlan::STATE_TYPE_EMPTY, "Unsupported sink state type. Plan: " << sinkPlan); @@ -109,15 +109,16 @@ NDqProto::TComputeActorState CombineForeignState( for (const auto& sourcePlan : plan.GetSources()) { YQL_ENSURE(sourcePlan.GetStateType() == NDqProto::NDqStateLoadPlan::STATE_TYPE_EMPTY || sourcePlan.GetStateType() == NDqProto::NDqStateLoadPlan::STATE_TYPE_FOREIGN, "Unsupported sink state type. Plan: " << sourcePlan); if (sourcePlan.GetStateType() == NDqProto::NDqStateLoadPlan::STATE_TYPE_FOREIGN) { - auto& sourceState = *state.AddSources(); - sourceState.SetInputIndex(sourcePlan.GetInputIndex()); + state.Sources.push_back({}); + auto& sourceState = state.Sources.back(); + sourceState.InputIndex = sourcePlan.GetInputIndex(); for (const auto& foreignTaskSource : sourcePlan.GetForeignTasksSources()) { - const NDqProto::TSourceState& srcSourceState = FindSourceState(foreignTaskSource, states, taskIds); - for (const NDqProto::TStateData& data : srcSourceState.GetData()) { - sourceState.AddData()->CopyFrom(data); + const TSourceState& srcSourceState = FindSourceState(foreignTaskSource, states, taskIds); + for (const TStateData& data : srcSourceState.Data) { + sourceState.Data.emplace_back(data); } } - YQL_ENSURE(sourceState.DataSize(), "No data was loaded to source " << sourcePlan.GetInputIndex()); + YQL_ENSURE(sourceState.Data.size(), "No data was loaded to source " << sourcePlan.GetInputIndex()); } } return state; @@ -340,7 +341,7 @@ void TDqComputeActorCheckpoints::Handle(TEvDqCompute::TEvGetTaskStateResult::TPt if (StateLoadPlan.GetStateType() == NDqProto::NDqStateLoadPlan::STATE_TYPE_OWN) { ComputeActor->LoadState(std::move(ev->Get()->States[0])); } else if (StateLoadPlan.GetStateType() == NDqProto::NDqStateLoadPlan::STATE_TYPE_FOREIGN) { - NDqProto::TComputeActorState state = CombineForeignState(StateLoadPlan, ev->Get()->States, taskIds); + TComputeActorState state = CombineForeignState(StateLoadPlan, ev->Get()->States, taskIds); ComputeActor->LoadState(std::move(state)); } else { Y_ABORT("Unprocessed state type %s (%d)", @@ -501,7 +502,7 @@ void TDqComputeActorCheckpoints::AbortCheckpoint() { SavingToDatabase = false; } -void TDqComputeActorCheckpoints::OnSinkStateSaved(NDqProto::TSinkState&& state, ui64 outputIndex, const NDqProto::TCheckpoint& checkpoint) { +void TDqComputeActorCheckpoints::OnSinkStateSaved(TSinkState&& state, ui64 outputIndex, const NDqProto::TCheckpoint& checkpoint) { Y_ABORT_UNLESS(CheckpointCoordinator); Y_ABORT_UNLESS(checkpoint.GetGeneration() <= CheckpointCoordinator->Generation); if (checkpoint.GetGeneration() < CheckpointCoordinator->Generation) { @@ -512,11 +513,12 @@ void TDqComputeActorCheckpoints::OnSinkStateSaved(NDqProto::TSinkState&& state, Y_ABORT_UNLESS(PendingCheckpoint); Y_ABORT_UNLESS(PendingCheckpoint.Checkpoint->GetId() == checkpoint.GetId(), "Expected pending checkpoint id %lu, but got %lu", PendingCheckpoint.Checkpoint->GetId(), checkpoint.GetId()); - for (const NDqProto::TSinkState& sinkState : PendingCheckpoint.ComputeActorState.GetSinks()) { - Y_ABORT_UNLESS(sinkState.GetOutputIndex() != outputIndex, "Double save sink[%lu] state", outputIndex); + for (const TSinkState& sinkState : PendingCheckpoint.ComputeActorState.Sinks) { + Y_ABORT_UNLESS(sinkState.OutputIndex != outputIndex, "Double save sink[%lu] state", outputIndex); } - NDqProto::TSinkState* sinkState = PendingCheckpoint.ComputeActorState.AddSinks(); + PendingCheckpoint.ComputeActorState.Sinks.push_back({}); + TSinkState* sinkState = PendingCheckpoint.ComputeActorState.Sinks.back(); *sinkState = std::move(state); sinkState->SetOutputIndex(outputIndex); // Set index explicitly to avoid errors ++PendingCheckpoint.SavedSinkStatesCount; diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h index e33ba0495912..21bc494fef8a 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h @@ -58,7 +58,7 @@ class TDqComputeActorCheckpoints : public NActors::TActor Checkpoint; - NDqProto::TComputeActorState ComputeActorState; + TComputeActorState ComputeActorState; size_t SavedSinkStatesCount = 0; bool SavedComputeActorState = false; }; @@ -69,7 +69,7 @@ class TDqComputeActorCheckpoints : public NActors::TActor } protected: - void OnSinkStateSaved(NDqProto::TSinkState&& state, ui64 outputIndex, const NDqProto::TCheckpoint& checkpoint) override final { + void OnSinkStateSaved(TSinkState&& state, ui64 outputIndex, const NDqProto::TCheckpoint& checkpoint) override final { Y_ABORT_UNLESS(Checkpoints); // If we are checkpointing, we must have already constructed "checkpoints" object. Checkpoints->OnSinkStateSaved(std::move(state), outputIndex, checkpoint); } - void OnTransformStateSaved(NDqProto::TSinkState&& state, ui64 outputIndex, const NDqProto::TCheckpoint& checkpoint) override final { + void OnTransformStateSaved(TSinkState&& state, ui64 outputIndex, const NDqProto::TCheckpoint& checkpoint) override final { Y_ABORT_UNLESS(Checkpoints); // If we are checkpointing, we must have already constructed "checkpoints" object. Checkpoints->OnTransformStateSaved(std::move(state), outputIndex, checkpoint); } @@ -694,34 +694,34 @@ class TDqComputeActorBase : public NActors::TActorBootstrapped protected: virtual void DoLoadRunnerState(TString&& blob) = 0; - void LoadState(NDqProto::TComputeActorState&& state) override final { + void LoadState(TComputeActorState&& state) override final { CA_LOG_D("Load state"); TMaybe error = Nothing(); - const NDqProto::TMiniKqlProgramState& mkqlProgramState = state.GetMiniKqlProgram(); + const TMiniKqlProgramState& mkqlProgramState = state.MiniKqlProgram; auto guard = BindAllocator(); try { - const ui64 version = mkqlProgramState.GetData().GetStateData().GetVersion(); + const ui64 version = mkqlProgramState.Data.Version; YQL_ENSURE(version && version <= TDqComputeActorCheckpoints::ComputeActorCurrentStateVersion && version != TDqComputeActorCheckpoints::ComputeActorNonProtobufStateVersion, "Unsupported state version: " << version); if (version != TDqComputeActorCheckpoints::ComputeActorCurrentStateVersion) { ythrow yexception() << "Invalid state version " << version; } - for (const NDqProto::TSourceState& sourceState : state.GetSources()) { - TAsyncInputHelper* source = SourcesMap.FindPtr(sourceState.GetInputIndex()); - YQL_ENSURE(source, "Failed to load state. Source with input index " << sourceState.GetInputIndex() << " was not found"); - YQL_ENSURE(source->AsyncInput, "Source[" << sourceState.GetInputIndex() << "] is not created"); + for (const TSourceState& sourceState : state.Sources) { + TAsyncInputHelper* source = SourcesMap.FindPtr(sourceState.InputIndex); + YQL_ENSURE(source, "Failed to load state. Source with input index " << sourceState.InputIndex << " was not found"); + YQL_ENSURE(source->AsyncInput, "Source[" << sourceState.InputIndex << "] is not created"); source->AsyncInput->LoadState(sourceState); } - for (const NDqProto::TSinkState& sinkState : state.GetSinks()) { - TAsyncOutputInfoBase* sink = SinksMap.FindPtr(sinkState.GetOutputIndex()); - YQL_ENSURE(sink, "Failed to load state. Sink with output index " << sinkState.GetOutputIndex() << " was not found"); - YQL_ENSURE(sink->AsyncOutput, "Sink[" << sinkState.GetOutputIndex() << "] is not created"); + for (const TSinkState& sinkState : state.Sinks) { + TAsyncOutputInfoBase* sink = SinksMap.FindPtr(sinkState.OutputIndex); + YQL_ENSURE(sink, "Failed to load state. Sink with output index " << sinkState.OutputIndex << " was not found"); + YQL_ENSURE(sink->AsyncOutput, "Sink[" << sinkState.OutputIndex << "] is not created"); sink->AsyncOutput->LoadState(sinkState); } } catch (const std::exception& e) { error = e.what(); CA_LOG_E("Exception: " << error); } - TString& blob = *state.MutableMiniKqlProgram()->MutableData()->MutableStateData()->MutableBlob(); + TString& blob = state.MiniKqlProgram.Data.Blob; if (blob && !error.Defined()) { CA_LOG_D("State size: " << blob.size()); DoLoadRunnerState(std::move(blob)); diff --git a/ydb/library/yql/dq/actors/compute/dq_sync_compute_actor_base.h b/ydb/library/yql/dq/actors/compute/dq_sync_compute_actor_base.h index 2881c5f40c5b..603d5d8e1eee 100644 --- a/ydb/library/yql/dq/actors/compute/dq_sync_compute_actor_base.h +++ b/ydb/library/yql/dq/actors/compute/dq_sync_compute_actor_base.h @@ -160,19 +160,20 @@ class TDqSyncComputeActorBase: public TDqComputeActorBaseMutableStateData(); - data.SetVersion(TDqComputeActorCheckpoints::ComputeActorCurrentStateVersion); - data.SetBlob(TaskRunner->Save()); + TMiniKqlProgramState& mkqlProgramState = state.MiniKqlProgram; + mkqlProgramState.RuntimeVersion = NDqProto::RUNTIME_VERSION_YQL_1_0; + TStateData& data = mkqlProgramState.Data; + data.Version = TDqComputeActorCheckpoints::ComputeActorCurrentStateVersion; + data.Blob = TaskRunner->Save(); for (auto& [inputIndex, source] : this->SourcesMap) { YQL_ENSURE(source.AsyncInput, "Source[" << inputIndex << "] is not created"); - NDqProto::TSourceState& sourceState = *state.AddSources(); + state.Sources.push_back({}); + TSourceState& sourceState = state.Sources.back(); source.AsyncInput->SaveState(checkpoint, sourceState); - sourceState.SetInputIndex(inputIndex); + sourceState.InputIndex = inputIndex; } } diff --git a/ydb/library/yql/dq/actors/compute/ut/dq_compute_actor_async_input_helper_ut.cpp b/ydb/library/yql/dq/actors/compute/ut/dq_compute_actor_async_input_helper_ut.cpp index 137816bcbe57..74a6bc3421ae 100644 --- a/ydb/library/yql/dq/actors/compute/ut/dq_compute_actor_async_input_helper_ut.cpp +++ b/ydb/library/yql/dq/actors/compute/ut/dq_compute_actor_async_input_helper_ut.cpp @@ -41,14 +41,14 @@ Y_UNIT_TEST_SUITE(TComputeActorAsyncInputHelperTest) { } // Checkpointing. - void SaveState(const NDqProto::TCheckpoint& checkpoint, NDqProto::TSourceState& state) override { + void SaveState(const NDqProto::TCheckpoint& checkpoint, TSourceState& state) override { Y_UNUSED(checkpoint); Y_UNUSED(state); } void CommitState(const NDqProto::TCheckpoint& checkpoint) override { Y_UNUSED(checkpoint); } - void LoadState(const NDqProto::TSourceState& state) override { + void LoadState(const TSourceState& state) override { Y_UNUSED(state); } diff --git a/ydb/library/yql/dq/actors/input_transforms/dq_input_transform_lookup.cpp b/ydb/library/yql/dq/actors/input_transforms/dq_input_transform_lookup.cpp index 424c188ad0e2..54a1433c66f3 100644 --- a/ydb/library/yql/dq/actors/input_transforms/dq_input_transform_lookup.cpp +++ b/ydb/library/yql/dq/actors/input_transforms/dq_input_transform_lookup.cpp @@ -112,8 +112,8 @@ class TInputTransformStreamLookup } //No checkpointg required - void SaveState(const NYql::NDqProto::TCheckpoint&, NYql::NDqProto::TSourceState&) final {} - void LoadState(const NYql::NDqProto::TSourceState&) final {} + void SaveState(const NYql::NDqProto::TCheckpoint&, NYql::NDq::TSourceState&) final {} + void LoadState(const NYql::NDq::TSourceState&) final {} void CommitState(const NYql::NDqProto::TCheckpoint&) final {} void PassAway() final { diff --git a/ydb/library/yql/dq/actors/task_runner/events.h b/ydb/library/yql/dq/actors/task_runner/events.h index 264760cac53c..c2ce3ed0aedb 100644 --- a/ydb/library/yql/dq/actors/task_runner/events.h +++ b/ydb/library/yql/dq/actors/task_runner/events.h @@ -11,7 +11,9 @@ #include #include #include -#include +//#include +#include + #include #include @@ -222,7 +224,7 @@ struct TEvTaskRunFinished const TTaskRunnerActorSensors& sensors = {}, const TDqMemoryQuota::TProfileStats& profileStats = {}, ui64 mkqlMemoryLimit = 0, - THolder&& programState = nullptr, + THolder&& programState = nullptr, bool watermarkInjectedToOutputs = false, bool checkpointRequestedFromTaskRunner = false, TDuration computeTime = TDuration::Zero()) @@ -245,7 +247,7 @@ struct TEvTaskRunFinished THashMap SourcesFreeSpace; TDqMemoryQuota::TProfileStats ProfileStats; ui64 MkqlMemoryLimit = 0; - THolder ProgramState; + THolder ProgramState; bool WatermarkInjectedToOutputs = false; bool CheckpointRequestedFromTaskRunner = false; TDuration ComputeTime; diff --git a/ydb/library/yql/dq/actors/task_runner/task_runner_actor_local.cpp b/ydb/library/yql/dq/actors/task_runner/task_runner_actor_local.cpp index 49d0f18504de..d252c645c07e 100644 --- a/ydb/library/yql/dq/actors/task_runner/task_runner_actor_local.cpp +++ b/ydb/library/yql/dq/actors/task_runner/task_runner_actor_local.cpp @@ -179,7 +179,7 @@ class TLocalTaskRunnerActor } auto watermarkInjectedToOutputs = false; - THolder mkqlProgramState; + THolder mkqlProgramState; if (res == ERunStatus::PendingInput || res == ERunStatus::Finished) { if (shouldHandleWatermark) { const auto watermarkRequested = ev->Get()->WatermarkRequest->Watermark; @@ -196,12 +196,12 @@ class TLocalTaskRunnerActor } if (ev->Get()->CheckpointRequest.Defined() && ReadyToCheckpoint()) { - mkqlProgramState = MakeHolder(); + mkqlProgramState = MakeHolder(); try { - mkqlProgramState->SetRuntimeVersion(NDqProto::RUNTIME_VERSION_YQL_1_0); - NDqProto::TStateData::TData& data = *mkqlProgramState->MutableData()->MutableStateData(); - data.SetVersion(TDqComputeActorCheckpoints::ComputeActorCurrentStateVersion); - data.SetBlob(TaskRunner->Save()); + mkqlProgramState->RuntimeVersion = NDqProto::RUNTIME_VERSION_YQL_1_0; + TStateData& data = mkqlProgramState->Data; + data.Version = TDqComputeActorCheckpoints::ComputeActorCurrentStateVersion; + data.Blob = TaskRunner->Save(); // inject barriers // todo:(whcrc) barriers are injected even if source state save failed for (const auto& channelId : ev->Get()->CheckpointRequest->ChannelIds) { diff --git a/ydb/library/yql/dq/proto/dq_checkpoint.proto b/ydb/library/yql/dq/proto/dq_checkpoint.proto index 5f32c92c5c04..8d92630cab47 100644 --- a/ydb/library/yql/dq/proto/dq_checkpoint.proto +++ b/ydb/library/yql/dq/proto/dq_checkpoint.proto @@ -44,7 +44,7 @@ message TMiniKqlProgramState { // Checkpoint for single compute actor. message TComputeActorState { - TMiniKqlProgramState MiniKqlProgram = 1; + // TMiniKqlProgramState MiniKqlProgram = 1; repeated TSourceState Sources = 2; repeated TSinkState Sinks = 3; } diff --git a/ydb/library/yql/providers/clickhouse/actors/yql_ch_read_actor.cpp b/ydb/library/yql/providers/clickhouse/actors/yql_ch_read_actor.cpp index 7e91e4d0ed6f..6f0953da925a 100644 --- a/ydb/library/yql/providers/clickhouse/actors/yql_ch_read_actor.cpp +++ b/ydb/library/yql/providers/clickhouse/actors/yql_ch_read_actor.cpp @@ -72,8 +72,8 @@ class TClickHouseReadActor : public TActorBootstrapped, pu static constexpr char ActorName[] = "ClickHouse_READ_ACTOR"; private: - void SaveState(const NDqProto::TCheckpoint&, NDqProto::TSourceState&) final {} - void LoadState(const NDqProto::TSourceState&) final {} + void SaveState(const NDqProto::TCheckpoint&, NDq::TSourceState&) final {} + void LoadState(const NDq::TSourceState&) final {} void CommitState(const NDqProto::TCheckpoint&) final {} ui64 GetInputIndex() const final { diff --git a/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.cpp b/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.cpp index 61e4be5668a9..639fd304ecc9 100644 --- a/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.cpp +++ b/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.cpp @@ -115,21 +115,21 @@ void TFakeCASetup::AsyncOutputWrite(const TWriteValueProducer valueProducer, TMa }); } -void TFakeCASetup::SaveSourceState(NDqProto::TCheckpoint checkpoint, NDqProto::TSourceState& state) { +void TFakeCASetup::SaveSourceState(NDqProto::TCheckpoint checkpoint, NDq::TSourceState& state) { Execute([&state, &checkpoint](TFakeActor& actor) { Y_ASSERT(actor.DqAsyncInput); actor.DqAsyncInput->SaveState(checkpoint, state); }); } -void TFakeCASetup::LoadSource(const NDqProto::TSourceState& state) { +void TFakeCASetup::LoadSource(const NDq::TSourceState& state) { Execute([&state](TFakeActor& actor) { Y_ASSERT(actor.DqAsyncInput); actor.DqAsyncInput->LoadState(state); }); } -void TFakeCASetup::LoadSink(const NDqProto::TSinkState& state) { +void TFakeCASetup::LoadSink(const TSinkState& state) { Execute([&state](TFakeActor& actor) { Y_ASSERT(actor.DqAsyncOutput); actor.DqAsyncOutput->LoadState(state); diff --git a/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.h b/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.h index 0726b143785a..068841d8f668 100644 --- a/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.h +++ b/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.h @@ -3,7 +3,8 @@ #include #include #include -#include +//#include +#include #include #include #include @@ -63,7 +64,7 @@ struct TAsyncInputPromises { struct TAsyncOutputPromises { NThreading::TPromise ResumeExecution = NThreading::NewPromise(); NThreading::TPromise Issue = NThreading::NewPromise(); - NThreading::TPromise StateSaved = NThreading::NewPromise(); + NThreading::TPromise StateSaved = NThreading::NewPromise(); }; NYql::NDqProto::TCheckpoint CreateCheckpoint(ui64 id = 0); @@ -100,10 +101,10 @@ class TFakeActor : public NActors::TActor { Parent.AsyncOutputPromises.Issue = NThreading::NewPromise(); }; - void OnAsyncOutputStateSaved(NDqProto::TSinkState&& state, ui64 outputIndex, const NDqProto::TCheckpoint&) override { + void OnAsyncOutputStateSaved(TSinkState&& state, ui64 outputIndex, const NDqProto::TCheckpoint&) override { Y_UNUSED(outputIndex); Parent.AsyncOutputPromises.StateSaved.SetValue(state); - Parent.AsyncOutputPromises.StateSaved = NThreading::NewPromise(); + Parent.AsyncOutputPromises.StateSaved = NThreading::NewPromise(); }; void OnAsyncOutputFinished(ui64 outputIndex) override { @@ -251,10 +252,10 @@ struct TFakeCASetup { void AsyncOutputWrite(const TWriteValueProducer valueProducer, TMaybe checkpoint = Nothing(), bool finish = false); - void SaveSourceState(NDqProto::TCheckpoint checkpoint, NDqProto::TSourceState& state); + void SaveSourceState(NDqProto::TCheckpoint checkpoint, TSourceState& state); - void LoadSource(const NDqProto::TSourceState& state); - void LoadSink(const NDqProto::TSinkState& state); + void LoadSource(const TSourceState& state); + void LoadSink(const TSinkState& state); void Execute(TCallback callback); diff --git a/ydb/library/yql/providers/dq/actors/worker_actor.cpp b/ydb/library/yql/providers/dq/actors/worker_actor.cpp index 887801f59141..59e28aa94914 100644 --- a/ydb/library/yql/providers/dq/actors/worker_actor.cpp +++ b/ydb/library/yql/providers/dq/actors/worker_actor.cpp @@ -744,7 +744,7 @@ class TDqWorker: public TRichActor Y_UNUSED(outputIndex); } - void OnAsyncOutputStateSaved(NDqProto::TSinkState&& state, ui64 outputIndex, const NDqProto::TCheckpoint& checkpoint) override { + void OnAsyncOutputStateSaved(TSinkState&& state, ui64 outputIndex, const NDqProto::TCheckpoint& checkpoint) override { Y_UNUSED(state); Y_UNUSED(outputIndex); Y_UNUSED(checkpoint); diff --git a/ydb/library/yql/providers/generic/actors/yql_generic_read_actor.cpp b/ydb/library/yql/providers/generic/actors/yql_generic_read_actor.cpp index c40735ae1e1a..fa58560997da 100644 --- a/ydb/library/yql/providers/generic/actors/yql_generic_read_actor.cpp +++ b/ydb/library/yql/providers/generic/actors/yql_generic_read_actor.cpp @@ -404,10 +404,10 @@ namespace NYql::NDq { TActorBootstrapped::PassAway(); } - void SaveState(const NDqProto::TCheckpoint&, NDqProto::TSourceState&) final { + void SaveState(const NDqProto::TCheckpoint&, TSourceState&) final { } - void LoadState(const NDqProto::TSourceState&) final { + void LoadState(const TSourceState&) final { } void CommitState(const NDqProto::TCheckpoint&) final { diff --git a/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp b/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp index c22e9d833155..a76fa2dcf1af 100644 --- a/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp +++ b/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp @@ -6,7 +6,8 @@ #include #include #include -#include +//#include +#include #include #include @@ -137,7 +138,7 @@ class TDqPqReadActor : public NActors::TActor, public IDqCompute static constexpr char ActorName[] = "DQ_PQ_READ_ACTOR"; public: - void SaveState(const NDqProto::TCheckpoint& checkpoint, NDqProto::TSourceState& state) override { + void SaveState(const NDqProto::TCheckpoint& checkpoint, TSourceState& state) override { NPq::NProto::TDqPqTopicSourceState stateProto; NPq::NProto::TDqPqTopicSourceState::TTopicDescription* topic = stateProto.AddTopics(); @@ -169,7 +170,7 @@ class TDqPqReadActor : public NActors::TActor, public IDqCompute CurrentDeferredCommit = NYdb::NTopic::TDeferredCommit(); } - void LoadState(const NDqProto::TSourceState& state) override { + void LoadState(const TSourceState& state) override { TInstant minStartingMessageTs = state.DataSize() ? TInstant::Max() : StartingMessageTimestamp; ui64 ingressBytes = 0; for (const auto& stateData : state.GetData()) { diff --git a/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp b/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp index d8f5a2e6d420..5703f68eda92 100644 --- a/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp +++ b/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp @@ -4,7 +4,8 @@ #include #include #include -#include +//#include +#include #include #include @@ -232,7 +233,7 @@ class TDqPqWriteActor : public NActors::TActor, public IDqCompu } }; - void LoadState(const NDqProto::TSinkState& state) override { + void LoadState(const TSinkState& state) override { Y_ABORT_UNLESS(NextSeqNo == 1); const auto& data = state.GetData().GetStateData(); if (data.GetVersion() == StateVersion) { // Current version @@ -352,7 +353,7 @@ class TDqPqWriteActor : public NActors::TActor, public IDqCompu return !events.empty(); } - NDqProto::TSinkState BuildState(const NDqProto::TCheckpoint& checkpoint) { + TSinkState BuildState(const NDqProto::TCheckpoint& checkpoint) { NPq::NProto::TDqPqTopicSinkState stateProto; stateProto.SetSourceId(GetSourceId()); stateProto.SetConfirmedSeqNo(ConfirmedSeqNo); @@ -360,7 +361,7 @@ class TDqPqWriteActor : public NActors::TActor, public IDqCompu TString serializedState; YQL_ENSURE(stateProto.SerializeToString(&serializedState)); - NDqProto::TSinkState sinkState; + TSinkState sinkState; auto* data = sinkState.MutableData()->MutableStateData(); data->SetVersion(StateVersion); data->SetBlob(serializedState); diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp index d4752920b494..74ebbf60786d 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp @@ -1008,8 +1008,8 @@ class TS3ReadActor : public TActorBootstrapped, public IDqComputeA static constexpr char ActorName[] = "S3_READ_ACTOR"; private: - void SaveState(const NDqProto::TCheckpoint&, NDqProto::TSourceState&) final {} - void LoadState(const NDqProto::TSourceState&) final {} + void SaveState(const NDqProto::TCheckpoint&, TSourceState&) final {} + void LoadState(const TSourceState&) final {} void CommitState(const NDqProto::TCheckpoint&) final {} ui64 GetInputIndex() const final { @@ -2775,8 +2775,8 @@ class TS3StreamReadActor : public TActorBootstrapped, public size_t PathInd; }; - void SaveState(const NDqProto::TCheckpoint&, NDqProto::TSourceState&) final {} - void LoadState(const NDqProto::TSourceState&) final {} + void SaveState(const NDqProto::TCheckpoint&, TSourceState&) final {} + void LoadState(const TSourceState&) final {} void CommitState(const NDqProto::TCheckpoint&) final {} ui64 GetInputIndex() const final { diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp index 46bf335802c0..76b65478da26 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp @@ -524,7 +524,7 @@ class TS3WriteActor : public TActorBootstrapped, public IDqComput static constexpr char ActorName[] = "S3_WRITE_ACTOR"; private: void CommitState(const NDqProto::TCheckpoint&) final {}; - void LoadState(const NDqProto::TSinkState&) final {}; + void LoadState(const TSinkState&) final {}; ui64 GetOutputIndex() const final { return OutputIndex; diff --git a/ydb/library/yql/providers/solomon/async_io/dq_solomon_write_actor.cpp b/ydb/library/yql/providers/solomon/async_io/dq_solomon_write_actor.cpp index 73013df36a08..dd1f52482de1 100644 --- a/ydb/library/yql/providers/solomon/async_io/dq_solomon_write_actor.cpp +++ b/ydb/library/yql/providers/solomon/async_io/dq_solomon_write_actor.cpp @@ -3,7 +3,8 @@ #include #include -#include +//include +#include #include #include @@ -194,7 +195,7 @@ class TDqSolomonWriteActor : public NActors::TActor, publi CheckFinished(); }; - void LoadState(const NDqProto::TSinkState&) override { } + void LoadState(const TSinkState&) override { } void CommitState(const NDqProto::TCheckpoint&) override { } @@ -304,7 +305,7 @@ class TDqSolomonWriteActor : public NActors::TActor, publi } private: - NDqProto::TSinkState BuildState() { return {}; } + TSinkState BuildState() { return {}; } TString GetUrl() const { return GetSolomonUrl(WriteParams.Shard.GetEndpoint(), diff --git a/ydb/library/yql/providers/ydb/actors/yql_ydb_read_actor.cpp b/ydb/library/yql/providers/ydb/actors/yql_ydb_read_actor.cpp index 822f37b1662b..1800a2992dc8 100644 --- a/ydb/library/yql/providers/ydb/actors/yql_ydb_read_actor.cpp +++ b/ydb/library/yql/providers/ydb/actors/yql_ydb_read_actor.cpp @@ -109,8 +109,8 @@ class TYdbReadActor : public TActorBootstrapped, public IDqComput static constexpr char ActorName[] = "YQL_YDB_READ_ACTOR"; private: - void SaveState(const NDqProto::TCheckpoint&, NDqProto::TSourceState&) final {} - void LoadState(const NDqProto::TSourceState&) final {} + void SaveState(const NDqProto::TCheckpoint&, TSourceState&) final {} + void LoadState(const TSourceState&) final {} void CommitState(const NDqProto::TCheckpoint&) final {} ui64 GetInputIndex() const final { diff --git a/ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp b/ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp index db6b6d39f937..7d3f6fc503f8 100644 --- a/ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp +++ b/ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp @@ -115,7 +115,7 @@ Y_UNIT_TEST_SUITE(TDqPqReadActorTest) { } Y_UNIT_TEST(TestSaveLoadPqRead) { - NDqProto::TSourceState state; + TSourceState state; const TString topicName = "SaveLoadPqRead"; PQCreateStream(topicName); @@ -134,7 +134,7 @@ Y_UNIT_TEST_SUITE(TDqPqReadActorTest) { Cerr << "State saved" << Endl; } - NDqProto::TSourceState state2; + TSourceState state2; { TPqIoTestFixture setup2; setup2.InitSource(topicName); @@ -153,7 +153,7 @@ Y_UNIT_TEST_SUITE(TDqPqReadActorTest) { PQWrite({"futherData"}, topicName); } - NDqProto::TSourceState state3; + TSourceState state3; { TPqIoTestFixture setup3; setup3.InitSource(topicName); @@ -206,7 +206,7 @@ Y_UNIT_TEST_SUITE(TDqPqReadActorTest) { } Y_UNIT_TEST(LoadCorruptedState) { - NDqProto::TSourceState state; + TSourceState state; const TString topicName = "Invalid"; // We wouldn't read from this topic. auto checkpoint = CreateCheckpoint(); @@ -232,7 +232,7 @@ Y_UNIT_TEST_SUITE(TDqPqReadActorTest) { const TString topicName = "LoadFromSeveralStates"; PQCreateStream(topicName); - NDqProto::TSourceState state2; + TSourceState state2; { TPqIoTestFixture setup; setup.InitSource(topicName); @@ -243,7 +243,7 @@ Y_UNIT_TEST_SUITE(TDqPqReadActorTest) { auto result1 = setup.SourceReadDataUntil(UVParser, 1); AssertDataWithWatermarks(result1, data, {}); - NDqProto::TSourceState state1; + TSourceState state1; auto checkpoint1 = CreateCheckpoint(); setup.SaveSourceState(checkpoint1, state1); Cerr << "State saved" << Endl; @@ -313,7 +313,7 @@ Y_UNIT_TEST_SUITE(TDqPqReadActorTest) { Y_UNIT_TEST(WatermarkCheckpointWithItemsInReadyBuffer) { const TString topicName = "WatermarkCheckpointWithItemsInReadyBuffer"; PQCreateStream(topicName); - NDqProto::TSourceState state; + TSourceState state; { TPqIoTestFixture setup; diff --git a/ydb/tests/fq/pq_async_io/dq_pq_write_actor_ut.cpp b/ydb/tests/fq/pq_async_io/dq_pq_write_actor_ut.cpp index 7c6cd84f3185..8bb3be26c27a 100644 --- a/ydb/tests/fq/pq_async_io/dq_pq_write_actor_ut.cpp +++ b/ydb/tests/fq/pq_async_io/dq_pq_write_actor_ut.cpp @@ -77,7 +77,7 @@ Y_UNIT_TEST_SUITE(TPqWriterTest) { const TString topicName = "Checkpoints"; PQCreateStream(topicName); - NDqProto::TSinkState state1; + TSinkState state1; { TPqIoTestFixture setup; setup.InitAsyncOutput(topicName); @@ -125,7 +125,7 @@ Y_UNIT_TEST_SUITE(TPqWriterTest) { const TString topicName = "CheckpointsWithEmptyBatch"; PQCreateStream(topicName); - NDqProto::TSinkState state1; + TSinkState state1; { InitAsyncOutput(topicName); diff --git a/ydb/tests/fq/pq_async_io/ut_helpers.h b/ydb/tests/fq/pq_async_io/ut_helpers.h index 260bdcec0281..96fd267ecf6a 100644 --- a/ydb/tests/fq/pq_async_io/ut_helpers.h +++ b/ydb/tests/fq/pq_async_io/ut_helpers.h @@ -74,11 +74,11 @@ struct TPqIoTestFixture : public NUnitTest::TBaseFixture { } - void SaveSourceState(NDqProto::TCheckpoint checkpoint, NDqProto::TSourceState& state) { + void SaveSourceState(NDqProto::TCheckpoint checkpoint, TSourceState& state) { CaSetup->SaveSourceState(checkpoint, state); } - void LoadSource(const NDqProto::TSourceState& state) { + void LoadSource(const TSourceState& state) { return CaSetup->LoadSource(state); } @@ -94,7 +94,7 @@ struct TPqIoTestFixture : public NUnitTest::TBaseFixture { InitAsyncOutput(BuildPqTopicSinkSettings(topic), freeSpace); } - void LoadSink(const NDqProto::TSinkState& state) { + void LoadSink(const TSinkState& state) { CaSetup->LoadSink(state); } From 11f58f12ddcf147afadeea2f009833d5989641e8 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 23 Apr 2024 09:00:00 +0000 Subject: [PATCH 02/21] fix build --- .../checkpoint_storage/ydb_state_storage.cpp | 54 +++++++++---------- .../actors/compute/dq_async_compute_actor.cpp | 9 ++-- .../actors/compute/dq_async_compute_actor.h | 2 +- .../dq/actors/compute/dq_checkpoints_states.h | 7 +++ .../compute/dq_compute_actor_checkpoints.cpp | 8 +-- .../pq/async_io/dq_pq_read_actor.cpp | 16 +++--- .../pq/async_io/dq_pq_write_actor.cpp | 20 +++---- 7 files changed, 62 insertions(+), 54 deletions(-) diff --git a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp index e4aea15bfc61..720aa39eaed2 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp @@ -39,14 +39,14 @@ enum class EStateType { class TIncrementLogic { public: - void Apply(TComputeActorState& update) { - Sources = update.GetSources(); // Always snapshot - copy it; - Sinks = update.GetSinks(); + void Apply(NYql::NDq::TComputeActorState& update) { + Sources = update.Sources; // Always snapshot - copy it; + Sinks = update.Sinks; ui64 nodeNum = 0; - if (update.HasMiniKqlProgram()) { - const TString& blob = update.MutableMiniKqlProgram()->MutableData()->MutableStateData()->GetBlob(); - ui64 version = update.MutableMiniKqlProgram()->MutableData()->MutableStateData()->GetVersion(); + if (true) {//update.HasMiniKqlProgram()) { + const TString& blob = update.MiniKqlProgram.Data.Blob; + ui64 version = update.MiniKqlProgram.Data.Version; if (LastVersion) { Y_ENSURE(*LastVersion == version, "Version is different: " << *LastVersion << ", " << version); } @@ -90,11 +90,11 @@ class TIncrementLogic { } } - TComputeActorState Build() { + NYql::NDq::TComputeActorState Build() { NKikimr::NMiniKQL::TScopedAlloc alloc(__LOCATION__); - TComputeActorState state; - *state.MutableSources() = Sources; - *state.MutableSinks() = Sinks; + NYql::NDq::TComputeActorState state; + state.Sources = Sources; + state.Sinks = Sinks; TString result; for (const auto& [nodeNum, nodeState] : NodeStates) { @@ -108,18 +108,18 @@ class TIncrementLogic { result.AppendNoAlias(savedBuf.Data(), savedBuf.Size()); } } - const auto& stateData = state.MutableMiniKqlProgram()->MutableData()->MutableStateData(); - stateData->SetBlob(result); + auto& stateData = state.MiniKqlProgram.Data; + stateData.Blob = result; Y_ENSURE(LastVersion, "LastVersion is empty"); - stateData->SetVersion(*LastVersion); + stateData.Version = *LastVersion; return state; } - static EStateType GetStateType(const TComputeActorState& state) { - if (!state.HasMiniKqlProgram()) { + static EStateType GetStateType(const NYql::NDq::TComputeActorState& state) { + if (false) {// !state.MiniKqlProgram) { return EStateType::Snapshot; } - const TString& blob = state.GetMiniKqlProgram().GetData().GetStateData().GetBlob(); + const TString& blob = state.MiniKqlProgram.Data.Blob; TStringBuf buf(blob); while (!buf.empty()) { auto nodeStateSize = NKikimr::NMiniKQL::ReadUi32(buf); @@ -141,8 +141,8 @@ class TIncrementLogic { TString SimpleBlobNodeState; }; std::map NodeStates; - google::protobuf::RepeatedPtrField< ::NYql::NDq::TSourceState> Sources; - google::protobuf::RepeatedPtrField Sinks; + std::list Sources; + std::list Sinks; TMaybe LastVersion; }; @@ -163,7 +163,7 @@ struct TContext : public TThrRefBase { std::list Rows; EStateType Type = EStateType::Snapshot; std::list ListOfStatesForReading; // ordered by desc - std::list States; + std::list States; }; const NActors::TActorSystem* ActorSystem; @@ -299,7 +299,7 @@ class TStateStorage : public IStateStorage { ui64 taskId, const TString& graphId, const TCheckpointId& checkpointId, - const TComputeActorState& state) override; + const NYql::NDq::TComputeActorState& state) override; TFuture GetState( const std::vector& taskIds, @@ -333,7 +333,7 @@ class TStateStorage : public IStateStorage { const TContextPtr& context); std::list SerializeState( - const TComputeActorState& state); + const NYql::NDq::TComputeActorState& state); EStateType DeserializeState( TContext::TaskInfo& taskInfo); @@ -344,7 +344,7 @@ class TStateStorage : public IStateStorage { TFuture ReadRows( const TContextPtr& context); - std::vector ApplyIncrements( + std::vector ApplyIncrements( const TContextPtr& context, NYql::TIssues& issues); @@ -418,13 +418,13 @@ EStateType TStateStorage::DeserializeState(TContext::TaskInfo& taskInfo) { it = taskInfo.Rows.erase(it); } taskInfo.States.push_front({}); - TComputeActorState& state = taskInfo.States.front(); + NYql::NDq::TComputeActorState& state = taskInfo.States.front(); auto res = state.ParseFromString(blob); Y_ENSURE(res, "Parsing error"); return TIncrementLogic::GetStateType(state); } -std::list TStateStorage::SerializeState(const TComputeActorState& state) { +std::list TStateStorage::SerializeState(const NYql::NDq::TComputeActorState& state) { std::list result; TString serializedState; if (!state.SerializeToString(&serializedState)) { @@ -447,7 +447,7 @@ TFuture TStateStorage::SaveState( ui64 taskId, const TString& graphId, const TCheckpointId& checkpointId, - const TComputeActorState& state) { + const NYql::NDq::TComputeActorState& state) { const size_t stateSize = state.ByteSizeLong(); std::cerr << "SaveState, size: " << stateSize << std::endl; @@ -973,12 +973,12 @@ TFuture TStateStorage::ReadRows(const TContextPtr& context) { return promise.GetFuture(); } -std::vector TStateStorage::ApplyIncrements( +std::vector TStateStorage::ApplyIncrements( const TContextPtr& context, NYql::TIssues& issues) { LOG_STORAGE_DEBUG(context, "ApplyIncrements"); - std::vector states; + std::vector states; try { for (auto& task : context->Tasks) { diff --git a/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.cpp b/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.cpp index cd22d16af509..2e200ea02716 100644 --- a/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.cpp +++ b/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.cpp @@ -548,18 +548,19 @@ class TDqAsyncComputeActor : public TDqComputeActorBaseSwap(&*ProgramState); + state.MiniKqlProgram = std::move(*ProgramState); ProgramState.Destroy(); // TODO:(whcrc) maybe save Sources before Program? for (auto& [inputIndex, source] : SourcesMap) { YQL_ENSURE(source.AsyncInput, "Source[" << inputIndex << "] is not created"); - TSourceState& sourceState = *state.AddSources(); + state.Sources.push_back({}); + TSourceState& sourceState = state.Sources.back(); source.AsyncInput->SaveState(checkpoint, sourceState); - sourceState.SetInputIndex(inputIndex); + sourceState.InputIndex = inputIndex; } } diff --git a/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.h b/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.h index 591562f7ab5d..06396724eae0 100644 --- a/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.h +++ b/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.h @@ -6,7 +6,7 @@ #include #include #include -##include +//include #include #include #include diff --git a/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h b/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h index 8614f48f3bdb..cfe9e2eaab3a 100644 --- a/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h +++ b/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace NYql::NDq { @@ -22,6 +23,8 @@ struct TSourceState { // from several different tasks sources. std::list Data; ui64 InputIndex; + + size_t DataSize() const {return 1;} }; struct TSinkState { @@ -38,6 +41,10 @@ struct TComputeActorState { void Clear() { // TODO; } + + bool ParseFromString(const TString& /*in*/) { return true;} + bool SerializeToString(TString* /*out*/) const { return true;} + size_t ByteSizeLong() const {return 1;} }; } // namespace NDq::NYql diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp index 41fea4f72d96..777e2f286d68 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp @@ -518,9 +518,9 @@ void TDqComputeActorCheckpoints::OnSinkStateSaved(TSinkState&& state, ui64 outpu } PendingCheckpoint.ComputeActorState.Sinks.push_back({}); - TSinkState* sinkState = PendingCheckpoint.ComputeActorState.Sinks.back(); - *sinkState = std::move(state); - sinkState->SetOutputIndex(outputIndex); // Set index explicitly to avoid errors + TSinkState& sinkState = PendingCheckpoint.ComputeActorState.Sinks.back(); + sinkState = std::move(state); + sinkState.OutputIndex = outputIndex; // Set index explicitly to avoid errors ++PendingCheckpoint.SavedSinkStatesCount; LOG_T("Sink[" << outputIndex << "] state saved"); @@ -531,7 +531,7 @@ void TDqComputeActorCheckpoints::TryToSavePendingCheckpoint() { Y_ABORT_UNLESS(PendingCheckpoint); if (PendingCheckpoint.IsReady()) { auto saveTaskStateRequest = MakeHolder(GraphId, Task.GetId(), *PendingCheckpoint.Checkpoint); - saveTaskStateRequest->State.Swap(&PendingCheckpoint.ComputeActorState); + saveTaskStateRequest->State = std::move(PendingCheckpoint.ComputeActorState); Send(CheckpointStorage, std::move(saveTaskStateRequest)); LOG_PCP_D("Task checkpoint is done. Send to storage"); diff --git a/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp b/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp index a76fa2dcf1af..87e7cbdb9994 100644 --- a/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp +++ b/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp @@ -162,9 +162,10 @@ class TDqPqReadActor : public NActors::TActor, public IDqCompute TString stateBlob; YQL_ENSURE(stateProto.SerializeToString(&stateBlob)); - auto* data = state.AddData()->MutableStateData(); - data->SetVersion(StateVersion); - data->SetBlob(stateBlob); + state.Data.push_back({}); + auto& data = state.Data.back(); + data.Version = StateVersion; + data.Blob = stateBlob; DeferredCommits.emplace(checkpoint.GetId(), std::move(CurrentDeferredCommit)); CurrentDeferredCommit = NYdb::NTopic::TDeferredCommit(); @@ -173,11 +174,10 @@ class TDqPqReadActor : public NActors::TActor, public IDqCompute void LoadState(const TSourceState& state) override { TInstant minStartingMessageTs = state.DataSize() ? TInstant::Max() : StartingMessageTimestamp; ui64 ingressBytes = 0; - for (const auto& stateData : state.GetData()) { - const auto& data = stateData.GetStateData(); - if (data.GetVersion() == StateVersion) { // Current version + for (const auto& data : state.Data) { + if (data.Version == StateVersion) { // Current version NPq::NProto::TDqPqTopicSourceState stateProto; - YQL_ENSURE(stateProto.ParseFromString(data.GetBlob()), "Serialized state is corrupted"); + YQL_ENSURE(stateProto.ParseFromString(data.Blob), "Serialized state is corrupted"); YQL_ENSURE(stateProto.TopicsSize() == 1, "One topic per source is expected"); PartitionToOffset.reserve(PartitionToOffset.size() + stateProto.PartitionsSize()); for (const NPq::NProto::TDqPqTopicSourceState::TPartitionReadState& partitionProto : stateProto.GetPartitions()) { @@ -191,7 +191,7 @@ class TDqPqReadActor : public NActors::TActor, public IDqCompute minStartingMessageTs = Min(minStartingMessageTs, TInstant::MilliSeconds(stateProto.GetStartingMessageTimestampMs())); ingressBytes += stateProto.GetIngressBytes(); } else { - ythrow yexception() << "Invalid state version " << data.GetVersion(); + ythrow yexception() << "Invalid state version " << data.Version; } } for (const auto& [key, value] : PartitionToOffset) { diff --git a/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp b/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp index 5703f68eda92..1277f0c9b04f 100644 --- a/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp +++ b/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp @@ -235,18 +235,18 @@ class TDqPqWriteActor : public NActors::TActor, public IDqCompu void LoadState(const TSinkState& state) override { Y_ABORT_UNLESS(NextSeqNo == 1); - const auto& data = state.GetData().GetStateData(); - if (data.GetVersion() == StateVersion) { // Current version + const auto& data = state.Data; + if (data.Version == StateVersion) { // Current version NPq::NProto::TDqPqTopicSinkState stateProto; - YQL_ENSURE(stateProto.ParseFromString(data.GetBlob()), "Serialized state is corrupted"); - SINK_LOG_D("Load state: " << stateProto); + YQL_ENSURE(stateProto.ParseFromString(data.Blob), "Serialized state is corrupted"); + //SINK_LOG_D("Load state: " << stateProto); SourceId = stateProto.GetSourceId(); ConfirmedSeqNo = stateProto.GetConfirmedSeqNo(); NextSeqNo = ConfirmedSeqNo + 1; EgressStats.Bytes = stateProto.GetEgressBytes(); return; } - ythrow yexception() << "Invalid state version " << data.GetVersion(); + ythrow yexception() << "Invalid state version " << data.Version; } void CommitState(const NDqProto::TCheckpoint& checkpoint) override { @@ -353,7 +353,7 @@ class TDqPqWriteActor : public NActors::TActor, public IDqCompu return !events.empty(); } - TSinkState BuildState(const NDqProto::TCheckpoint& checkpoint) { + TSinkState BuildState(const NDqProto::TCheckpoint& /*checkpoint*/) { NPq::NProto::TDqPqTopicSinkState stateProto; stateProto.SetSourceId(GetSourceId()); stateProto.SetConfirmedSeqNo(ConfirmedSeqNo); @@ -362,10 +362,10 @@ class TDqPqWriteActor : public NActors::TActor, public IDqCompu YQL_ENSURE(stateProto.SerializeToString(&serializedState)); TSinkState sinkState; - auto* data = sinkState.MutableData()->MutableStateData(); - data->SetVersion(StateVersion); - data->SetBlob(serializedState); - SINK_LOG_T("Save checkpoint " << checkpoint << " state: " << stateProto << ". Sink state: " << sinkState); + auto& data = sinkState.Data; + data.Version = StateVersion; + data.Blob = serializedState; + //SINK_LOG_T("Save checkpoint " << checkpoint << " state: " << stateProto << ". Sink state: " << sinkState); return sinkState; } From ab6e747a5c8c66f8e95887bb88f460781676fe44 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 23 Apr 2024 11:17:52 +0000 Subject: [PATCH 03/21] fix all build --- .../libs/checkpoint_storage/state_storage.h | 1 - .../fq/libs/checkpoint_storage/ut/gc_ut.cpp | 6 +- .../ut/storage_service_ydb_ut.cpp | 4 +- .../ut/ydb_state_storage_ut.cpp | 59 +++++++++++-------- .../dq/actors/compute/dq_checkpoints_states.h | 37 ++++++++++-- .../yql/dq/actors/compute/dq_compute_actor.h | 37 ------------ .../yql/dq/actors/task_runner/events.h | 1 - .../providers/common/ut_helpers/dq_fake_ca.h | 1 - .../pq/async_io/dq_pq_read_actor.cpp | 1 - .../pq/async_io/dq_pq_write_actor.cpp | 1 - 10 files changed, 71 insertions(+), 77 deletions(-) diff --git a/ydb/core/fq/libs/checkpoint_storage/state_storage.h b/ydb/core/fq/libs/checkpoint_storage/state_storage.h index daa7ecdc20f5..3ef72f184287 100644 --- a/ydb/core/fq/libs/checkpoint_storage/state_storage.h +++ b/ydb/core/fq/libs/checkpoint_storage/state_storage.h @@ -2,7 +2,6 @@ #include -// #include #include #include diff --git a/ydb/core/fq/libs/checkpoint_storage/ut/gc_ut.cpp b/ydb/core/fq/libs/checkpoint_storage/ut/gc_ut.cpp index 0ff053406c88..640a522d58a9 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ut/gc_ut.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ut/gc_ut.cpp @@ -32,7 +32,7 @@ namespace { //////////////////////////////////////////////////////////////////////////////// -TComputeActorState MakeStateFromBlob(size_t blobSize, bool isIncrement = false) { +NYql::NDq::TComputeActorState MakeStateFromBlob(size_t blobSize, bool isIncrement = false) { TString blob; blob.reserve(blobSize); for (size_t i = 0; i < blobSize; ++i) { @@ -51,8 +51,8 @@ TComputeActorState MakeStateFromBlob(size_t blobSize, bool isIncrement = false) const TStringBuf savedBuf = value.AsStringRef(); TString result; NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, savedBuf); - TComputeActorState state; - state.MutableMiniKqlProgram()->MutableData()->MutableStateData()->SetBlob(result); + NYql::NDq::TComputeActorState state; + state.MiniKqlProgram.Data.Blob = result; return state; } diff --git a/ydb/core/fq/libs/checkpoint_storage/ut/storage_service_ydb_ut.cpp b/ydb/core/fq/libs/checkpoint_storage/ut/storage_service_ydb_ut.cpp index 742881e2fcf7..309c9873a604 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ut/storage_service_ydb_ut.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ut/storage_service_ydb_ut.cpp @@ -217,7 +217,7 @@ void SaveState( checkpoint.SetGeneration(checkpointId.CoordinatorGeneration); checkpoint.SetId(checkpointId.SeqNo); auto request = std::make_unique(GraphId, taskId, checkpoint); - request->State.MutableMiniKqlProgram()->MutableData()->MutableStateData()->SetBlob(blob); + request->State.MiniKqlProgram.Data.Blob = blob; runtime->Send(new IEventHandle(NYql::NDq::MakeCheckpointStorageID(), sender, request.release())); TAutoPtr handle; @@ -247,7 +247,7 @@ TString GetState( UNIT_ASSERT(event->Issues.Empty()); UNIT_ASSERT(!event->States.empty()); - return event->States[0].GetMiniKqlProgram().GetData().GetStateData().GetBlob(); + return event->States[0].MiniKqlProgram.Data.Blob; } void CreateCompletedCheckpoint( diff --git a/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp b/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp index e93a6c0146b5..fc691c20d2a1 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp @@ -25,7 +25,7 @@ const TCheckpointId CheckpointId2(12, 1); const TCheckpointId CheckpointId3(12, 4); const TCheckpointId CheckpointId4(13, 2); -const size_t YdbRowSizeLimit = 16*1000*1000; +const size_t YdbRowSizeLimit = 500; //////////////////////////////////////////////////////////////////////////////// @@ -52,15 +52,15 @@ class TFixture : public NUnitTest::TBaseFixture { return storage; } - TComputeActorState MakeState(NYql::NUdf::TUnboxedValuePod&& value) { + NYql::NDq::TComputeActorState MakeState(NYql::NUdf::TUnboxedValuePod&& value) { TString result; NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, value.AsStringRef()); - TComputeActorState state; - state.MutableMiniKqlProgram()->MutableData()->MutableStateData()->SetBlob(result); + NYql::NDq::TComputeActorState state; + state.MiniKqlProgram.Data.Blob = result; return state; } - TEvDqCompute::TEvComputeActorState MakeStateFromBlob(size_t blobSize) { + NYql::NDq::TComputeActorState MakeStateFromBlob(size_t blobSize) { TString blob; blob.reserve(blobSize); for (size_t i = 0; i < blobSize; ++i) { @@ -69,7 +69,7 @@ class TFixture : public NUnitTest::TBaseFixture { return MakeState(NKikimr::NMiniKQL::TOutputSerializer::MakeSimpleBlobState(blob, 0)); } - TEvDqCompute::TEvComputeActorState MakeIncrementState(size_t miniKqlPStateSize) { + NYql::NDq::TComputeActorState MakeIncrementState(size_t miniKqlPStateSize) { std::map map; size_t itemCount = 4; for (size_t i = 0; i < itemCount; ++i) { @@ -78,7 +78,7 @@ class TFixture : public NUnitTest::TBaseFixture { return MakeState(NKikimr::NMiniKQL::TOutputSerializer::MakeSnapshotState(map, 0)); } - TEvDqCompute::TEvComputeActorState MakeIncrementState( + NYql::NDq::TComputeActorState MakeIncrementState( const std::map& snapshot, const std::map& increment, const std::set& deleted) @@ -94,13 +94,13 @@ class TFixture : public NUnitTest::TBaseFixture { ui64 taskId, const TString& graphId, const TCheckpointId& checkpointId, - const TEvDqCompute::TEvComputeActorState& state) + const NYql::NDq::TComputeActorState& state) { auto issues = storage->SaveState(taskId, graphId, checkpointId, state).GetValueSync(); UNIT_ASSERT_C(issues.Empty(), issues.ToString()); } - TEvDqCompute::TEvComputeActorState GetState( + NYql::NDq::TComputeActorState GetState( TStateStoragePtr storage, const ui64 taskId, const TString& graphId, @@ -112,7 +112,7 @@ class TFixture : public NUnitTest::TBaseFixture { return states[0]; } - void ShouldSaveGetStateImpl(const char* tablePrefix, const TEvDqCompute::TEvComputeActorState& state) { + void ShouldSaveGetStateImpl(const char* tablePrefix, const NYql::NDq::TComputeActorState& state) { auto storage = GetStateStorage(tablePrefix); auto issues = storage->SaveState(1, "graph1", CheckpointId1, state).GetValueSync(); UNIT_ASSERT_C(issues.Empty(), issues.ToString()); @@ -120,8 +120,19 @@ class TFixture : public NUnitTest::TBaseFixture { auto [states, getIssues] = storage->GetState({1}, "graph1", CheckpointId1).GetValueSync(); UNIT_ASSERT_C(getIssues.Empty(), getIssues.ToString()); UNIT_ASSERT(!states.empty()); - UNIT_ASSERT(google::protobuf::util::MessageDifferencer::Equals(state, states[0])); + UNIT_ASSERT(Equals(state, states[0])); } + + bool Equals(const NYql::NDq::TComputeActorState& state1, const NYql::NDq::TComputeActorState& state2) { + return state1.MiniKqlProgram.Data.Blob == state2.MiniKqlProgram.Data.Blob + && state1.MiniKqlProgram.Data.Version == state2.MiniKqlProgram.Data.Version + && state1.MiniKqlProgram.RuntimeVersion == state2.MiniKqlProgram.RuntimeVersion + && state1.Sources.size() == state2.Sources.size() + && state1.Sinks.size() == state2.Sinks.size(); + + return true; + } + private: NKikimr::NMiniKQL::TScopedAlloc Alloc; NKikimr::TActorSystemStub ActorSystemStub; @@ -147,14 +158,14 @@ Y_UNIT_TEST_SUITE(TStateStorageTest) { auto state2 = NKikimr::NMiniKQL::TOutputSerializer::MakeSimpleBlobState(TString(20, 'b'), 0); NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, state1.AsStringRef()); NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, state2.AsStringRef()); - TEvDqCompute::TEvComputeActorState state; - state.MutableMiniKqlProgram()->MutableData()->MutableStateData()->SetBlob(result); + NYql::NDq::TComputeActorState state; + state.MiniKqlProgram.Data.Blob = result; ShouldSaveGetStateImpl("TStateStorageTestShouldSaveGetState", state); } Y_UNIT_TEST_F(ShouldSaveGetOldBigState, TFixture) { - ShouldSaveGetStateImpl("TStateStorageTestShouldSaveGetState", MakeStateFromBlob(YdbRowSizeLimit * 150)); + ShouldSaveGetStateImpl("TStateStorageTestShouldSaveGetState", MakeStateFromBlob(YdbRowSizeLimit * 4)); } Y_UNIT_TEST_F(ShouldSaveGetIncrementSmallState, TFixture) @@ -334,19 +345,19 @@ Y_UNIT_TEST_SUITE(TStateStorageTest) { UNIT_ASSERT_C(getIssues.Empty(), getIssues.ToString()); UNIT_ASSERT_VALUES_EQUAL(states.size(), 4); - UNIT_ASSERT(google::protobuf::util::MessageDifferencer::Equals(state1, states[0])); - UNIT_ASSERT(google::protobuf::util::MessageDifferencer::Equals(state2, states[1])); - UNIT_ASSERT(google::protobuf::util::MessageDifferencer::Equals(state3, states[2])); - UNIT_ASSERT(google::protobuf::util::MessageDifferencer::Equals(state4, states[3])); + UNIT_ASSERT(Equals(state1, states[0])); + UNIT_ASSERT(Equals(state2, states[1])); + UNIT_ASSERT(Equals(state3, states[2])); + UNIT_ASSERT(Equals(state4, states[3])); // in different order auto [states2, getIssues2] = storage->GetState({42, 1, 13, 7}, "graph1", CheckpointId1).GetValueSync(); UNIT_ASSERT(getIssues2.Empty()); UNIT_ASSERT_VALUES_EQUAL(states2.size(), 4); - UNIT_ASSERT(google::protobuf::util::MessageDifferencer::Equals(state2, states2[0])); - UNIT_ASSERT(google::protobuf::util::MessageDifferencer::Equals(state1, states2[1])); - UNIT_ASSERT(google::protobuf::util::MessageDifferencer::Equals(state4, states2[2])); - UNIT_ASSERT(google::protobuf::util::MessageDifferencer::Equals(state3, states2[3])); + UNIT_ASSERT(Equals(state2, states2[0])); + UNIT_ASSERT(Equals(state1, states2[1])); + UNIT_ASSERT(Equals(state4, states2[2])); + UNIT_ASSERT(Equals(state3, states2[3])); } Y_UNIT_TEST_F(ShouldLoadLastSnapshot, TFixture) @@ -360,7 +371,7 @@ Y_UNIT_TEST_SUITE(TStateStorageTest) { SaveState(storage, 1, "graph1", CheckpointId2, state2); auto state = GetState(storage, 1, "graph1", CheckpointId2); - UNIT_ASSERT(google::protobuf::util::MessageDifferencer::Equals(state, state2)); + UNIT_ASSERT(Equals(state, state2)); } Y_UNIT_TEST_F(ShouldNotGetNonExistendSnaphotState, TFixture) @@ -392,7 +403,7 @@ Y_UNIT_TEST_SUITE(TStateStorageTest) { auto expected = MakeIncrementState({{"key1", "value1-new"}, {"key3", "value3"}, {"key4", value4}}, {}, {}); auto actual = GetState(storage, 1, "graph1", CheckpointId3); - UNIT_ASSERT(google::protobuf::util::MessageDifferencer::Equals(expected, actual)); + UNIT_ASSERT(Equals(expected, actual)); } }; diff --git a/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h b/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h index cfe9e2eaab3a..6d898983cdf4 100644 --- a/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h +++ b/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h @@ -8,11 +8,19 @@ namespace NYql::NDq { struct TStateData { TString Blob; ui64 Version{0}; + + Y_SAVELOAD_DEFINE(Blob, Version); }; struct TMiniKqlProgramState { TStateData Data; ui64 RuntimeVersion{0}; + + void Clear() { + Data.Blob.clear(); + } + + Y_SAVELOAD_DEFINE(Data, RuntimeVersion); }; struct TSourceState { @@ -22,14 +30,18 @@ struct TSourceState { // between tasks there can be state // from several different tasks sources. std::list Data; - ui64 InputIndex; + ui64 InputIndex{0}; size_t DataSize() const {return 1;} + + Y_SAVELOAD_DEFINE(Data, InputIndex); }; struct TSinkState { TStateData Data; - ui64 OutputIndex; + ui64 OutputIndex{0}; + + Y_SAVELOAD_DEFINE(Data, OutputIndex); }; // Checkpoint for single compute actor. @@ -39,12 +51,25 @@ struct TComputeActorState { std::list Sinks; void Clear() { - // TODO; + MiniKqlProgram.Clear(); + Sources.clear(); + Sinks.clear(); + } + + bool ParseFromString(const TString& in) { + TStringStream str(in); + Load(&str); + return true; + } + bool SerializeToString(TString* out) const { + TStringStream result; + Save(&result); + *out = result.Str(); + return true; } + size_t ByteSizeLong() const {return 1;} - bool ParseFromString(const TString& /*in*/) { return true;} - bool SerializeToString(TString* /*out*/) const { return true;} - size_t ByteSizeLong() const {return 1;} + Y_SAVELOAD_DEFINE(MiniKqlProgram, Sources, Sinks); }; } // namespace NDq::NYql diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor.h b/ydb/library/yql/dq/actors/compute/dq_compute_actor.h index a631137962ed..ac6ee7e6aae8 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor.h +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor.h @@ -4,7 +4,6 @@ #include #include #include -//#include #include #include #include @@ -19,42 +18,6 @@ namespace NYql { namespace NDq { -// struct TStateData { -// TString Blob; -// ui64 Version{0}; -// }; - -// struct TMiniKqlProgramState { -// TStateData Data; -// ui64 RuntimeVersion{0}; -// }; - -// struct TSourceState { -// // State data for source. -// // Typically there is only one element with state that -// // source saved. But when we are migrating states -// // between tasks there can be state -// // from several different tasks sources. -// std::list Data; -// ui64 InputIndex; -// }; - -// struct TSinkState { -// TStateData Data; -// ui64 OutputIndex; -// }; - -// // Checkpoint for single compute actor. -// struct TComputeActorState { -// TMiniKqlProgramState MiniKqlProgram; -// std::list Sources; -// std::list Sinks; - -// void Clear() { -// // TODO; -// } -// }; - struct TEvDqCompute { struct TEvState : public NActors::TEventPB {}; struct TEvStateRequest : public NActors::TEventPB {}; diff --git a/ydb/library/yql/dq/actors/task_runner/events.h b/ydb/library/yql/dq/actors/task_runner/events.h index c2ce3ed0aedb..4555a8119482 100644 --- a/ydb/library/yql/dq/actors/task_runner/events.h +++ b/ydb/library/yql/dq/actors/task_runner/events.h @@ -11,7 +11,6 @@ #include #include #include -//#include #include #include diff --git a/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.h b/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.h index 068841d8f668..8d50cff70b66 100644 --- a/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.h +++ b/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.h @@ -3,7 +3,6 @@ #include #include #include -//#include #include #include #include diff --git a/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp b/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp index 87e7cbdb9994..782f6a1b0148 100644 --- a/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp +++ b/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp @@ -6,7 +6,6 @@ #include #include #include -//#include #include #include diff --git a/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp b/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp index 1277f0c9b04f..3c527e93cfae 100644 --- a/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp +++ b/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp @@ -4,7 +4,6 @@ #include #include #include -//#include #include #include From d1c5f3700af20f311fd4ee5ae79d59774c53ad1f Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 23 Apr 2024 11:43:23 +0000 Subject: [PATCH 04/21] noch einmal --- ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp b/ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp index 7d3f6fc503f8..e5d149bfbf83 100644 --- a/ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp +++ b/ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp @@ -217,9 +217,9 @@ Y_UNIT_TEST_SUITE(TDqPqReadActorTest) { } // Corrupt state. - TString corruptedBlob = state.GetData(0).GetStateData().GetBlob(); + TString corruptedBlob = state.Data.front().Blob; corruptedBlob.append('a'); - state.MutableData(0)->MutableStateData()->SetBlob(corruptedBlob); + state.Data.front().Blob = corruptedBlob; { TPqIoTestFixture setup2; @@ -259,7 +259,8 @@ Y_UNIT_TEST_SUITE(TDqPqReadActorTest) { Cerr << "State 2 saved" << Endl; // Add state1 to state2 - *state2.AddData() = state1.GetData(0); + state2.Data.push_back(state1.Data.front()); + //*state2.AddData() = state1.Data.front(); } TPqIoTestFixture setup2; From 3adc560e9fe455846911e72aad843444c57cdc04 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 23 Apr 2024 13:04:58 +0000 Subject: [PATCH 05/21] add optional --- ydb/core/fq/libs/checkpoint_storage/state_storage.h | 1 - ydb/core/fq/libs/checkpoint_storage/ut/gc_ut.cpp | 2 +- .../checkpoint_storage/ut/storage_service_ydb_ut.cpp | 4 ++-- ydb/core/fq/libs/checkpoint_storage/ut/ya.make | 2 +- .../checkpoint_storage/ut/ydb_state_storage_ut.cpp | 4 ++-- .../fq/libs/checkpoint_storage/ydb_state_storage.cpp | 12 ++++++------ .../yql/dq/actors/compute/dq_checkpoints_states.h | 8 +++++++- .../actors/compute/dq_compute_actor_checkpoints.cpp | 10 +++++----- .../yql/dq/actors/compute/dq_compute_actor_impl.h | 4 ++-- .../dq/actors/compute/dq_sync_compute_actor_base.h | 2 +- .../yql/providers/pq/async_io/dq_pq_read_actor.cpp | 5 +---- 11 files changed, 28 insertions(+), 26 deletions(-) diff --git a/ydb/core/fq/libs/checkpoint_storage/state_storage.h b/ydb/core/fq/libs/checkpoint_storage/state_storage.h index 3ef72f184287..8ac5e9f040dc 100644 --- a/ydb/core/fq/libs/checkpoint_storage/state_storage.h +++ b/ydb/core/fq/libs/checkpoint_storage/state_storage.h @@ -4,7 +4,6 @@ #include -#include #include #include diff --git a/ydb/core/fq/libs/checkpoint_storage/ut/gc_ut.cpp b/ydb/core/fq/libs/checkpoint_storage/ut/gc_ut.cpp index 640a522d58a9..1e9acd671d79 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ut/gc_ut.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ut/gc_ut.cpp @@ -52,7 +52,7 @@ NYql::NDq::TComputeActorState MakeStateFromBlob(size_t blobSize, bool isIncremen TString result; NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, savedBuf); NYql::NDq::TComputeActorState state; - state.MiniKqlProgram.Data.Blob = result; + state.MiniKqlProgram.ConstructInPlace().Data.Blob = result; return state; } diff --git a/ydb/core/fq/libs/checkpoint_storage/ut/storage_service_ydb_ut.cpp b/ydb/core/fq/libs/checkpoint_storage/ut/storage_service_ydb_ut.cpp index 309c9873a604..7b88ebc26e8e 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ut/storage_service_ydb_ut.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ut/storage_service_ydb_ut.cpp @@ -217,7 +217,7 @@ void SaveState( checkpoint.SetGeneration(checkpointId.CoordinatorGeneration); checkpoint.SetId(checkpointId.SeqNo); auto request = std::make_unique(GraphId, taskId, checkpoint); - request->State.MiniKqlProgram.Data.Blob = blob; + request->State.MiniKqlProgram.ConstructInPlace().Data.Blob = blob; runtime->Send(new IEventHandle(NYql::NDq::MakeCheckpointStorageID(), sender, request.release())); TAutoPtr handle; @@ -247,7 +247,7 @@ TString GetState( UNIT_ASSERT(event->Issues.Empty()); UNIT_ASSERT(!event->States.empty()); - return event->States[0].MiniKqlProgram.Data.Blob; + return event->States[0].MiniKqlProgram->Data.Blob; } void CreateCompletedCheckpoint( diff --git a/ydb/core/fq/libs/checkpoint_storage/ut/ya.make b/ydb/core/fq/libs/checkpoint_storage/ut/ya.make index c17ecfaf6cd0..b3e6265e4f46 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ut/ya.make +++ b/ydb/core/fq/libs/checkpoint_storage/ut/ya.make @@ -24,5 +24,5 @@ SRCS( ydb_state_storage_ut.cpp ydb_checkpoint_storage_ut.cpp ) -REQUIREMENTS(ram:32) + END() diff --git a/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp b/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp index fc691c20d2a1..cd82670a2603 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp @@ -56,7 +56,7 @@ class TFixture : public NUnitTest::TBaseFixture { TString result; NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, value.AsStringRef()); NYql::NDq::TComputeActorState state; - state.MiniKqlProgram.Data.Blob = result; + state.MiniKqlProgram.Data.ConstructInPlace().Blob = result; return state; } @@ -159,7 +159,7 @@ Y_UNIT_TEST_SUITE(TStateStorageTest) { NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, state1.AsStringRef()); NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, state2.AsStringRef()); NYql::NDq::TComputeActorState state; - state.MiniKqlProgram.Data.Blob = result; + state.MiniKqlProgram.ConstructInPlace().Data.Blob = result; ShouldSaveGetStateImpl("TStateStorageTestShouldSaveGetState", state); } diff --git a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp index 720aa39eaed2..6faa28e88f95 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp @@ -44,9 +44,9 @@ class TIncrementLogic { Sinks = update.Sinks; ui64 nodeNum = 0; - if (true) {//update.HasMiniKqlProgram()) { - const TString& blob = update.MiniKqlProgram.Data.Blob; - ui64 version = update.MiniKqlProgram.Data.Version; + if (update.MiniKqlProgram) { + const TString& blob = update.MiniKqlProgram->Data.Blob; + ui64 version = update.MiniKqlProgram->Data.Version; if (LastVersion) { Y_ENSURE(*LastVersion == version, "Version is different: " << *LastVersion << ", " << version); } @@ -108,7 +108,7 @@ class TIncrementLogic { result.AppendNoAlias(savedBuf.Data(), savedBuf.Size()); } } - auto& stateData = state.MiniKqlProgram.Data; + auto& stateData = state.MiniKqlProgram.ConstructInPlace().Data; stateData.Blob = result; Y_ENSURE(LastVersion, "LastVersion is empty"); stateData.Version = *LastVersion; @@ -116,10 +116,10 @@ class TIncrementLogic { } static EStateType GetStateType(const NYql::NDq::TComputeActorState& state) { - if (false) {// !state.MiniKqlProgram) { + if (!state.MiniKqlProgram) { return EStateType::Snapshot; } - const TString& blob = state.MiniKqlProgram.Data.Blob; + const TString& blob = state.MiniKqlProgram->Data.Blob; TStringBuf buf(blob); while (!buf.empty()) { auto nodeStateSize = NKikimr::NMiniKQL::ReadUi32(buf); diff --git a/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h b/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h index 6d898983cdf4..0bec9067eeaf 100644 --- a/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h +++ b/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h @@ -6,6 +6,12 @@ namespace NYql::NDq { struct TStateData { + TStateData() {} + + TStateData(const TString& blob, ui64 version) + : Blob(blob) + , Version(version) {} + TString Blob; ui64 Version{0}; @@ -46,7 +52,7 @@ struct TSinkState { // Checkpoint for single compute actor. struct TComputeActorState { - TMiniKqlProgramState MiniKqlProgram; + TMaybe MiniKqlProgram; std::list Sources; std::list Sinks; diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp index 777e2f286d68..e9c89bc78a1f 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp @@ -101,7 +101,7 @@ TComputeActorState CombineForeignState( const std::vector& taskIds) { TComputeActorState state; - state.MiniKqlProgram.Data.Version = TDqComputeActorCheckpoints::ComputeActorCurrentStateVersion; + state.MiniKqlProgram.ConstructInPlace().Data.Version = TDqComputeActorCheckpoints::ComputeActorCurrentStateVersion; YQL_ENSURE(plan.GetProgram().GetStateType() == NDqProto::NDqStateLoadPlan::STATE_TYPE_EMPTY, "Unsupported program state type. Plan: " << plan); for (const auto& sinkPlan : plan.GetSinks()) { YQL_ENSURE(sinkPlan.GetStateType() == NDqProto::NDqStateLoadPlan::STATE_TYPE_EMPTY, "Unsupported sink state type. Plan: " << sinkPlan); @@ -517,10 +517,10 @@ void TDqComputeActorCheckpoints::OnSinkStateSaved(TSinkState&& state, ui64 outpu Y_ABORT_UNLESS(sinkState.OutputIndex != outputIndex, "Double save sink[%lu] state", outputIndex); } - PendingCheckpoint.ComputeActorState.Sinks.push_back({}); - TSinkState& sinkState = PendingCheckpoint.ComputeActorState.Sinks.back(); - sinkState = std::move(state); - sinkState.OutputIndex = outputIndex; // Set index explicitly to avoid errors + state.OutputIndex = outputIndex; // Set index explicitly to avoid errors + PendingCheckpoint.ComputeActorState.Sinks.emplace_back(std::move(state)); + //TSinkState& sinkState = PendingCheckpoint.ComputeActorState.Sinks.back(); + // sinkState = std::move(state); ++PendingCheckpoint.SavedSinkStatesCount; LOG_T("Sink[" << outputIndex << "] state saved"); diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor_impl.h b/ydb/library/yql/dq/actors/compute/dq_compute_actor_impl.h index 6344af069b68..0deb48366f36 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor_impl.h +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor_impl.h @@ -697,7 +697,7 @@ class TDqComputeActorBase : public NActors::TActorBootstrapped void LoadState(TComputeActorState&& state) override final { CA_LOG_D("Load state"); TMaybe error = Nothing(); - const TMiniKqlProgramState& mkqlProgramState = state.MiniKqlProgram; + const TMiniKqlProgramState& mkqlProgramState = *state.MiniKqlProgram; auto guard = BindAllocator(); try { const ui64 version = mkqlProgramState.Data.Version; @@ -721,7 +721,7 @@ class TDqComputeActorBase : public NActors::TActorBootstrapped error = e.what(); CA_LOG_E("Exception: " << error); } - TString& blob = state.MiniKqlProgram.Data.Blob; + TString& blob = state.MiniKqlProgram->Data.Blob; if (blob && !error.Defined()) { CA_LOG_D("State size: " << blob.size()); DoLoadRunnerState(std::move(blob)); diff --git a/ydb/library/yql/dq/actors/compute/dq_sync_compute_actor_base.h b/ydb/library/yql/dq/actors/compute/dq_sync_compute_actor_base.h index 603d5d8e1eee..e31df5ce9f6d 100644 --- a/ydb/library/yql/dq/actors/compute/dq_sync_compute_actor_base.h +++ b/ydb/library/yql/dq/actors/compute/dq_sync_compute_actor_base.h @@ -162,7 +162,7 @@ class TDqSyncComputeActorBase: public TDqComputeActorBase, public IDqCompute TString stateBlob; YQL_ENSURE(stateProto.SerializeToString(&stateBlob)); - state.Data.push_back({}); - auto& data = state.Data.back(); - data.Version = StateVersion; - data.Blob = stateBlob; + state.Data.emplace_back(stateBlob, StateVersion); DeferredCommits.emplace(checkpoint.GetId(), std::move(CurrentDeferredCommit)); CurrentDeferredCommit = NYdb::NTopic::TDeferredCommit(); From ee349d26860c06503e97bb5f920469bd221e395f Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 23 Apr 2024 13:16:13 +0000 Subject: [PATCH 06/21] remove comments --- .../checkpoint_storage/ydb_state_storage.cpp | 4 -- .../actors/compute/dq_async_compute_actor.h | 1 - .../dq/actors/compute/dq_checkpoints_states.h | 2 +- ydb/library/yql/dq/proto/dq_checkpoint.proto | 50 ------------------- ydb/library/yql/dq/proto/ya.make | 1 - .../async_io/dq_solomon_write_actor.cpp | 1 - 6 files changed, 1 insertion(+), 58 deletions(-) delete mode 100644 ydb/library/yql/dq/proto/dq_checkpoint.proto diff --git a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp index 6faa28e88f95..d5f7979dd2c9 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp @@ -459,16 +459,12 @@ TFuture TStateStorage::SaveState( type = TIncrementLogic::GetStateType(state); serializedState = SerializeState(state); if (serializedState.empty()) { - std::cerr << "SaveState, empty: " << std::endl; return MakeFuture(NYql::TIssues{NYql::TIssue{"Failed to serialize compute actor state"}}); } } catch (...) { - //std::cerr << "exception " << std::endl; - LOG_STREAMS_STORAGE_SERVICE_AS_DEBUG(*NActors::TActivationContext::ActorSystem(), "Exception " << stateSize); return MakeFuture(NYql::TIssues{NYql::TIssue{CurrentExceptionMessage()}}); } - // return MakeFuture(NYql::TIssues{NYql::TIssue{"XXXXXX"}}); auto context = MakeIntrusive( NActors::TActivationContext::ActorSystem(), diff --git a/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.h b/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.h index 06396724eae0..4492c2a65600 100644 --- a/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.h +++ b/ydb/library/yql/dq/actors/compute/dq_async_compute_actor.h @@ -6,7 +6,6 @@ #include #include #include -//include #include #include #include diff --git a/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h b/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h index 0bec9067eeaf..5cd106024076 100644 --- a/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h +++ b/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h @@ -78,4 +78,4 @@ struct TComputeActorState { Y_SAVELOAD_DEFINE(MiniKqlProgram, Sources, Sinks); }; -} // namespace NDq::NYql +} // namespace NYql::NDq diff --git a/ydb/library/yql/dq/proto/dq_checkpoint.proto b/ydb/library/yql/dq/proto/dq_checkpoint.proto deleted file mode 100644 index 8d92630cab47..000000000000 --- a/ydb/library/yql/dq/proto/dq_checkpoint.proto +++ /dev/null @@ -1,50 +0,0 @@ -syntax = "proto3"; -option cc_enable_arenas = true; - -package NYql.NDqProto; - -message TStateData { - oneof State { - TExtStateId StateId = 1; // Unique (for TComputeActorCheckpoint) state id for big blobs. // TODO: implement - TData StateData = 2; // In-place blob data for small blobs - } - - message TExtStateId { - uint32 StateId = 1; - } - - message TData { - bytes Blob = 1; - uint64 Version = 2; - // TODO: codec - // TODO: structured data (map, array) - // TODO: minikql node explicit id (HOP) - } -} - -message TSourceState { - // State data for source. - // Typically there is only one element with state that - // source saved. But when we are migrating states - // between tasks there can be state - // from several different tasks sources. - repeated TStateData Data = 1; - uint64 InputIndex = 2; -} - -message TSinkState { - TStateData Data = 1; - uint64 OutputIndex = 2; -} - -message TMiniKqlProgramState { - TStateData Data = 1; - uint64 RuntimeVersion = 2; -} - -// Checkpoint for single compute actor. -message TComputeActorState { - // TMiniKqlProgramState MiniKqlProgram = 1; - repeated TSourceState Sources = 2; - repeated TSinkState Sinks = 3; -} diff --git a/ydb/library/yql/dq/proto/ya.make b/ydb/library/yql/dq/proto/ya.make index abd67f766c0f..cd5b47808895 100644 --- a/ydb/library/yql/dq/proto/ya.make +++ b/ydb/library/yql/dq/proto/ya.make @@ -5,7 +5,6 @@ PEERDIR( ) SRCS( - dq_checkpoint.proto dq_state_load_plan.proto dq_tasks.proto dq_transport.proto diff --git a/ydb/library/yql/providers/solomon/async_io/dq_solomon_write_actor.cpp b/ydb/library/yql/providers/solomon/async_io/dq_solomon_write_actor.cpp index dd1f52482de1..6c088f0219be 100644 --- a/ydb/library/yql/providers/solomon/async_io/dq_solomon_write_actor.cpp +++ b/ydb/library/yql/providers/solomon/async_io/dq_solomon_write_actor.cpp @@ -3,7 +3,6 @@ #include #include -//include #include #include From e73eeee96e828c0f1a0fb600e989d1241011b871 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 23 Apr 2024 13:55:34 +0000 Subject: [PATCH 07/21] fix equal() --- .../ut/ydb_state_storage_ut.cpp | 55 ++++++++++++------- .../dq/actors/compute/dq_checkpoints_states.h | 2 +- 2 files changed, 36 insertions(+), 21 deletions(-) diff --git a/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp b/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp index cd82670a2603..ab1f7e96e2bb 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp @@ -56,7 +56,7 @@ class TFixture : public NUnitTest::TBaseFixture { TString result; NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, value.AsStringRef()); NYql::NDq::TComputeActorState state; - state.MiniKqlProgram.Data.ConstructInPlace().Blob = result; + state.MiniKqlProgram.ConstructInPlace().Data.Blob = result; return state; } @@ -120,17 +120,32 @@ class TFixture : public NUnitTest::TBaseFixture { auto [states, getIssues] = storage->GetState({1}, "graph1", CheckpointId1).GetValueSync(); UNIT_ASSERT_C(getIssues.Empty(), getIssues.ToString()); UNIT_ASSERT(!states.empty()); - UNIT_ASSERT(Equals(state, states[0])); + CheckEquals(state, states[0]); } - bool Equals(const NYql::NDq::TComputeActorState& state1, const NYql::NDq::TComputeActorState& state2) { - return state1.MiniKqlProgram.Data.Blob == state2.MiniKqlProgram.Data.Blob - && state1.MiniKqlProgram.Data.Version == state2.MiniKqlProgram.Data.Version - && state1.MiniKqlProgram.RuntimeVersion == state2.MiniKqlProgram.RuntimeVersion - && state1.Sources.size() == state2.Sources.size() - && state1.Sinks.size() == state2.Sinks.size(); - - return true; + void CheckEquals(const NYql::NDq::TComputeActorState& state1, const NYql::NDq::TComputeActorState& state2) { + UNIT_ASSERT_VALUES_EQUAL(state1.MiniKqlProgram.Empty(), state2.MiniKqlProgram.Empty()); + if (state1.MiniKqlProgram) { + UNIT_ASSERT_VALUES_EQUAL(state1.MiniKqlProgram->Data.Blob, state2.MiniKqlProgram->Data.Blob); + UNIT_ASSERT_VALUES_EQUAL(state1.MiniKqlProgram->Data.Version, state2.MiniKqlProgram->Data.Version); + UNIT_ASSERT_VALUES_EQUAL(state1.MiniKqlProgram->RuntimeVersion, state2.MiniKqlProgram->RuntimeVersion); + } + UNIT_ASSERT_VALUES_EQUAL(state1.Sources.size(), state2.Sources.size()); + UNIT_ASSERT(std::equal(std::begin(state1.Sources), std::end(state1.Sources), std::begin(state2.Sources), std::end(state2.Sources), + [](const NYql::NDq::TSourceState& state1, const NYql::NDq::TSourceState& state2) { + UNIT_ASSERT_VALUES_EQUAL(state1.InputIndex, state2.InputIndex); + UNIT_ASSERT_VALUES_EQUAL(state1.Data.size(), state2.Data.size()); + return true; + })); + + UNIT_ASSERT_VALUES_EQUAL(state1.Sinks.size(), state2.Sinks.size()); + UNIT_ASSERT(std::equal(std::begin(state1.Sinks), std::end(state1.Sinks), std::begin(state2.Sinks), std::end(state2.Sinks), + [](const NYql::NDq::TSinkState& state1, const NYql::NDq::TSinkState& state2) { + UNIT_ASSERT_VALUES_EQUAL(state1.OutputIndex, state2.OutputIndex); + UNIT_ASSERT_VALUES_EQUAL(state1.Data.Blob, state2.Data.Blob); + UNIT_ASSERT_VALUES_EQUAL(state1.Data.Version, state2.Data.Version); + return true; + })); } private: @@ -345,19 +360,19 @@ Y_UNIT_TEST_SUITE(TStateStorageTest) { UNIT_ASSERT_C(getIssues.Empty(), getIssues.ToString()); UNIT_ASSERT_VALUES_EQUAL(states.size(), 4); - UNIT_ASSERT(Equals(state1, states[0])); - UNIT_ASSERT(Equals(state2, states[1])); - UNIT_ASSERT(Equals(state3, states[2])); - UNIT_ASSERT(Equals(state4, states[3])); + CheckEquals(state1, states[0]); + CheckEquals(state2, states[1]); + CheckEquals(state3, states[2]); + CheckEquals(state4, states[3]); // in different order auto [states2, getIssues2] = storage->GetState({42, 1, 13, 7}, "graph1", CheckpointId1).GetValueSync(); UNIT_ASSERT(getIssues2.Empty()); UNIT_ASSERT_VALUES_EQUAL(states2.size(), 4); - UNIT_ASSERT(Equals(state2, states2[0])); - UNIT_ASSERT(Equals(state1, states2[1])); - UNIT_ASSERT(Equals(state4, states2[2])); - UNIT_ASSERT(Equals(state3, states2[3])); + CheckEquals(state2, states2[0]); + CheckEquals(state1, states2[1]); + CheckEquals(state4, states2[2]); + CheckEquals(state3, states2[3]); } Y_UNIT_TEST_F(ShouldLoadLastSnapshot, TFixture) @@ -371,7 +386,7 @@ Y_UNIT_TEST_SUITE(TStateStorageTest) { SaveState(storage, 1, "graph1", CheckpointId2, state2); auto state = GetState(storage, 1, "graph1", CheckpointId2); - UNIT_ASSERT(Equals(state, state2)); + CheckEquals(state, state2); } Y_UNIT_TEST_F(ShouldNotGetNonExistendSnaphotState, TFixture) @@ -403,7 +418,7 @@ Y_UNIT_TEST_SUITE(TStateStorageTest) { auto expected = MakeIncrementState({{"key1", "value1-new"}, {"key3", "value3"}, {"key4", value4}}, {}, {}); auto actual = GetState(storage, 1, "graph1", CheckpointId3); - UNIT_ASSERT(Equals(expected, actual)); + CheckEquals(expected, actual); } }; diff --git a/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h b/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h index 5cd106024076..1443663a8ecf 100644 --- a/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h +++ b/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h @@ -38,7 +38,7 @@ struct TSourceState { std::list Data; ui64 InputIndex{0}; - size_t DataSize() const {return 1;} + size_t DataSize() const { return Data.size(); } Y_SAVELOAD_DEFINE(Data, InputIndex); }; From c223da9151a8ba2f666662633b9229eced26f5d6 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 23 Apr 2024 21:28:30 +0000 Subject: [PATCH 08/21] remove comments / style fix --- ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp | 4 ---- ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h | 2 +- ydb/library/yql/dq/actors/compute/dq_compute_actor.h | 2 -- ydb/library/yql/dq/actors/compute/dq_compute_actor_async_io.h | 1 - .../yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp | 4 +--- ydb/library/yql/dq/actors/task_runner/events.h | 1 - ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.cpp | 4 ++-- ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp | 2 +- ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp | 1 - 9 files changed, 5 insertions(+), 16 deletions(-) diff --git a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp index d5f7979dd2c9..782afb893363 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp @@ -448,9 +448,6 @@ TFuture TStateStorage::SaveState( const TString& graphId, const TCheckpointId& checkpointId, const NYql::NDq::TComputeActorState& state) { - - const size_t stateSize = state.ByteSizeLong(); - std::cerr << "SaveState, size: " << stateSize << std::endl; std::list serializedState; EStateType type = EStateType::Snapshot; @@ -462,7 +459,6 @@ TFuture TStateStorage::SaveState( return MakeFuture(NYql::TIssues{NYql::TIssue{"Failed to serialize compute actor state"}}); } } catch (...) { - LOG_STREAMS_STORAGE_SERVICE_AS_DEBUG(*NActors::TActivationContext::ActorSystem(), "Exception " << stateSize); return MakeFuture(NYql::TIssues{NYql::TIssue{CurrentExceptionMessage()}}); } diff --git a/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h b/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h index 1443663a8ecf..eb00205f321a 100644 --- a/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h +++ b/ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h @@ -73,7 +73,7 @@ struct TComputeActorState { *out = result.Str(); return true; } - size_t ByteSizeLong() const {return 1;} + size_t ByteSizeLong() const {return MiniKqlProgram ? MiniKqlProgram->Data.Blob.Size() : 0; } Y_SAVELOAD_DEFINE(MiniKqlProgram, Sources, Sinks); }; diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor.h b/ydb/library/yql/dq/actors/compute/dq_compute_actor.h index ac6ee7e6aae8..f69a60d30988 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor.h +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor.h @@ -17,7 +17,6 @@ namespace NYql { namespace NDq { - struct TEvDqCompute { struct TEvState : public NActors::TEventPB {}; struct TEvStateRequest : public NActors::TEventPB {}; @@ -108,7 +107,6 @@ struct TEvDqCompute { const ui64 TaskId; const NDqProto::TCheckpoint Checkpoint; TComputeActorState State; - //NDqProto::TComputeActorState State; }; struct TEvSaveTaskStateResult : public NActors::TEventPB -//include #include #include #include diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp index e9c89bc78a1f..bc9b665e961d 100644 --- a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp +++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.cpp @@ -118,7 +118,7 @@ TComputeActorState CombineForeignState( sourceState.Data.emplace_back(data); } } - YQL_ENSURE(sourceState.Data.size(), "No data was loaded to source " << sourcePlan.GetInputIndex()); + YQL_ENSURE(sourceState.DataSize(), "No data was loaded to source " << sourcePlan.GetInputIndex()); } } return state; @@ -519,8 +519,6 @@ void TDqComputeActorCheckpoints::OnSinkStateSaved(TSinkState&& state, ui64 outpu state.OutputIndex = outputIndex; // Set index explicitly to avoid errors PendingCheckpoint.ComputeActorState.Sinks.emplace_back(std::move(state)); - //TSinkState& sinkState = PendingCheckpoint.ComputeActorState.Sinks.back(); - // sinkState = std::move(state); ++PendingCheckpoint.SavedSinkStatesCount; LOG_T("Sink[" << outputIndex << "] state saved"); diff --git a/ydb/library/yql/dq/actors/task_runner/events.h b/ydb/library/yql/dq/actors/task_runner/events.h index 4555a8119482..bf5d38965a0c 100644 --- a/ydb/library/yql/dq/actors/task_runner/events.h +++ b/ydb/library/yql/dq/actors/task_runner/events.h @@ -12,7 +12,6 @@ #include #include #include - #include #include diff --git a/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.cpp b/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.cpp index 639fd304ecc9..8356bfd4dfdd 100644 --- a/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.cpp +++ b/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.cpp @@ -115,14 +115,14 @@ void TFakeCASetup::AsyncOutputWrite(const TWriteValueProducer valueProducer, TMa }); } -void TFakeCASetup::SaveSourceState(NDqProto::TCheckpoint checkpoint, NDq::TSourceState& state) { +void TFakeCASetup::SaveSourceState(NDqProto::TCheckpoint checkpoint, TSourceState& state) { Execute([&state, &checkpoint](TFakeActor& actor) { Y_ASSERT(actor.DqAsyncInput); actor.DqAsyncInput->SaveState(checkpoint, state); }); } -void TFakeCASetup::LoadSource(const NDq::TSourceState& state) { +void TFakeCASetup::LoadSource(const TSourceState& state) { Execute([&state](TFakeActor& actor) { Y_ASSERT(actor.DqAsyncInput); actor.DqAsyncInput->LoadState(state); diff --git a/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp b/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp index 3c527e93cfae..d6511a2efba6 100644 --- a/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp +++ b/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp @@ -238,7 +238,7 @@ class TDqPqWriteActor : public NActors::TActor, public IDqCompu if (data.Version == StateVersion) { // Current version NPq::NProto::TDqPqTopicSinkState stateProto; YQL_ENSURE(stateProto.ParseFromString(data.Blob), "Serialized state is corrupted"); - //SINK_LOG_D("Load state: " << stateProto); + SINK_LOG_D("Load state: " << stateProto); SourceId = stateProto.GetSourceId(); ConfirmedSeqNo = stateProto.GetConfirmedSeqNo(); NextSeqNo = ConfirmedSeqNo + 1; diff --git a/ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp b/ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp index e5d149bfbf83..fbc1cfd6104f 100644 --- a/ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp +++ b/ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp @@ -260,7 +260,6 @@ Y_UNIT_TEST_SUITE(TDqPqReadActorTest) { // Add state1 to state2 state2.Data.push_back(state1.Data.front()); - //*state2.AddData() = state1.Data.front(); } TPqIoTestFixture setup2; From 911fafd139e0dc8f27141d69d20d6ce41323ca81 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 24 Apr 2024 07:49:52 +0000 Subject: [PATCH 09/21] fix state size --- .../libs/checkpoint_storage/state_storage.h | 3 +- .../libs/checkpoint_storage/storage_proxy.cpp | 7 ++-- .../ut/ydb_state_storage_ut.cpp | 18 +++++---- .../checkpoint_storage/ydb_state_storage.cpp | 39 +++++++++++-------- 4 files changed, 38 insertions(+), 29 deletions(-) diff --git a/ydb/core/fq/libs/checkpoint_storage/state_storage.h b/ydb/core/fq/libs/checkpoint_storage/state_storage.h index 8ac5e9f040dc..ee39f21f4ea1 100644 --- a/ydb/core/fq/libs/checkpoint_storage/state_storage.h +++ b/ydb/core/fq/libs/checkpoint_storage/state_storage.h @@ -17,11 +17,12 @@ namespace NFq { class IStateStorage : public virtual TThrRefBase { public: using TGetStateResult = std::pair, NYql::TIssues>; + using TSaveStateResult = std::pair; using TCountStatesResult = std::pair; virtual NThreading::TFuture Init() = 0; - virtual NThreading::TFuture SaveState( + virtual NThreading::TFuture SaveState( ui64 taskId, const TString& graphId, const TCheckpointId& checkpointId, diff --git a/ydb/core/fq/libs/checkpoint_storage/storage_proxy.cpp b/ydb/core/fq/libs/checkpoint_storage/storage_proxy.cpp index 6bb24aa690a7..7037b2734ea0 100644 --- a/ydb/core/fq/libs/checkpoint_storage/storage_proxy.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/storage_proxy.cpp @@ -347,14 +347,13 @@ void TStorageProxy::Handle(NYql::NDq::TEvDqCompute::TEvSaveTaskState::TPtr& ev) taskId = event->TaskId, cookie = ev->Cookie, sender = ev->Sender, - stateSize = stateSize, - actorSystem = TActivationContext::ActorSystem()](const NThreading::TFuture& futureResult) { + actorSystem = TActivationContext::ActorSystem()](const NThreading::TFuture& futureResult) { LOG_STREAMS_STORAGE_SERVICE_AS_DEBUG(*actorSystem, "[" << graphId << "] [" << checkpointId << "] TEvSaveTaskState Apply: task: " << taskId) - const auto& issues = futureResult.GetValue(); + const auto& issues = futureResult.GetValue().second; auto response = std::make_unique(); response->Record.MutableCheckpoint()->SetGeneration(checkpointId.CoordinatorGeneration); response->Record.MutableCheckpoint()->SetId(checkpointId.SeqNo); - response->Record.SetStateSizeBytes(stateSize); + response->Record.SetStateSizeBytes(futureResult.GetValue().first); response->Record.SetTaskId(taskId); if (issues) { diff --git a/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp b/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp index ab1f7e96e2bb..3fd22dd17433 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ut/ydb_state_storage_ut.cpp @@ -96,8 +96,9 @@ class TFixture : public NUnitTest::TBaseFixture { const TCheckpointId& checkpointId, const NYql::NDq::TComputeActorState& state) { - auto issues = storage->SaveState(taskId, graphId, checkpointId, state).GetValueSync(); + auto [size, issues] = storage->SaveState(taskId, graphId, checkpointId, state).GetValueSync(); UNIT_ASSERT_C(issues.Empty(), issues.ToString()); + UNIT_ASSERT(size > 0); } NYql::NDq::TComputeActorState GetState( @@ -114,8 +115,9 @@ class TFixture : public NUnitTest::TBaseFixture { void ShouldSaveGetStateImpl(const char* tablePrefix, const NYql::NDq::TComputeActorState& state) { auto storage = GetStateStorage(tablePrefix); - auto issues = storage->SaveState(1, "graph1", CheckpointId1, state).GetValueSync(); + auto [size, issues] = storage->SaveState(1, "graph1", CheckpointId1, state).GetValueSync(); UNIT_ASSERT_C(issues.Empty(), issues.ToString()); + UNIT_ASSERT(size > 0); auto [states, getIssues] = storage->GetState({1}, "graph1", CheckpointId1).GetValueSync(); UNIT_ASSERT_C(getIssues.Empty(), getIssues.ToString()); @@ -326,8 +328,9 @@ Y_UNIT_TEST_SUITE(TStateStorageTest) { Y_UNIT_TEST_F(ShouldIssueErrorOnNonExistentState, TFixture) { auto storage = GetStateStorage("TStateStorageTestShouldIssueErrorOnNonExistentState"); - auto issues = storage->SaveState(1, "graph1", CheckpointId1, MakeStateFromBlob(4)).GetValueSync(); + auto [size, issues] = storage->SaveState(1, "graph1", CheckpointId1, MakeStateFromBlob(4)).GetValueSync(); UNIT_ASSERT(issues.Empty()); + UNIT_ASSERT(size > 0); auto getResult = storage->GetState({1}, "graph1", CheckpointId1).GetValueSync(); UNIT_ASSERT(getResult.second.Empty()); @@ -347,13 +350,14 @@ Y_UNIT_TEST_SUITE(TStateStorageTest) { auto state3 = MakeStateFromBlob(YdbRowSizeLimit * 6); auto state4 = MakeIncrementState(YdbRowSizeLimit * 3); - auto issues = storage->SaveState(1, "graph1", CheckpointId1, state1).GetValueSync(); + auto [size, issues] = storage->SaveState(1, "graph1", CheckpointId1, state1).GetValueSync(); UNIT_ASSERT(issues.Empty()); - issues = storage->SaveState(42, "graph1", CheckpointId1, state2).GetValueSync(); + UNIT_ASSERT(size > 0); + issues = storage->SaveState(42, "graph1", CheckpointId1, state2).GetValueSync().second; UNIT_ASSERT(issues.Empty()); - issues = storage->SaveState(7, "graph1", CheckpointId1, state3).GetValueSync(); + issues = storage->SaveState(7, "graph1", CheckpointId1, state3).GetValueSync().second; UNIT_ASSERT(issues.Empty()); - issues = storage->SaveState(13, "graph1", CheckpointId1, state4).GetValueSync(); + issues = storage->SaveState(13, "graph1", CheckpointId1, state4).GetValueSync().second; UNIT_ASSERT(issues.Empty()); auto [states, getIssues] = storage->GetState({1, 42, 7, 13}, "graph1", CheckpointId1).GetValueSync(); diff --git a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp index 782afb893363..6fb55d3e8967 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp @@ -295,7 +295,7 @@ class TStateStorage : public IStateStorage { TFuture Init() override; - TFuture SaveState( + TFuture SaveState( ui64 taskId, const TString& graphId, const TCheckpointId& checkpointId, @@ -332,8 +332,9 @@ class TStateStorage : public IStateStorage { TFuture ListStates( const TContextPtr& context); - std::list SerializeState( - const NYql::NDq::TComputeActorState& state); + size_t SerializeState( + const NYql::NDq::TComputeActorState& state, + std::list& outSerializedState); EStateType DeserializeState( TContext::TaskInfo& taskInfo); @@ -424,26 +425,30 @@ EStateType TStateStorage::DeserializeState(TContext::TaskInfo& taskInfo) { return TIncrementLogic::GetStateType(state); } -std::list TStateStorage::SerializeState(const NYql::NDq::TComputeActorState& state) { - std::list result; +size_t TStateStorage::SerializeState( + const NYql::NDq::TComputeActorState& state, + std::list& outSerializedState) { + outSerializedState.clear(); + TString serializedState; if (!state.SerializeToString(&serializedState)) { - return result; + return 0; } auto size = serializedState.size(); + size_t result = size; size_t rowLimit = Config.GetStateStorageLimits().GetMaxRowSizeBytes(); size_t offset = 0; while (size) { size_t chunkSize = (rowLimit && (size > rowLimit)) ? rowLimit : size; - result.push_back(serializedState.substr(offset, chunkSize)); + outSerializedState.push_back(serializedState.substr(offset, chunkSize)); offset += chunkSize; size -= chunkSize; } return result; } -TFuture TStateStorage::SaveState( +TFuture TStateStorage::SaveState( ui64 taskId, const TString& graphId, const TCheckpointId& checkpointId, @@ -451,15 +456,16 @@ TFuture TStateStorage::SaveState( std::list serializedState; EStateType type = EStateType::Snapshot; + size_t size = 0; try { type = TIncrementLogic::GetStateType(state); - serializedState = SerializeState(state); - if (serializedState.empty()) { - return MakeFuture(NYql::TIssues{NYql::TIssue{"Failed to serialize compute actor state"}}); + size = SerializeState(state, serializedState); + if (!size || serializedState.empty()) { + return MakeFuture(TSaveStateResult(0, NYql::TIssues{NYql::TIssue{"Failed to serialize compute actor state"}})); } } catch (...) { - return MakeFuture(NYql::TIssues{NYql::TIssue{CurrentExceptionMessage()}}); + return MakeFuture(TSaveStateResult(0, NYql::TIssues{NYql::TIssue{CurrentExceptionMessage()}})); } auto context = MakeIntrusive( @@ -472,15 +478,14 @@ TFuture TStateStorage::SaveState( serializedState, type); - auto promise = NewPromise(); + auto promise = NewPromise(); auto future = UpsertRow(context); - context->Callback = [promise, context, thisPtr = TIntrusivePtr(this)] (TFuture upsertRowStatus) mutable { - + context->Callback = [promise, context, size, thisPtr = TIntrusivePtr(this)] (TFuture upsertRowStatus) mutable { TStatus status = upsertRowStatus.GetValue(); if (!status.IsSuccess()) { context->Callback = nullptr; - promise.SetValue(StatusToIssues(status)); + promise.SetValue(TSaveStateResult(0, StatusToIssues(status))); return; } auto& taskInfo = context->Tasks[context->CurrentProcessingTaskIndex]; @@ -493,7 +498,7 @@ TFuture TStateStorage::SaveState( return; } context->Callback = nullptr; - promise.SetValue(StatusToIssues(status)); + promise.SetValue(TSaveStateResult(size, StatusToIssues(status))); }; future.Subscribe(context->Callback); return promise.GetFuture(); From 634801f1aacfe36455738e890ed9f69052fa2dc3 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Sun, 12 May 2024 19:08:14 +0000 Subject: [PATCH 10/21] fix ui32 len --- .../checkpoint_storage/ydb_state_storage.cpp | 38 +++++++++++-------- .../yql/minikql/comp_nodes/mkql_saveload.h | 7 ++-- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp index 6fb55d3e8967..64816d7c06fc 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp @@ -26,6 +26,7 @@ namespace { //////////////////////////////////////////////////////////////////////////////// const char* StatesTable = "states"; +const ui64 DeleteStateTimeoutMultiplier = 10; //////////////////////////////////////////////////////////////////////////////// @@ -324,7 +325,7 @@ class TStateStorage : public IStateStorage { TFuture UpsertRow( const TContextPtr& context); - TExecDataQuerySettings DefaultExecDataQuerySettings(); + TExecDataQuerySettings GetExecDataQuerySettings(ui64 multiplier = 1); TFuture SelectRowState( const TContextPtr& context); @@ -337,6 +338,7 @@ class TStateStorage : public IStateStorage { std::list& outSerializedState); EStateType DeserializeState( + const TContextPtr& context, TContext::TaskInfo& taskInfo); TFuture SkipStatesInFuture( @@ -412,7 +414,7 @@ TFuture TStateStorage::Init() { return MakeFuture(std::move(issues)); } -EStateType TStateStorage::DeserializeState(TContext::TaskInfo& taskInfo) { +EStateType TStateStorage::DeserializeState(const TContextPtr& context, TContext::TaskInfo& taskInfo) { TString blob; for (auto it = taskInfo.Rows.begin(); it != taskInfo.Rows.end();) { blob += *it; @@ -420,6 +422,9 @@ EStateType TStateStorage::DeserializeState(TContext::TaskInfo& taskInfo) { } taskInfo.States.push_front({}); NYql::NDq::TComputeActorState& state = taskInfo.States.front(); + + LOG_STORAGE_DEBUG(context, "DeserializeState, task id " << taskInfo.TaskId << ", blob size " << blob.size()); + auto res = state.ParseFromString(blob); Y_ENSURE(res, "Parsing error"); return TIncrementLogic::GetStateType(state); @@ -476,7 +481,7 @@ TFuture TStateStorage::SaveState( checkpointId, TMaybe(), serializedState, - type); + type); auto promise = NewPromise(); auto future = UpsertRow(context); @@ -588,7 +593,7 @@ TFuture TStateStorage::CountStates( query, TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx(), params, - thisPtr->DefaultExecDataQuerySettings()); + thisPtr->GetExecDataQuerySettings()); return future.Apply( [context] (const TFuture& future) { @@ -653,7 +658,7 @@ TFuture TStateStorage::ListStates(const TContextPtr& context) { query, TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx(), params, - thisPtr->DefaultExecDataQuerySettings()); + thisPtr->GetExecDataQuerySettings()); return future.Apply( [context] (const TFuture& future) { @@ -694,12 +699,12 @@ TFuture TStateStorage::ListStates(const TContextPtr& context) { }); } -TExecDataQuerySettings TStateStorage::DefaultExecDataQuerySettings() { +TExecDataQuerySettings TStateStorage::GetExecDataQuerySettings(ui64 multiplier) { return TExecDataQuerySettings() .KeepInQueryCache(true) - .ClientTimeout(TDuration::Seconds(StorageConfig.GetClientTimeoutSec())) - .OperationTimeout(TDuration::Seconds(StorageConfig.GetOperationTimeoutSec())) - .CancelAfter(TDuration::Seconds(StorageConfig.GetCancelAfterSec())); + .ClientTimeout(TDuration::Seconds(StorageConfig.GetClientTimeoutSec() * multiplier)) + .OperationTimeout(TDuration::Seconds(StorageConfig.GetOperationTimeoutSec() * multiplier)) + .CancelAfter(TDuration::Seconds(StorageConfig.GetCancelAfterSec() * multiplier)); } TFuture TStateStorage::DeleteGraph(const TString& graphId) { @@ -727,7 +732,7 @@ TFuture TStateStorage::DeleteGraph(const TString& graphId) { query, TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx(), params, - thisPtr->DefaultExecDataQuerySettings()); + thisPtr->GetExecDataQuerySettings(DeleteStateTimeoutMultiplier)); return future.Apply( [] (const TFuture& future) { @@ -772,7 +777,7 @@ TFuture TStateStorage::DeleteCheckpoints( query, TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx(), params, - thisPtr->DefaultExecDataQuerySettings()); + thisPtr->GetExecDataQuerySettings(DeleteStateTimeoutMultiplier)); return future.Apply( [] (const TFuture& future) { @@ -840,7 +845,7 @@ TFuture TStateStorage::SelectState(const TContextPtr& context) query, TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx(), params, - DefaultExecDataQuerySettings()); + GetExecDataQuerySettings()); } TFuture TStateStorage::UpsertRow(const TContextPtr& context) { @@ -885,7 +890,7 @@ TFuture TStateStorage::UpsertRow(const TContextPtr& context) { query, TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx(), params, - thisPtr->DefaultExecDataQuerySettings()); + thisPtr->GetExecDataQuerySettings()); return future.Apply( [] (const TFuture& future) { @@ -896,8 +901,7 @@ TFuture TStateStorage::UpsertRow(const TContextPtr& context) { } TFuture TStateStorage::SkipStatesInFuture(const TContextPtr& context) { - LOG_STORAGE_DEBUG(context, "SkipStatesInFuture"); - + size_t eraseCount = 0; for (auto& taskInfo : context->Tasks) { auto it = taskInfo.ListOfStatesForReading.begin(); while (it != taskInfo.ListOfStatesForReading.end()) @@ -910,11 +914,13 @@ TFuture TStateStorage::SkipStatesInFuture(const TContextPtr& context) { return MakeFuture(TStatus{EStatus::INTERNAL_ERROR, NYql::TIssues{NYql::TIssue{"Checkpoint is not found"}}}); } it = taskInfo.ListOfStatesForReading.erase(it); + ++eraseCount; } if (taskInfo.ListOfStatesForReading.empty()) { return MakeFuture(TStatus{EStatus::INTERNAL_ERROR, NYql::TIssues{NYql::TIssue{"Checkpoint is not found"}}}); } } + LOG_STORAGE_DEBUG(context, "SkipStatesInFuture, skip " << eraseCount << " checkpoints"); return MakeFuture(TStatus{EStatus::SUCCESS, NYql::TIssues{}}); } @@ -934,7 +940,7 @@ TFuture TStateStorage::ReadRows(const TContextPtr& context) { ++taskInfo.CurrentProcessingRow; if (taskInfo.CurrentProcessingRow == taskInfo.ListOfStatesForReading.front().StateRowsCount) { - auto type = thisPtr->DeserializeState(taskInfo); + auto type = thisPtr->DeserializeState(context, taskInfo); if (type != EStateType::Snapshot) { taskInfo.ListOfStatesForReading.pop_front(); taskInfo.CurrentProcessingRow = 0; diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h b/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h index 324c915d2876..bd9538e3dd72 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h @@ -179,8 +179,9 @@ struct TOutputSerializer { } NUdf::TUnboxedValue MakeState() { - auto strRef = NUdf::TStringRef(Buf.data(), Buf.size()); - return NMiniKQL::MakeString(strRef); + NUdf::TStringValue str(Buf.size()); + std::memcpy(str.Data(), Buf.Data(), Buf.Size()); + return NUdf::TUnboxedValuePod(std::move(str)); } protected: TString Buf; @@ -235,7 +236,7 @@ struct TInputSerializer { Y_FORCE_INLINE NUdf::TUnboxedValue ReadUnboxedValue(const TValuePacker& packer, TComputationContext& ctx) { auto size = Read(); - MKQL_ENSURE(size <= Buf.size(), "Serialized state is corrupted"); + MKQL_ENSURE_S(size <= Buf.size(), "Serialized state is corrupted, size " << size << ", Buf.size " << Buf.size()); auto value = packer.Unpack(TStringBuf(Buf.data(), Buf.data() + size), ctx.HolderFactory); Buf.Skip(size); return value; From d9d61ba85744c934360028f0922224a15fd9ed28 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 14 May 2024 12:43:54 +0000 Subject: [PATCH 11/21] Use list to get value from task --- .../yql/minikql/comp_nodes/mkql_hopping.cpp | 2 +- .../comp_nodes/mkql_match_recognize.cpp | 8 ++- .../mkql_match_recognize_save_load.h | 4 +- .../minikql/comp_nodes/mkql_multihopping.cpp | 2 +- .../yql/minikql/comp_nodes/mkql_saveload.h | 58 +++++++++++++++++-- .../minikql/comp_nodes/mkql_squeeze_state.cpp | 2 +- .../comp_nodes/mkql_time_order_recover.cpp | 2 +- .../mkql_computation_node_graph.cpp | 14 ++++- 8 files changed, 77 insertions(+), 15 deletions(-) diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_hopping.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_hopping.cpp index c2804a303431..b5643af7e932 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_hopping.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_hopping.cpp @@ -56,7 +56,7 @@ class THoppingCoreWrapper : public TMutableComputationNode NUdf::TUnboxedValue Save() const override { MKQL_ENSURE(Ready.empty(), "Inconsistent state to save, not all elements are fetched"); - TOutputSerializer out(EMkqlStateType::SIMPLE_BLOB, StateVersion); + TOutputSerializer out(EMkqlStateType::SIMPLE_BLOB, StateVersion, Ctx); out.Write(Buckets.size()); for (const auto& bucket : Buckets) { diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp index 3d718b1afdaf..ad831eec016a 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp @@ -269,10 +269,11 @@ class TStateForNonInterleavedPartitions , Cache(cache) , Terminating(false) , SerializerContext(ctx, rowType, rowPacker) + , Ctx(ctx) {} NUdf::TUnboxedValue Save() const override { - TMrOutputSerializer out(SerializerContext, EMkqlStateType::SIMPLE_BLOB, StateVersion); + TMrOutputSerializer out(SerializerContext, EMkqlStateType::SIMPLE_BLOB, StateVersion, Ctx); out.Write(CurPartitionPackedKey); bool isValid = static_cast(PartitionHandler); out.Write(isValid); @@ -386,6 +387,7 @@ class TStateForNonInterleavedPartitions NUdf::TUnboxedValue DelayedRow; bool Terminating; TSerializerContext SerializerContext; + TComputationContext& Ctx; }; class TStateForInterleavedPartitions @@ -413,10 +415,11 @@ class TStateForInterleavedPartitions , NfaTransitionGraph(TNfaTransitionGraphBuilder::Create(parameters.Pattern, parameters.VarNamesLookup)) , Cache(cache) , SerializerContext(ctx, rowType, rowPacker) + , Ctx(ctx) {} NUdf::TUnboxedValue Save() const override { - TMrOutputSerializer serializer(SerializerContext, EMkqlStateType::SIMPLE_BLOB, StateVersion); + TMrOutputSerializer serializer(SerializerContext, EMkqlStateType::SIMPLE_BLOB, StateVersion, Ctx); serializer.Write(Partitions.size()); for (const auto& [key, state] : Partitions) { @@ -529,6 +532,7 @@ class TStateForInterleavedPartitions const TNfaTransitionGraph::TPtr NfaTransitionGraph; const TContainerCacheOnContext& Cache; TSerializerContext SerializerContext; + TComputationContext& Ctx; }; template diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_save_load.h b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_save_load.h index 9b6cce17b365..309a8a68715c 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_save_load.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_save_load.h @@ -34,8 +34,8 @@ struct TMrOutputSerializer : TOutputSerializer { }; public: - TMrOutputSerializer(const TSerializerContext& context, EMkqlStateType stateType, ui32 stateVersion) - : TOutputSerializer(stateType, stateVersion) + TMrOutputSerializer(const TSerializerContext& context, EMkqlStateType stateType, ui32 stateVersion, TComputationContext& ctx) + : TOutputSerializer(stateType, stateVersion, ctx) , Context(context) {} diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp index e9d155e5647e..99d5ec848d2f 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp @@ -101,7 +101,7 @@ class TMultiHoppingCoreWrapper : public TStatefulSourceComputationNode(StatesMap.size()); for (const auto& [key, state] : StatesMap) { diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h b/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h index bd9538e3dd72..3d2c216adf23 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h @@ -127,7 +127,8 @@ struct TOutputSerializer { } public: - TOutputSerializer(EMkqlStateType stateType, ui32 stateVersion) { + TOutputSerializer(EMkqlStateType stateType, ui32 stateVersion, TComputationContext& ctx) + : Ctx(ctx) { Write(static_cast(stateType)); Write(stateVersion); } @@ -178,13 +179,62 @@ struct TOutputSerializer { Buf.AppendNoAlias(state.data(), state.size()); } + class TRangeList: public TComputationValue { + const size_t MaxValueLen = 10000; + + class TIterator : public TComputationValue { + const size_t MaxValueLen = 10000; + + public: + TIterator(TMemoryUsageInfo* memInfo, const TString& buf) + : TComputationValue(memInfo) + , Buf(buf) + , Index(0) + {} + + private: + bool Next(NUdf::TUnboxedValue& value) override { + if (Buf.size() == Index) { + return false; + } + size_t nextSize = std::min(Buf.size() - Index, MaxValueLen); + + NUdf::TStringValue str(nextSize); + std::memcpy(str.Data(), Buf.Data() + Index, nextSize); + Index += nextSize; + value = NUdf::TUnboxedValuePod(std::move(str)); + return true; + } + const TString& Buf; + size_t Index; + }; + + public: + + TRangeList(TMemoryUsageInfo* memInfo, TComputationContext& ctx, TString&& buf) + : TComputationValue(memInfo) + , Ctx(ctx) + , Buf(buf) + {} + + ui64 GetListLength() const override { + return Buf.size() / MaxValueLen + ((Buf.size() % MaxValueLen) ? 1 : 0); + } + + NUdf::TUnboxedValue GetListIterator() const override { + return Ctx.HolderFactory.Create(Buf); + } + private: + TComputationContext& Ctx; + TString Buf; + }; + NUdf::TUnboxedValue MakeState() { - NUdf::TStringValue str(Buf.size()); - std::memcpy(str.Data(), Buf.Data(), Buf.Size()); - return NUdf::TUnboxedValuePod(std::move(str)); + return Ctx.HolderFactory.Create(Ctx, std::move(Buf)); } protected: TString Buf; + TComputationContext& Ctx; }; diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_squeeze_state.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_squeeze_state.cpp index 7ab819bef9ca..040bf5ae52f4 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_squeeze_state.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_squeeze_state.cpp @@ -48,7 +48,7 @@ TSqueezeState::TSqueezeState(const TSqueezeState& state) {} NUdf::TUnboxedValue TSqueezeState::Save(TComputationContext& ctx) const { - TOutputSerializer out(EMkqlStateType::SIMPLE_BLOB, StateVersion); + TOutputSerializer out(EMkqlStateType::SIMPLE_BLOB, StateVersion, ctx); out.Write(static_cast(Stage)); if (ESqueezeState::Work == Stage) { InSave->SetValue(ctx, State->GetValue(ctx)); diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp index 69ea328c6449..e5229aeb3fb7 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp @@ -119,7 +119,7 @@ class TTimeOrderRecover : public TStatefulFlowComputationNode(Heap.size()); for (const TEntry& entry : Heap) { diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp index 45dd61a55f9b..83451995b331 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp @@ -729,9 +729,17 @@ class TComputationGraph final : public IComputationGraph { if (mutableValue.IsInvalid()) { WriteUi32(result, std::numeric_limits::max()); // -1. } else if (mutableValue.IsBoxed()) { - NUdf::TUnboxedValue saved = mutableValue.Save(); - const TStringBuf savedBuf = saved.AsStringRef(); - NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, savedBuf); + NUdf::TUnboxedValue list = mutableValue.Save(); + + TString taskState; + auto listIt = list.GetListIterator(); + NUdf::TUnboxedValue str; + while (listIt.Next(str)) { + const TStringBuf savedBuf = str.AsStringRef(); + taskState.AppendNoAlias(savedBuf.Data(), savedBuf.Size()); + } + + NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, taskState); } else { // No load was done during previous runs (if any). MKQL_ENSURE(mutableValue.HasValue() && (mutableValue.IsString() || mutableValue.IsEmbedded()), "State is expected to have data or invalid value"); const NUdf::TStringRef savedRef = mutableValue.AsStringRef(); From 3b2273ec4304f8370c3c686e06c29e3e88c10d38 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 14 May 2024 13:07:31 +0000 Subject: [PATCH 12/21] Use 64 size --- ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp | 6 +++--- ydb/library/yql/minikql/comp_nodes/mkql_saveload.h | 2 +- .../yql/minikql/computation/mkql_computation_node_graph.cpp | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp index 64816d7c06fc..5b4e9c4f3703 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp @@ -55,7 +55,7 @@ class TIncrementLogic { TStringBuf buf(blob); while (!buf.empty()) { - auto nodeStateSize = NKikimr::NMiniKQL::ReadUi32(buf); + auto nodeStateSize = NKikimr::NMiniKQL::ReadUi64(buf); Y_ENSURE(buf.Size() >= nodeStateSize, "State/buf is corrupted"); TStringBuf nodeStateBuf(buf.Data(), nodeStateSize); buf.Skip(nodeStateSize); @@ -105,7 +105,7 @@ class TIncrementLogic { NYql::NUdf::TUnboxedValue saved = NKikimr::NMiniKQL::TOutputSerializer::MakeSnapshotState(nodeState.Items, 0); const TStringBuf savedBuf = saved.AsStringRef(); - NKikimr::NMiniKQL::WriteUi32(result, savedBuf.Size()); + NKikimr::NMiniKQL::WriteUi64(result, savedBuf.Size()); result.AppendNoAlias(savedBuf.Data(), savedBuf.Size()); } } @@ -123,7 +123,7 @@ class TIncrementLogic { const TString& blob = state.MiniKqlProgram->Data.Blob; TStringBuf buf(blob); while (!buf.empty()) { - auto nodeStateSize = NKikimr::NMiniKQL::ReadUi32(buf); + auto nodeStateSize = NKikimr::NMiniKQL::ReadUi64(buf); Y_ENSURE(buf.Size() >= nodeStateSize, "State/buf is corrupted"); TStringBuf nodeStateBuf(buf.Data(), nodeStateSize); if (NKikimr::NMiniKQL::TInputSerializer(nodeStateBuf).GetType() == NKikimr::NMiniKQL::EMkqlStateType::INCREMENT) { diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h b/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h index 3d2c216adf23..512cbf94f3f2 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h @@ -347,7 +347,7 @@ struct TInputSerializer { class TNodeStateHelper { public: static void AddNodeState(TString& result, const TStringBuf& state) { - WriteUi32(result, state.Size()); + WriteUi64(result, state.Size()); result.AppendNoAlias(state.Data(), state.Size()); } }; diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp index 83451995b331..837e6f953a08 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp @@ -727,7 +727,7 @@ class TComputationGraph final : public IComputationGraph { for (ui32 i : PatternNodes->GetMutables().SerializableValues) { const NUdf::TUnboxedValuePod& mutableValue = Ctx->MutableValues[i]; if (mutableValue.IsInvalid()) { - WriteUi32(result, std::numeric_limits::max()); // -1. + WriteUi64(result, std::numeric_limits::max()); // -1. } else if (mutableValue.IsBoxed()) { NUdf::TUnboxedValue list = mutableValue.Save(); @@ -743,7 +743,7 @@ class TComputationGraph final : public IComputationGraph { } else { // No load was done during previous runs (if any). MKQL_ENSURE(mutableValue.HasValue() && (mutableValue.IsString() || mutableValue.IsEmbedded()), "State is expected to have data or invalid value"); const NUdf::TStringRef savedRef = mutableValue.AsStringRef(); - WriteUi32(result, savedRef.Size()); + WriteUi64(result, savedRef.Size()); result.AppendNoAlias(savedRef.Data(), savedRef.Size()); } } @@ -754,7 +754,7 @@ class TComputationGraph final : public IComputationGraph { Prepare(); for (ui32 i : PatternNodes->GetMutables().SerializableValues) { - if (const ui32 size = ReadUi32(state); size != std::numeric_limits::max()) { + if (const ui64 size = ReadUi64(state); size != std::numeric_limits::max()) { MKQL_ENSURE(state.Size() >= size, "Serialized state is corrupted - buffer is too short (" << state.Size() << ") for specified size: " << size); const NUdf::TStringRef savedRef(state.Data(), size); Ctx->MutableValues[i] = MakeString(savedRef); From 967c1778ee8b1452d6c41eb3ecaf1f021cb5298b Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 15 May 2024 07:10:32 +0000 Subject: [PATCH 13/21] iterate list --- .../yql/minikql/comp_nodes/mkql_saveload.h | 9 +++--- .../mkql_computation_node_graph.cpp | 28 ++++++++++++------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h b/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h index 512cbf94f3f2..28001224fa95 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h @@ -180,7 +180,6 @@ struct TOutputSerializer { } class TRangeList: public TComputationValue { - const size_t MaxValueLen = 10000; class TIterator : public TComputationValue { const size_t MaxValueLen = 10000; @@ -209,8 +208,7 @@ struct TOutputSerializer { size_t Index; }; - public: - + public: TRangeList(TMemoryUsageInfo* memInfo, TComputationContext& ctx, TString&& buf) : TComputationValue(memInfo) , Ctx(ctx) @@ -218,13 +216,14 @@ struct TOutputSerializer { {} ui64 GetListLength() const override { - return Buf.size() / MaxValueLen + ((Buf.size() % MaxValueLen) ? 1 : 0); + ThrowNotSupported(__func__); + return 0; } NUdf::TUnboxedValue GetListIterator() const override { return Ctx.HolderFactory.Create(Buf); } - private: + private: TComputationContext& Ctx; TString Buf; }; diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp index 837e6f953a08..5b87827f86a9 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp @@ -729,17 +729,25 @@ class TComputationGraph final : public IComputationGraph { if (mutableValue.IsInvalid()) { WriteUi64(result, std::numeric_limits::max()); // -1. } else if (mutableValue.IsBoxed()) { - NUdf::TUnboxedValue list = mutableValue.Save(); - - TString taskState; - auto listIt = list.GetListIterator(); - NUdf::TUnboxedValue str; - while (listIt.Next(str)) { - const TStringBuf savedBuf = str.AsStringRef(); - taskState.AppendNoAlias(savedBuf.Data(), savedBuf.Size()); + TList taskState; + size_t taskStateSize = 0; + { + NUdf::TUnboxedValue list = mutableValue.Save(); + auto listIt = list.GetListIterator(); + NUdf::TUnboxedValue str; + while (listIt.Next(str)) { + const TStringBuf strRef = str.AsStringRef(); + taskStateSize += strRef.Size(); + taskState.push_back({}); + taskState.back().AppendNoAlias(strRef.Data(), strRef.Size()); + } } - - NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, taskState); + WriteUi64(result, taskStateSize); + for (auto it = taskState.begin(); it != taskState.end();) { + result.AppendNoAlias(it->Data(), it->Size()); + it = taskState.erase(it); + } + //NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, taskState); } else { // No load was done during previous runs (if any). MKQL_ENSURE(mutableValue.HasValue() && (mutableValue.IsString() || mutableValue.IsEmbedded()), "State is expected to have data or invalid value"); const NUdf::TStringRef savedRef = mutableValue.AsStringRef(); From a213069774aa1fdaef58d25d4e6ad97c09825adf Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 15 May 2024 13:15:15 +0000 Subject: [PATCH 14/21] iterate list --- .../minikql/comp_nodes/mkql_multihopping.cpp | 34 +++++++++++++++---- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp index 99d5ec848d2f..218b43434b68 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp @@ -124,8 +124,23 @@ class TMultiHoppingCoreWrapper : public TStatefulSourceComputationNode Date: Thu, 16 May 2024 05:50:13 +0000 Subject: [PATCH 15/21] Add load state --- .../comp_nodes/mkql_match_recognize.cpp | 44 ++++-- .../mkql_match_recognize_save_load.h | 2 +- .../minikql/comp_nodes/mkql_multihopping.cpp | 26 +--- .../yql/minikql/comp_nodes/mkql_saveload.h | 138 +++++++++++------- .../comp_nodes/mkql_time_order_recover.cpp | 32 ++-- .../ut/mkql_multihopping_saveload_ut.cpp | 2 +- .../mkql_computation_node_codegen.h.txt | 2 +- .../mkql_computation_node_graph.cpp | 6 +- ...kql_computation_node_graph_saveload_ut.cpp | 2 +- .../computation/mkql_computation_node_impl.h | 5 + ydb/library/yql/public/udf/udf_value.h | 85 ++++++++++- ydb/library/yql/public/udf/udf_value_inl.h | 14 ++ ydb/library/yql/public/udf/udf_version.h | 2 +- 13 files changed, 250 insertions(+), 110 deletions(-) diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp index ad831eec016a..6d7ffd9bac02 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp @@ -289,7 +289,7 @@ class TStateForNonInterleavedPartitions return out.MakeState(); } - void Load(const NUdf::TStringRef& state) override { + void LoadState(NUdf::TUnboxedValue& state) override { TMrInputSerializer in(SerializerContext, state); const auto loadStateVersion = in.GetStateVersion(); @@ -319,6 +319,10 @@ class TStateForNonInterleavedPartitions MKQL_ENSURE(in.Empty(), "State is corrupted"); } + bool HasListItems() const override { + return false; + } + bool ProcessInputRow(NUdf::TUnboxedValue&& row, TComputationContext& ctx) { MKQL_ENSURE(not DelayedRow, "Internal logic error"); //we're finalizing previous partition InputRowArg->SetValue(ctx, NUdf::TUnboxedValue(row)); @@ -432,7 +436,7 @@ class TStateForInterleavedPartitions return serializer.MakeState(); } - void Load(const NUdf::TStringRef& state) override { + void LoadState(NUdf::TUnboxedValue& state) override { TMrInputSerializer in(SerializerContext, state); const auto loadStateVersion = in.GetStateVersion(); @@ -469,6 +473,10 @@ class TStateForInterleavedPartitions MKQL_ENSURE(in.Empty(), "State is corrupted"); } + bool HasListItems() const override { + return false; + } + bool ProcessInputRow(NUdf::TUnboxedValue&& row, TComputationContext& ctx) { auto partition = GetPartitionHandler(row, ctx); if (partition->second->ProcessInputRow(std::move(row), ctx)) { @@ -569,20 +577,24 @@ class TMatchRecognizeWrapper : public TStatefulFlowComputationNode( - InputRowArg, - PartitionKey, - PartitionKeyType, - Parameters, - Cache, - ctx, - RowType, - RowPacker - ); - state.Load(stateValue.AsStringRef()); - stateValue = state; + } else if (stateValue.HasValue()) { + MKQL_ENSURE(stateValue.IsBoxed(), "Expected boxed value"); + bool isStateToLoad = stateValue.HasListItems(); + if (isStateToLoad) { + // Load from saved state. + NUdf::TUnboxedValue state = ctx.HolderFactory.Create( + InputRowArg, + PartitionKey, + PartitionKeyType, + Parameters, + Cache, + ctx, + RowType, + RowPacker + ); + state.LoadState(stateValue); + stateValue = state; + } } auto state = static_cast(stateValue.AsBoxed().Get()); while (true) { diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_save_load.h b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_save_load.h index 309a8a68715c..3c424fdc1620 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_save_load.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize_save_load.h @@ -83,7 +83,7 @@ struct TMrInputSerializer : TInputSerializer { }; public: - TMrInputSerializer(TSerializerContext& context, const NUdf::TStringRef& state) + TMrInputSerializer(TSerializerContext& context, NUdf::TUnboxedValue& state) : TInputSerializer(state, EMkqlStateType::SIMPLE_BLOB) , Context(context) { } diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp index 218b43434b68..1cc569145a36 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp @@ -124,23 +124,8 @@ class TMultiHoppingCoreWrapper : public TStatefulSourceComputationNode { + + class TIterator : public TComputationValue { + const size_t MaxValueLen = 10000; + + public: + TIterator(TMemoryUsageInfo* memInfo, const TString& buf) + : TComputationValue(memInfo) + , Buf(buf) + , Index(0) + {} + + private: + bool Next(NUdf::TUnboxedValue& value) override { + if (Buf.size() == Index) { + return false; + } + size_t nextSize = std::min(Buf.size() - Index, MaxValueLen); + NUdf::TStringValue str(nextSize); + std::memcpy(str.Data(), Buf.Data() + Index, nextSize); + Index += nextSize; + value = NUdf::TUnboxedValuePod(std::move(str)); + return true; + } + const TString& Buf; + size_t Index; + }; + +public: + TSaveLoadList(TMemoryUsageInfo* memInfo, TComputationContext& ctx, TString&& buf) + : TComputationValue(memInfo) + , Ctx(ctx) + , Buf(buf) + {} + + TSaveLoadList(TMemoryUsageInfo* memInfo, TComputationContext& ctx, const TStringBuf& buf) + : TComputationValue(memInfo) + , Ctx(ctx) + , Buf(buf) + {} + + ui64 GetListLength() const override { + ThrowNotSupported(__func__); + return 0; + } + + bool HasListItems() const override { + return !Buf.empty(); + } + + NUdf::TUnboxedValue GetListIterator() const override { + return Ctx.HolderFactory.Create(Buf); + } +private: + TComputationContext& Ctx; + TString Buf; +}; + struct TOutputSerializer { public: static NUdf::TUnboxedValue MakeSimpleBlobState(const TString& blob, ui32 stateVersion) { @@ -179,68 +237,29 @@ struct TOutputSerializer { Buf.AppendNoAlias(state.data(), state.size()); } - class TRangeList: public TComputationValue { - - class TIterator : public TComputationValue { - const size_t MaxValueLen = 10000; - - public: - TIterator(TMemoryUsageInfo* memInfo, const TString& buf) - : TComputationValue(memInfo) - , Buf(buf) - , Index(0) - {} - - private: - bool Next(NUdf::TUnboxedValue& value) override { - if (Buf.size() == Index) { - return false; - } - size_t nextSize = std::min(Buf.size() - Index, MaxValueLen); - - NUdf::TStringValue str(nextSize); - std::memcpy(str.Data(), Buf.Data() + Index, nextSize); - Index += nextSize; - value = NUdf::TUnboxedValuePod(std::move(str)); - return true; - } - const TString& Buf; - size_t Index; - }; - - public: - TRangeList(TMemoryUsageInfo* memInfo, TComputationContext& ctx, TString&& buf) - : TComputationValue(memInfo) - , Ctx(ctx) - , Buf(buf) - {} - - ui64 GetListLength() const override { - ThrowNotSupported(__func__); - return 0; - } - - NUdf::TUnboxedValue GetListIterator() const override { - return Ctx.HolderFactory.Create(Buf); - } - private: - TComputationContext& Ctx; - TString Buf; - }; - NUdf::TUnboxedValue MakeState() { - return Ctx.HolderFactory.Create(Ctx, std::move(Buf)); + return Ctx.HolderFactory.Create(Ctx, std::move(Buf)); } protected: TString Buf; TComputationContext& Ctx; }; - struct TInputSerializer { public: - TInputSerializer(const NUdf::TStringRef& state, TMaybe expectedType = Nothing()) - : Buf(state) { + TInputSerializer(const TStringBuf& state, TMaybe expectedType = Nothing()) + : Buf(state) { + Type = static_cast(Read()); + Read(StateVersion); + if (expectedType) { + MKQL_ENSURE(Type == *expectedType, "state type is not expected"); + } + } + + TInputSerializer(NUdf::TUnboxedValue& state, TMaybe expectedType = Nothing()) + : State(StateToString(state)) + , Buf(State) { + state.Clear(); Type = static_cast(Read()); Read(StateVersion); if (expectedType) { @@ -336,7 +355,20 @@ struct TInputSerializer { return Buf.empty(); } +private: + TString StateToString(NUdf::TUnboxedValue& state) { + TString result; + auto listIt = state.GetListIterator(); + NUdf::TUnboxedValue str; + while (listIt.Next(str)) { + const TStringBuf strRef = str.AsStringRef(); + result.AppendNoAlias(strRef.Data(), strRef.Size()); + } + return result; + } + protected: + TString State; TStringBuf Buf; EMkqlStateType Type{EMkqlStateType::SIMPLE_BLOB}; ui32 StateVersion{0}; diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp index e5229aeb3fb7..ab54df818699 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp @@ -100,7 +100,12 @@ class TTimeOrderRecover : public TStatefulFlowComputationNodeGetValue(ctx).Get(), RowLimit->GetValue(ctx).Get(), ctx); - } else if (stateValue.HasValue() && !stateValue.IsBoxed()) { - // Load from saved state. - NUdf::TUnboxedValue state = ctx.HolderFactory.Create( - this, - Delay->GetValue(ctx).Get(), - Ahead->GetValue(ctx).Get(), - RowLimit->GetValue(ctx).Get(), - ctx); - state.Load(stateValue.AsStringRef()); - stateValue = state; + } else if (stateValue.HasValue()) { + MKQL_ENSURE(stateValue.IsBoxed(), "Expected boxed value"); + bool isStateToLoad = stateValue.HasListItems(); + + if (isStateToLoad) { + // Load from saved state. + NUdf::TUnboxedValue state = ctx.HolderFactory.Create( + this, + Delay->GetValue(ctx).Get(), + Ahead->GetValue(ctx).Get(), + RowLimit->GetValue(ctx).Get(), + ctx); + state.LoadState(stateValue); + stateValue = state; + } } auto& state = *static_cast(stateValue.AsBoxed().Get()); while (true) { diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_multihopping_saveload_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_multihopping_saveload_ut.cpp index 9100c5d590e7..9aee3587feca 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_multihopping_saveload_ut.cpp +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_multihopping_saveload_ut.cpp @@ -47,7 +47,7 @@ namespace { return NUdf::TUnboxedValue::Zero(); } - void Load(const NUdf::TStringRef& state) override { + void LoadState(NUdf::TUnboxedValue& state) override { Y_UNUSED(state); } diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_codegen.h.txt b/ydb/library/yql/minikql/computation/mkql_computation_node_codegen.h.txt index cb4df89f5953..029289071956 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_codegen.h.txt +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_codegen.h.txt @@ -1229,7 +1229,7 @@ protected: return NUdf::TUnboxedValue::Zero(); } - void Load(const NUdf::TStringRef& state) override { + void LoadState(NUdf::TUnboxedValue& state) override { Y_UNUSED(state); } diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp index 5b87827f86a9..ed55133d9877 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp @@ -747,8 +747,8 @@ class TComputationGraph final : public IComputationGraph { result.AppendNoAlias(it->Data(), it->Size()); it = taskState.erase(it); } - //NKikimr::NMiniKQL::TNodeStateHelper::AddNodeState(result, taskState); } else { // No load was done during previous runs (if any). + // TODO MKQL_ENSURE(mutableValue.HasValue() && (mutableValue.IsString() || mutableValue.IsEmbedded()), "State is expected to have data or invalid value"); const NUdf::TStringRef savedRef = mutableValue.AsStringRef(); WriteUi64(result, savedRef.Size()); @@ -764,8 +764,8 @@ class TComputationGraph final : public IComputationGraph { for (ui32 i : PatternNodes->GetMutables().SerializableValues) { if (const ui64 size = ReadUi64(state); size != std::numeric_limits::max()) { MKQL_ENSURE(state.Size() >= size, "Serialized state is corrupted - buffer is too short (" << state.Size() << ") for specified size: " << size); - const NUdf::TStringRef savedRef(state.Data(), size); - Ctx->MutableValues[i] = MakeString(savedRef); + TStringBuf savedRef(state.Data(), size); + Ctx->MutableValues[i] = HolderFactory->Create(*Ctx, savedRef); state.Skip(size); } // else leave it Invalid() } diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_graph_saveload_ut.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_graph_saveload_ut.cpp index 5453f8b59095..3471b9dd4a7d 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_graph_saveload_ut.cpp +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_graph_saveload_ut.cpp @@ -86,7 +86,7 @@ namespace { return NUdf::TUnboxedValue::Zero(); } - void Load(const NUdf::TStringRef& state) override { + void LoadState(NUdf::TUnboxedValue& state) override { Y_UNUSED(state); } diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_impl.h b/ydb/library/yql/minikql/computation/mkql_computation_node_impl.h index 645084171c57..0fc59899a990 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_impl.h +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_impl.h @@ -1013,6 +1013,11 @@ class TComputationValueBase: public TBaseExt ThrowNotSupported(__func__); } + void LoadState(NUdf::TUnboxedValue& state) override { + Y_UNUSED(state); + ThrowNotSupported(__func__); + } + void Push(const NUdf::TUnboxedValuePod& value) override { Y_UNUSED(value); ThrowNotSupported(__func__); diff --git a/ydb/library/yql/public/udf/udf_value.h b/ydb/library/yql/public/udf/udf_value.h index c9ab0a106455..2180760f465b 100644 --- a/ydb/library/yql/public/udf/udf_value.h +++ b/ydb/library/yql/public/udf/udf_value.h @@ -181,7 +181,20 @@ friend struct TBoxedValueAccessor; }; #endif -#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 30) +#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) +class IBoxedValue7 : public IBoxedValue6 { +friend struct TBoxedValueAccessor; +private: + virtual void LoadState(TUnboxedValue& state) = 0; +}; +#endif + +#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) +class IBoxedValue : public IBoxedValue7 { +protected: + IBoxedValue(); +}; +#elif UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 30) class IBoxedValue : public IBoxedValue6 { protected: IBoxedValue(); @@ -222,7 +235,7 @@ UDF_ASSERT_TYPE_SIZE(IBoxedValuePtr, 8); /////////////////////////////////////////////////////////////////////////////// struct TBoxedValueAccessor { -#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 30) +#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) #define METHOD_MAP(xx) \ xx(HasFastListLength) \ @@ -266,7 +279,54 @@ struct TBoxedValueAccessor xx(Unused4) \ xx(Unused5) \ xx(Unused6) \ - xx(WideFetch) + xx(WideFetch) \ + xx(LoadState) + +#elif UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 30) + +#define METHOD_MAP(xx) \ + xx(HasFastListLength) \ + xx(GetListLength) \ + xx(GetEstimatedListLength) \ + xx(GetListIterator) \ + xx(GetListRepresentation) \ + xx(ReverseListImpl) \ + xx(SkipListImpl) \ + xx(TakeListImpl) \ + xx(ToIndexDictImpl) \ + xx(GetDictLength) \ + xx(GetDictIterator) \ + xx(GetKeysIterator) \ + xx(GetPayloadsIterator) \ + xx(Contains) \ + xx(Lookup) \ + xx(GetElement) \ + xx(GetElements) \ + xx(Run) \ + xx(GetResourceTag) \ + xx(GetResource) \ + xx(HasListItems) \ + xx(HasDictItems) \ + xx(GetVariantIndex) \ + xx(GetVariantItem) \ + xx(Fetch) \ + xx(Skip) \ + xx(Next) \ + xx(NextPair) \ + xx(Apply) \ + xx(GetTraverseCount) \ + xx(GetTraverseItem) \ + xx(Save) \ + xx(Load) \ + xx(Push) \ + xx(IsSortedDict) \ + xx(Unused1) \ + xx(Unused2) \ + xx(Unused3) \ + xx(Unused4) \ + xx(Unused5) \ + xx(Unused6) \ + xx(WideFetch) \ #elif UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 19) @@ -555,6 +615,10 @@ struct TBoxedValueAccessor #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 30) static inline EFetchStatus WideFetch(IBoxedValue& value, TUnboxedValue* result, ui32 width); #endif + +#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) + static inline void LoadState(IBoxedValue& value, TUnboxedValue& data); +#endif }; #define MAP_HANDLER(xx) template<> inline uintptr_t TBoxedValueAccessor::GetMethodPtr() { return GetMethodPtr(&IBoxedValue::xx); } @@ -643,6 +707,10 @@ class TBoxedValueBase: public IBoxedValue { #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 30) EFetchStatus WideFetch(TUnboxedValue* result, ui32 width) override; #endif + +#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) + void LoadState(TUnboxedValue& value) override; +#endif }; class TBoxedValueLink: public TBoxedValueBase @@ -841,6 +909,10 @@ friend class TUnboxedValue; inline EFetchStatus WideFetch(TUnboxedValue *result, ui32 width) const; #endif +#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) + inline void LoadState(TUnboxedValue& value); +#endif + inline bool TryMakeVariant(ui32 index); inline void SetTimezoneId(ui16 id); @@ -1232,6 +1304,13 @@ inline EFetchStatus TBoxedValueBase::WideFetch(TUnboxedValue *result, ui32 width } #endif +#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) +inline void TBoxedValueBase::LoadState(TUnboxedValue& value) { + Y_UNUSED(value); + Y_ABORT("Not implemented"); +} +#endif + inline void TUnboxedValuePod::Dump(IOutputStream& out) const { switch (Raw.GetMarkers()) { case EMarkers::Empty: diff --git a/ydb/library/yql/public/udf/udf_value_inl.h b/ydb/library/yql/public/udf/udf_value_inl.h index d862ede39ccb..789d4232d08d 100644 --- a/ydb/library/yql/public/udf/udf_value_inl.h +++ b/ydb/library/yql/public/udf/udf_value_inl.h @@ -283,6 +283,13 @@ inline EFetchStatus TBoxedValueAccessor::WideFetch(IBoxedValue& value, TUnboxedV } #endif +#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) +inline void TBoxedValueAccessor::LoadState(IBoxedValue& value, TUnboxedValue& data) { + Y_DEBUG_ABORT_UNLESS(value.IsCompatibleTo(MakeAbiCompatibilityVersion(2, 36))); + value.LoadState(data); +} +#endif + ////////////////////////////////////////////////////////////////////////////// // TUnboxedValue ////////////////////////////////////////////////////////////////////////////// @@ -615,6 +622,13 @@ inline EFetchStatus TUnboxedValuePod::WideFetch(TUnboxedValue* result, ui32 widt } #endif +#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) +inline void TUnboxedValuePod::LoadState(TUnboxedValue& value) { + UDF_VERIFY(IsBoxed(), "Value is not boxed"); + TBoxedValueAccessor::LoadState(*Raw.Boxed.Value, value); +} +#endif + Y_FORCE_INLINE void TUnboxedValuePod::Ref() const noexcept { switch (Raw.GetMarkers()) { diff --git a/ydb/library/yql/public/udf/udf_version.h b/ydb/library/yql/public/udf/udf_version.h index b9ef0e3cada1..964f1eba1759 100644 --- a/ydb/library/yql/public/udf/udf_version.h +++ b/ydb/library/yql/public/udf/udf_version.h @@ -7,7 +7,7 @@ namespace NYql { namespace NUdf { #define CURRENT_UDF_ABI_VERSION_MAJOR 2 -#define CURRENT_UDF_ABI_VERSION_MINOR 35 +#define CURRENT_UDF_ABI_VERSION_MINOR 36 #define CURRENT_UDF_ABI_VERSION_PATCH 0 #ifdef USE_CURRENT_UDF_ABI_VERSION From b6911805ab7548b5d5561178e49818ded1176aa8 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 16 May 2024 08:39:06 +0000 Subject: [PATCH 16/21] try to fix mqkl tests --- .../minikql/comp_nodes/mkql_multihopping.cpp | 8 ++++++ .../mkql_computation_node_graph_saveload.cpp | 25 +++++++++++++++---- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp index 1cc569145a36..a7d06f1a60bb 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp @@ -124,9 +124,17 @@ class TMultiHoppingCoreWrapper : public TStatefulSourceComputationNode Date: Fri, 17 May 2024 07:58:01 +0000 Subject: [PATCH 17/21] Style fix --- .../checkpoint_storage/ydb_state_storage.cpp | 2 +- .../comp_nodes/mkql_time_order_recover.cpp | 1 - .../mkql_computation_node_graph.cpp | 20 +++++++++++-------- .../mkql_computation_node_graph_saveload.cpp | 6 +++--- .../pq/async_io/dq_pq_write_actor.cpp | 4 ++-- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp index 5b4e9c4f3703..dd09709b0402 100644 --- a/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp +++ b/ydb/core/fq/libs/checkpoint_storage/ydb_state_storage.cpp @@ -481,7 +481,7 @@ TFuture TStateStorage::SaveState( checkpointId, TMaybe(), serializedState, - type); + type); auto promise = NewPromise(); auto future = UpsertRow(context); diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp index ab54df818699..0c595a3fc08f 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp @@ -191,7 +191,6 @@ class TTimeOrderRecover : public TStatefulFlowComputationNode( diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp index ed55133d9877..af62898c3d23 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp @@ -731,8 +731,8 @@ class TComputationGraph final : public IComputationGraph { } else if (mutableValue.IsBoxed()) { TList taskState; size_t taskStateSize = 0; - { - NUdf::TUnboxedValue list = mutableValue.Save(); + + auto saveList = [&](auto& list) { auto listIt = list.GetListIterator(); NUdf::TUnboxedValue str; while (listIt.Next(str)) { @@ -741,18 +741,22 @@ class TComputationGraph final : public IComputationGraph { taskState.push_back({}); taskState.back().AppendNoAlias(strRef.Data(), strRef.Size()); } + }; + bool isList= mutableValue.HasListItems(); + NUdf::TUnboxedValue list; + if (isList) { // No load was done during previous runs + saveList(mutableValue); + } else { + NUdf::TUnboxedValue list = mutableValue.Save(); + saveList(list); } WriteUi64(result, taskStateSize); for (auto it = taskState.begin(); it != taskState.end();) { result.AppendNoAlias(it->Data(), it->Size()); it = taskState.erase(it); } - } else { // No load was done during previous runs (if any). - // TODO - MKQL_ENSURE(mutableValue.HasValue() && (mutableValue.IsString() || mutableValue.IsEmbedded()), "State is expected to have data or invalid value"); - const NUdf::TStringRef savedRef = mutableValue.AsStringRef(); - WriteUi64(result, savedRef.Size()); - result.AppendNoAlias(savedRef.Data(), savedRef.Size()); + } else { + MKQL_ENSURE(false, "State is expected to have data or invalid value"); } } return result; diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_graph_saveload.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_graph_saveload.cpp index 0ede5ca872f7..9a010b221db9 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_graph_saveload.cpp +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_graph_saveload.cpp @@ -59,7 +59,7 @@ void SaveGraphState(const NUdf::TUnboxedValue* roots, ui32 rootCount, ui64 hash, if (state.IsString() || state.IsEmbedded()) { auto strRef = state.AsStringRef(); auto size = strRef.Size(); - WriteUi32(out, size); + WriteUi64(out, size); if (size) { out.AppendNoAlias(strRef.Data(), size); } @@ -72,7 +72,7 @@ void SaveGraphState(const NUdf::TUnboxedValue* roots, ui32 rootCount, ui64 hash, const TStringBuf strRef = str.AsStringRef(); taskState.AppendNoAlias(strRef.Data(), strRef.Size()); } - WriteUi32(out, taskState.size()); + WriteUi64(out, taskState.size()); if (!taskState.empty()) { out.AppendNoAlias(taskState.Data(), taskState.size()); } @@ -93,7 +93,7 @@ void LoadGraphState(const NUdf::TUnboxedValue* roots, ui32 rootCount, ui64 hash, TraverseGraph(roots, rootCount, values); for (ui32 i = 0; i < values.size(); ++i) { - auto size = ReadUi32(state); + auto size = ReadUi64(state); if (size) { MKQL_ENSURE(size <= state.size(), "Serialized state is corrupted"); values[i].Load(NUdf::TStringRef(state.data(), size)); diff --git a/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp b/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp index e2bec4375c08..d104d05542f0 100644 --- a/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp +++ b/ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.cpp @@ -354,7 +354,7 @@ class TDqPqWriteActor : public NActors::TActor, public IDqCompu return !events.empty(); } - TSinkState BuildState(const NDqProto::TCheckpoint& /*checkpoint*/) { + TSinkState BuildState(const NDqProto::TCheckpoint& checkpoint) { NPq::NProto::TDqPqTopicSinkState stateProto; stateProto.SetSourceId(GetSourceId()); stateProto.SetConfirmedSeqNo(ConfirmedSeqNo); @@ -366,7 +366,7 @@ class TDqPqWriteActor : public NActors::TActor, public IDqCompu auto& data = sinkState.Data; data.Version = StateVersion; data.Blob = serializedState; - //SINK_LOG_T("Save checkpoint " << checkpoint << " state: " << stateProto << ". Sink state: " << sinkState); + SINK_LOG_T("Save checkpoint " << checkpoint << " state: " << stateProto); return sinkState; } From 1824381080d51f3d98c47faa92cbe653c980f7ac Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 21 May 2024 08:20:43 +0000 Subject: [PATCH 18/21] Use array --- .../comp_nodes/mkql_match_recognize.cpp | 6 +- .../minikql/comp_nodes/mkql_multihopping.cpp | 6 +- .../yql/minikql/comp_nodes/mkql_saveload.h | 78 +++++-------------- .../comp_nodes/mkql_time_order_recover.cpp | 4 +- .../ut/mkql_multihopping_saveload_ut.cpp | 2 +- .../mkql_computation_node_codegen.h.txt | 2 +- .../mkql_computation_node_graph.cpp | 24 ++++-- ...kql_computation_node_graph_saveload_ut.cpp | 2 +- .../computation/mkql_computation_node_impl.h | 2 +- ydb/library/yql/public/udf/udf_value.h | 12 +-- ydb/library/yql/public/udf/udf_value_inl.h | 8 +- 11 files changed, 58 insertions(+), 88 deletions(-) diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp index 6d7ffd9bac02..d64b09e21190 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp @@ -289,7 +289,7 @@ class TStateForNonInterleavedPartitions return out.MakeState(); } - void LoadState(NUdf::TUnboxedValue& state) override { + void Load2(NUdf::TUnboxedValue& state) override { TMrInputSerializer in(SerializerContext, state); const auto loadStateVersion = in.GetStateVersion(); @@ -436,7 +436,7 @@ class TStateForInterleavedPartitions return serializer.MakeState(); } - void LoadState(NUdf::TUnboxedValue& state) override { + void Load2(NUdf::TUnboxedValue& state) override { TMrInputSerializer in(SerializerContext, state); const auto loadStateVersion = in.GetStateVersion(); @@ -592,7 +592,7 @@ class TMatchRecognizeWrapper : public TStatefulFlowComputationNode { - - class TIterator : public TComputationValue { - const size_t MaxValueLen = 10000; - - public: - TIterator(TMemoryUsageInfo* memInfo, const TString& buf) - : TComputationValue(memInfo) - , Buf(buf) - , Index(0) - {} - - private: - bool Next(NUdf::TUnboxedValue& value) override { - if (Buf.size() == Index) { - return false; - } - size_t nextSize = std::min(Buf.size() - Index, MaxValueLen); - NUdf::TStringValue str(nextSize); - std::memcpy(str.Data(), Buf.Data() + Index, nextSize); - Index += nextSize; - value = NUdf::TUnboxedValuePod(std::move(str)); - return true; - } - const TString& Buf; - size_t Index; - }; - -public: - TSaveLoadList(TMemoryUsageInfo* memInfo, TComputationContext& ctx, TString&& buf) - : TComputationValue(memInfo) - , Ctx(ctx) - , Buf(buf) - {} - - TSaveLoadList(TMemoryUsageInfo* memInfo, TComputationContext& ctx, const TStringBuf& buf) - : TComputationValue(memInfo) - , Ctx(ctx) - , Buf(buf) - {} - - ui64 GetListLength() const override { - ThrowNotSupported(__func__); - return 0; - } - - bool HasListItems() const override { - return !Buf.empty(); - } - - NUdf::TUnboxedValue GetListIterator() const override { - return Ctx.HolderFactory.Create(Buf); - } -private: - TComputationContext& Ctx; - TString Buf; -}; - struct TOutputSerializer { public: static NUdf::TUnboxedValue MakeSimpleBlobState(const TString& blob, ui32 stateVersion) { @@ -237,8 +179,26 @@ struct TOutputSerializer { Buf.AppendNoAlias(state.data(), state.size()); } + static NUdf::TUnboxedValue MakeArray(TComputationContext& ctx, const TStringBuf& buf) { + const size_t MaxItemLen = 1048576; + + size_t count = buf.size() / MaxItemLen + (buf.size() % MaxItemLen ? 1 : 0); + NUdf::TUnboxedValue *items = nullptr; + auto array = ctx.HolderFactory.CreateDirectArrayHolder(count, items); + + size_t pos = 0; + for (size_t index = 0; index < count; ++index) { + size_t itemSize = std::min(buf.size() - pos, MaxItemLen); + NUdf::TStringValue str(itemSize); + std::memcpy(str.Data(), buf.Data() + pos, itemSize); + items[index] = NUdf::TUnboxedValuePod(std::move(str));; + pos += itemSize; + } + return array; + } + NUdf::TUnboxedValue MakeState() { - return Ctx.HolderFactory.Create(Ctx, std::move(Buf)); + return MakeArray(Ctx, Buf); } protected: TString Buf; diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp index 0c595a3fc08f..cedb2bd4dfe5 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_time_order_recover.cpp @@ -105,7 +105,7 @@ class TTimeOrderRecover : public TStatefulFlowComputationNodeGetValue(ctx).Get(), RowLimit->GetValue(ctx).Get(), ctx); - state.LoadState(stateValue); + state.Load2(stateValue); stateValue = state; } } diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_multihopping_saveload_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_multihopping_saveload_ut.cpp index 9aee3587feca..515752a73a35 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_multihopping_saveload_ut.cpp +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_multihopping_saveload_ut.cpp @@ -47,7 +47,7 @@ namespace { return NUdf::TUnboxedValue::Zero(); } - void LoadState(NUdf::TUnboxedValue& state) override { + void Load2(NUdf::TUnboxedValue& state) override { Y_UNUSED(state); } diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_codegen.h.txt b/ydb/library/yql/minikql/computation/mkql_computation_node_codegen.h.txt index 029289071956..669386838628 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_codegen.h.txt +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_codegen.h.txt @@ -1229,7 +1229,7 @@ protected: return NUdf::TUnboxedValue::Zero(); } - void LoadState(NUdf::TUnboxedValue& state) override { + void Load2(NUdf::TUnboxedValue& state) override { Y_UNUSED(state); } diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp index af62898c3d23..08066458ca2a 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp @@ -742,21 +742,31 @@ class TComputationGraph final : public IComputationGraph { taskState.back().AppendNoAlias(strRef.Data(), strRef.Size()); } }; - bool isList= mutableValue.HasListItems(); + bool isList = mutableValue.HasListItems(); NUdf::TUnboxedValue list; - if (isList) { // No load was done during previous runs + if (isList) { // No load was done during previous runs. saveList(mutableValue); } else { - NUdf::TUnboxedValue list = mutableValue.Save(); - saveList(list); + NUdf::TUnboxedValue saved = mutableValue.Save(); + if (saved.IsString() || saved.IsEmbedded()) { // Old version. + const TStringBuf savedBuf = saved.AsStringRef(); + taskState.push_back({}); + taskState.back().AppendNoAlias(savedBuf.Data(), savedBuf.Size()); + taskStateSize = savedBuf.Size(); + } else { + saveList(saved); + } } WriteUi64(result, taskStateSize); for (auto it = taskState.begin(); it != taskState.end();) { result.AppendNoAlias(it->Data(), it->Size()); it = taskState.erase(it); } - } else { - MKQL_ENSURE(false, "State is expected to have data or invalid value"); + } else { // No load was done during previous runs (if any). + MKQL_ENSURE(mutableValue.HasValue() && (mutableValue.IsString() || mutableValue.IsEmbedded()), "State is expected to have data or invalid value"); + const NUdf::TStringRef savedRef = mutableValue.AsStringRef(); + WriteUi64(result, savedRef.Size()); + result.AppendNoAlias(savedRef.Data(), savedRef.Size()); } } return result; @@ -769,7 +779,7 @@ class TComputationGraph final : public IComputationGraph { if (const ui64 size = ReadUi64(state); size != std::numeric_limits::max()) { MKQL_ENSURE(state.Size() >= size, "Serialized state is corrupted - buffer is too short (" << state.Size() << ") for specified size: " << size); TStringBuf savedRef(state.Data(), size); - Ctx->MutableValues[i] = HolderFactory->Create(*Ctx, savedRef); + Ctx->MutableValues[i] = NKikimr::NMiniKQL::TOutputSerializer::MakeArray(*Ctx, savedRef); state.Skip(size); } // else leave it Invalid() } diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_graph_saveload_ut.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_graph_saveload_ut.cpp index 3471b9dd4a7d..f1eb49daacb0 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_graph_saveload_ut.cpp +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_graph_saveload_ut.cpp @@ -86,7 +86,7 @@ namespace { return NUdf::TUnboxedValue::Zero(); } - void LoadState(NUdf::TUnboxedValue& state) override { + void Load2(NUdf::TUnboxedValue& state) override { Y_UNUSED(state); } diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_impl.h b/ydb/library/yql/minikql/computation/mkql_computation_node_impl.h index 0fc59899a990..080fefd90d35 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_impl.h +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_impl.h @@ -1013,7 +1013,7 @@ class TComputationValueBase: public TBaseExt ThrowNotSupported(__func__); } - void LoadState(NUdf::TUnboxedValue& state) override { + void Load2(NUdf::TUnboxedValue& state) override { Y_UNUSED(state); ThrowNotSupported(__func__); } diff --git a/ydb/library/yql/public/udf/udf_value.h b/ydb/library/yql/public/udf/udf_value.h index 2180760f465b..80708e36a8ca 100644 --- a/ydb/library/yql/public/udf/udf_value.h +++ b/ydb/library/yql/public/udf/udf_value.h @@ -185,7 +185,7 @@ friend struct TBoxedValueAccessor; class IBoxedValue7 : public IBoxedValue6 { friend struct TBoxedValueAccessor; private: - virtual void LoadState(TUnboxedValue& state) = 0; + virtual void Load2(TUnboxedValue& state) = 0; }; #endif @@ -280,7 +280,7 @@ struct TBoxedValueAccessor xx(Unused5) \ xx(Unused6) \ xx(WideFetch) \ - xx(LoadState) + xx(Load2) #elif UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 30) @@ -617,7 +617,7 @@ struct TBoxedValueAccessor #endif #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) - static inline void LoadState(IBoxedValue& value, TUnboxedValue& data); + static inline void Load2(IBoxedValue& value, TUnboxedValue& data); #endif }; @@ -709,7 +709,7 @@ class TBoxedValueBase: public IBoxedValue { #endif #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) - void LoadState(TUnboxedValue& value) override; + void Load2(TUnboxedValue& value) override; #endif }; @@ -910,7 +910,7 @@ friend class TUnboxedValue; #endif #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) - inline void LoadState(TUnboxedValue& value); + inline void Load2(TUnboxedValue& value); #endif inline bool TryMakeVariant(ui32 index); @@ -1305,7 +1305,7 @@ inline EFetchStatus TBoxedValueBase::WideFetch(TUnboxedValue *result, ui32 width #endif #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) -inline void TBoxedValueBase::LoadState(TUnboxedValue& value) { +inline void TBoxedValueBase::Load2(TUnboxedValue& value) { Y_UNUSED(value); Y_ABORT("Not implemented"); } diff --git a/ydb/library/yql/public/udf/udf_value_inl.h b/ydb/library/yql/public/udf/udf_value_inl.h index 789d4232d08d..938466b01b0f 100644 --- a/ydb/library/yql/public/udf/udf_value_inl.h +++ b/ydb/library/yql/public/udf/udf_value_inl.h @@ -284,9 +284,9 @@ inline EFetchStatus TBoxedValueAccessor::WideFetch(IBoxedValue& value, TUnboxedV #endif #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) -inline void TBoxedValueAccessor::LoadState(IBoxedValue& value, TUnboxedValue& data) { +inline void TBoxedValueAccessor::Load2(IBoxedValue& value, TUnboxedValue& data) { Y_DEBUG_ABORT_UNLESS(value.IsCompatibleTo(MakeAbiCompatibilityVersion(2, 36))); - value.LoadState(data); + value.Load2(data); } #endif @@ -623,9 +623,9 @@ inline EFetchStatus TUnboxedValuePod::WideFetch(TUnboxedValue* result, ui32 widt #endif #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) -inline void TUnboxedValuePod::LoadState(TUnboxedValue& value) { +inline void TUnboxedValuePod::Load2(TUnboxedValue& value) { UDF_VERIFY(IsBoxed(), "Value is not boxed"); - TBoxedValueAccessor::LoadState(*Raw.Boxed.Value, value); + TBoxedValueAccessor::Load2(*Raw.Boxed.Value, value); } #endif From 5fbdd625c647f32326d19774b9aabdc389af0222 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 21 May 2024 12:04:09 +0000 Subject: [PATCH 19/21] remove ; --- ydb/library/yql/minikql/comp_nodes/mkql_saveload.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h b/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h index a1eebf31f332..40075b70efe6 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h @@ -191,7 +191,7 @@ struct TOutputSerializer { size_t itemSize = std::min(buf.size() - pos, MaxItemLen); NUdf::TStringValue str(itemSize); std::memcpy(str.Data(), buf.Data() + pos, itemSize); - items[index] = NUdf::TUnboxedValuePod(std::move(str));; + items[index] = NUdf::TUnboxedValuePod(std::move(str)); pos += itemSize; } return array; From ead7c5735234187171ff8e2464aa9350c2cd285a Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 21 May 2024 13:40:17 +0000 Subject: [PATCH 20/21] Add Load2() return value --- .../yql/minikql/comp_nodes/mkql_match_recognize.cpp | 6 ++++-- .../yql/minikql/comp_nodes/mkql_multihopping.cpp | 3 ++- .../minikql/comp_nodes/mkql_time_order_recover.cpp | 3 ++- .../comp_nodes/ut/mkql_multihopping_saveload_ut.cpp | 3 ++- .../computation/mkql_computation_node_codegen.h.txt | 3 ++- .../mkql_computation_node_graph_saveload_ut.cpp | 3 ++- .../minikql/computation/mkql_computation_node_impl.h | 3 ++- ydb/library/yql/public/udf/udf_value.h | 10 +++++----- ydb/library/yql/public/udf/udf_value_inl.h | 12 ++++++++---- 9 files changed, 29 insertions(+), 17 deletions(-) diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp index d64b09e21190..42dd21e64be4 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_match_recognize.cpp @@ -289,7 +289,7 @@ class TStateForNonInterleavedPartitions return out.MakeState(); } - void Load2(NUdf::TUnboxedValue& state) override { + bool Load2(NUdf::TUnboxedValue& state) override { TMrInputSerializer in(SerializerContext, state); const auto loadStateVersion = in.GetStateVersion(); @@ -317,6 +317,7 @@ class TStateForNonInterleavedPartitions restoredRowPatternConfiguration->Load(in); MKQL_ENSURE(*restoredRowPatternConfiguration == *RowPatternConfiguration, "Restored and current RowPatternConfiguration is different"); MKQL_ENSURE(in.Empty(), "State is corrupted"); + return true; } bool HasListItems() const override { @@ -436,7 +437,7 @@ class TStateForInterleavedPartitions return serializer.MakeState(); } - void Load2(NUdf::TUnboxedValue& state) override { + bool Load2(NUdf::TUnboxedValue& state) override { TMrInputSerializer in(SerializerContext, state); const auto loadStateVersion = in.GetStateVersion(); @@ -471,6 +472,7 @@ class TStateForInterleavedPartitions MKQL_ENSURE(NfaTransitionGraph, "Empty NfaTransitionGraph"); MKQL_ENSURE(*restoredTransitionGraph == *NfaTransitionGraph, "Restored and current NfaTransitionGraph is different"); MKQL_ENSURE(in.Empty(), "State is corrupted"); + return true; } bool HasListItems() const override { diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp index 2c3a5e87b20e..eed929be5f18 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_multihopping.cpp @@ -129,9 +129,10 @@ class TMultiHoppingCoreWrapper : public TStatefulSourceComputationNode= UDF_ABI_COMPATIBILITY_VERSION(2, 36) - static inline void Load2(IBoxedValue& value, TUnboxedValue& data); + static inline bool Load2(IBoxedValue& value, TUnboxedValue& data); #endif }; @@ -709,7 +709,7 @@ class TBoxedValueBase: public IBoxedValue { #endif #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) - void Load2(TUnboxedValue& value) override; + bool Load2(TUnboxedValue& value) override; #endif }; @@ -910,7 +910,7 @@ friend class TUnboxedValue; #endif #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) - inline void Load2(TUnboxedValue& value); + inline bool Load2(TUnboxedValue& value); #endif inline bool TryMakeVariant(ui32 index); @@ -1305,7 +1305,7 @@ inline EFetchStatus TBoxedValueBase::WideFetch(TUnboxedValue *result, ui32 width #endif #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) -inline void TBoxedValueBase::Load2(TUnboxedValue& value) { +inline bool TBoxedValueBase::Load2(TUnboxedValue& value) { Y_UNUSED(value); Y_ABORT("Not implemented"); } diff --git a/ydb/library/yql/public/udf/udf_value_inl.h b/ydb/library/yql/public/udf/udf_value_inl.h index 938466b01b0f..bbc96e5c0c8a 100644 --- a/ydb/library/yql/public/udf/udf_value_inl.h +++ b/ydb/library/yql/public/udf/udf_value_inl.h @@ -284,9 +284,13 @@ inline EFetchStatus TBoxedValueAccessor::WideFetch(IBoxedValue& value, TUnboxedV #endif #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) -inline void TBoxedValueAccessor::Load2(IBoxedValue& value, TUnboxedValue& data) { +inline bool TBoxedValueAccessor::Load2(IBoxedValue& value, TUnboxedValue& data) { + if (!value.IsCompatibleTo(MakeAbiCompatibilityVersion(2, 36))) { + return false; + } + Y_DEBUG_ABORT_UNLESS(value.IsCompatibleTo(MakeAbiCompatibilityVersion(2, 36))); - value.Load2(data); + return value.Load2(data); } #endif @@ -623,9 +627,9 @@ inline EFetchStatus TUnboxedValuePod::WideFetch(TUnboxedValue* result, ui32 widt #endif #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 36) -inline void TUnboxedValuePod::Load2(TUnboxedValue& value) { +inline bool TUnboxedValuePod::Load2(TUnboxedValue& value) { UDF_VERIFY(IsBoxed(), "Value is not boxed"); - TBoxedValueAccessor::Load2(*Raw.Boxed.Value, value); + return TBoxedValueAccessor::Load2(*Raw.Boxed.Value, value); } #endif From 7a291e97f15214c344658a8c4ea9d3a799693333 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 22 May 2024 07:54:18 +0000 Subject: [PATCH 21/21] remove assert --- ydb/library/yql/minikql/comp_nodes/mkql_saveload.h | 2 +- ydb/library/yql/public/udf/udf_value_inl.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h b/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h index 40075b70efe6..9f53addbcc54 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_saveload.h @@ -323,7 +323,7 @@ struct TInputSerializer { while (listIt.Next(str)) { const TStringBuf strRef = str.AsStringRef(); result.AppendNoAlias(strRef.Data(), strRef.Size()); - } + } return result; } diff --git a/ydb/library/yql/public/udf/udf_value_inl.h b/ydb/library/yql/public/udf/udf_value_inl.h index bbc96e5c0c8a..3454fd1724c6 100644 --- a/ydb/library/yql/public/udf/udf_value_inl.h +++ b/ydb/library/yql/public/udf/udf_value_inl.h @@ -289,7 +289,6 @@ inline bool TBoxedValueAccessor::Load2(IBoxedValue& value, TUnboxedValue& data) return false; } - Y_DEBUG_ABORT_UNLESS(value.IsCompatibleTo(MakeAbiCompatibilityVersion(2, 36))); return value.Load2(data); } #endif