diff --git a/ydb/core/formats/arrow/program.h b/ydb/core/formats/arrow/program.h index dbfa46f0f816..31254144fa93 100644 --- a/ydb/core/formats/arrow/program.h +++ b/ydb/core/formats/arrow/program.h @@ -62,6 +62,10 @@ class TColumnInfo { } public: + TString DebugString() const { + return TStringBuilder() << (GeneratedFlag ? "G:" : "") << ColumnName; + } + static TColumnInfo Generated(const ui32 columnId, const std::string& columnName) { return TColumnInfo(columnId, columnName, true); } @@ -233,6 +237,12 @@ class TAssign { const arrow::compute::FunctionOptions* GetOptions() const { return FuncOpts.get(); } IStepFunction::TPtr GetFunction(arrow::compute::ExecContext* ctx) const; + TString DebugString() const { + return TStringBuilder() << + "{op=" << Operation << ";column=" << Column.DebugString() << ";" << (Constant ? "const=" + Constant->ToString() + ";" : "NO;") + << (KernelFunction ? ("kernel=" + KernelFunction->name() + ";") : "NO;") + << "}"; + } private: const TColumnInfo Column; EOperation Operation{EOperation::Unspecified}; @@ -312,6 +322,27 @@ class TProgramStep { public: using TDatumBatch = TDatumBatch; + TString DebugString() const { + TStringBuilder sb; + sb << "{"; + sb << "assignes=["; + for (auto&& i : Assignes) { + sb << i.DebugString() << ";"; + } + sb << "];"; + sb << "group_by_count = " << GroupBy.size() << "; "; + sb << "group_by_keys_count=" << GroupByKeys.size() << ";"; + + sb << "projections=["; + for (auto&& i : Projection) { + sb << i.DebugString() << ";"; + } + sb << "];"; + + sb << "}"; + return sb; + } + std::set GetColumnsInUsage() const; const std::set& GetFilterOriginalColumnIds() const; @@ -391,6 +422,15 @@ struct TProgram { std::set GetEarlyFilterColumns() const; std::set GetProcessingColumns() const; std::shared_ptr ApplyEarlyFilter(std::shared_ptr& batch, const bool useFilter) const; + TString DebugString() const { + TStringBuilder sb; + sb << "["; + for (auto&& i : Steps) { + sb << i->DebugString() << ";"; + } + sb << "]"; + return sb; + } }; inline arrow::Status ApplyProgram( diff --git a/ydb/core/tx/columnshard/splitter/stats.h b/ydb/core/tx/columnshard/splitter/stats.h index 6c7c5e154d6f..695a7ab32dd7 100644 --- a/ydb/core/tx/columnshard/splitter/stats.h +++ b/ydb/core/tx/columnshard/splitter/stats.h @@ -30,6 +30,14 @@ class TSimpleSerializationStat { Y_ABORT_UNLESS(RawBytes); } + TString DebugString() const { + return TStringBuilder() << "{" + << "serialized_bytes=" << SerializedBytes << ";" + << "records=" << RecordsCount << ";" + << "raw_bytes=" << RawBytes << ";" + << "}"; + } + double GetSerializedBytesPerRecord() const { AFL_VERIFY(RecordsCount); return 1.0 * SerializedBytes / RecordsCount; @@ -77,6 +85,10 @@ class TBatchSerializationStat { RawBytesPerRecord = 1.0 * rawBytes / recordsCount; } + TString DebugString() const { + return TStringBuilder() << "{sbpr=" << SerializedBytesPerRecord << ";rbpr=" << RawBytesPerRecord << "}"; + } + TBatchSerializationStat(const TSimpleSerializationStat& simple) { SerializedBytesPerRecord = simple.GetSerializedBytesPerRecord(); RawBytesPerRecord = simple.GetRawBytesPerRecord(); @@ -114,6 +126,7 @@ class TBatchSerializationStat { class TColumnSerializationStat: public TSimpleSerializationStat { private: + using TBase = TSimpleSerializationStat; YDB_READONLY(ui32, ColumnId, 0); YDB_READONLY_DEF(std::string, ColumnName); public: @@ -133,6 +146,10 @@ class TColumnSerializationStat: public TSimpleSerializationStat { return result; } + TString DebugString() const { + return TStringBuilder() << "{id=" << ColumnId << ";name=" << ColumnName << ";details=" << TBase::DebugString() << "}"; + } + void Merge(const TSimpleSerializationStat& item) { SerializedBytes += item.GetSerializedBytes(); RawBytes += item.GetRawBytes(); @@ -146,6 +163,16 @@ class TSerializationStats { std::map StatsByColumnId; std::map StatsByColumnName; public: + TString DebugString() const { + TStringBuilder sb; + sb << "{columns="; + for (auto&& i : ColumnStat) { + sb << i.DebugString(); + } + sb << ";}"; + return sb; + } + void Merge(const TSerializationStats& item) { for (auto&& i : item.ColumnStat) { AddStat(i); diff --git a/ydb/core/tx/program/program.cpp b/ydb/core/tx/program/program.cpp index 49967d75d4a5..6ee25067bdd4 100644 --- a/ydb/core/tx/program/program.cpp +++ b/ydb/core/tx/program/program.cpp @@ -475,6 +475,7 @@ bool TProgramContainer::Init(const IColumnResolver& columnResolver, NKikimrSchem return false; } + ProgramProto = programProto; if (IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD)) { TString out; ::google::protobuf::TextFormat::PrintToString(programProto, &out); diff --git a/ydb/core/tx/program/program.h b/ydb/core/tx/program/program.h index 67cac1845ae1..b595fc1f6100 100644 --- a/ydb/core/tx/program/program.h +++ b/ydb/core/tx/program/program.h @@ -21,12 +21,21 @@ class IColumnResolver { class TProgramContainer { private: + NKikimrSSA::TProgram ProgramProto; std::shared_ptr Program; std::shared_ptr ProgramParameters; // TODO TKernelsRegistry KernelsRegistry; std::optional> OverrideProcessingColumnsSet; std::optional> OverrideProcessingColumnsVector; public: + TString ProtoDebugString() const { + return ProgramProto.DebugString(); + } + + TString DebugString() const { + return Program ? Program->DebugString() : "NO_PROGRAM"; + } + bool HasOverridenProcessingColumnIds() const { return !!OverrideProcessingColumnsVector; } @@ -75,8 +84,6 @@ class TProgramContainer { std::set GetEarlyFilterColumns() const; std::set GetProcessingColumns() const; - - bool HasEarlyFilterOnly() const; private: bool ParseProgram(const IColumnResolver& columnResolver, const NKikimrSSA::TProgram& program, TString& error); };