From a33d1e2cc69b50409f7c99c9786f1097ffe7f669 Mon Sep 17 00:00:00 2001 From: Yuriy Kaminskiy Date: Tue, 28 May 2024 15:44:49 +0300 Subject: [PATCH] Fix YQL-18475 ProcessSpilledData -> NextJoinedData -> HasMoreTuples -> iterates bucketId (refrerencing JoinTable1->CurrIterBucket) out of un-spilled data -> SIGSEGV in GetTupleData on access to empty/spilled TableBuckets[bucketNum].keyIntVals --- .../minikql/comp_nodes/mkql_grace_join.cpp | 3 +- .../comp_nodes/mkql_grace_join_imp.cpp | 40 +++++++++---------- .../minikql/comp_nodes/mkql_grace_join_imp.h | 6 ++- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp index e68d15f42084..2188edfcecfc 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp @@ -864,9 +864,8 @@ EFetchResult ProcessSpilledData(TComputationContext&, NUdf::TUnboxedValue*const* if (LeftPacker->TablePtr->IsBucketInMemory(NextBucketToJoin) && RightPacker->TablePtr->IsBucketInMemory(NextBucketToJoin)) { if (*PartialJoinCompleted) { - while (JoinedTablePtr->NextJoinedData(LeftPacker->JoinTupleData, RightPacker->JoinTupleData)) { + while (JoinedTablePtr->NextJoinedData(LeftPacker->JoinTupleData, RightPacker->JoinTupleData, NextBucketToJoin + 1)) { UnpackJoinedData(output); - return EFetchResult::One; } diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp index 2b71f4c4aa0e..3346d4843e24 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp @@ -154,21 +154,21 @@ void TTable::ResetIterator() { } // Checks if there are more tuples and sets bucketId and tupleId to next valid. -inline bool HasMoreTuples(std::vector & tableBucketsStats, ui64 & bucketId, ui64 & tupleId ) { +inline bool HasMoreTuples(std::vector & tableBucketsStats, ui64 & bucketId, ui64 & tupleId, ui64 bucketLimit ) { - if (bucketId >= tableBucketsStats.size()) return false; + if (bucketId >= bucketLimit) return false; if ( tupleId >= tableBucketsStats[bucketId].TuplesNum ) { tupleId = 0; bucketId ++; - if (bucketId == tableBucketsStats.size()) { + if (bucketId == bucketLimit) { return false; } while( tableBucketsStats[bucketId].TuplesNum == 0 ) { bucketId ++; - if (bucketId == tableBucketsStats.size()) { + if (bucketId == bucketLimit) { return false; } } @@ -181,7 +181,7 @@ inline bool HasMoreTuples(std::vector & tableBucketsStats, ui // Returns value of next tuple. Returs true if there are more tuples bool TTable::NextTuple(TupleData & td){ - if (HasMoreTuples(TableBucketsStats, CurrIterBucket, CurrIterIndex )) { + if (HasMoreTuples(TableBucketsStats, CurrIterBucket, CurrIterIndex, TableBucketsStats.size())) { GetTupleData(CurrIterBucket, CurrIterIndex, td); CurrIterIndex++; return true; @@ -767,15 +767,15 @@ inline bool HasRightIdMatch(ui64 currId, ui64 & rightIdIter, const std::vectorTableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex)) + if (HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex, bucketLimit)) { JoinTable1->GetTupleData(JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex, td1); - if (HasMoreTuples(JoinTable2->TableBucketsStats, JoinTable2->CurrIterBucket, JoinTable2->CurrIterIndex)) + if (HasMoreTuples(JoinTable2->TableBucketsStats, JoinTable2->CurrIterBucket, JoinTable2->CurrIterIndex, bucketLimit)) { JoinTable2->GetTupleData(JoinTable2->CurrIterBucket, JoinTable2->CurrIterIndex, td2); JoinTable2->CurrIterIndex++; @@ -786,7 +786,7 @@ bool TTable::NextJoinedData( TupleData & td1, TupleData & td2) { JoinTable2->CurrIterBucket = 0; JoinTable2->CurrIterIndex = 0; JoinTable1->CurrIterIndex++; - return NextJoinedData(td1, td2); + return NextJoinedData(td1, td2, bucketLimit); } } else @@ -794,7 +794,7 @@ bool TTable::NextJoinedData( TupleData & td1, TupleData & td2) { } if ( JoinKind == EJoinKind::Inner ) { - while(HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex)) { + while(HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex, bucketLimit)) { ui32 tupleId2; if (HasJoinedTupleId(JoinTable1, tupleId2)) { @@ -810,7 +810,7 @@ bool TTable::NextJoinedData( TupleData & td1, TupleData & td2) { } if ( JoinKind == EJoinKind::Left ) { - while (HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex)) { + while (HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex, bucketLimit)) { ui32 tupleId2; if (HasJoinedTupleId(JoinTable1, tupleId2)) { @@ -845,7 +845,7 @@ bool TTable::NextJoinedData( TupleData & td1, TupleData & td2) { } if ( JoinKind == EJoinKind::Right ) { - while(HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex)) { + while(HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex, bucketLimit)) { ui32 tupleId2; if (HasJoinedTupleId(JoinTable1, tupleId2)) { @@ -886,7 +886,7 @@ bool TTable::NextJoinedData( TupleData & td1, TupleData & td2) { if ( RightTableBatch_ && HasMoreRightTuples_ ) return false; - while(HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex)) { + while(HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex, bucketLimit)) { ui32 tupleId2; bool globalMatchedId = false; @@ -917,7 +917,7 @@ bool TTable::NextJoinedData( TupleData & td1, TupleData & td2) { if (LeftTableBatch_ && HasMoreLeftTuples_ ) return false; - while(HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex)) { + while(HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex, bucketLimit)) { ui32 tupleId2; bool globalMatchedId = false; @@ -949,7 +949,7 @@ bool TTable::NextJoinedData( TupleData & td1, TupleData & td2) { if (RightTableBatch_ && HasMoreRightTuples_ ) return false; - while(HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex)) { + while(HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex, bucketLimit)) { ui32 tupleId2; if ( !RightTableBatch_ && HasJoinedTupleId(JoinTable1, tupleId2)) @@ -983,7 +983,7 @@ bool TTable::NextJoinedData( TupleData & td1, TupleData & td2) { if (LeftTableBatch_ && HasMoreLeftTuples_ ) return false; - while(HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex)) { + while(HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex, bucketLimit)) { ui32 tupleId2; if ( !LeftTableBatch_ && HasJoinedTupleId(JoinTable1, tupleId2)) { @@ -1010,7 +1010,7 @@ bool TTable::NextJoinedData( TupleData & td1, TupleData & td2) { } if ( JoinKind == EJoinKind::Full ) { - if(HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex)) { + if(HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex, bucketLimit)) { ui32 tupleId2; if (HasJoinedTupleId(JoinTable1, tupleId2)) { @@ -1036,7 +1036,7 @@ bool TTable::NextJoinedData( TupleData & td1, TupleData & td2) { Table2Initialized_ = true; } - while (HasMoreTuples(JoinTable2->TableBucketsStats, JoinTable2->CurrIterBucket, JoinTable2->CurrIterIndex)) { + while (HasMoreTuples(JoinTable2->TableBucketsStats, JoinTable2->CurrIterBucket, JoinTable2->CurrIterIndex, bucketLimit)) { if (CurrIterBucket != JoinTable2->CurrIterBucket) { CurrIterBucket = JoinTable2->CurrIterBucket; @@ -1060,7 +1060,7 @@ bool TTable::NextJoinedData( TupleData & td1, TupleData & td2) { } if ( JoinKind == EJoinKind::Exclusion ) { - while (HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex)) { + while (HasMoreTuples(JoinTable1->TableBucketsStats, JoinTable1->CurrIterBucket, JoinTable1->CurrIterIndex, bucketLimit)) { ui32 tupleId2; if (HasJoinedTupleId(JoinTable1, tupleId2)) { @@ -1078,7 +1078,7 @@ bool TTable::NextJoinedData( TupleData & td1, TupleData & td2) { td1.AllNulls = true; - while (HasMoreTuples(JoinTable2->TableBucketsStats, JoinTable2->CurrIterBucket, JoinTable2->CurrIterIndex)) { + while (HasMoreTuples(JoinTable2->TableBucketsStats, JoinTable2->CurrIterBucket, JoinTable2->CurrIterIndex, bucketLimit)) { if (CurrIterBucket != JoinTable2->CurrIterBucket) { CurrIterBucket = JoinTable2->CurrIterBucket; diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.h b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.h index 1f5832518107..f8a6226e6ae6 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.h +++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.h @@ -265,7 +265,11 @@ class TTable { void Join(TTable& t1, TTable& t2, EJoinKind joinKind = EJoinKind::Inner, bool hasMoreLeftTuples = false, bool hasMoreRightTuples = false, ui32 fromBucket = 0, ui32 toBucket = NumberOfBuckets); // Returns next jointed tuple data. Returs true if there are more tuples - bool NextJoinedData(TupleData& td1, TupleData& td2); + bool NextJoinedData(TupleData& td1, TupleData& td2, ui64 bucketLimit); + + bool NextJoinedData(TupleData& td1, TupleData& td2) { + return NextJoinedData(td1, td2, JoinTable1->TableBucketsStats.size()); + } // Creates buckets that support spilling. void InitializeBucketSpillers(ISpiller::TPtr spiller);