-
Notifications
You must be signed in to change notification settings - Fork 606
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add validations for inconsistency states for scanners (#3526)
- Loading branch information
1 parent
3c19574
commit 1acf95f
Showing
11 changed files
with
170 additions
and
219 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#include "batch_iterator.h" | ||
|
||
namespace NKikimr::NArrow::NMerger { | ||
|
||
NJson::TJsonValue TBatchIterator::DebugJson() const { | ||
NJson::TJsonValue result; | ||
result["is_cp"] = IsControlPoint(); | ||
result["key"] = KeyColumns.DebugJson(); | ||
return result; | ||
} | ||
|
||
NKikimr::NArrow::NMerger::TSortableBatchPosition::TFoundPosition TBatchIterator::SkipToLower(const TSortableBatchPosition& pos) { | ||
const ui32 posStart = KeyColumns.GetPosition(); | ||
auto result = KeyColumns.SkipToLower(pos); | ||
const i32 delta = IsReverse() ? (posStart - KeyColumns.GetPosition()) : (KeyColumns.GetPosition() - posStart); | ||
AFL_VERIFY(delta >= 0); | ||
AFL_VERIFY(VersionColumns.InitPosition(KeyColumns.GetPosition()))("pos", KeyColumns.GetPosition()) | ||
("size", VersionColumns.GetRecordsCount())("key_size", KeyColumns.GetRecordsCount()); | ||
if (FilterIterator && delta) { | ||
AFL_VERIFY(FilterIterator->Next(delta)); | ||
} | ||
return result; | ||
} | ||
|
||
bool TBatchIterator::Next() { | ||
const bool result = KeyColumns.NextPosition(ReverseSortKff) && VersionColumns.NextPosition(ReverseSortKff); | ||
if (FilterIterator) { | ||
Y_ABORT_UNLESS(result == FilterIterator->Next(1)); | ||
} | ||
return result; | ||
} | ||
|
||
bool TBatchIterator::operator<(const TBatchIterator& item) const { | ||
const std::partial_ordering result = KeyColumns.Compare(item.KeyColumns); | ||
if (result == std::partial_ordering::equivalent) { | ||
if (IsControlPoint() && item.IsControlPoint()) { | ||
return false; | ||
} else if (IsControlPoint()) { | ||
return false; | ||
} else if (item.IsControlPoint()) { | ||
return true; | ||
} | ||
//don't need inverse through we need maximal version at first (reverse analytic not included in VersionColumns) | ||
return VersionColumns.Compare(item.VersionColumns) == std::partial_ordering::less; | ||
} else { | ||
//inverse logic through we use max heap, but need minimal element if not reverse (reverse analytic included in KeyColumns) | ||
return result == std::partial_ordering::greater; | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
#pragma once | ||
#include "position.h" | ||
#include <ydb/core/formats/arrow/arrow_filter.h> | ||
|
||
namespace NKikimr::NArrow::NMerger { | ||
|
||
class TBatchIterator { | ||
private: | ||
bool ControlPointFlag; | ||
TSortableBatchPosition KeyColumns; | ||
TSortableBatchPosition VersionColumns; | ||
i64 RecordsCount; | ||
int ReverseSortKff; | ||
|
||
std::shared_ptr<NArrow::TColumnFilter> Filter; | ||
std::shared_ptr<NArrow::TColumnFilter::TIterator> FilterIterator; | ||
|
||
i32 GetFirstPosition() const { | ||
if (ReverseSortKff > 0) { | ||
return 0; | ||
} else { | ||
return RecordsCount - 1; | ||
} | ||
} | ||
|
||
public: | ||
NJson::TJsonValue DebugJson() const; | ||
|
||
const std::shared_ptr<NArrow::TColumnFilter>& GetFilter() const { | ||
return Filter; | ||
} | ||
|
||
bool IsControlPoint() const { | ||
return ControlPointFlag; | ||
} | ||
|
||
const TSortableBatchPosition& GetKeyColumns() const { | ||
return KeyColumns; | ||
} | ||
|
||
const TSortableBatchPosition& GetVersionColumns() const { | ||
return VersionColumns; | ||
} | ||
|
||
TBatchIterator(const TSortableBatchPosition& keyColumns) | ||
: ControlPointFlag(true) | ||
, KeyColumns(keyColumns) { | ||
|
||
} | ||
|
||
template <class TDataContainer> | ||
TBatchIterator(std::shared_ptr<TDataContainer> batch, std::shared_ptr<NArrow::TColumnFilter> filter, | ||
const std::vector<std::string>& keyColumns, const std::vector<std::string>& dataColumns, const bool reverseSort, const std::vector<std::string>& versionColumnNames) | ||
: ControlPointFlag(false) | ||
, KeyColumns(batch, 0, keyColumns, dataColumns, reverseSort) | ||
, VersionColumns(batch, 0, versionColumnNames, {}, false) | ||
, RecordsCount(batch->num_rows()) | ||
, ReverseSortKff(reverseSort ? -1 : 1) | ||
, Filter(filter) { | ||
Y_ABORT_UNLESS(KeyColumns.InitPosition(GetFirstPosition())); | ||
Y_ABORT_UNLESS(VersionColumns.InitPosition(GetFirstPosition())); | ||
if (Filter) { | ||
FilterIterator = std::make_shared<NArrow::TColumnFilter::TIterator>(Filter->GetIterator(reverseSort, RecordsCount)); | ||
} | ||
} | ||
|
||
bool CheckNextBatch(const TBatchIterator& nextIterator) { | ||
return KeyColumns.Compare(nextIterator.KeyColumns) == std::partial_ordering::less; | ||
} | ||
|
||
bool IsReverse() const { | ||
return ReverseSortKff < 0; | ||
} | ||
|
||
bool IsDeleted() const { | ||
if (!FilterIterator) { | ||
return false; | ||
} | ||
return !FilterIterator->GetCurrentAcceptance(); | ||
} | ||
|
||
TSortableBatchPosition::TFoundPosition SkipToLower(const TSortableBatchPosition& pos); | ||
|
||
bool Next(); | ||
|
||
bool operator<(const TBatchIterator& item) const; | ||
}; | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.