From 591c2d22dcd2c2dac64b04ad35944baf5dcf6be5 Mon Sep 17 00:00:00 2001 From: kungasc Date: Tue, 6 Feb 2024 16:19:02 +0000 Subject: [PATCH 1/9] do not stop groups charge if isn't ready --- .../flat_part_charge_btree_index.h | 34 +++++++--------- .../ut/ut_btree_index_iter_charge.cpp | 39 +++++++++++++------ 2 files changed, 43 insertions(+), 30 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index ee091ff8e095..e992e96de05c 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -494,18 +494,23 @@ class TChargeBTreeIndex : public ICharge { const auto& meta = groupId.IsHistoric() ? Part->IndexPages.BTreeHistoric[groupId.Index] : Part->IndexPages.BTreeGroups[groupId.Index]; TVector level, nextLevel(::Reserve(3)); - ui64 prevBeginDataSize = 0; - ui64 prevBeginBytesLimitDataSize = bytesLimit ? GetPrevDataSize(meta, beginBytesLimitRowId) : 0; + + ui64 prevBeginBytesLimitDataSize = 0; + if (bytesLimit) { + prevBeginBytesLimitDataSize = GetPrevDataSize(meta, beginBytesLimitRowId); + if (beginBytesLimitRowId < beginRowId) { + ui64 prevBeginDataSize = GetPrevDataSize(meta, beginRowId); + if (LimitExceeded(prevBeginDataSize - prevBeginBytesLimitDataSize, bytesLimit)) { + return true; + } + } + } const auto iterateLevel = [&](const auto& tryHandleChild) { - ui64 prevChildDataSize = prevBeginDataSize; for (const auto &node : level) { TRecIdx from = 0, to = node.GetChildrenCount(); if (node.BeginRowId < beginRowId) { from = node.Seek(beginRowId); - if (from) { - prevChildDataSize = prevBeginDataSize = node.GetShortChild(from - 1).DataSize; - } } if (node.EndRowId > endRowId) { to = node.Seek(endRowId - 1) + 1; @@ -515,13 +520,12 @@ class TChargeBTreeIndex : public ICharge { auto prevChild = pos ? node.GetShortChildRef(pos - 1) : nullptr; TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; TRowId childEndRowId = child->RowCount; + ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); if (bytesLimit) { - if (prevChildDataSize > prevBeginBytesLimitDataSize && LimitExceeded(prevChildDataSize - prevBeginBytesLimitDataSize, bytesLimit)) { + if (child->DataSize > prevBeginBytesLimitDataSize && LimitExceeded(child->DataSize - prevBeginBytesLimitDataSize, bytesLimit)) { return; } } - ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); - prevChildDataSize = child->DataSize; } } }; @@ -534,7 +538,7 @@ class TChargeBTreeIndex : public ICharge { return HasDataPage(child.PageId, groupId); }; - for (ui32 height = 0; height < meta.LevelCount && ready; height++) { + for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); } else { @@ -544,10 +548,6 @@ class TChargeBTreeIndex : public ICharge { nextLevel.clear(); } - if (!ready) { // some index pages are missing, do not continue - return ready; - } - if (meta.LevelCount == 0) { ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); } else { @@ -598,7 +598,7 @@ class TChargeBTreeIndex : public ICharge { return HasDataPage(child.PageId, groupId); }; - for (ui32 height = 0; height < meta.LevelCount && ready; height++) { + for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); } else { @@ -608,10 +608,6 @@ class TChargeBTreeIndex : public ICharge { nextLevel.clear(); } - if (!ready) { // some index pages are missing, do not continue - return ready; - } - if (meta.LevelCount == 0) { ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); } else { diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp index 68b2433f4682..b68a313da470 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp @@ -79,15 +79,16 @@ namespace { } } - struct TMakePartParams { + struct TTestParams { const ui32 Levels = Max(); const bool Groups = false; const bool History = false; const bool Slices = false; const ui32 Rows = 40; + const bool PrechargeSome = false; }; - TPartEggs MakePart(TMakePartParams params) { + TPartEggs MakePart(TTestParams params) { NPage::TConf conf; switch (params.Levels) { case 0: @@ -370,7 +371,7 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) { } } - void CheckPart(TMakePartParams params) { + void CheckPart(TTestParams params) { TPartEggs eggs = MakePart(params); const auto part = *eggs.Lone(); @@ -394,6 +395,19 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) { } Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { + void PrepareEnvs(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults &keyDefaults, TTouchEnv& bTreeEnv, TTouchEnv& flatEnv) { + if (params.PrechargeSome) { + TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true); + + for (int times = 0; times < 5; times++) { + bTree.ICharge::Do(10, 10, keyDefaults, 0, 0); + bTreeEnv.LoadTouched(); + } + + flatEnv.Loaded = bTreeEnv.Loaded; + } + } + void DoChargeRowId(ICharge& charge, TTouchEnv& env, const TRowId row1, const TRowId row2, ui64 itemsLimit, ui64 bytesLimit, bool reverse, const TKeyCellDefaults &keyDefaults, const TString& message, ui32 failsAllowed = 10) { while (true) { @@ -424,7 +438,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { Y_UNREACHABLE(); } - void CheckChargeRowId(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { + void CheckChargeRowId(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { for (bool reverse : {false, true}) { for (ui64 itemsLimit : TVector{0, 1, 2, 5, 13, 19, part.Stat.Rows - 2, part.Stat.Rows - 1}) { for (TRowId rowId1 : xrange(0, part.Stat.Rows - 1)) { @@ -432,6 +446,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { TTouchEnv bTreeEnv, flatEnv; TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true); TCharge flat(&flatEnv, part, tags, true); + PrepareEnvs(params, part, tags, *keyDefaults, bTreeEnv, flatEnv); TString message = TStringBuilder() << (reverse ? "ChargeRowIdReverse " : "ChargeRowId ") << rowId1 << " " << rowId2 << " items " << itemsLimit; DoChargeRowId(bTree, bTreeEnv, rowId1, rowId2, itemsLimit, 0, reverse, *keyDefaults, message); @@ -444,7 +459,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { } } - void CheckChargeKeys(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { + void CheckChargeKeys(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { for (bool reverse : {false, true}) { for (ui64 itemsLimit : TVector{0, 1, 2, 5, 13, 19, part.Stat.Rows - 2, part.Stat.Rows - 1}) { for (ui32 firstCellKey1 : xrange(0, part.Stat.Rows / 7 + 1)) { @@ -457,6 +472,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { TTouchEnv bTreeEnv, flatEnv; TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true); TCharge flat(&flatEnv, part, tags, true); + PrepareEnvs(params, part, tags, *keyDefaults, bTreeEnv, flatEnv); TStringBuilder message = TStringBuilder() << (reverse ? "ChargeKeysReverse " : "ChargeKeys ") << "("; for (auto c : key1) { @@ -487,7 +503,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { } } - void CheckChargeBytesLimit(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { + void CheckChargeBytesLimit(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { for (bool reverse : {false, true}) { for (ui64 bytesLimit : xrange(1, part.Stat.Bytes + 100, part.Stat.Bytes / 100)) { for (ui32 firstCellKey1 : xrange(0, part.Stat.Rows / 7 + 1)) { @@ -497,6 +513,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { TTouchEnv limitedEnv, unlimitedEnv; TChargeBTreeIndex limitedCharge(&limitedEnv, part, tags, true); TChargeBTreeIndex unlimitedCharge(&unlimitedEnv, part, tags, true); + PrepareEnvs(params, part, tags, *keyDefaults, limitedEnv, unlimitedEnv); TStringBuilder message = TStringBuilder() << (reverse ? "ChargeBytesLimitReverse " : "ChargeBytesLimit ") << "("; for (auto c : key1) { @@ -551,7 +568,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { } } - void CheckPart(TMakePartParams params) { + void CheckPart(TTestParams params) { TPartEggs eggs = MakePart(params); const auto part = *eggs.Lone(); @@ -560,9 +577,9 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { tags.push_back(c.Tag); } - CheckChargeRowId(part, tags, eggs.Scheme->Keys.Get()); - CheckChargeKeys(part, tags, eggs.Scheme->Keys.Get()); - CheckChargeBytesLimit(part, tags, eggs.Scheme->Keys.Get()); + CheckChargeRowId(params, part, tags, eggs.Scheme->Keys.Get()); + CheckChargeKeys(params, part, tags, eggs.Scheme->Keys.Get()); + CheckChargeBytesLimit(params, part, tags, eggs.Scheme->Keys.Get()); } Y_UNIT_TEST(NoNodes) { @@ -756,7 +773,7 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) { } } - void CheckPart(TMakePartParams params) { + void CheckPart(TTestParams params) { TPartEggs eggs = MakePart(params); const auto part = *eggs.Lone(); From cebb3ad5518916402929a53b81d0a5dfd79bcc4c Mon Sep 17 00:00:00 2001 From: kungasc Date: Wed, 7 Feb 2024 12:34:50 +0000 Subject: [PATCH 2/9] stick pages --- .../ut/ut_btree_index_iter_charge.cpp | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp index b68a313da470..0a6af362f240 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp @@ -19,7 +19,7 @@ namespace { const TSharedData* TryGetPage(const TPart *part, TPageId pageId, TGroupId groupId) override { Touched[groupId].insert(pageId); - if (Loaded[groupId].contains(pageId)) { + if (Loaded[groupId].contains(pageId) || Sticky[groupId].contains(pageId)) { return NTest::TTestEnv::TryGetPage(part, pageId, groupId); } return nullptr; @@ -32,8 +32,17 @@ namespace { Touched.clear(); } + void StickLoaded() { + for (const auto &g : Loaded) { + Sticky[g.first].insert(g.second.begin(), g.second.end()); + } + Touched.clear(); + Loaded.clear(); + } + TMap> Loaded; TMap> Touched; + TMap> Sticky; }; void AssertLoadedTheSame(const TPartStore& part, const TTouchEnv& bTree, const TTouchEnv& flat, const TString& message, @@ -85,7 +94,7 @@ namespace { const bool History = false; const bool Slices = false; const ui32 Rows = 40; - const bool PrechargeSome = false; + const bool StickSomePages = true; }; TPartEggs MakePart(TTestParams params) { @@ -395,8 +404,8 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) { } Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { - void PrepareEnvs(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults &keyDefaults, TTouchEnv& bTreeEnv, TTouchEnv& flatEnv) { - if (params.PrechargeSome) { + void StickSomePages(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults &keyDefaults, TTouchEnv& bTreeEnv, TTouchEnv& flatEnv) { + if (params.StickSomePages) { TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true); for (int times = 0; times < 5; times++) { @@ -405,6 +414,8 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { } flatEnv.Loaded = bTreeEnv.Loaded; + flatEnv.StickLoaded(); + bTreeEnv.StickLoaded(); } } @@ -446,7 +457,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { TTouchEnv bTreeEnv, flatEnv; TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true); TCharge flat(&flatEnv, part, tags, true); - PrepareEnvs(params, part, tags, *keyDefaults, bTreeEnv, flatEnv); + StickSomePages(params, part, tags, *keyDefaults, bTreeEnv, flatEnv); TString message = TStringBuilder() << (reverse ? "ChargeRowIdReverse " : "ChargeRowId ") << rowId1 << " " << rowId2 << " items " << itemsLimit; DoChargeRowId(bTree, bTreeEnv, rowId1, rowId2, itemsLimit, 0, reverse, *keyDefaults, message); @@ -472,7 +483,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { TTouchEnv bTreeEnv, flatEnv; TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true); TCharge flat(&flatEnv, part, tags, true); - PrepareEnvs(params, part, tags, *keyDefaults, bTreeEnv, flatEnv); + StickSomePages(params, part, tags, *keyDefaults, bTreeEnv, flatEnv); TStringBuilder message = TStringBuilder() << (reverse ? "ChargeKeysReverse " : "ChargeKeys ") << "("; for (auto c : key1) { @@ -513,7 +524,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { TTouchEnv limitedEnv, unlimitedEnv; TChargeBTreeIndex limitedCharge(&limitedEnv, part, tags, true); TChargeBTreeIndex unlimitedCharge(&unlimitedEnv, part, tags, true); - PrepareEnvs(params, part, tags, *keyDefaults, limitedEnv, unlimitedEnv); + StickSomePages(params, part, tags, *keyDefaults, limitedEnv, unlimitedEnv); TStringBuilder message = TStringBuilder() << (reverse ? "ChargeBytesLimitReverse " : "ChargeBytesLimit ") << "("; for (auto c : key1) { From c3b175b5b2fc4f35372c6a164e2ed218ed548782 Mon Sep 17 00:00:00 2001 From: kungasc Date: Wed, 7 Feb 2024 15:51:36 +0000 Subject: [PATCH 3/9] child state --- .../flat_part_charge_btree_index.h | 247 ++++++++++-------- .../ut/ut_btree_index_iter_charge.cpp | 7 +- 2 files changed, 142 insertions(+), 112 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index e992e96de05c..0bfb7bfdf4b0 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -16,20 +16,25 @@ class TChargeBTreeIndex : public ICharge { struct TChildState { TPageId PageId; - TRowId BeginRowId; - TRowId EndRowId; + TRowId BeginRowId, EndRowId; + TRowId PrevItems, Items; + ui64 PrevDataSize, DataSize; - TChildState(TPageId pageId, TRowId beginRowId, TRowId endRowId) + TChildState(TPageId pageId, TRowId beginRowId, TRowId endRowId, TRowId prevItems, TRowId items, ui64 prevDataSize, ui64 dataSize) : PageId(pageId) , BeginRowId(beginRowId) , EndRowId(endRowId) + , PrevItems(prevItems) + , Items(items) + , PrevDataSize(prevDataSize) + , DataSize(dataSize) { } }; struct TNodeState : TChildState, TBtreeIndexNode { - TNodeState(TSharedData data, TPageId pageId, TRowId beginRowId, TRowId endRowId) - : TChildState(pageId, beginRowId, endRowId) + TNodeState(TSharedData data, TPageId pageId, TRowId beginRowId, TRowId endRowId, TRowId prevItems, TRowId items, ui64 prevDataSize, ui64 dataSize) + : TChildState(pageId, beginRowId, endRowId, prevItems, items, prevDataSize, dataSize) , TBtreeIndexNode(data) { } @@ -62,13 +67,11 @@ class TChargeBTreeIndex : public ICharge { public: TResult Do(TCells key1, TCells key2, TRowId beginRowId, TRowId endRowId, const TKeyCellDefaults &keyDefaults, ui64 itemsLimit, ui64 bytesLimit) const noexcept override { + Cerr << "Do " << beginRowId << " " << endRowId << " " << itemsLimit << Endl; endRowId++; // current interface accepts inclusive row2 bound Y_ABORT_UNLESS(beginRowId < endRowId); - bool ready = true, overshot = true; - bool hasValidRowsRange = Groups || IncludeHistory; // false value means that beginRowId, endRowId are invalid and shouldn't be used - ui64 chargeGroupsItemsLimit = itemsLimit; // pessimistic items limit for groups - TRowId beginBytesLimitRowId = Max(); // first unloaded probably needed row + bool ready = true, overshot = true, hasValidRowsRange = true; const auto& meta = Part->IndexPages.BTreeGroups[0]; Y_ABORT_UNLESS(endRowId <= meta.RowCount); @@ -82,91 +85,79 @@ class TChargeBTreeIndex : public ICharge { TVector level, nextLevel(::Reserve(3)); TPageId key1PageId = key1 ? meta.PageId : Max(); TPageId key2PageId = key2 ? meta.PageId : Max(); - ui64 key1Items = 0, prevKey1Items = 0; + TChildState firstChild = BuildRootChildState(meta); const auto iterateLevel = [&](const auto& tryHandleChild) { // tryHandleChild may update them, copy for simplicity // always load beginRowId regardless of keys - const TRowId levelBeginRowId = beginRowId, levelEndRowId = Max(endRowId, beginRowId + 1); - const TChild* levelFirstChild = nullptr; - + const TRowId levelBeginRowId = beginRowId, levelEndRowId = endRowId; + for (const auto &node : level) { if (node.EndRowId <= levelBeginRowId || node.BeginRowId >= levelEndRowId) { continue; } TRecIdx from = 0, to = node.GetChildrenCount(); - if (node.BeginRowId < levelBeginRowId) { + if (node.BeginRowId <= levelBeginRowId) { from = node.Seek(levelBeginRowId); + if (firstChild.PageId != Max()) { // still valid and should be updated + auto& child = node.GetChild(from); + auto prevChild = from ? node.GetChildRef(from - 1) : nullptr; + firstChild = BuildChildState(node, child, prevChild); + Cerr << " first " << firstChild.PageId << " " << firstChild.BeginRowId << " " << firstChild.EndRowId << Endl; + } } if (node.EndRowId > levelEndRowId) { to = node.Seek(levelEndRowId - 1) + 1; } for (TRecIdx pos : xrange(from, to)) { - auto child = node.GetChildRef(pos); + auto child = node.GetChild(pos); auto prevChild = pos ? node.GetChildRef(pos - 1) : nullptr; - TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; - TRowId childEndRowId = child->RowCount; - ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); - if (itemsLimit || bytesLimit) { - if (!levelFirstChild) { - // do not apply limits on the first child because beginRowId/key1 position is uncertain - levelFirstChild = child; - } else { - if (itemsLimit) { - ui64 items = child->GetNonErasedRowCount() - levelFirstChild->GetNonErasedRowCount(); - if (LimitExceeded(items, itemsLimit)) { - overshot = false; - return; - } - } - if (bytesLimit) { - ui64 bytes = child->DataSize - levelFirstChild->DataSize; - if (LimitExceeded(bytes, bytesLimit)) { - endRowId = Min(endRowId, childEndRowId); - overshot = false; - return; - } - } + auto childState = BuildChildState(node, child, prevChild); + if (itemsLimit) { + if (LimitExceeded(firstChild.Items, childState.PrevItems, itemsLimit)) { + overshot = false; + return; } } + if (bytesLimit) { + // ui64 bytes = child->DataSize - firstChild.DataSize; + // if (LimitExceeded(bytes, bytesLimit)) { + // endRowId = Min(endRowId, childEndRowId); + // overshot = false; + // return; + // } + } + ready &= tryHandleChild(childState); } } }; const auto skipUnloadedRows = [&](const TChildState& child) { + if (child.PageId == firstChild.PageId) { + firstChild.PageId = Max(); // mark first child unloaded + } if (child.PageId == key1PageId) { - if (hasValidRowsRange && chargeGroupsItemsLimit) { - ui64 unloadedItems = key1Items - prevKey1Items; - if (unloadedItems < chargeGroupsItemsLimit) { - chargeGroupsItemsLimit -= unloadedItems; - } else { - hasValidRowsRange = false; - } - } - if (hasValidRowsRange && bytesLimit) { - beginBytesLimitRowId = Max(beginRowId, child.BeginRowId); - } beginRowId = Max(beginRowId, child.EndRowId); + Cerr << " beginRowId " << beginRowId << Endl; } if (child.PageId == key2PageId) { endRowId = Min(endRowId, child.BeginRowId); + Cerr << " endRowId " << endRowId << Endl; } }; - const auto tryHandleNode = [&](TChildState child) -> bool { - if (child.PageId == key1PageId || child.PageId == key2PageId) { + const auto tryHandleNode = [&](const TChildState& child) -> bool { + Cerr << " node " << child.PageId << " " << child.BeginRowId << " " << child.EndRowId << Endl; + if (child.PageId == firstChild.PageId || child.PageId == key1PageId || child.PageId == key2PageId) { if (TryLoadNode(child, nextLevel)) { const auto& node = nextLevel.back(); if (child.PageId == key1PageId) { TRecIdx pos = node.Seek(ESeek::Lower, key1, Scheme.Groups[0].ColsKeyIdx, &keyDefaults); auto& key1Child = node.GetChild(pos); key1PageId = key1Child.PageId; - key1Items = key1Child.GetNonErasedRowCount(); if (pos) { - auto& prevKey1Child = node.GetChild(pos - 1); - prevKey1Items = prevKey1Child.GetNonErasedRowCount(); - beginRowId = Max(beginRowId, prevKey1Child.RowCount); // move beginRowId to the first key >= key1 + beginRowId = Max(beginRowId, node.GetChild(pos - 1).RowCount); // move beginRowId to the first key >= key1 } } if (child.PageId == key2PageId) { @@ -188,7 +179,8 @@ class TChargeBTreeIndex : public ICharge { } }; - const auto tryHandleDataPage = [&](TChildState child) -> bool { + const auto tryHandleDataPage = [&](const TChildState& child) -> bool { + Cerr << " data " << child.PageId << " " << child.BeginRowId << " " << child.EndRowId << Endl; if (hasValidRowsRange && (child.PageId == key1PageId || child.PageId == key2PageId)) { const auto page = TryGetDataPage(child.PageId, { }); if (page) { @@ -211,19 +203,19 @@ class TChargeBTreeIndex : public ICharge { } }; - for (ui32 height = 0; height < meta.LevelCount && ready; height++) { + for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { - ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleNode(root); } else { iterateLevel(tryHandleNode); } level.swap(nextLevel); nextLevel.clear(); - } - - if (!ready) { // some index pages are missing, do not continue - ready &= DoGroupsAndHistory(hasValidRowsRange, beginRowId, endRowId, beginBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds - return {ready, false}; + if (firstChild.PageId == Max()) { // first child is unloaded, consider all first's child rows are needed for next levels + Cerr << " move " << firstChild.Items << " -> " << firstChild.PrevItems << Endl; + firstChild.Items = firstChild.PrevItems; + } } // flat index doesn't treat key placement within data page, so let's do the same @@ -231,12 +223,13 @@ class TChargeBTreeIndex : public ICharge { overshot &= endRowId == sliceEndRowId; if (meta.LevelCount == 0) { - ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleDataPage(root); } else { iterateLevel(tryHandleDataPage); } - ready &= DoGroupsAndHistory(hasValidRowsRange, beginRowId, endRowId, beginBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds + // ready &= DoGroupsAndHistory(hasValidRowsRange, beginRowId, endRowId, beginLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds return {ready, overshot}; } @@ -285,36 +278,35 @@ class TChargeBTreeIndex : public ICharge { to = node.Seek(levelEndRowId - 1) + 1; } for (TRecIdx posExt = to; posExt > from; posExt--) { - auto child = node.GetChildRef(posExt - 1); + auto& child = node.GetChild(posExt - 1); auto prevChild = posExt - 1 ? node.GetChildRef(posExt - 2) : nullptr; - TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; - TRowId childEndRowId = child->RowCount; + auto childState = BuildChildState(node, child, prevChild); if (itemsLimit || bytesLimit) { if (!levelLastChild) { // do not apply limits on the last child because endRowId/key1 position is uncertain - levelLastChild = child; + levelLastChild = &child; } else { if (!levelPrevLastChild) { - levelPrevLastChild = child; + levelPrevLastChild = &child; } if (itemsLimit) { - ui64 items = levelPrevLastChild->GetNonErasedRowCount() - child->GetNonErasedRowCount(); + ui64 items = levelPrevLastChild->GetNonErasedRowCount() - child.GetNonErasedRowCount(); if (LimitExceeded(items, itemsLimit)) { overshot = false; return; } } if (bytesLimit) { - ui64 bytes = levelPrevLastChild->DataSize - child->DataSize; - if (LimitExceeded(bytes, bytesLimit)) { - beginRowId = Max(beginRowId, childEndRowId); - overshot = false; - return; - } + // ui64 bytes = levelPrevLastChild->DataSize - child.DataSize; + // if (LimitExceeded(bytes, bytesLimit)) { + // beginRowId = Max(beginRowId, childEndRowId); + // overshot = false; + // return; + // } } } } - ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); + ready &= tryHandleChild(childState); } } }; @@ -339,7 +331,7 @@ class TChargeBTreeIndex : public ICharge { } }; - const auto tryHandleNode = [&](TChildState child) -> bool { + const auto tryHandleNode = [&](const TChildState& child) -> bool { if (child.PageId == key1PageId || child.PageId == key2PageId) { if (TryLoadNode(child, nextLevel)) { const auto& node = nextLevel.back(); @@ -375,7 +367,7 @@ class TChargeBTreeIndex : public ICharge { } }; - const auto tryHandleDataPage = [&](TChildState child) -> bool { + const auto tryHandleDataPage = [&](const TChildState& child) -> bool { if (hasValidRowsRange && (child.PageId == key1PageId || child.PageId == key2PageId)) { const auto page = TryGetDataPage(child.PageId, { }); if (page) { @@ -410,7 +402,8 @@ class TChargeBTreeIndex : public ICharge { for (ui32 height = 0; height < meta.LevelCount && ready; height++) { if (height == 0) { - ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleNode(root); } else { iterateLevel(tryHandleNode); } @@ -428,7 +421,8 @@ class TChargeBTreeIndex : public ICharge { overshot &= beginRowId == sliceBeginRowId; if (meta.LevelCount == 0) { - ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleDataPage(root); } else { iterateLevel(tryHandleDataPage); } @@ -516,31 +510,31 @@ class TChargeBTreeIndex : public ICharge { to = node.Seek(endRowId - 1) + 1; } for (TRecIdx pos : xrange(from, to)) { - auto child = node.GetShortChildRef(pos); + auto& child = node.GetShortChild(pos); auto prevChild = pos ? node.GetShortChildRef(pos - 1) : nullptr; - TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; - TRowId childEndRowId = child->RowCount; - ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); + auto childState = BuildChildState(node, child, prevChild); + ready &= tryHandleChild(childState); if (bytesLimit) { - if (child->DataSize > prevBeginBytesLimitDataSize && LimitExceeded(child->DataSize - prevBeginBytesLimitDataSize, bytesLimit)) { - return; - } + // if (child->DataSize > prevBeginBytesLimitDataSize && LimitExceeded(child->DataSize - prevBeginBytesLimitDataSize, bytesLimit)) { + // return; + // } } } } }; - const auto tryHandleNode = [&](TChildState child) -> bool { + const auto tryHandleNode = [&](const TChildState& child) -> bool { return TryLoadNode(child, nextLevel); }; - const auto tryHandleDataPage = [&](TChildState child) -> bool { + const auto tryHandleDataPage = [&](const TChildState& child) -> bool { return HasDataPage(child.PageId, groupId); }; for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { - ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleNode(root); } else { iterateLevel(tryHandleNode); } @@ -549,7 +543,8 @@ class TChargeBTreeIndex : public ICharge { } if (meta.LevelCount == 0) { - ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleDataPage(root); } else { iterateLevel(tryHandleDataPage); } @@ -576,31 +571,32 @@ class TChargeBTreeIndex : public ICharge { to = node.Seek(endRowId - 1) + 1; } for (TRecIdx posExt = to; posExt > from; posExt--) { - auto child = node.GetShortChildRef(posExt - 1); + auto& child = node.GetShortChild(posExt - 1); auto prevChild = posExt - 1 ? node.GetShortChildRef(posExt - 2) : nullptr; - TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; - TRowId childEndRowId = child->RowCount; + auto childState = BuildChildState(node, child, prevChild); if (bytesLimit) { - if (endBytesLimitDataSize > child->DataSize && LimitExceeded(endBytesLimitDataSize - child->DataSize, bytesLimit)) { - return; - } + Y_UNUSED(endBytesLimitDataSize); + // if (endBytesLimitDataSize > child->DataSize && LimitExceeded(endBytesLimitDataSize - child->DataSize, bytesLimit)) { + // return; + // } } - ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); + ready &= tryHandleChild(childState); } } }; - const auto tryHandleNode = [&](TChildState child) -> bool { + const auto tryHandleNode = [&](const TChildState& child) -> bool { return TryLoadNode(child, nextLevel); }; - const auto tryHandleDataPage = [&](TChildState child) -> bool { + const auto tryHandleDataPage = [&](const TChildState& child) -> bool { return HasDataPage(child.PageId, groupId); }; for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { - ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleNode(root); } else { iterateLevel(tryHandleNode); } @@ -609,7 +605,8 @@ class TChargeBTreeIndex : public ICharge { } if (meta.LevelCount == 0) { - ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleDataPage(root); } else { iterateLevel(tryHandleDataPage); } @@ -666,9 +663,9 @@ class TChargeBTreeIndex : public ICharge { } for (TRecIdx pos : xrange(from, to)) { auto& child = node.GetShortChild(pos); - TRowId childBeginRowId = pos ? node.GetShortChild(pos - 1).RowCount : node.BeginRowId; - TRowId childEndRowId = child.RowCount; - ready &= tryHandleChild(TChildState(child.PageId, childBeginRowId, childEndRowId)); + auto prevChild = pos ? node.GetShortChildRef(pos - 1) : nullptr; + auto childState = BuildChildState(node, child, prevChild); + ready &= tryHandleChild(childState); } } }; @@ -682,7 +679,7 @@ class TChargeBTreeIndex : public ICharge { } }; - const auto tryHandleNode = [&](TChildState child) -> bool { + const auto tryHandleNode = [&](const TChildState& child) -> bool { if (child.PageId == key1PageId || child.PageId == key2PageId) { if (TryLoadNode(child, nextLevel)) { const auto& node = nextLevel.back(); @@ -710,7 +707,7 @@ class TChargeBTreeIndex : public ICharge { } }; - const auto tryHandleDataPage = [&](TChildState child) -> bool { + const auto tryHandleDataPage = [&](const TChildState& child) -> bool { if (Groups && (child.PageId == key1PageId || child.PageId == key2PageId)) { const auto page = TryGetDataPage(child.PageId, groupId); if (page) { @@ -735,7 +732,8 @@ class TChargeBTreeIndex : public ICharge { for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { - ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleNode(root); } else { iterateLevel(tryHandleNode); } @@ -744,7 +742,8 @@ class TChargeBTreeIndex : public ICharge { } if (meta.LevelCount == 0) { - ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleDataPage(root); } else { iterateLevel(tryHandleDataPage); } @@ -841,10 +840,36 @@ class TChargeBTreeIndex : public ICharge { : (left.size() > right.size() ? -1 : 1); } + TChildState BuildRootChildState(const TBtreeIndexMeta& meta) const noexcept { + return TChildState(meta.PageId, + 0, meta.RowCount, + 0, meta.GetNonErasedRowCount(), + 0, meta.DataSize); + } + + TChildState BuildChildState(const TNodeState& parent, TChild child, const TChild* prevChild) const noexcept { + return TChildState(child.PageId, + prevChild ? prevChild->RowCount : parent.BeginRowId, child.RowCount, + prevChild ? prevChild->GetNonErasedRowCount() : parent.PrevItems, child.GetNonErasedRowCount(), + prevChild ? prevChild->DataSize : parent.PrevDataSize, child.DataSize); + } + + TChildState BuildChildState(const TNodeState& parent, TShortChild child, const TShortChild* prevChild) const noexcept { + return TChildState(child.PageId, + prevChild ? prevChild->RowCount : parent.BeginRowId, child.RowCount, + prevChild ? prevChild->RowCount : parent.BeginRowId, child.RowCount, + prevChild ? prevChild->DataSize : parent.PrevDataSize, child.DataSize); + } + bool LimitExceeded(ui64 value, ui64 limit) const noexcept { return limit && value > limit; } + bool LimitExceeded(ui64 prev, ui64 current, ui64 limit) const noexcept { + Cerr << " check limit " << prev << " " << current << " " << limit << Endl; + return limit && current > prev && current - prev > limit; + } + private: const TPart* const Part; const TPartScheme &Scheme; diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp index 0a6af362f240..142a478d9fbf 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp @@ -18,8 +18,12 @@ namespace { struct TTouchEnv : public NTest::TTestEnv { const TSharedData* TryGetPage(const TPart *part, TPageId pageId, TGroupId groupId) override { + if (Sticky[groupId].contains(pageId)) { + Loaded[groupId].insert(pageId); + } + Touched[groupId].insert(pageId); - if (Loaded[groupId].contains(pageId) || Sticky[groupId].contains(pageId)) { + if (Loaded[groupId].contains(pageId)) { return NTest::TTestEnv::TryGetPage(part, pageId, groupId); } return nullptr; @@ -47,6 +51,7 @@ namespace { void AssertLoadedTheSame(const TPartStore& part, const TTouchEnv& bTree, const TTouchEnv& flat, const TString& message, bool allowAdditionalFirstLastPartPages = false, bool allowAdditionalFirstLoadedPage = false, bool allowLastLoadedPageDifference = false) { + TSet groupIds; for (const auto &c : {bTree.Loaded, flat.Loaded}) { for (const auto &g : c) { From 66441e79cd61898cc11070481bd5271a4ff19e1f Mon Sep 17 00:00:00 2001 From: kungasc Date: Wed, 7 Feb 2024 19:42:02 +0300 Subject: [PATCH 4/9] fix bytes limit --- .../flat_part_charge_btree_index.h | 41 +++++++------------ 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index 0bfb7bfdf4b0..18256ecd2416 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -18,7 +18,7 @@ class TChargeBTreeIndex : public ICharge { TPageId PageId; TRowId BeginRowId, EndRowId; TRowId PrevItems, Items; - ui64 PrevDataSize, DataSize; + ui64 PrevBytes, Bytes; TChildState(TPageId pageId, TRowId beginRowId, TRowId endRowId, TRowId prevItems, TRowId items, ui64 prevDataSize, ui64 dataSize) : PageId(pageId) @@ -26,8 +26,8 @@ class TChargeBTreeIndex : public ICharge { , EndRowId(endRowId) , PrevItems(prevItems) , Items(items) - , PrevDataSize(prevDataSize) - , DataSize(dataSize) + , PrevBytes(prevDataSize) + , Bytes(dataSize) { } }; @@ -69,14 +69,13 @@ class TChargeBTreeIndex : public ICharge { const TKeyCellDefaults &keyDefaults, ui64 itemsLimit, ui64 bytesLimit) const noexcept override { Cerr << "Do " << beginRowId << " " << endRowId << " " << itemsLimit << Endl; endRowId++; // current interface accepts inclusive row2 bound - Y_ABORT_UNLESS(beginRowId < endRowId); bool ready = true, overshot = true, hasValidRowsRange = true; - + const TRowId sliceEndRowId = endRowId; const auto& meta = Part->IndexPages.BTreeGroups[0]; + Y_ABORT_UNLESS(beginRowId < endRowId); Y_ABORT_UNLESS(endRowId <= meta.RowCount); - const TRowId sliceEndRowId = endRowId; if (Y_UNLIKELY(key1 && key2 && Compare(key1, key2, keyDefaults) > 0)) { key2 = key1; // will not go further than key1 hasValidRowsRange = false; @@ -89,7 +88,7 @@ class TChargeBTreeIndex : public ICharge { const auto iterateLevel = [&](const auto& tryHandleChild) { // tryHandleChild may update them, copy for simplicity - // always load beginRowId regardless of keys + // ??? always load beginRowId regardless of keys const TRowId levelBeginRowId = beginRowId, levelEndRowId = endRowId; for (const auto &node : level) { @@ -114,19 +113,10 @@ class TChargeBTreeIndex : public ICharge { auto child = node.GetChild(pos); auto prevChild = pos ? node.GetChildRef(pos - 1) : nullptr; auto childState = BuildChildState(node, child, prevChild); - if (itemsLimit) { - if (LimitExceeded(firstChild.Items, childState.PrevItems, itemsLimit)) { - overshot = false; - return; - } - } - if (bytesLimit) { - // ui64 bytes = child->DataSize - firstChild.DataSize; - // if (LimitExceeded(bytes, bytesLimit)) { - // endRowId = Min(endRowId, childEndRowId); - // overshot = false; - // return; - // } + if (LimitExceeded(firstChild.Items, childState.PrevItems, itemsLimit) || LimitExceeded(firstChild.Bytes, childState.PrevBytes, bytesLimit)) { + endRowId = Min(endRowId, childState.BeginRowId); + overshot = false; + return; } ready &= tryHandleChild(childState); } @@ -205,8 +195,7 @@ class TChargeBTreeIndex : public ICharge { for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { - auto root = BuildRootChildState(meta); - ready &= tryHandleNode(root); + ready &= tryHandleNode(BuildRootChildState(meta)); } else { iterateLevel(tryHandleNode); } @@ -215,6 +204,7 @@ class TChargeBTreeIndex : public ICharge { if (firstChild.PageId == Max()) { // first child is unloaded, consider all first's child rows are needed for next levels Cerr << " move " << firstChild.Items << " -> " << firstChild.PrevItems << Endl; firstChild.Items = firstChild.PrevItems; + firstChild.Bytes = firstChild.PrevBytes; } } @@ -223,8 +213,7 @@ class TChargeBTreeIndex : public ICharge { overshot &= endRowId == sliceEndRowId; if (meta.LevelCount == 0) { - auto root = BuildRootChildState(meta); - ready &= tryHandleDataPage(root); + ready &= tryHandleDataPage(BuildRootChildState(meta)); } else { iterateLevel(tryHandleDataPage); } @@ -851,14 +840,14 @@ class TChargeBTreeIndex : public ICharge { return TChildState(child.PageId, prevChild ? prevChild->RowCount : parent.BeginRowId, child.RowCount, prevChild ? prevChild->GetNonErasedRowCount() : parent.PrevItems, child.GetNonErasedRowCount(), - prevChild ? prevChild->DataSize : parent.PrevDataSize, child.DataSize); + prevChild ? prevChild->DataSize : parent.PrevBytes, child.DataSize); } TChildState BuildChildState(const TNodeState& parent, TShortChild child, const TShortChild* prevChild) const noexcept { return TChildState(child.PageId, prevChild ? prevChild->RowCount : parent.BeginRowId, child.RowCount, prevChild ? prevChild->RowCount : parent.BeginRowId, child.RowCount, - prevChild ? prevChild->DataSize : parent.PrevDataSize, child.DataSize); + prevChild ? prevChild->DataSize : parent.PrevBytes, child.DataSize); } bool LimitExceeded(ui64 value, ui64 limit) const noexcept { From cc512ec145717edee236a3b110b6bac7a37389f4 Mon Sep 17 00:00:00 2001 From: kungasc Date: Wed, 7 Feb 2024 18:07:49 +0000 Subject: [PATCH 5/9] clean up --- .../flat_part_charge_btree_index.h | 12 ++--------- .../ut/ut_btree_index_iter_charge.cpp | 20 +++++++++++++++++-- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index 18256ecd2416..56041d8d3353 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -67,7 +67,6 @@ class TChargeBTreeIndex : public ICharge { public: TResult Do(TCells key1, TCells key2, TRowId beginRowId, TRowId endRowId, const TKeyCellDefaults &keyDefaults, ui64 itemsLimit, ui64 bytesLimit) const noexcept override { - Cerr << "Do " << beginRowId << " " << endRowId << " " << itemsLimit << Endl; endRowId++; // current interface accepts inclusive row2 bound bool ready = true, overshot = true, hasValidRowsRange = true; @@ -88,8 +87,8 @@ class TChargeBTreeIndex : public ICharge { const auto iterateLevel = [&](const auto& tryHandleChild) { // tryHandleChild may update them, copy for simplicity - // ??? always load beginRowId regardless of keys - const TRowId levelBeginRowId = beginRowId, levelEndRowId = endRowId; + // always load beginRowId regardless of keys + const TRowId levelBeginRowId = beginRowId, levelEndRowId = Max(endRowId, beginRowId + 1); for (const auto &node : level) { if (node.EndRowId <= levelBeginRowId || node.BeginRowId >= levelEndRowId) { @@ -103,7 +102,6 @@ class TChargeBTreeIndex : public ICharge { auto& child = node.GetChild(from); auto prevChild = from ? node.GetChildRef(from - 1) : nullptr; firstChild = BuildChildState(node, child, prevChild); - Cerr << " first " << firstChild.PageId << " " << firstChild.BeginRowId << " " << firstChild.EndRowId << Endl; } } if (node.EndRowId > levelEndRowId) { @@ -129,16 +127,13 @@ class TChargeBTreeIndex : public ICharge { } if (child.PageId == key1PageId) { beginRowId = Max(beginRowId, child.EndRowId); - Cerr << " beginRowId " << beginRowId << Endl; } if (child.PageId == key2PageId) { endRowId = Min(endRowId, child.BeginRowId); - Cerr << " endRowId " << endRowId << Endl; } }; const auto tryHandleNode = [&](const TChildState& child) -> bool { - Cerr << " node " << child.PageId << " " << child.BeginRowId << " " << child.EndRowId << Endl; if (child.PageId == firstChild.PageId || child.PageId == key1PageId || child.PageId == key2PageId) { if (TryLoadNode(child, nextLevel)) { const auto& node = nextLevel.back(); @@ -170,7 +165,6 @@ class TChargeBTreeIndex : public ICharge { }; const auto tryHandleDataPage = [&](const TChildState& child) -> bool { - Cerr << " data " << child.PageId << " " << child.BeginRowId << " " << child.EndRowId << Endl; if (hasValidRowsRange && (child.PageId == key1PageId || child.PageId == key2PageId)) { const auto page = TryGetDataPage(child.PageId, { }); if (page) { @@ -202,7 +196,6 @@ class TChargeBTreeIndex : public ICharge { level.swap(nextLevel); nextLevel.clear(); if (firstChild.PageId == Max()) { // first child is unloaded, consider all first's child rows are needed for next levels - Cerr << " move " << firstChild.Items << " -> " << firstChild.PrevItems << Endl; firstChild.Items = firstChild.PrevItems; firstChild.Bytes = firstChild.PrevBytes; } @@ -855,7 +848,6 @@ class TChargeBTreeIndex : public ICharge { } bool LimitExceeded(ui64 prev, ui64 current, ui64 limit) const noexcept { - Cerr << " check limit " << prev << " " << current << " " << limit << Endl; return limit && current > prev && current - prev > limit; } diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp index 142a478d9fbf..f7418f0acbc0 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp @@ -99,7 +99,7 @@ namespace { const bool History = false; const bool Slices = false; const ui32 Rows = 40; - const bool StickSomePages = true; + const bool StickSomePages = false; }; TPartEggs MakePart(TTestParams params) { @@ -330,7 +330,7 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) { } void CheckSeekKey(const TPartStore& part, const TKeyCellDefaults *keyDefaults) { - for (bool reverse : {false}) { + for (bool reverse : {false, true}) { for (ESeek seek : {ESeek::Exact, ESeek::Lower, ESeek::Upper}) { for (ui32 firstCell : xrange(0, part.Stat.Rows / 7 + 1)) { for (ui32 secondCell : xrange(0, 14)) { @@ -642,9 +642,17 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { CheckPart({.Levels = 3, .History = true}); } + Y_UNIT_TEST(FewNodes_Sticky) { + CheckPart({.Levels = 3, .StickSomePages = true}); + } + Y_UNIT_TEST(FewNodes_Groups_History) { CheckPart({.Levels = 3, .Groups = true, .History = true}); } + + Y_UNIT_TEST(FewNodes_Groups_History_Sticky) { + CheckPart({.Levels = 3, .Groups = true, .History = true, .StickSomePages = true}); + } } Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) { @@ -849,6 +857,10 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) { CheckPart({.Levels = 3, .History = true}); } + Y_UNIT_TEST(FewNodes_Sticky) { + CheckPart({.Levels = 3, .StickSomePages = true}); + } + Y_UNIT_TEST(FewNodes_Slices) { CheckPart({.Levels = 3, .Slices = true}); } @@ -864,6 +876,10 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) { Y_UNIT_TEST(FewNodes_Groups_History_Slices) { CheckPart({.Levels = 3, .Groups = true, .History = true, .Slices = true}); } + + Y_UNIT_TEST(FewNodes_Groups_History_Slices_Sticky) { + CheckPart({.Levels = 3, .Groups = true, .History = true, .Slices = true, .StickSomePages = true}); + } } } From 5d22dec92e5d9e123ef2957ca04dc4a226e3d22e Mon Sep 17 00:00:00 2001 From: kungasc Date: Wed, 7 Feb 2024 20:46:07 +0000 Subject: [PATCH 6/9] DoGroupsAndHistory --- .../flat_part_charge_btree_index.h | 92 ++++++++----------- 1 file changed, 40 insertions(+), 52 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index 56041d8d3353..76a1164dfd28 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -70,7 +70,7 @@ class TChargeBTreeIndex : public ICharge { endRowId++; // current interface accepts inclusive row2 bound bool ready = true, overshot = true, hasValidRowsRange = true; - const TRowId sliceEndRowId = endRowId; + const TRowId sliceBeginRowId = beginRowId, sliceEndRowId = endRowId; const auto& meta = Part->IndexPages.BTreeGroups[0]; Y_ABORT_UNLESS(beginRowId < endRowId); Y_ABORT_UNLESS(endRowId <= meta.RowCount); @@ -87,8 +87,7 @@ class TChargeBTreeIndex : public ICharge { const auto iterateLevel = [&](const auto& tryHandleChild) { // tryHandleChild may update them, copy for simplicity - // always load beginRowId regardless of keys - const TRowId levelBeginRowId = beginRowId, levelEndRowId = Max(endRowId, beginRowId + 1); + const TRowId levelBeginRowId = beginRowId, levelEndRowId = endRowId; for (const auto &node : level) { if (node.EndRowId <= levelBeginRowId || node.BeginRowId >= levelEndRowId) { @@ -150,8 +149,9 @@ class TChargeBTreeIndex : public ICharge { auto& key2Child = node.GetChild(pos); key2PageId = key2Child.PageId; endRowId = Min(endRowId, key2Child.RowCount + 1); // move endRowId - 1 to the first key > key2 - if (key2Child.RowCount <= beginRowId) { + if (key2Child.RowCount <= sliceBeginRowId) { hasValidRowsRange = false; // key2 is before current slice + endRowId = Max(endRowId, sliceBeginRowId + 1); // always load sliceBeginRowId regardless of key2 } } return true; @@ -211,7 +211,7 @@ class TChargeBTreeIndex : public ICharge { iterateLevel(tryHandleDataPage); } - // ready &= DoGroupsAndHistory(hasValidRowsRange, beginRowId, endRowId, beginLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds + ready &= DoGroupsAndHistory(hasValidRowsRange, beginRowId, endRowId, firstChild, itemsLimit, bytesLimit); // precharge groups using the latest row bounds return {ready, overshot}; } @@ -384,8 +384,7 @@ class TChargeBTreeIndex : public ICharge { for (ui32 height = 0; height < meta.LevelCount && ready; height++) { if (height == 0) { - auto root = BuildRootChildState(meta); - ready &= tryHandleNode(root); + ready &= tryHandleNode(BuildRootChildState(meta)); } else { iterateLevel(tryHandleNode); } @@ -403,8 +402,7 @@ class TChargeBTreeIndex : public ICharge { overshot &= beginRowId == sliceBeginRowId; if (meta.LevelCount == 0) { - auto root = BuildRootChildState(meta); - ready &= tryHandleDataPage(root); + ready &= tryHandleDataPage(BuildRootChildState(meta)); } else { iterateLevel(tryHandleDataPage); } @@ -415,27 +413,34 @@ class TChargeBTreeIndex : public ICharge { } private: - bool DoGroupsAndHistory(bool hasValidRowsRange, TRowId beginRowId, TRowId endRowId, TRowId beginBytesLimitRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept { + bool DoGroupsAndHistory(bool hasValidRowsRange, TRowId beginRowId, TRowId endRowId, const TChildState& firstChild, ui64 itemsLimit, ui64 bytesLimit) const noexcept { bool ready = true; - if (hasValidRowsRange && beginRowId < endRowId) { - if (itemsLimit && endRowId - beginRowId - 1 >= itemsLimit) { - endRowId = beginRowId + itemsLimit + 1; - } - - if (beginBytesLimitRowId == Max()) { - beginBytesLimitRowId = beginRowId; + if (!hasValidRowsRange) { + return ready; + } + if (beginRowId >= endRowId) { + return ready; + } - if (IncludeHistory) { - ready &= DoHistory(beginRowId, endRowId); - } // otherwise beginBytesLimitRowId is specified, so we do not know where to start charging history + if (itemsLimit) { + TRowId limitFromRowId = firstChild.PageId == Max() ? firstChild.BeginRowId : beginRowId; + if (endRowId - limitFromRowId - 1 > itemsLimit) { + endRowId = limitFromRowId + itemsLimit + 1; } - - for (auto groupIndex : Groups) { - ready &= DoGroup(TGroupId(groupIndex), beginRowId, endRowId, beginBytesLimitRowId, bytesLimit); + if (beginRowId >= endRowId) { + return ready; } } + if (IncludeHistory && (!bytesLimit || firstChild.PageId != Max())) { + ready &= DoHistory(beginRowId, endRowId); + } + + for (auto groupIndex : Groups) { + ready &= DoGroup(TGroupId(groupIndex), beginRowId, endRowId, firstChild.BeginRowId, bytesLimit); + } + return ready; } @@ -464,23 +469,12 @@ class TChargeBTreeIndex : public ICharge { } private: - bool DoGroup(TGroupId groupId, TRowId beginRowId, TRowId endRowId, TRowId beginBytesLimitRowId, ui64 bytesLimit) const noexcept { + bool DoGroup(TGroupId groupId, TRowId beginRowId, TRowId endRowId, TRowId firstChildBeginRowId, ui64 bytesLimit) const noexcept { bool ready = true; - const auto& meta = groupId.IsHistoric() ? Part->IndexPages.BTreeHistoric[groupId.Index] : Part->IndexPages.BTreeGroups[groupId.Index]; TVector level, nextLevel(::Reserve(3)); - - ui64 prevBeginBytesLimitDataSize = 0; - if (bytesLimit) { - prevBeginBytesLimitDataSize = GetPrevDataSize(meta, beginBytesLimitRowId); - if (beginBytesLimitRowId < beginRowId) { - ui64 prevBeginDataSize = GetPrevDataSize(meta, beginRowId); - if (LimitExceeded(prevBeginDataSize - prevBeginBytesLimitDataSize, bytesLimit)) { - return true; - } - } - } + ui64 firstChildPrevBytes = bytesLimit ? GetPrevDataSize(meta, firstChildBeginRowId) : 0; const auto iterateLevel = [&](const auto& tryHandleChild) { for (const auto &node : level) { @@ -495,12 +489,12 @@ class TChargeBTreeIndex : public ICharge { auto& child = node.GetShortChild(pos); auto prevChild = pos ? node.GetShortChildRef(pos - 1) : nullptr; auto childState = BuildChildState(node, child, prevChild); - ready &= tryHandleChild(childState); if (bytesLimit) { - // if (child->DataSize > prevBeginBytesLimitDataSize && LimitExceeded(child->DataSize - prevBeginBytesLimitDataSize, bytesLimit)) { - // return; - // } + if (LimitExceeded(firstChildPrevBytes, childState.PrevBytes, bytesLimit)) { + return; + } } + ready &= tryHandleChild(childState); } } }; @@ -515,8 +509,7 @@ class TChargeBTreeIndex : public ICharge { for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { - auto root = BuildRootChildState(meta); - ready &= tryHandleNode(root); + ready &= tryHandleNode(BuildRootChildState(meta)); } else { iterateLevel(tryHandleNode); } @@ -525,8 +518,7 @@ class TChargeBTreeIndex : public ICharge { } if (meta.LevelCount == 0) { - auto root = BuildRootChildState(meta); - ready &= tryHandleDataPage(root); + ready &= tryHandleDataPage(BuildRootChildState(meta)); } else { iterateLevel(tryHandleDataPage); } @@ -577,8 +569,7 @@ class TChargeBTreeIndex : public ICharge { for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { - auto root = BuildRootChildState(meta); - ready &= tryHandleNode(root); + ready &= tryHandleNode(BuildRootChildState(meta)); } else { iterateLevel(tryHandleNode); } @@ -587,8 +578,7 @@ class TChargeBTreeIndex : public ICharge { } if (meta.LevelCount == 0) { - auto root = BuildRootChildState(meta); - ready &= tryHandleDataPage(root); + ready &= tryHandleDataPage(BuildRootChildState(meta)); } else { iterateLevel(tryHandleDataPage); } @@ -714,8 +704,7 @@ class TChargeBTreeIndex : public ICharge { for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { - auto root = BuildRootChildState(meta); - ready &= tryHandleNode(root); + ready &= tryHandleNode(BuildRootChildState(meta)); } else { iterateLevel(tryHandleNode); } @@ -724,8 +713,7 @@ class TChargeBTreeIndex : public ICharge { } if (meta.LevelCount == 0) { - auto root = BuildRootChildState(meta); - ready &= tryHandleDataPage(root); + ready &= tryHandleDataPage(BuildRootChildState(meta)); } else { iterateLevel(tryHandleDataPage); } From 39172bbaff5c61a682c3720d64fd610a768e416f Mon Sep 17 00:00:00 2001 From: kungasc Date: Thu, 8 Feb 2024 11:47:01 +0000 Subject: [PATCH 7/9] refactor reverse --- .../flat_part_charge_btree_index.h | 172 +++++++----------- .../ut/ut_btree_index_iter_charge.cpp | 2 +- 2 files changed, 71 insertions(+), 103 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index 76a1164dfd28..55db013d04bb 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -69,7 +69,7 @@ class TChargeBTreeIndex : public ICharge { const TKeyCellDefaults &keyDefaults, ui64 itemsLimit, ui64 bytesLimit) const noexcept override { endRowId++; // current interface accepts inclusive row2 bound - bool ready = true, overshot = true, hasValidRowsRange = true; + bool ready = true, overshot = true, hasValidRowsRange = Groups || IncludeHistory; const TRowId sliceBeginRowId = beginRowId, sliceEndRowId = endRowId; const auto& meta = Part->IndexPages.BTreeGroups[0]; Y_ABORT_UNLESS(beginRowId < endRowId); @@ -93,7 +93,6 @@ class TChargeBTreeIndex : public ICharge { if (node.EndRowId <= levelBeginRowId || node.BeginRowId >= levelEndRowId) { continue; } - TRecIdx from = 0, to = node.GetChildrenCount(); if (node.BeginRowId <= levelBeginRowId) { from = node.Seek(levelBeginRowId); @@ -106,6 +105,7 @@ class TChargeBTreeIndex : public ICharge { if (node.EndRowId > levelEndRowId) { to = node.Seek(levelEndRowId - 1) + 1; } + for (TRecIdx pos : xrange(from, to)) { auto child = node.GetChild(pos); auto prevChild = pos ? node.GetChildRef(pos - 1) : nullptr; @@ -219,17 +219,13 @@ class TChargeBTreeIndex : public ICharge { TResult DoReverse(TCells key1, TCells key2, TRowId endRowId, TRowId beginRowId, const TKeyCellDefaults &keyDefaults, ui64 itemsLimit, ui64 bytesLimit) const noexcept override { endRowId++; // current interface accepts inclusive row1 bound - Y_ABORT_UNLESS(beginRowId < endRowId); - bool ready = true, overshot = true; - bool hasValidRowsRange = Groups || IncludeHistory; // false value means that beginRowId, endRowId are invalid and shouldn't be used - ui64 chargeGroupsItemsLimit = itemsLimit; // pessimistic items limit for groups - TRowId endBytesLimitRowId = Max(); // last unloaded probably needed row - + bool ready = true, overshot = true, hasValidRowsRange = Groups || IncludeHistory; + const TRowId sliceBeginRowId = beginRowId, sliceEndRowId = endRowId; const auto& meta = Part->IndexPages.BTreeGroups[0]; + Y_ABORT_UNLESS(beginRowId < endRowId); Y_ABORT_UNLESS(endRowId <= meta.RowCount); - const TRowId sliceBeginRowId = beginRowId; if (Y_UNLIKELY(key1 && key2 && Compare(key2, key1, keyDefaults) > 0)) { key2 = key1; // will not go further than key1 hasValidRowsRange = false; @@ -239,54 +235,37 @@ class TChargeBTreeIndex : public ICharge { TVector level, nextLevel(::Reserve(3)); TPageId key1PageId = key1 ? meta.PageId : Max(); TPageId key2PageId = key2 ? meta.PageId : Max(); - ui64 prevKey1Items = 0, key1Items = 0; + TChildState lastChild = BuildRootChildState(meta); const auto iterateLevel = [&](const auto& tryHandleChild) { // tryHandleChild may update them, copy for simplicity - // always load endRowId - 1 regardless of keys - const TRowId levelBeginRowId = Min(beginRowId, endRowId - 1), levelEndRowId = endRowId; - const TChild *levelLastChild = nullptr, *levelPrevLastChild = nullptr; + const TRowId levelBeginRowId = beginRowId, levelEndRowId = endRowId; for (const auto &node : level) { if (node.EndRowId <= levelBeginRowId || node.BeginRowId >= levelEndRowId) { continue; } - - TRecIdx from = 0, to = node.GetChildrenCount(); + TRecIdx from = 0, to = node.GetKeysCount(); if (node.BeginRowId < levelBeginRowId) { from = node.Seek(levelBeginRowId); } - if (node.EndRowId > levelEndRowId) { - to = node.Seek(levelEndRowId - 1) + 1; + if (node.EndRowId >= levelEndRowId) { + to = node.Seek(levelEndRowId - 1); + if (lastChild.PageId != Max()) { // still valid and should be updated + auto& child = node.GetChild(to); + auto prevChild = to ? node.GetChildRef(to - 1) : nullptr; + lastChild = BuildChildState(node, child, prevChild); + } } - for (TRecIdx posExt = to; posExt > from; posExt--) { + + for (TRecIdx posExt = to + 1; posExt > from; posExt--) { auto& child = node.GetChild(posExt - 1); auto prevChild = posExt - 1 ? node.GetChildRef(posExt - 2) : nullptr; auto childState = BuildChildState(node, child, prevChild); - if (itemsLimit || bytesLimit) { - if (!levelLastChild) { - // do not apply limits on the last child because endRowId/key1 position is uncertain - levelLastChild = &child; - } else { - if (!levelPrevLastChild) { - levelPrevLastChild = &child; - } - if (itemsLimit) { - ui64 items = levelPrevLastChild->GetNonErasedRowCount() - child.GetNonErasedRowCount(); - if (LimitExceeded(items, itemsLimit)) { - overshot = false; - return; - } - } - if (bytesLimit) { - // ui64 bytes = levelPrevLastChild->DataSize - child.DataSize; - // if (LimitExceeded(bytes, bytesLimit)) { - // beginRowId = Max(beginRowId, childEndRowId); - // overshot = false; - // return; - // } - } - } + if (LimitExceeded(childState.Items, lastChild.PrevItems, itemsLimit) || LimitExceeded(childState.Bytes, lastChild.PrevBytes, bytesLimit)) { + beginRowId = Max(beginRowId, childState.EndRowId); + overshot = false; + return; } ready &= tryHandleChild(childState); } @@ -294,18 +273,10 @@ class TChargeBTreeIndex : public ICharge { }; const auto skipUnloadedRows = [&](const TChildState& child) { + if (child.PageId == lastChild.PageId) { + lastChild.PageId = Max(); // mark last child unloaded + } if (child.PageId == key1PageId) { - if (hasValidRowsRange && chargeGroupsItemsLimit) { - ui64 unloadedItems = key1Items - prevKey1Items; - if (unloadedItems < chargeGroupsItemsLimit) { - chargeGroupsItemsLimit -= unloadedItems; - } else { - hasValidRowsRange = false; - } - } - if (hasValidRowsRange && bytesLimit) { - endBytesLimitRowId = Min(endRowId, child.EndRowId); - } endRowId = Min(endRowId, child.BeginRowId); } if (child.PageId == key2PageId) { @@ -314,18 +285,13 @@ class TChargeBTreeIndex : public ICharge { }; const auto tryHandleNode = [&](const TChildState& child) -> bool { - if (child.PageId == key1PageId || child.PageId == key2PageId) { + if (child.PageId == lastChild.PageId || child.PageId == key1PageId || child.PageId == key2PageId) { if (TryLoadNode(child, nextLevel)) { const auto& node = nextLevel.back(); if (child.PageId == key1PageId) { TRecIdx pos = node.SeekReverse(ESeek::Lower, key1, Scheme.Groups[0].ColsKeyIdx, &keyDefaults); auto& key1Child = node.GetChild(pos); key1PageId = key1Child.PageId; - key1Items = key1Child.GetNonErasedRowCount(); - if (pos) { - auto& prevKey1Child = node.GetChild(pos - 1); - prevKey1Items = prevKey1Child.GetNonErasedRowCount(); - } endRowId = Min(endRowId, key1Child.RowCount); // move endRowId - 1 to the last key <= key1 } if (child.PageId == key2PageId) { @@ -334,8 +300,9 @@ class TChargeBTreeIndex : public ICharge { if (pos) { auto& prevKey2Child = node.GetChild(pos - 1); beginRowId = Max(beginRowId, prevKey2Child.RowCount - 1); // move beginRowId to the last key < key2 - if (prevKey2Child.RowCount >= endRowId) { + if (prevKey2Child.RowCount >= sliceEndRowId) { hasValidRowsRange = false; // key2 is after current slice + beginRowId = Min(beginRowId, sliceEndRowId - 1); // always load endRowId - 1 regardless of keys } } } @@ -382,7 +349,7 @@ class TChargeBTreeIndex : public ICharge { } }; - for (ui32 height = 0; height < meta.LevelCount && ready; height++) { + for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { ready &= tryHandleNode(BuildRootChildState(meta)); } else { @@ -390,11 +357,10 @@ class TChargeBTreeIndex : public ICharge { } level.swap(nextLevel); nextLevel.clear(); - } - - if (!ready) { // some index pages are missing, do not continue - ready &= DoGroupsAndHistoryReverse(hasValidRowsRange, beginRowId, endRowId, endBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds - return {ready, false}; + if (lastChild.PageId == Max()) { // last child is unloaded, consider all last's child rows are needed for next levels + lastChild.PrevItems = lastChild.Items; + lastChild.PrevBytes = lastChild.Bytes; + } } // flat index doesn't treat key placement within data page, so let's do the same @@ -407,7 +373,7 @@ class TChargeBTreeIndex : public ICharge { iterateLevel(tryHandleDataPage); } - ready &= DoGroupsAndHistoryReverse(hasValidRowsRange, beginRowId, endRowId, endBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds + ready &= DoGroupsAndHistoryReverse(hasValidRowsRange, beginRowId, endRowId, lastChild, itemsLimit, bytesLimit); // precharge groups using the latest row bounds return {ready, overshot}; } @@ -424,6 +390,7 @@ class TChargeBTreeIndex : public ICharge { } if (itemsLimit) { + // TODO: items limit should be applied on items not rows, but it requires iteration via first and last data pages TRowId limitFromRowId = firstChild.PageId == Max() ? firstChild.BeginRowId : beginRowId; if (endRowId - limitFromRowId - 1 > itemsLimit) { endRowId = limitFromRowId + itemsLimit + 1; @@ -444,25 +411,33 @@ class TChargeBTreeIndex : public ICharge { return ready; } - bool DoGroupsAndHistoryReverse(bool hasValidRowsRange, TRowId beginRowId, TRowId endRowId, TRowId endBytesLimitRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept { + bool DoGroupsAndHistoryReverse(bool hasValidRowsRange, TRowId beginRowId, TRowId endRowId, const TChildState& lastChild, ui64 itemsLimit, ui64 bytesLimit) const noexcept { bool ready = true; - if (hasValidRowsRange && beginRowId < endRowId) { - if (itemsLimit && endRowId - beginRowId - 1 >= itemsLimit) { - beginRowId = endRowId - itemsLimit - 1; + if (!hasValidRowsRange) { + return ready; + } + if (beginRowId >= endRowId) { + return ready; + } + + if (itemsLimit) { + // TODO: items limit should be applied on items not rows, but it requires iteration via first and last data pages + TRowId limitToRowId = lastChild.PageId == Max() ? lastChild.EndRowId : endRowId; + if (limitToRowId - beginRowId - 1 >= itemsLimit) { + beginRowId = limitToRowId - itemsLimit - 1; } - if (endBytesLimitRowId == Max()) { - endBytesLimitRowId = endRowId; + if (beginRowId >= endRowId) { + return ready; + } + } - - if (IncludeHistory) { - ready &= DoHistory(beginRowId, endRowId); - } - } // otherwise endBytesLimitRowId is specified, so we do not know where to start charging history + if (IncludeHistory && (!bytesLimit || lastChild.PageId != Max())) { + ready &= DoHistory(beginRowId, endRowId); + } - for (auto groupIndex : Groups) { - ready &= DoGroupReverse(TGroupId(groupIndex), beginRowId, endRowId, endBytesLimitRowId, bytesLimit); - } + for (auto groupIndex : Groups) { + ready &= DoGroupReverse(TGroupId(groupIndex), beginRowId, endRowId, lastChild.EndRowId, bytesLimit); } return ready; @@ -474,7 +449,7 @@ class TChargeBTreeIndex : public ICharge { const auto& meta = groupId.IsHistoric() ? Part->IndexPages.BTreeHistoric[groupId.Index] : Part->IndexPages.BTreeGroups[groupId.Index]; TVector level, nextLevel(::Reserve(3)); - ui64 firstChildPrevBytes = bytesLimit ? GetPrevDataSize(meta, firstChildBeginRowId) : 0; + ui64 firstChildPrevBytes = bytesLimit ? GetPrevBytes(meta, firstChildBeginRowId) : 0; const auto iterateLevel = [&](const auto& tryHandleChild) { for (const auto &node : level) { @@ -485,14 +460,13 @@ class TChargeBTreeIndex : public ICharge { if (node.EndRowId > endRowId) { to = node.Seek(endRowId - 1) + 1; } + for (TRecIdx pos : xrange(from, to)) { auto& child = node.GetShortChild(pos); auto prevChild = pos ? node.GetShortChildRef(pos - 1) : nullptr; auto childState = BuildChildState(node, child, prevChild); - if (bytesLimit) { - if (LimitExceeded(firstChildPrevBytes, childState.PrevBytes, bytesLimit)) { - return; - } + if (LimitExceeded(firstChildPrevBytes, childState.PrevBytes, bytesLimit)) { + return; } ready &= tryHandleChild(childState); } @@ -526,33 +500,29 @@ class TChargeBTreeIndex : public ICharge { return ready; } - bool DoGroupReverse(TGroupId groupId, TRowId beginRowId, TRowId endRowId, TRowId endBytesLimitRowId, ui64 bytesLimit) const noexcept { + bool DoGroupReverse(TGroupId groupId, TRowId beginRowId, TRowId endRowId, TRowId lastChildEndRowId, ui64 bytesLimit) const noexcept { bool ready = true; - const auto& meta = groupId.IsHistoric() ? Part->IndexPages.BTreeHistoric[groupId.Index] : Part->IndexPages.BTreeGroups[groupId.Index]; // level's nodes is in reverse order TVector level, nextLevel(::Reserve(3)); - ui64 endBytesLimitDataSize = bytesLimit ? GetDataSize(meta, endBytesLimitRowId - 1) : 0; + ui64 lastChildBytes = bytesLimit ? GetBytes(meta, lastChildEndRowId - 1) : 0; const auto iterateLevel = [&](const auto& tryHandleChild) { for (const auto &node : level) { - TRecIdx from = 0, to = node.GetChildrenCount(); + TRecIdx from = 0, to = node.GetKeysCount(); if (node.BeginRowId < beginRowId) { from = node.Seek(beginRowId); } if (node.EndRowId > endRowId) { - to = node.Seek(endRowId - 1) + 1; + to = node.Seek(endRowId - 1); } - for (TRecIdx posExt = to; posExt > from; posExt--) { + for (TRecIdx posExt = to + 1; posExt > from; posExt--) { auto& child = node.GetShortChild(posExt - 1); auto prevChild = posExt - 1 ? node.GetShortChildRef(posExt - 2) : nullptr; auto childState = BuildChildState(node, child, prevChild); - if (bytesLimit) { - Y_UNUSED(endBytesLimitDataSize); - // if (endBytesLimitDataSize > child->DataSize && LimitExceeded(endBytesLimitDataSize - child->DataSize, bytesLimit)) { - // return; - // } + if (LimitExceeded(childState.Bytes, lastChildBytes, bytesLimit)) { + return; } ready &= tryHandleChild(childState); } @@ -610,11 +580,9 @@ class TChargeBTreeIndex : public ICharge { }; TCells key2{ key2Cells, 3 }; - // Directly use the history group scheme + // Directly use the history group scheme and key defaults with correct sort order const auto& scheme = Part->Scheme->HistoryGroup; Y_DEBUG_ABORT_UNLESS(scheme.ColsKeyIdx.size() == 3); - - // Directly use the history key defaults with correct sort order const TKeyCellDefaults* keyDefaults = Part->Scheme->HistoryKeys.Get(); const TGroupId groupId(0, true); @@ -736,7 +704,7 @@ class TChargeBTreeIndex : public ICharge { } private: - ui64 GetPrevDataSize(const TBtreeIndexMeta& meta, TRowId rowId) const { + ui64 GetPrevBytes(const TBtreeIndexMeta& meta, TRowId rowId) const { TPageId pageId = meta.PageId; ui64 result = 0; @@ -756,7 +724,7 @@ class TChargeBTreeIndex : public ICharge { return result; } - ui64 GetDataSize(TBtreeIndexMeta meta, TRowId rowId) const { + ui64 GetBytes(TBtreeIndexMeta meta, TRowId rowId) const { TPageId pageId = meta.PageId; ui64 result = meta.DataSize; diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp index f7418f0acbc0..96144a01e26a 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp @@ -425,7 +425,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { } void DoChargeRowId(ICharge& charge, TTouchEnv& env, const TRowId row1, const TRowId row2, ui64 itemsLimit, ui64 bytesLimit, - bool reverse, const TKeyCellDefaults &keyDefaults, const TString& message, ui32 failsAllowed = 10) { + bool reverse, const TKeyCellDefaults &keyDefaults, const TString& message, ui32 failsAllowed = 15) { while (true) { bool ready = reverse ? charge.DoReverse(row2, row1, keyDefaults, itemsLimit, bytesLimit) From 8cefe192d4a2052be676b1efa2f4ee58bbb6b656 Mon Sep 17 00:00:00 2001 From: kungasc Date: Thu, 8 Feb 2024 13:06:40 +0000 Subject: [PATCH 8/9] fix check --- ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp index 96144a01e26a..4bb71c2a9383 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp @@ -566,7 +566,12 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { } if (!groupId.IsMain() && !loaded.contains(pageId)) { // only check that we loaded consecutive pages - break; + if (params.StickSomePages) { + // extra pages may appear after the bytes limit is applied on main pages + continue; + } else { + break; + } } expected.insert(pageId); if (size > bytesLimit) { From 8d51432dd01811e4954ad01523691a21db02faeb Mon Sep 17 00:00:00 2001 From: kungasc Date: Thu, 8 Feb 2024 13:57:50 +0000 Subject: [PATCH 9/9] remove unused --- ydb/core/tablet_flat/flat_part_charge_btree_index.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index 55db013d04bb..4207f85f69fb 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -112,7 +112,6 @@ class TChargeBTreeIndex : public ICharge { auto childState = BuildChildState(node, child, prevChild); if (LimitExceeded(firstChild.Items, childState.PrevItems, itemsLimit) || LimitExceeded(firstChild.Bytes, childState.PrevBytes, bytesLimit)) { endRowId = Min(endRowId, childState.BeginRowId); - overshot = false; return; } ready &= tryHandleChild(childState); @@ -264,7 +263,6 @@ class TChargeBTreeIndex : public ICharge { auto childState = BuildChildState(node, child, prevChild); if (LimitExceeded(childState.Items, lastChild.PrevItems, itemsLimit) || LimitExceeded(childState.Bytes, lastChild.PrevBytes, bytesLimit)) { beginRowId = Max(beginRowId, childState.EndRowId); - overshot = false; return; } ready &= tryHandleChild(childState); @@ -799,10 +797,6 @@ class TChargeBTreeIndex : public ICharge { prevChild ? prevChild->DataSize : parent.PrevBytes, child.DataSize); } - bool LimitExceeded(ui64 value, ui64 limit) const noexcept { - return limit && value > limit; - } - bool LimitExceeded(ui64 prev, ui64 current, ui64 limit) const noexcept { return limit && current > prev && current - prev > limit; }