From 591c2d22dcd2c2dac64b04ad35944baf5dcf6be5 Mon Sep 17 00:00:00 2001 From: kungasc Date: Tue, 6 Feb 2024 16:19:02 +0000 Subject: [PATCH 1/3] do not stop groups charge if isn't ready --- .../flat_part_charge_btree_index.h | 34 +++++++--------- .../ut/ut_btree_index_iter_charge.cpp | 39 +++++++++++++------ 2 files changed, 43 insertions(+), 30 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index ee091ff8e095..e992e96de05c 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -494,18 +494,23 @@ class TChargeBTreeIndex : public ICharge { const auto& meta = groupId.IsHistoric() ? Part->IndexPages.BTreeHistoric[groupId.Index] : Part->IndexPages.BTreeGroups[groupId.Index]; TVector level, nextLevel(::Reserve(3)); - ui64 prevBeginDataSize = 0; - ui64 prevBeginBytesLimitDataSize = bytesLimit ? GetPrevDataSize(meta, beginBytesLimitRowId) : 0; + + ui64 prevBeginBytesLimitDataSize = 0; + if (bytesLimit) { + prevBeginBytesLimitDataSize = GetPrevDataSize(meta, beginBytesLimitRowId); + if (beginBytesLimitRowId < beginRowId) { + ui64 prevBeginDataSize = GetPrevDataSize(meta, beginRowId); + if (LimitExceeded(prevBeginDataSize - prevBeginBytesLimitDataSize, bytesLimit)) { + return true; + } + } + } const auto iterateLevel = [&](const auto& tryHandleChild) { - ui64 prevChildDataSize = prevBeginDataSize; for (const auto &node : level) { TRecIdx from = 0, to = node.GetChildrenCount(); if (node.BeginRowId < beginRowId) { from = node.Seek(beginRowId); - if (from) { - prevChildDataSize = prevBeginDataSize = node.GetShortChild(from - 1).DataSize; - } } if (node.EndRowId > endRowId) { to = node.Seek(endRowId - 1) + 1; @@ -515,13 +520,12 @@ class TChargeBTreeIndex : public ICharge { auto prevChild = pos ? node.GetShortChildRef(pos - 1) : nullptr; TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; TRowId childEndRowId = child->RowCount; + ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); if (bytesLimit) { - if (prevChildDataSize > prevBeginBytesLimitDataSize && LimitExceeded(prevChildDataSize - prevBeginBytesLimitDataSize, bytesLimit)) { + if (child->DataSize > prevBeginBytesLimitDataSize && LimitExceeded(child->DataSize - prevBeginBytesLimitDataSize, bytesLimit)) { return; } } - ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); - prevChildDataSize = child->DataSize; } } }; @@ -534,7 +538,7 @@ class TChargeBTreeIndex : public ICharge { return HasDataPage(child.PageId, groupId); }; - for (ui32 height = 0; height < meta.LevelCount && ready; height++) { + for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); } else { @@ -544,10 +548,6 @@ class TChargeBTreeIndex : public ICharge { nextLevel.clear(); } - if (!ready) { // some index pages are missing, do not continue - return ready; - } - if (meta.LevelCount == 0) { ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); } else { @@ -598,7 +598,7 @@ class TChargeBTreeIndex : public ICharge { return HasDataPage(child.PageId, groupId); }; - for (ui32 height = 0; height < meta.LevelCount && ready; height++) { + for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); } else { @@ -608,10 +608,6 @@ class TChargeBTreeIndex : public ICharge { nextLevel.clear(); } - if (!ready) { // some index pages are missing, do not continue - return ready; - } - if (meta.LevelCount == 0) { ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); } else { diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp index 68b2433f4682..b68a313da470 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp @@ -79,15 +79,16 @@ namespace { } } - struct TMakePartParams { + struct TTestParams { const ui32 Levels = Max(); const bool Groups = false; const bool History = false; const bool Slices = false; const ui32 Rows = 40; + const bool PrechargeSome = false; }; - TPartEggs MakePart(TMakePartParams params) { + TPartEggs MakePart(TTestParams params) { NPage::TConf conf; switch (params.Levels) { case 0: @@ -370,7 +371,7 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) { } } - void CheckPart(TMakePartParams params) { + void CheckPart(TTestParams params) { TPartEggs eggs = MakePart(params); const auto part = *eggs.Lone(); @@ -394,6 +395,19 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) { } Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { + void PrepareEnvs(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults &keyDefaults, TTouchEnv& bTreeEnv, TTouchEnv& flatEnv) { + if (params.PrechargeSome) { + TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true); + + for (int times = 0; times < 5; times++) { + bTree.ICharge::Do(10, 10, keyDefaults, 0, 0); + bTreeEnv.LoadTouched(); + } + + flatEnv.Loaded = bTreeEnv.Loaded; + } + } + void DoChargeRowId(ICharge& charge, TTouchEnv& env, const TRowId row1, const TRowId row2, ui64 itemsLimit, ui64 bytesLimit, bool reverse, const TKeyCellDefaults &keyDefaults, const TString& message, ui32 failsAllowed = 10) { while (true) { @@ -424,7 +438,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { Y_UNREACHABLE(); } - void CheckChargeRowId(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { + void CheckChargeRowId(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { for (bool reverse : {false, true}) { for (ui64 itemsLimit : TVector{0, 1, 2, 5, 13, 19, part.Stat.Rows - 2, part.Stat.Rows - 1}) { for (TRowId rowId1 : xrange(0, part.Stat.Rows - 1)) { @@ -432,6 +446,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { TTouchEnv bTreeEnv, flatEnv; TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true); TCharge flat(&flatEnv, part, tags, true); + PrepareEnvs(params, part, tags, *keyDefaults, bTreeEnv, flatEnv); TString message = TStringBuilder() << (reverse ? "ChargeRowIdReverse " : "ChargeRowId ") << rowId1 << " " << rowId2 << " items " << itemsLimit; DoChargeRowId(bTree, bTreeEnv, rowId1, rowId2, itemsLimit, 0, reverse, *keyDefaults, message); @@ -444,7 +459,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { } } - void CheckChargeKeys(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { + void CheckChargeKeys(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { for (bool reverse : {false, true}) { for (ui64 itemsLimit : TVector{0, 1, 2, 5, 13, 19, part.Stat.Rows - 2, part.Stat.Rows - 1}) { for (ui32 firstCellKey1 : xrange(0, part.Stat.Rows / 7 + 1)) { @@ -457,6 +472,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { TTouchEnv bTreeEnv, flatEnv; TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true); TCharge flat(&flatEnv, part, tags, true); + PrepareEnvs(params, part, tags, *keyDefaults, bTreeEnv, flatEnv); TStringBuilder message = TStringBuilder() << (reverse ? "ChargeKeysReverse " : "ChargeKeys ") << "("; for (auto c : key1) { @@ -487,7 +503,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { } } - void CheckChargeBytesLimit(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { + void CheckChargeBytesLimit(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults) { for (bool reverse : {false, true}) { for (ui64 bytesLimit : xrange(1, part.Stat.Bytes + 100, part.Stat.Bytes / 100)) { for (ui32 firstCellKey1 : xrange(0, part.Stat.Rows / 7 + 1)) { @@ -497,6 +513,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { TTouchEnv limitedEnv, unlimitedEnv; TChargeBTreeIndex limitedCharge(&limitedEnv, part, tags, true); TChargeBTreeIndex unlimitedCharge(&unlimitedEnv, part, tags, true); + PrepareEnvs(params, part, tags, *keyDefaults, limitedEnv, unlimitedEnv); TStringBuilder message = TStringBuilder() << (reverse ? "ChargeBytesLimitReverse " : "ChargeBytesLimit ") << "("; for (auto c : key1) { @@ -551,7 +568,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { } } - void CheckPart(TMakePartParams params) { + void CheckPart(TTestParams params) { TPartEggs eggs = MakePart(params); const auto part = *eggs.Lone(); @@ -560,9 +577,9 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { tags.push_back(c.Tag); } - CheckChargeRowId(part, tags, eggs.Scheme->Keys.Get()); - CheckChargeKeys(part, tags, eggs.Scheme->Keys.Get()); - CheckChargeBytesLimit(part, tags, eggs.Scheme->Keys.Get()); + CheckChargeRowId(params, part, tags, eggs.Scheme->Keys.Get()); + CheckChargeKeys(params, part, tags, eggs.Scheme->Keys.Get()); + CheckChargeBytesLimit(params, part, tags, eggs.Scheme->Keys.Get()); } Y_UNIT_TEST(NoNodes) { @@ -756,7 +773,7 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) { } } - void CheckPart(TMakePartParams params) { + void CheckPart(TTestParams params) { TPartEggs eggs = MakePart(params); const auto part = *eggs.Lone(); From cebb3ad5518916402929a53b81d0a5dfd79bcc4c Mon Sep 17 00:00:00 2001 From: kungasc Date: Wed, 7 Feb 2024 12:34:50 +0000 Subject: [PATCH 2/3] stick pages --- .../ut/ut_btree_index_iter_charge.cpp | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp index b68a313da470..0a6af362f240 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp @@ -19,7 +19,7 @@ namespace { const TSharedData* TryGetPage(const TPart *part, TPageId pageId, TGroupId groupId) override { Touched[groupId].insert(pageId); - if (Loaded[groupId].contains(pageId)) { + if (Loaded[groupId].contains(pageId) || Sticky[groupId].contains(pageId)) { return NTest::TTestEnv::TryGetPage(part, pageId, groupId); } return nullptr; @@ -32,8 +32,17 @@ namespace { Touched.clear(); } + void StickLoaded() { + for (const auto &g : Loaded) { + Sticky[g.first].insert(g.second.begin(), g.second.end()); + } + Touched.clear(); + Loaded.clear(); + } + TMap> Loaded; TMap> Touched; + TMap> Sticky; }; void AssertLoadedTheSame(const TPartStore& part, const TTouchEnv& bTree, const TTouchEnv& flat, const TString& message, @@ -85,7 +94,7 @@ namespace { const bool History = false; const bool Slices = false; const ui32 Rows = 40; - const bool PrechargeSome = false; + const bool StickSomePages = true; }; TPartEggs MakePart(TTestParams params) { @@ -395,8 +404,8 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIt) { } Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { - void PrepareEnvs(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults &keyDefaults, TTouchEnv& bTreeEnv, TTouchEnv& flatEnv) { - if (params.PrechargeSome) { + void StickSomePages(TTestParams params, const TPartStore& part, TTagsRef tags, const TKeyCellDefaults &keyDefaults, TTouchEnv& bTreeEnv, TTouchEnv& flatEnv) { + if (params.StickSomePages) { TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true); for (int times = 0; times < 5; times++) { @@ -405,6 +414,8 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { } flatEnv.Loaded = bTreeEnv.Loaded; + flatEnv.StickLoaded(); + bTreeEnv.StickLoaded(); } } @@ -446,7 +457,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { TTouchEnv bTreeEnv, flatEnv; TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true); TCharge flat(&flatEnv, part, tags, true); - PrepareEnvs(params, part, tags, *keyDefaults, bTreeEnv, flatEnv); + StickSomePages(params, part, tags, *keyDefaults, bTreeEnv, flatEnv); TString message = TStringBuilder() << (reverse ? "ChargeRowIdReverse " : "ChargeRowId ") << rowId1 << " " << rowId2 << " items " << itemsLimit; DoChargeRowId(bTree, bTreeEnv, rowId1, rowId2, itemsLimit, 0, reverse, *keyDefaults, message); @@ -472,7 +483,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { TTouchEnv bTreeEnv, flatEnv; TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true); TCharge flat(&flatEnv, part, tags, true); - PrepareEnvs(params, part, tags, *keyDefaults, bTreeEnv, flatEnv); + StickSomePages(params, part, tags, *keyDefaults, bTreeEnv, flatEnv); TStringBuilder message = TStringBuilder() << (reverse ? "ChargeKeysReverse " : "ChargeKeys ") << "("; for (auto c : key1) { @@ -513,7 +524,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) { TTouchEnv limitedEnv, unlimitedEnv; TChargeBTreeIndex limitedCharge(&limitedEnv, part, tags, true); TChargeBTreeIndex unlimitedCharge(&unlimitedEnv, part, tags, true); - PrepareEnvs(params, part, tags, *keyDefaults, limitedEnv, unlimitedEnv); + StickSomePages(params, part, tags, *keyDefaults, limitedEnv, unlimitedEnv); TStringBuilder message = TStringBuilder() << (reverse ? "ChargeBytesLimitReverse " : "ChargeBytesLimit ") << "("; for (auto c : key1) { From c3b175b5b2fc4f35372c6a164e2ed218ed548782 Mon Sep 17 00:00:00 2001 From: kungasc Date: Wed, 7 Feb 2024 15:51:36 +0000 Subject: [PATCH 3/3] child state --- .../flat_part_charge_btree_index.h | 247 ++++++++++-------- .../ut/ut_btree_index_iter_charge.cpp | 7 +- 2 files changed, 142 insertions(+), 112 deletions(-) diff --git a/ydb/core/tablet_flat/flat_part_charge_btree_index.h b/ydb/core/tablet_flat/flat_part_charge_btree_index.h index e992e96de05c..0bfb7bfdf4b0 100644 --- a/ydb/core/tablet_flat/flat_part_charge_btree_index.h +++ b/ydb/core/tablet_flat/flat_part_charge_btree_index.h @@ -16,20 +16,25 @@ class TChargeBTreeIndex : public ICharge { struct TChildState { TPageId PageId; - TRowId BeginRowId; - TRowId EndRowId; + TRowId BeginRowId, EndRowId; + TRowId PrevItems, Items; + ui64 PrevDataSize, DataSize; - TChildState(TPageId pageId, TRowId beginRowId, TRowId endRowId) + TChildState(TPageId pageId, TRowId beginRowId, TRowId endRowId, TRowId prevItems, TRowId items, ui64 prevDataSize, ui64 dataSize) : PageId(pageId) , BeginRowId(beginRowId) , EndRowId(endRowId) + , PrevItems(prevItems) + , Items(items) + , PrevDataSize(prevDataSize) + , DataSize(dataSize) { } }; struct TNodeState : TChildState, TBtreeIndexNode { - TNodeState(TSharedData data, TPageId pageId, TRowId beginRowId, TRowId endRowId) - : TChildState(pageId, beginRowId, endRowId) + TNodeState(TSharedData data, TPageId pageId, TRowId beginRowId, TRowId endRowId, TRowId prevItems, TRowId items, ui64 prevDataSize, ui64 dataSize) + : TChildState(pageId, beginRowId, endRowId, prevItems, items, prevDataSize, dataSize) , TBtreeIndexNode(data) { } @@ -62,13 +67,11 @@ class TChargeBTreeIndex : public ICharge { public: TResult Do(TCells key1, TCells key2, TRowId beginRowId, TRowId endRowId, const TKeyCellDefaults &keyDefaults, ui64 itemsLimit, ui64 bytesLimit) const noexcept override { + Cerr << "Do " << beginRowId << " " << endRowId << " " << itemsLimit << Endl; endRowId++; // current interface accepts inclusive row2 bound Y_ABORT_UNLESS(beginRowId < endRowId); - bool ready = true, overshot = true; - bool hasValidRowsRange = Groups || IncludeHistory; // false value means that beginRowId, endRowId are invalid and shouldn't be used - ui64 chargeGroupsItemsLimit = itemsLimit; // pessimistic items limit for groups - TRowId beginBytesLimitRowId = Max(); // first unloaded probably needed row + bool ready = true, overshot = true, hasValidRowsRange = true; const auto& meta = Part->IndexPages.BTreeGroups[0]; Y_ABORT_UNLESS(endRowId <= meta.RowCount); @@ -82,91 +85,79 @@ class TChargeBTreeIndex : public ICharge { TVector level, nextLevel(::Reserve(3)); TPageId key1PageId = key1 ? meta.PageId : Max(); TPageId key2PageId = key2 ? meta.PageId : Max(); - ui64 key1Items = 0, prevKey1Items = 0; + TChildState firstChild = BuildRootChildState(meta); const auto iterateLevel = [&](const auto& tryHandleChild) { // tryHandleChild may update them, copy for simplicity // always load beginRowId regardless of keys - const TRowId levelBeginRowId = beginRowId, levelEndRowId = Max(endRowId, beginRowId + 1); - const TChild* levelFirstChild = nullptr; - + const TRowId levelBeginRowId = beginRowId, levelEndRowId = endRowId; + for (const auto &node : level) { if (node.EndRowId <= levelBeginRowId || node.BeginRowId >= levelEndRowId) { continue; } TRecIdx from = 0, to = node.GetChildrenCount(); - if (node.BeginRowId < levelBeginRowId) { + if (node.BeginRowId <= levelBeginRowId) { from = node.Seek(levelBeginRowId); + if (firstChild.PageId != Max()) { // still valid and should be updated + auto& child = node.GetChild(from); + auto prevChild = from ? node.GetChildRef(from - 1) : nullptr; + firstChild = BuildChildState(node, child, prevChild); + Cerr << " first " << firstChild.PageId << " " << firstChild.BeginRowId << " " << firstChild.EndRowId << Endl; + } } if (node.EndRowId > levelEndRowId) { to = node.Seek(levelEndRowId - 1) + 1; } for (TRecIdx pos : xrange(from, to)) { - auto child = node.GetChildRef(pos); + auto child = node.GetChild(pos); auto prevChild = pos ? node.GetChildRef(pos - 1) : nullptr; - TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; - TRowId childEndRowId = child->RowCount; - ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); - if (itemsLimit || bytesLimit) { - if (!levelFirstChild) { - // do not apply limits on the first child because beginRowId/key1 position is uncertain - levelFirstChild = child; - } else { - if (itemsLimit) { - ui64 items = child->GetNonErasedRowCount() - levelFirstChild->GetNonErasedRowCount(); - if (LimitExceeded(items, itemsLimit)) { - overshot = false; - return; - } - } - if (bytesLimit) { - ui64 bytes = child->DataSize - levelFirstChild->DataSize; - if (LimitExceeded(bytes, bytesLimit)) { - endRowId = Min(endRowId, childEndRowId); - overshot = false; - return; - } - } + auto childState = BuildChildState(node, child, prevChild); + if (itemsLimit) { + if (LimitExceeded(firstChild.Items, childState.PrevItems, itemsLimit)) { + overshot = false; + return; } } + if (bytesLimit) { + // ui64 bytes = child->DataSize - firstChild.DataSize; + // if (LimitExceeded(bytes, bytesLimit)) { + // endRowId = Min(endRowId, childEndRowId); + // overshot = false; + // return; + // } + } + ready &= tryHandleChild(childState); } } }; const auto skipUnloadedRows = [&](const TChildState& child) { + if (child.PageId == firstChild.PageId) { + firstChild.PageId = Max(); // mark first child unloaded + } if (child.PageId == key1PageId) { - if (hasValidRowsRange && chargeGroupsItemsLimit) { - ui64 unloadedItems = key1Items - prevKey1Items; - if (unloadedItems < chargeGroupsItemsLimit) { - chargeGroupsItemsLimit -= unloadedItems; - } else { - hasValidRowsRange = false; - } - } - if (hasValidRowsRange && bytesLimit) { - beginBytesLimitRowId = Max(beginRowId, child.BeginRowId); - } beginRowId = Max(beginRowId, child.EndRowId); + Cerr << " beginRowId " << beginRowId << Endl; } if (child.PageId == key2PageId) { endRowId = Min(endRowId, child.BeginRowId); + Cerr << " endRowId " << endRowId << Endl; } }; - const auto tryHandleNode = [&](TChildState child) -> bool { - if (child.PageId == key1PageId || child.PageId == key2PageId) { + const auto tryHandleNode = [&](const TChildState& child) -> bool { + Cerr << " node " << child.PageId << " " << child.BeginRowId << " " << child.EndRowId << Endl; + if (child.PageId == firstChild.PageId || child.PageId == key1PageId || child.PageId == key2PageId) { if (TryLoadNode(child, nextLevel)) { const auto& node = nextLevel.back(); if (child.PageId == key1PageId) { TRecIdx pos = node.Seek(ESeek::Lower, key1, Scheme.Groups[0].ColsKeyIdx, &keyDefaults); auto& key1Child = node.GetChild(pos); key1PageId = key1Child.PageId; - key1Items = key1Child.GetNonErasedRowCount(); if (pos) { - auto& prevKey1Child = node.GetChild(pos - 1); - prevKey1Items = prevKey1Child.GetNonErasedRowCount(); - beginRowId = Max(beginRowId, prevKey1Child.RowCount); // move beginRowId to the first key >= key1 + beginRowId = Max(beginRowId, node.GetChild(pos - 1).RowCount); // move beginRowId to the first key >= key1 } } if (child.PageId == key2PageId) { @@ -188,7 +179,8 @@ class TChargeBTreeIndex : public ICharge { } }; - const auto tryHandleDataPage = [&](TChildState child) -> bool { + const auto tryHandleDataPage = [&](const TChildState& child) -> bool { + Cerr << " data " << child.PageId << " " << child.BeginRowId << " " << child.EndRowId << Endl; if (hasValidRowsRange && (child.PageId == key1PageId || child.PageId == key2PageId)) { const auto page = TryGetDataPage(child.PageId, { }); if (page) { @@ -211,19 +203,19 @@ class TChargeBTreeIndex : public ICharge { } }; - for (ui32 height = 0; height < meta.LevelCount && ready; height++) { + for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { - ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleNode(root); } else { iterateLevel(tryHandleNode); } level.swap(nextLevel); nextLevel.clear(); - } - - if (!ready) { // some index pages are missing, do not continue - ready &= DoGroupsAndHistory(hasValidRowsRange, beginRowId, endRowId, beginBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds - return {ready, false}; + if (firstChild.PageId == Max()) { // first child is unloaded, consider all first's child rows are needed for next levels + Cerr << " move " << firstChild.Items << " -> " << firstChild.PrevItems << Endl; + firstChild.Items = firstChild.PrevItems; + } } // flat index doesn't treat key placement within data page, so let's do the same @@ -231,12 +223,13 @@ class TChargeBTreeIndex : public ICharge { overshot &= endRowId == sliceEndRowId; if (meta.LevelCount == 0) { - ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleDataPage(root); } else { iterateLevel(tryHandleDataPage); } - ready &= DoGroupsAndHistory(hasValidRowsRange, beginRowId, endRowId, beginBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds + // ready &= DoGroupsAndHistory(hasValidRowsRange, beginRowId, endRowId, beginLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds return {ready, overshot}; } @@ -285,36 +278,35 @@ class TChargeBTreeIndex : public ICharge { to = node.Seek(levelEndRowId - 1) + 1; } for (TRecIdx posExt = to; posExt > from; posExt--) { - auto child = node.GetChildRef(posExt - 1); + auto& child = node.GetChild(posExt - 1); auto prevChild = posExt - 1 ? node.GetChildRef(posExt - 2) : nullptr; - TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; - TRowId childEndRowId = child->RowCount; + auto childState = BuildChildState(node, child, prevChild); if (itemsLimit || bytesLimit) { if (!levelLastChild) { // do not apply limits on the last child because endRowId/key1 position is uncertain - levelLastChild = child; + levelLastChild = &child; } else { if (!levelPrevLastChild) { - levelPrevLastChild = child; + levelPrevLastChild = &child; } if (itemsLimit) { - ui64 items = levelPrevLastChild->GetNonErasedRowCount() - child->GetNonErasedRowCount(); + ui64 items = levelPrevLastChild->GetNonErasedRowCount() - child.GetNonErasedRowCount(); if (LimitExceeded(items, itemsLimit)) { overshot = false; return; } } if (bytesLimit) { - ui64 bytes = levelPrevLastChild->DataSize - child->DataSize; - if (LimitExceeded(bytes, bytesLimit)) { - beginRowId = Max(beginRowId, childEndRowId); - overshot = false; - return; - } + // ui64 bytes = levelPrevLastChild->DataSize - child.DataSize; + // if (LimitExceeded(bytes, bytesLimit)) { + // beginRowId = Max(beginRowId, childEndRowId); + // overshot = false; + // return; + // } } } } - ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); + ready &= tryHandleChild(childState); } } }; @@ -339,7 +331,7 @@ class TChargeBTreeIndex : public ICharge { } }; - const auto tryHandleNode = [&](TChildState child) -> bool { + const auto tryHandleNode = [&](const TChildState& child) -> bool { if (child.PageId == key1PageId || child.PageId == key2PageId) { if (TryLoadNode(child, nextLevel)) { const auto& node = nextLevel.back(); @@ -375,7 +367,7 @@ class TChargeBTreeIndex : public ICharge { } }; - const auto tryHandleDataPage = [&](TChildState child) -> bool { + const auto tryHandleDataPage = [&](const TChildState& child) -> bool { if (hasValidRowsRange && (child.PageId == key1PageId || child.PageId == key2PageId)) { const auto page = TryGetDataPage(child.PageId, { }); if (page) { @@ -410,7 +402,8 @@ class TChargeBTreeIndex : public ICharge { for (ui32 height = 0; height < meta.LevelCount && ready; height++) { if (height == 0) { - ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleNode(root); } else { iterateLevel(tryHandleNode); } @@ -428,7 +421,8 @@ class TChargeBTreeIndex : public ICharge { overshot &= beginRowId == sliceBeginRowId; if (meta.LevelCount == 0) { - ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleDataPage(root); } else { iterateLevel(tryHandleDataPage); } @@ -516,31 +510,31 @@ class TChargeBTreeIndex : public ICharge { to = node.Seek(endRowId - 1) + 1; } for (TRecIdx pos : xrange(from, to)) { - auto child = node.GetShortChildRef(pos); + auto& child = node.GetShortChild(pos); auto prevChild = pos ? node.GetShortChildRef(pos - 1) : nullptr; - TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; - TRowId childEndRowId = child->RowCount; - ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); + auto childState = BuildChildState(node, child, prevChild); + ready &= tryHandleChild(childState); if (bytesLimit) { - if (child->DataSize > prevBeginBytesLimitDataSize && LimitExceeded(child->DataSize - prevBeginBytesLimitDataSize, bytesLimit)) { - return; - } + // if (child->DataSize > prevBeginBytesLimitDataSize && LimitExceeded(child->DataSize - prevBeginBytesLimitDataSize, bytesLimit)) { + // return; + // } } } } }; - const auto tryHandleNode = [&](TChildState child) -> bool { + const auto tryHandleNode = [&](const TChildState& child) -> bool { return TryLoadNode(child, nextLevel); }; - const auto tryHandleDataPage = [&](TChildState child) -> bool { + const auto tryHandleDataPage = [&](const TChildState& child) -> bool { return HasDataPage(child.PageId, groupId); }; for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { - ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleNode(root); } else { iterateLevel(tryHandleNode); } @@ -549,7 +543,8 @@ class TChargeBTreeIndex : public ICharge { } if (meta.LevelCount == 0) { - ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleDataPage(root); } else { iterateLevel(tryHandleDataPage); } @@ -576,31 +571,32 @@ class TChargeBTreeIndex : public ICharge { to = node.Seek(endRowId - 1) + 1; } for (TRecIdx posExt = to; posExt > from; posExt--) { - auto child = node.GetShortChildRef(posExt - 1); + auto& child = node.GetShortChild(posExt - 1); auto prevChild = posExt - 1 ? node.GetShortChildRef(posExt - 2) : nullptr; - TRowId childBeginRowId = prevChild ? prevChild->RowCount : node.BeginRowId; - TRowId childEndRowId = child->RowCount; + auto childState = BuildChildState(node, child, prevChild); if (bytesLimit) { - if (endBytesLimitDataSize > child->DataSize && LimitExceeded(endBytesLimitDataSize - child->DataSize, bytesLimit)) { - return; - } + Y_UNUSED(endBytesLimitDataSize); + // if (endBytesLimitDataSize > child->DataSize && LimitExceeded(endBytesLimitDataSize - child->DataSize, bytesLimit)) { + // return; + // } } - ready &= tryHandleChild(TChildState(child->PageId, childBeginRowId, childEndRowId)); + ready &= tryHandleChild(childState); } } }; - const auto tryHandleNode = [&](TChildState child) -> bool { + const auto tryHandleNode = [&](const TChildState& child) -> bool { return TryLoadNode(child, nextLevel); }; - const auto tryHandleDataPage = [&](TChildState child) -> bool { + const auto tryHandleDataPage = [&](const TChildState& child) -> bool { return HasDataPage(child.PageId, groupId); }; for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { - ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleNode(root); } else { iterateLevel(tryHandleNode); } @@ -609,7 +605,8 @@ class TChargeBTreeIndex : public ICharge { } if (meta.LevelCount == 0) { - ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleDataPage(root); } else { iterateLevel(tryHandleDataPage); } @@ -666,9 +663,9 @@ class TChargeBTreeIndex : public ICharge { } for (TRecIdx pos : xrange(from, to)) { auto& child = node.GetShortChild(pos); - TRowId childBeginRowId = pos ? node.GetShortChild(pos - 1).RowCount : node.BeginRowId; - TRowId childEndRowId = child.RowCount; - ready &= tryHandleChild(TChildState(child.PageId, childBeginRowId, childEndRowId)); + auto prevChild = pos ? node.GetShortChildRef(pos - 1) : nullptr; + auto childState = BuildChildState(node, child, prevChild); + ready &= tryHandleChild(childState); } } }; @@ -682,7 +679,7 @@ class TChargeBTreeIndex : public ICharge { } }; - const auto tryHandleNode = [&](TChildState child) -> bool { + const auto tryHandleNode = [&](const TChildState& child) -> bool { if (child.PageId == key1PageId || child.PageId == key2PageId) { if (TryLoadNode(child, nextLevel)) { const auto& node = nextLevel.back(); @@ -710,7 +707,7 @@ class TChargeBTreeIndex : public ICharge { } }; - const auto tryHandleDataPage = [&](TChildState child) -> bool { + const auto tryHandleDataPage = [&](const TChildState& child) -> bool { if (Groups && (child.PageId == key1PageId || child.PageId == key2PageId)) { const auto page = TryGetDataPage(child.PageId, groupId); if (page) { @@ -735,7 +732,8 @@ class TChargeBTreeIndex : public ICharge { for (ui32 height = 0; height < meta.LevelCount; height++) { if (height == 0) { - ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleNode(root); } else { iterateLevel(tryHandleNode); } @@ -744,7 +742,8 @@ class TChargeBTreeIndex : public ICharge { } if (meta.LevelCount == 0) { - ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount)); + auto root = BuildRootChildState(meta); + ready &= tryHandleDataPage(root); } else { iterateLevel(tryHandleDataPage); } @@ -841,10 +840,36 @@ class TChargeBTreeIndex : public ICharge { : (left.size() > right.size() ? -1 : 1); } + TChildState BuildRootChildState(const TBtreeIndexMeta& meta) const noexcept { + return TChildState(meta.PageId, + 0, meta.RowCount, + 0, meta.GetNonErasedRowCount(), + 0, meta.DataSize); + } + + TChildState BuildChildState(const TNodeState& parent, TChild child, const TChild* prevChild) const noexcept { + return TChildState(child.PageId, + prevChild ? prevChild->RowCount : parent.BeginRowId, child.RowCount, + prevChild ? prevChild->GetNonErasedRowCount() : parent.PrevItems, child.GetNonErasedRowCount(), + prevChild ? prevChild->DataSize : parent.PrevDataSize, child.DataSize); + } + + TChildState BuildChildState(const TNodeState& parent, TShortChild child, const TShortChild* prevChild) const noexcept { + return TChildState(child.PageId, + prevChild ? prevChild->RowCount : parent.BeginRowId, child.RowCount, + prevChild ? prevChild->RowCount : parent.BeginRowId, child.RowCount, + prevChild ? prevChild->DataSize : parent.PrevDataSize, child.DataSize); + } + bool LimitExceeded(ui64 value, ui64 limit) const noexcept { return limit && value > limit; } + bool LimitExceeded(ui64 prev, ui64 current, ui64 limit) const noexcept { + Cerr << " check limit " << prev << " " << current << " " << limit << Endl; + return limit && current > prev && current - prev > limit; + } + private: const TPart* const Part; const TPartScheme &Scheme; diff --git a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp index 0a6af362f240..142a478d9fbf 100644 --- a/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp +++ b/ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp @@ -18,8 +18,12 @@ namespace { struct TTouchEnv : public NTest::TTestEnv { const TSharedData* TryGetPage(const TPart *part, TPageId pageId, TGroupId groupId) override { + if (Sticky[groupId].contains(pageId)) { + Loaded[groupId].insert(pageId); + } + Touched[groupId].insert(pageId); - if (Loaded[groupId].contains(pageId) || Sticky[groupId].contains(pageId)) { + if (Loaded[groupId].contains(pageId)) { return NTest::TTestEnv::TryGetPage(part, pageId, groupId); } return nullptr; @@ -47,6 +51,7 @@ namespace { void AssertLoadedTheSame(const TPartStore& part, const TTouchEnv& bTree, const TTouchEnv& flat, const TString& message, bool allowAdditionalFirstLastPartPages = false, bool allowAdditionalFirstLoadedPage = false, bool allowLastLoadedPageDifference = false) { + TSet groupIds; for (const auto &c : {bTree.Loaded, flat.Loaded}) { for (const auto &g : c) {