From 1d3496d7c5cc1957b97539f11b499b355149fbb8 Mon Sep 17 00:00:00 2001 From: Alexandru Vasile <60601340+lexnv@users.noreply.github.com> Date: Tue, 23 Jul 2024 21:55:58 +0300 Subject: [PATCH] hotfix: blockchain/backend: Skip genesis leaf to unblock syncing (#5103) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR effectively skips over cases where the blockchain reports the genesis block as leaf. The issue manifests as the blockchain getting stuck and not importing blocks after a while. Although the root-cause of why the blockchain reports the genesis as leaf is not scoped, this hot-fix is unblocking the new release. While at it, added some extra debug logs to identify issues more easily in the future. ### Issue ``` 2024-07-22 10:06:08.708 DEBUG tokio-runtime-worker db::blockchain: Checking for displaced leaves after finalization. leaves=[0xd62aea69664b74c55b7e79ab5855b117d213156a5e9ab05ad0737772aaf42c14, 0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe] finalized_block_hash=0x8f8e…7f34 finalized_block_number=24148459 2024-07-22 10:06:08.708 DEBUG tokio-runtime-worker db::blockchain: Handle displaced leaf 0xd62aea69664b74c55b7e79ab5855b117d213156a5e9ab05ad0737772aaf42c14 (elapsed 25.74µs) leaf_number=24148577 2024-07-22 10:06:08.709 DEBUG tokio-runtime-worker db::blockchain: Leaf points to the finalized header 0xd62aea69664b74c55b7e79ab5855b117d213156a5e9ab05ad0737772aaf42c14, skipping for now (elapsed 70.72µs) // This is Kusama genesis 2024-07-22 10:06:08.709 DEBUG tokio-runtime-worker db::blockchain: Handle displaced leaf 0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe (elapsed 127.271µs) leaf_number=0 2024-07-22 10:06:08.709 DEBUG tokio-runtime-worker db::blockchain: Skip more blocks until we get all blocks on finalized chain until the height of the parent block current_hash=0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe current_num=0 finalized_num=24148458 ``` ### Before ``` 2024-07-20 00:45:00.234 INFO tokio-runtime-worker substrate: ⚙️ Preparing 0.0 bps, target=#24116589 (50 peers), best: #24116498 (0xb846…8720), finalized #24116493 (0x50b6…2445), ⬇ 2.3MiB/s ⬆ 2.6kiB/s ... 2024-07-20 14:05:18.572 INFO tokio-runtime-worker substrate: ⚙️ Syncing 0.0 bps, target=#24124495 (51 peers), best: #24119976 (0x6970…aeb3), finalized #24119808 (0xd900…abe4), ⬇ 2.2MiB/s ⬆ 3.1kiB/s 2024-07-20 14:05:23.573 INFO tokio-runtime-worker substrate: ⚙️ Syncing 0.0 bps, target=#24124495 (51 peers), best: #24119976 (0x6970…aeb3), finalized #24119808 (0xd900…abe4), ⬇ 2.2MiB/s ⬆ 5.8kiB/s ``` ### After ``` 2024-07-22 10:41:10.897 DEBUG tokio-runtime-worker db::blockchain: Handle displaced leaf 0x4e8cf3ff18e7d13ff7fec28f9fc8ce6eff5492ed8dc046e961b76dec5c0cfddf (elapsed 39.26µs) leaf_number=24150969 2024-07-22 10:41:10.897 DEBUG tokio-runtime-worker db::blockchain: Leaf points to the finalized header 0x4e8cf3ff18e7d13ff7fec28f9fc8ce6eff5492ed8dc046e961b76dec5c0cfddf, skipping for now (elapsed 49.69µs) 2024-07-22 10:41:10.897 DEBUG tokio-runtime-worker db::blockchain: Skip genesis block 0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe reporterd as leaf (elapsed 54.57µs) 2024-07-22 10:41:10.897 DEBUG tokio-runtime-worker db::blockchain: Finished with result DisplacedLeavesAfterFinalization { displaced_leaves: [], displaced_blocks: [] } (elapsed 58.78µs) finalized_block_hash=0x02b3…5338 finalized_block_number=24150967 2024-07-22 10:41:12.357 INFO tokio-runtime-worker substrate: 🏆 Imported #24150970 (0x4e8c…fddf → 0x3637…56bb) 2024-07-22 10:41:12.862 INFO tokio-runtime-worker substrate: 💤 Idle (50 peers), best: #24150970 (0x3637…56bb), finalized #24150967 (0x02b3…5338), ⬇ 2.0MiB/s ⬆ 804.7kiB/s 2024-07-22 10:41:14.772 DEBUG tokio-runtime-worker db::blockchain: Checking for displaced leaves after finalization. leaves=[0x363763b16c23fc20a84f38f67014fa7ae6ba9c708fc074890016699e5ca756bb, 0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe] finalized_block_hash=0xa1534a105b90e7036a18ac1c646cd2bd6c41c66cc055817f4f51209ab9070e5c finalized_block_number=24150968 2024-07-22 10:41:14.772 DEBUG tokio-runtime-worker db::blockchain: Handle displaced leaf 0x363763b16c23fc20a84f38f67014fa7ae6ba9c708fc074890016699e5ca756bb (elapsed 62.48µs) leaf_number=24150970 2024-07-22 10:41:14.772 DEBUG tokio-runtime-worker db::blockchain: Leaf points to the finalized header 0x363763b16c23fc20a84f38f67014fa7ae6ba9c708fc074890016699e5ca756bb, skipping for now (elapsed 71.76µs) 2024-07-22 10:41:14.772 DEBUG tokio-runtime-worker db::blockchain: Skip genesis block 0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe reporterd as leaf (elapsed 75.96µs) 2024-07-22 10:41:14.772 DEBUG tokio-runtime-worker db::blockchain: Finished with result DisplacedLeavesAfterFinalization { displaced_leaves: [], displaced_blocks: [] } (elapsed 80.27µs) finalized_block_hash=0xa153…0e5c finalized_block_number=24150968 2024-07-22 10:41:14.795 DEBUG tokio-runtime-worker db::blockchain: Checking for displaced leaves after finalization. leaves=[0x363763b16c23fc20a84f38f67014fa7ae6ba9c708fc074890016699e5ca756bb, 0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe] finalized_block_hash=0xa1534a105b90e7036a18ac1c646cd2bd6c41c66cc055817f4f51209ab9070e5c finalized_block_number=24150968 2024-07-22 10:41:14.795 DEBUG tokio-runtime-worker db::blockchain: Handle displaced leaf 0x363763b16c23fc20a84f38f67014fa7ae6ba9c708fc074890016699e5ca756bb (elapsed 39.67µs) leaf_number=24150970 2024-07-22 10:41:14.795 DEBUG tokio-runtime-worker db::blockchain: Leaf points to the finalized header 0x363763b16c23fc20a84f38f67014fa7ae6ba9c708fc074890016699e5ca756bb, skipping for now (elapsed 50.3µs) 2024-07-22 10:41:14.795 DEBUG tokio-runtime-worker db::blockchain: Skip genesis block 0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe reporterd as leaf (elapsed 54.52µs) 2024-07-22 10:41:14.795 DEBUG tokio-runtime-worker db::blockchain: Finished with result DisplacedLeavesAfterFinalization { displaced_leaves: [], displaced_blocks: [] } (elapsed 58.66µs) finalized_block_hash=0xa153…0e5c finalized_block_number=24150968 2024-07-22 10:41:17.863 INFO tokio-runtime-worker substrate: 💤 Idle (50 peers), best: #24150970 (0x3637…56bb), finalized #24150968 (0xa153…0e5c), ⬇ 1.2MiB/s ⬆ 815.0kiB/s 2024-07-22 10:41:18.399 INFO tokio-runtime-worker substrate: 🏆 Imported #24150971 (0x3637…56bb → 0x4ee3…5f7c) ``` Closes: https://github.com/paritytech/polkadot-sdk/issues/5088 --------- Signed-off-by: Alexandru Vasile Co-authored-by: Bastian Köcher --- prdoc/pr_5103.prdoc | 18 +++ substrate/client/db/src/lib.rs | 4 +- .../primitives/blockchain/src/backend.rs | 144 ++++++++++++++++-- .../0001-basic-warp-sync/test-warp-sync.toml | 2 +- .../0001-basic-warp-sync/test-warp-sync.zndsl | 1 + 5 files changed, 157 insertions(+), 12 deletions(-) create mode 100644 prdoc/pr_5103.prdoc diff --git a/prdoc/pr_5103.prdoc b/prdoc/pr_5103.prdoc new file mode 100644 index 000000000000..b0f72bf531f1 --- /dev/null +++ b/prdoc/pr_5103.prdoc @@ -0,0 +1,18 @@ +title: Skip genesis leaf to unblock syncing + +doc: + - audience: + - Node Operator + - Node Dev + description: | + This PR skips over the genesis block reported as leaf when calculating displaced branches. + In those cases, when the genesis block is reported as leaf, the node would compute the path + from the current finalized block to the genesis block. This operation is time consuming and + is enough to block syncing. In the current state, the genesis block is assumed to always be + part of the finalized chain. + +crates: +- name: sc-client-db + bump: none +- name: sp-blockchain + bump: patch diff --git a/substrate/client/db/src/lib.rs b/substrate/client/db/src/lib.rs index acd165d91613..ba0cbc09d53d 100644 --- a/substrate/client/db/src/lib.rs +++ b/substrate/client/db/src/lib.rs @@ -3169,7 +3169,7 @@ pub(crate) mod tests { let displaced = blockchain.displaced_leaves_after_finalizing(a3_hash, a3_number).unwrap(); assert_eq!(blockchain.leaves().unwrap(), vec![a4_hash, genesis_hash]); - assert_eq!(displaced.displaced_leaves, vec![]); + assert_eq!(displaced.displaced_leaves, vec![(genesis_number, genesis_hash)]); assert_eq!(displaced.displaced_blocks, vec![]); } @@ -3177,7 +3177,7 @@ pub(crate) mod tests { let displaced = blockchain.displaced_leaves_after_finalizing(a4_hash, a4_number).unwrap(); assert_eq!(blockchain.leaves().unwrap(), vec![a4_hash, genesis_hash]); - assert_eq!(displaced.displaced_leaves, vec![]); + assert_eq!(displaced.displaced_leaves, vec![(genesis_number, genesis_hash)]); assert_eq!(displaced.displaced_blocks, vec![]); } diff --git a/substrate/primitives/blockchain/src/backend.rs b/substrate/primitives/blockchain/src/backend.rs index 2accd4dad12c..fd0c5795cbfd 100644 --- a/substrate/primitives/blockchain/src/backend.rs +++ b/substrate/primitives/blockchain/src/backend.rs @@ -255,10 +255,11 @@ pub trait Backend: ) -> std::result::Result, Error> { let leaves = self.leaves()?; + let now = std::time::Instant::now(); debug!( target: crate::LOG_TARGET, ?leaves, - %finalized_block_hash, + ?finalized_block_hash, ?finalized_block_number, "Checking for displaced leaves after finalization." ); @@ -277,11 +278,21 @@ pub trait Backend: debug!( target: crate::LOG_TARGET, hash = ?finalized_block_hash, - "Tried to fetch unknown block, block ancestry has gaps." + elapsed = ?now.elapsed(), + "Tried to fetch unknown block, block ancestry has gaps.", ); return Ok(DisplacedLeavesAfterFinalization::default()); }, - Err(e) => Err(e)?, + Err(e) => { + debug!( + target: crate::LOG_TARGET, + hash = ?finalized_block_hash, + err = ?e, + elapsed = ?now.elapsed(), + "Failed to fetch block.", + ); + return Err(e); + }, }; finalized_chain.push_front(MinimalBlockMetadata::from(¤t_finalized)); @@ -296,11 +307,42 @@ pub trait Backend: let mut displaced_blocks_candidates = Vec::new(); + let genesis_hash = self.info().genesis_hash; + for leaf_hash in leaves { let mut current_header_metadata = - MinimalBlockMetadata::from(&self.header_metadata(leaf_hash)?); + MinimalBlockMetadata::from(&self.header_metadata(leaf_hash).map_err(|err| { + debug!( + target: crate::LOG_TARGET, + ?leaf_hash, + ?err, + elapsed = ?now.elapsed(), + "Failed to fetch leaf header.", + ); + err + })?); let leaf_number = current_header_metadata.number; + // The genesis block is part of the canonical chain. + if leaf_hash == genesis_hash { + result.displaced_leaves.push((leaf_number, leaf_hash)); + debug!( + target: crate::LOG_TARGET, + ?leaf_hash, + elapsed = ?now.elapsed(), + "Added genesis leaf to displaced leaves." + ); + continue + } + + debug!( + target: crate::LOG_TARGET, + ?leaf_number, + ?leaf_hash, + elapsed = ?now.elapsed(), + "Handle displaced leaf.", + ); + // Collect all block hashes until the height of the finalized block displaced_blocks_candidates.clear(); while current_header_metadata.number > finalized_block_number { @@ -312,8 +354,20 @@ pub trait Backend: current_header_metadata = *metadata_header; }, None => { - current_header_metadata = - MinimalBlockMetadata::from(&self.header_metadata(parent_hash)?); + current_header_metadata = MinimalBlockMetadata::from( + &self.header_metadata(parent_hash).map_err(|err| { + debug!( + target: crate::LOG_TARGET, + ?err, + ?parent_hash, + ?leaf_hash, + elapsed = ?now.elapsed(), + "Failed to fetch parent header during leaf tracking.", + ); + + err + })?, + ); // Cache locally in case more branches above finalized block reference // the same block hash local_cache.insert(parent_hash, current_header_metadata); @@ -324,6 +378,13 @@ pub trait Backend: // If points back to the finalized header then nothing left to do, this leaf will be // checked again later if current_header_metadata.hash == finalized_block_hash { + debug!( + target: crate::LOG_TARGET, + ?leaf_hash, + elapsed = ?now.elapsed(), + "Leaf points to the finalized header, skipping for now.", + ); + continue; } @@ -332,6 +393,15 @@ pub trait Backend: // check for this gap later. displaced_blocks_candidates.push(current_header_metadata.hash); + debug!( + target: crate::LOG_TARGET, + current_hash = ?current_header_metadata.hash, + current_num = ?current_header_metadata.number, + ?finalized_block_number, + elapsed = ?now.elapsed(), + "Looking for path from finalized block number to current leaf number" + ); + // Collect the rest of the displaced blocks of leaf branch for distance_from_finalized in 1_u32.. { // Find block at `distance_from_finalized` from finalized block @@ -348,11 +418,22 @@ pub trait Backend: distance_from_finalized, hash = ?to_fetch.parent, number = ?to_fetch.number, + elapsed = ?now.elapsed(), "Tried to fetch unknown block, block ancestry has gaps." ); break; }, - Err(e) => Err(e)?, + Err(err) => { + debug!( + target: crate::LOG_TARGET, + hash = ?to_fetch.parent, + number = ?to_fetch.number, + ?err, + elapsed = ?now.elapsed(), + "Failed to fetch header for parent hash.", + ); + return Err(err); + }, }; let metadata = MinimalBlockMetadata::from(&metadata); let result = (metadata.number, metadata.hash); @@ -361,6 +442,19 @@ pub trait Backend: }, }; + if current_header_metadata.hash == finalized_chain_block_hash { + // Found the block on the finalized chain, nothing left to do + result.displaced_leaves.push((leaf_number, leaf_hash)); + + debug!( + target: crate::LOG_TARGET, + ?leaf_hash, + elapsed = ?now.elapsed(), + "Leaf is ancestor of finalized block." + ); + break; + } + if current_header_metadata.number <= finalized_chain_block_number { // Skip more blocks until we get all blocks on finalized chain until the height // of the parent block @@ -372,13 +466,36 @@ pub trait Backend: // Reached finalized chain, nothing left to do result.displaced_blocks.extend(displaced_blocks_candidates.drain(..)); result.displaced_leaves.push((leaf_number, leaf_hash)); + + debug!( + target: crate::LOG_TARGET, + ?leaf_hash, + elapsed = ?now.elapsed(), + "Found displaced leaf." + ); break; } // Store displaced block and look deeper for block on finalized chain + debug!( + target: crate::LOG_TARGET, + ?parent_hash, + elapsed = ?now.elapsed(), + "Found displaced block. Looking further.", + ); displaced_blocks_candidates.push(parent_hash); - current_header_metadata = - MinimalBlockMetadata::from(&self.header_metadata(parent_hash)?); + current_header_metadata = MinimalBlockMetadata::from( + &self.header_metadata(parent_hash).map_err(|err| { + debug!( + target: crate::LOG_TARGET, + ?err, + ?parent_hash, + elapsed = ?now.elapsed(), + "Failed to fetch header for parent during displaced block collection", + ); + err + })?, + ); } } @@ -386,6 +503,15 @@ pub trait Backend: result.displaced_blocks.sort_unstable(); result.displaced_blocks.dedup(); + debug!( + target: crate::LOG_TARGET, + %finalized_block_hash, + ?finalized_block_number, + ?result, + elapsed = ?now.elapsed(), + "Finished checking for displaced leaves after finalization.", + ); + return Ok(result); } } diff --git a/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.toml b/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.toml index f4586ba69d00..d40da6d64370 100644 --- a/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.toml +++ b/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.toml @@ -27,4 +27,4 @@ chain_spec_path = "chain-spec.json" [[relaychain.nodes]] name = "dave" validator = false - args = ["--sync warp"] + args = ["--sync warp -ldb::blockchain"] diff --git a/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.zndsl b/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.zndsl index c5644797321e..26c9fac60735 100644 --- a/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.zndsl +++ b/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.zndsl @@ -22,6 +22,7 @@ dave: reports block height is at least 1 within 60 seconds dave: reports block height is at least {{DB_BLOCK_HEIGHT}} within 60 seconds dave: log line matches "Warp sync is complete" within 60 seconds +dave: log line matches "Checking for displaced leaves after finalization\. leaves\=\[0xc5e7b4cfd23932bb930e859865430a35f6741b4732d677822d492ca64cc8d059\]" within 10 seconds # State sync is logically part of warp sync dave: log line matches "State sync is complete" within 60 seconds dave: log line matches "Block history download is complete" within 10 seconds