From 160bdc0d504feab8ebd797ea577ac1c4e918654d Mon Sep 17 00:00:00 2001 From: Nazar Mokrynskyi Date: Tue, 2 Jul 2024 05:17:27 +0300 Subject: [PATCH] Replace `displaced_leaves_after_finalizing` implementation with more efficient --- substrate/client/db/src/lib.rs | 183 +++++++++--------- .../primitives/blockchain/src/backend.rs | 145 ++++++++------ .../blockchain/src/header_metadata.rs | 26 --- 3 files changed, 177 insertions(+), 177 deletions(-) diff --git a/substrate/client/db/src/lib.rs b/substrate/client/db/src/lib.rs index c30be837c54a..b978ce261e09 100644 --- a/substrate/client/db/src/lib.rs +++ b/substrate/client/db/src/lib.rs @@ -2558,7 +2558,7 @@ pub(crate) mod tests { backend::{Backend as BTrait, BlockImportOperation as Op}, blockchain::Backend as BLBTrait, }; - use sp_blockchain::{lowest_common_ancestor, lowest_common_ancestor_multiblock, tree_route}; + use sp_blockchain::{lowest_common_ancestor, tree_route}; use sp_core::H256; use sp_runtime::{ testing::{Block as RawBlock, ExtrinsicWrapper, Header}, @@ -3120,117 +3120,118 @@ pub(crate) mod tests { } #[test] - fn lowest_common_ancestors_multiblock_works() { + fn displaced_leaves_after_finalizing_works() { let backend = Backend::::new_test(1000, 100); let blockchain = backend.blockchain(); - let block0 = insert_header(&backend, 0, Default::default(), None, Default::default()); + let genesis_number = 0; + let genesis_hash = + insert_header(&backend, genesis_number, Default::default(), None, Default::default()); // fork from genesis: 3 prong. // block 0 -> a1 -> a2 -> a3 - // | - // -> b1 -> b2 -> c1 -> c2 - // | - // -> d1 -> d2 - let a1 = insert_header(&backend, 1, block0, None, Default::default()); - let a2 = insert_header(&backend, 2, a1, None, Default::default()); - let a3 = insert_header(&backend, 3, a2, None, Default::default()); - - // fork from genesis: 2 prong. - let b1 = insert_header(&backend, 1, block0, None, H256::from([1; 32])); - let b2 = insert_header(&backend, 2, b1, None, Default::default()); - - // fork from b2. - let c1 = insert_header(&backend, 3, b2, None, H256::from([2; 32])); - let c2 = insert_header(&backend, 4, c1, None, Default::default()); - - // fork from b1. - let d1 = insert_header(&backend, 2, b1, None, H256::from([3; 32])); - let d2 = insert_header(&backend, 3, d1, None, Default::default()); - { - let lca = lowest_common_ancestor_multiblock(blockchain, &[a3, b2]).unwrap().unwrap(); - - assert_eq!(lca.hash, block0); - assert_eq!(lca.number, 0); - } - - { - let lca = lowest_common_ancestor_multiblock(blockchain, &[a1, a3]).unwrap().unwrap(); - - assert_eq!(lca.hash, a1); - assert_eq!(lca.number, 1); - } - - { - let lca = lowest_common_ancestor_multiblock(blockchain, &[a3, a1]).unwrap().unwrap(); - - assert_eq!(lca.hash, a1); - assert_eq!(lca.number, 1); - } + // \ + // -> b1 -> b2 -> c1 -> c2 + // \ + // -> d1 -> d2 + let a1_number = 1; + let a1_hash = insert_header(&backend, a1_number, genesis_hash, None, Default::default()); + let a2_number = 2; + let a2_hash = insert_header(&backend, a2_number, a1_hash, None, Default::default()); + let a3_number = 3; + let a3_hash = insert_header(&backend, a3_number, a2_hash, None, Default::default()); { - let lca = lowest_common_ancestor_multiblock(blockchain, &[a2, a3]).unwrap().unwrap(); - - assert_eq!(lca.hash, a2); - assert_eq!(lca.number, 2); - } - - { - let lca = lowest_common_ancestor_multiblock(blockchain, &[a2, a1]).unwrap().unwrap(); - - assert_eq!(lca.hash, a1); - assert_eq!(lca.number, 1); + let displaced = blockchain + .displaced_leaves_after_finalizing(genesis_hash, genesis_number) + .unwrap(); + assert_eq!(displaced.displaced_leaves, vec![]); + assert_eq!(displaced.displaced_blocks, vec![]); } - { - let lca = lowest_common_ancestor_multiblock(blockchain, &[a2, a2]).unwrap().unwrap(); - - assert_eq!(lca.hash, a2); - assert_eq!(lca.number, 2); + let displaced_a1 = + blockchain.displaced_leaves_after_finalizing(a1_hash, a1_number).unwrap(); + assert_eq!(displaced_a1.displaced_leaves, vec![]); + assert_eq!(displaced_a1.displaced_blocks, vec![]); + + let displaced_a2 = + blockchain.displaced_leaves_after_finalizing(a2_hash, a3_number).unwrap(); + assert_eq!(displaced_a2.displaced_leaves, vec![]); + assert_eq!(displaced_a2.displaced_blocks, vec![]); + + let displaced_a3 = + blockchain.displaced_leaves_after_finalizing(a3_hash, a3_number).unwrap(); + assert_eq!(displaced_a3.displaced_leaves, vec![]); + assert_eq!(displaced_a3.displaced_blocks, vec![]); } - { - let lca = - lowest_common_ancestor_multiblock(blockchain, &[a3, d2, c2]).unwrap().unwrap(); - - assert_eq!(lca.hash, block0); - assert_eq!(lca.number, 0); - } + // fork from genesis: 2 prong. + let b1_number = 1; + let b1_hash = insert_header(&backend, b1_number, genesis_hash, None, H256::from([1; 32])); + let b2_number = 2; + let b2_hash = insert_header(&backend, b2_number, b1_hash, None, Default::default()); - { - let lca = - lowest_common_ancestor_multiblock(blockchain, &[c2, d2, b2]).unwrap().unwrap(); + // fork from b2. + let c1_number = 3; + let c1_hash = insert_header(&backend, c1_number, b2_hash, None, H256::from([2; 32])); + let c2_number = 4; + let c2_hash = insert_header(&backend, c2_number, c1_hash, None, Default::default()); - assert_eq!(lca.hash, b1); - assert_eq!(lca.number, 1); - } + // fork from b1. + let d1_number = 2; + let d1_hash = insert_header(&backend, d1_number, b1_hash, None, H256::from([3; 32])); + let d2_number = 3; + let d2_hash = insert_header(&backend, d2_number, d1_hash, None, Default::default()); { - let lca = - lowest_common_ancestor_multiblock(blockchain, &[a1, a2, a3]).unwrap().unwrap(); - - assert_eq!(lca.hash, a1); - assert_eq!(lca.number, 1); + let displaced_a1 = + blockchain.displaced_leaves_after_finalizing(a1_hash, a1_number).unwrap(); + assert_eq!( + displaced_a1.displaced_leaves, + vec![(c2_number, c2_hash), (d2_number, d2_hash)] + ); + let mut displaced_blocks = vec![b1_hash, b2_hash, c1_hash, c2_hash, d1_hash, d2_hash]; + displaced_blocks.sort(); + assert_eq!(displaced_a1.displaced_blocks, displaced_blocks); + + let displaced_a2 = + blockchain.displaced_leaves_after_finalizing(a2_hash, a2_number).unwrap(); + assert_eq!(displaced_a1.displaced_leaves, displaced_a2.displaced_leaves); + assert_eq!(displaced_a1.displaced_blocks, displaced_a2.displaced_blocks); + + let displaced_a3 = + blockchain.displaced_leaves_after_finalizing(a3_hash, a3_number).unwrap(); + assert_eq!(displaced_a1.displaced_leaves, displaced_a3.displaced_leaves); + assert_eq!(displaced_a1.displaced_blocks, displaced_a3.displaced_blocks); } - { - let lca = - lowest_common_ancestor_multiblock(blockchain, &[b1, b2, d1]).unwrap().unwrap(); - - assert_eq!(lca.hash, b1); - assert_eq!(lca.number, 1); + let displaced = + blockchain.displaced_leaves_after_finalizing(b1_hash, b1_number).unwrap(); + assert_eq!(displaced.displaced_leaves, vec![(a3_number, a3_hash)]); + let mut displaced_blocks = vec![a1_hash, a2_hash, a3_hash]; + displaced_blocks.sort(); + assert_eq!(displaced.displaced_blocks, displaced_blocks); } - { - let lca = lowest_common_ancestor_multiblock(blockchain, &[]); - - assert_eq!(true, matches!(lca, Ok(None))); + let displaced = + blockchain.displaced_leaves_after_finalizing(b2_hash, b2_number).unwrap(); + assert_eq!( + displaced.displaced_leaves, + vec![(a3_number, a3_hash), (d2_number, d2_hash)] + ); + let mut displaced_blocks = vec![a1_hash, a2_hash, a3_hash, d1_hash, d2_hash]; + displaced_blocks.sort(); + assert_eq!(displaced.displaced_blocks, displaced_blocks); } - { - let lca = lowest_common_ancestor_multiblock(blockchain, &[a1]).unwrap().unwrap(); - - assert_eq!(lca.hash, a1); - assert_eq!(lca.number, 1); + let displaced = + blockchain.displaced_leaves_after_finalizing(c2_hash, c2_number).unwrap(); + assert_eq!( + displaced.displaced_leaves, + vec![(a3_number, a3_hash), (d2_number, d2_hash)] + ); + let mut displaced_blocks = vec![a1_hash, a2_hash, a3_hash, d1_hash, d2_hash]; + displaced_blocks.sort(); + assert_eq!(displaced.displaced_blocks, displaced_blocks); } } diff --git a/substrate/primitives/blockchain/src/backend.rs b/substrate/primitives/blockchain/src/backend.rs index 3b6a51df67f5..fc47413c24c2 100644 --- a/substrate/primitives/blockchain/src/backend.rs +++ b/substrate/primitives/blockchain/src/backend.rs @@ -21,15 +21,15 @@ use log::warn; use parking_lot::RwLock; use sp_runtime::{ generic::BlockId, - traits::{Block as BlockT, CheckedSub, Header as HeaderT, NumberFor, Zero}, + traits::{Block as BlockT, Header as HeaderT, NumberFor, One, Zero}, Justifications, }; -use std::collections::btree_set::BTreeSet; +use std::collections::{btree_set::BTreeSet, HashMap, VecDeque}; use crate::{ error::{Error, Result}, - header_metadata::{self, HeaderMetadata}, - lowest_common_ancestor_multiblock, tree_route, + header_metadata::HeaderMetadata, + tree_route, CachedHeaderMetadata, }; /// Blockchain database header backend. Does not perform any validation. @@ -226,76 +226,101 @@ pub trait Backend: finalized_block_hash: Block::Hash, finalized_block_number: NumberFor, ) -> std::result::Result, Error> { - let mut result = DisplacedLeavesAfterFinalization::default(); - let leaves = self.leaves()?; // If we have only one leaf there are no forks, and we can return early. if finalized_block_number == Zero::zero() || leaves.len() == 1 { - return Ok(result) + return Ok(DisplacedLeavesAfterFinalization::default()) } - let first_leaf = leaves.first().ok_or(Error::Backend( - "Unable to find any leaves. This should not happen.".to_string(), - ))?; - let leaf_block_header = self.expect_header(*first_leaf)?; - - // If the distance between the leafs and the finalized block is large, calculating - // tree routes can be very expensive. In that case, we will try to find the - // lowest common ancestor between all the leaves. The assumption here is that the forks are - // close to the tip and not long. So the LCA can be computed from the header cache. If the - // LCA is above the finalized block, we know that there are no displaced leaves by the - // finalization. - if leaf_block_header - .number() - .checked_sub(&finalized_block_number) - .unwrap_or(0u32.into()) > - header_metadata::LRU_CACHE_SIZE.into() - { - if let Some(lca) = lowest_common_ancestor_multiblock(self, &leaves)? { - if lca.number > finalized_block_number { - return Ok(result) - } else { - warn!( - "The distance between leafs and finalized block is large. Finalization \ - can take a long time." - ); - } - }; - } + // Store hashes of finalized blocks for quick checking later, the last block if the + // finalized one + let mut finalized_chain = VecDeque::new(); + finalized_chain.push_front(self.header_metadata(finalized_block_hash)?); + + // Local cache is a performance optimization in case of finalized block deep below the + // tip of the chain with a lot of leaves above finalized block + let mut local_cache = HashMap::>::new(); - result.displaced_leaves.reserve_exact(leaves.len()); - result.displaced_blocks.reserve_exact(leaves.len()); + let mut result = DisplacedLeavesAfterFinalization { + displaced_leaves: Vec::with_capacity(leaves.len()), + displaced_blocks: Vec::with_capacity(leaves.len()), + }; + let mut displaced_blocks_candidates = Vec::new(); - // For each leaf determine whether it belongs to a non-canonical branch. for leaf_hash in leaves { - let leaf_block_header = self.expect_header(leaf_hash)?; - let leaf_number = *leaf_block_header.number(); + let mut current_header_metadata = self.header_metadata(leaf_hash)?; + let leaf_number = current_header_metadata.number; + + // Collect all block hashes until the height of the finalized block + displaced_blocks_candidates.clear(); + while current_header_metadata.number > finalized_block_number { + displaced_blocks_candidates.push(current_header_metadata.hash); + + let parent_hash = current_header_metadata.parent; + match local_cache.get(&parent_hash) { + Some(metadata_header) => { + current_header_metadata = metadata_header.clone(); + }, + None => { + current_header_metadata = self.header_metadata(parent_hash)?; + // Cache locally in case more branches above finalized block reference + // the same block hash + local_cache.insert(parent_hash, current_header_metadata.clone()); + }, + } + } - let leaf_tree_route = match tree_route(self, leaf_hash, finalized_block_hash) { - Ok(tree_route) => tree_route, - Err(Error::UnknownBlock(_)) => { - // Sometimes routes can't be calculated. E.g. after warp sync. + // If points back to the finalized header then nothing left to do, this leaf will be + // checked again later + if current_header_metadata.hash == finalized_block_hash { + continue; + } + + // Otherwise the whole leaf branch needs to be pruned, track it all the way to the + // point of branching from the finalized chain + result.displaced_leaves.push((leaf_number, leaf_hash)); + result.displaced_blocks.extend(displaced_blocks_candidates.drain(..)); + result.displaced_blocks.push(current_header_metadata.hash); + // Collect the rest of the displaced blocks of leaf branch + for distance_from_finalized in 1_u32.. { + // Find block at `distance_from_finalized` from finalized block + let (finalized_chain_block_number, finalized_chain_block_hash) = + match finalized_chain.iter().rev().nth(distance_from_finalized as usize) { + Some(header) => (header.number, header.hash), + None => { + let header = self.header_metadata( + finalized_chain.front().expect("Not empty; qed").parent, + )?; + let result = (header.number, header.hash); + finalized_chain.push_front(header); + result + }, + }; + + if current_header_metadata.number < finalized_chain_block_number + One::one() { + // Skip more blocks until we get all blocks on finalized chain until the height + // of the parent block continue; - }, - Err(e) => Err(e)?, - }; - - // Is it a stale fork? - let needs_pruning = leaf_tree_route.common_block().hash != finalized_block_hash; - - if needs_pruning { - result.displaced_leaves.push((leaf_number, leaf_hash)); - result.displaced_blocks.extend( - leaf_tree_route - .retracted() - .into_iter() - .map(|hash_and_number| hash_and_number.hash), - ); + } + + let parent_hash = current_header_metadata.parent; + if finalized_chain_block_hash == parent_hash { + // Reached finalized chain, nothing left to do + break; + } + + // Store displaced block and look deeper for block on finalized chain + result.displaced_blocks.push(parent_hash); + current_header_metadata = self.header_metadata(parent_hash)?; } } - Ok(result) + // There could be duplicates shared by multiple branches, clean them up + result.displaced_blocks.sort_unstable(); + result.displaced_blocks.dedup_by_key(|hash| *hash); + + return Ok(result); } /// Clears the block gap from DB after the fast-sync. diff --git a/substrate/primitives/blockchain/src/header_metadata.rs b/substrate/primitives/blockchain/src/header_metadata.rs index 84955746f8a0..43f9ac84db0f 100644 --- a/substrate/primitives/blockchain/src/header_metadata.rs +++ b/substrate/primitives/blockchain/src/header_metadata.rs @@ -98,32 +98,6 @@ pub fn lowest_common_ancestor + ?Sized>( Ok(HashAndNumber { hash: header_one.hash, number: header_one.number }) } -/// Get the lowest common ancestor between multiple blocks. -/// -/// Returns `Ok(None)` only when input list is empty. -pub fn lowest_common_ancestor_multiblock + ?Sized>( - backend: &T, - hashes: &[Block::Hash], -) -> Result>, T::Error> { - // Ensure the list of hashes is not empty - let mut hashes_iter = hashes.iter(); - - let first_hash = match hashes_iter.next() { - Some(hash) => *hash, - None => return Ok(None), - }; - - // Start with the first hash as the initial LCA - let first_cached = backend.header_metadata(first_hash)?; - let mut lca = HashAndNumber { number: first_cached.number, hash: first_cached.hash }; - for hash in hashes_iter { - // Calculate the LCA of the current LCA and the next hash - lca = lowest_common_ancestor(backend, lca.hash, *hash)?; - } - - Ok(Some(lca)) -} - /// Compute a tree-route between two blocks. See tree-route docs for more details. pub fn tree_route + ?Sized>( backend: &T,