From 0d012c82620fc1c5ba9b408f7dba8856effd1efe Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe <49718502+alexggh@users.noreply.github.com> Date: Fri, 13 Dec 2024 14:33:47 +0200 Subject: [PATCH] Fix approval-voting canonicalize off by one (#6864) Approval voting canonicalize is off by one that means if we are finalizing blocks one by one, approval-voting cleans it up every other block for example: - With 1, 2, 3, 4, 5, 6 blocks created, the stored range would be StoredBlockRange(1,7) - When block 3 is finalized the canonicalize works and StoredBlockRange is (4,7) - When block 4 is finalized the canonicalize exists early because of the `if range.0 > canon_number` break clause, so blocks are not cleaned up. - When block 5 is finalized the canonicalize works and StoredBlockRange becomes (6,7) and both block 4 and 5 are cleaned up. The consequences of this is that sometimes we keep block entries around after they are finalized, so at restart we consider this blocks and send them to approval-distribution. In most cases this is not a problem, but in the case when finality is lagging on restart approval-distribution will receive 4 as being the oldest block it needs to work on, and since BlockFinalized is never resent for block 4 after restart it won't get the opportunity to clean that up. Therefore it will end running approval-distribution aggression on block 4, because that is the oldest block it received from approval-voting for which it did not see a BlockFinalized signal. --------- Signed-off-by: Alexandru Gheorghe (cherry picked from commit 2dd2bb5a847d7bf006493d9fe9b10a13289118b6) --- .../src/approval_db/v3/tests.rs | 52 +++++++++++++++++-- polkadot/node/core/approval-voting/src/ops.rs | 2 +- prdoc/pr_6864.prdoc | 18 +++++++ 3 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 prdoc/pr_6864.prdoc diff --git a/polkadot/node/core/approval-voting/src/approval_db/v3/tests.rs b/polkadot/node/core/approval-voting/src/approval_db/v3/tests.rs index d2a1d7d400b1..9338f581acd0 100644 --- a/polkadot/node/core/approval-voting/src/approval_db/v3/tests.rs +++ b/polkadot/node/core/approval-voting/src/approval_db/v3/tests.rs @@ -263,8 +263,8 @@ fn add_block_entry_adds_child() { fn canonicalize_works() { let (mut db, store) = make_db(); - // -> B1 -> C1 -> D1 - // A -> B2 -> C2 -> D2 + // -> B1 -> C1 -> D1 -> E1 + // A -> B2 -> C2 -> D2 -> E2 // // We'll canonicalize C1. Everything except D1 should disappear. // @@ -292,18 +292,22 @@ fn canonicalize_works() { let block_hash_c2 = Hash::repeat_byte(5); let block_hash_d1 = Hash::repeat_byte(6); let block_hash_d2 = Hash::repeat_byte(7); + let block_hash_e1 = Hash::repeat_byte(8); + let block_hash_e2 = Hash::repeat_byte(9); let candidate_receipt_genesis = make_candidate(ParaId::from(1_u32), genesis); let candidate_receipt_a = make_candidate(ParaId::from(2_u32), block_hash_a); let candidate_receipt_b = make_candidate(ParaId::from(3_u32), block_hash_a); let candidate_receipt_b1 = make_candidate(ParaId::from(4_u32), block_hash_b1); let candidate_receipt_c1 = make_candidate(ParaId::from(5_u32), block_hash_c1); + let candidate_receipt_e1 = make_candidate(ParaId::from(6_u32), block_hash_e1); let cand_hash_1 = candidate_receipt_genesis.hash(); let cand_hash_2 = candidate_receipt_a.hash(); let cand_hash_3 = candidate_receipt_b.hash(); let cand_hash_4 = candidate_receipt_b1.hash(); let cand_hash_5 = candidate_receipt_c1.hash(); + let cand_hash_6 = candidate_receipt_e1.hash(); let block_entry_a = make_block_entry(block_hash_a, genesis, 1, Vec::new()); let block_entry_b1 = make_block_entry(block_hash_b1, block_hash_a, 2, Vec::new()); @@ -325,6 +329,12 @@ fn canonicalize_works() { let block_entry_d2 = make_block_entry(block_hash_d2, block_hash_c2, 4, vec![(CoreIndex(0), cand_hash_5)]); + let block_entry_e1 = + make_block_entry(block_hash_e1, block_hash_d1, 5, vec![(CoreIndex(0), cand_hash_6)]); + + let block_entry_e2 = + make_block_entry(block_hash_e2, block_hash_d2, 5, vec![(CoreIndex(0), cand_hash_6)]); + let candidate_info = { let mut candidate_info = HashMap::new(); candidate_info.insert( @@ -344,6 +354,8 @@ fn canonicalize_works() { candidate_info .insert(cand_hash_5, NewCandidateInfo::new(candidate_receipt_c1, GroupIndex(5), None)); + candidate_info + .insert(cand_hash_6, NewCandidateInfo::new(candidate_receipt_e1, GroupIndex(6), None)); candidate_info }; @@ -356,6 +368,8 @@ fn canonicalize_works() { block_entry_c2.clone(), block_entry_d1.clone(), block_entry_d2.clone(), + block_entry_e1.clone(), + block_entry_e2.clone(), ]; let mut overlay_db = OverlayedBackend::new(&db); @@ -437,7 +451,7 @@ fn canonicalize_works() { assert_eq!( load_stored_blocks(store.as_ref(), &TEST_CONFIG).unwrap().unwrap(), - StoredBlockRange(4, 5) + StoredBlockRange(4, 6) ); check_candidates_in_store(vec![ @@ -446,6 +460,7 @@ fn canonicalize_works() { (cand_hash_3, Some(vec![block_hash_d1])), (cand_hash_4, Some(vec![block_hash_d1])), (cand_hash_5, None), + (cand_hash_6, Some(vec![block_hash_e1])), ]); check_blocks_in_store(vec![ @@ -455,6 +470,37 @@ fn canonicalize_works() { (block_hash_c1, None), (block_hash_c2, None), (block_hash_d1, Some(vec![cand_hash_3, cand_hash_4])), + (block_hash_e1, Some(vec![cand_hash_6])), + (block_hash_d2, None), + ]); + + let mut overlay_db = OverlayedBackend::new(&db); + canonicalize(&mut overlay_db, 4, block_hash_d1).unwrap(); + let write_ops = overlay_db.into_write_ops(); + db.write(write_ops).unwrap(); + + assert_eq!( + load_stored_blocks(store.as_ref(), &TEST_CONFIG).unwrap().unwrap(), + StoredBlockRange(5, 6) + ); + + check_candidates_in_store(vec![ + (cand_hash_1, None), + (cand_hash_2, None), + (cand_hash_3, None), + (cand_hash_4, None), + (cand_hash_5, None), + (cand_hash_6, Some(vec![block_hash_e1])), + ]); + + check_blocks_in_store(vec![ + (block_hash_a, None), + (block_hash_b1, None), + (block_hash_b2, None), + (block_hash_c1, None), + (block_hash_c2, None), + (block_hash_d1, None), + (block_hash_e1, Some(vec![cand_hash_6])), (block_hash_d2, None), ]); } diff --git a/polkadot/node/core/approval-voting/src/ops.rs b/polkadot/node/core/approval-voting/src/ops.rs index 2a8fdba5aa36..6731e6f9ad03 100644 --- a/polkadot/node/core/approval-voting/src/ops.rs +++ b/polkadot/node/core/approval-voting/src/ops.rs @@ -88,7 +88,7 @@ pub fn canonicalize( ) -> SubsystemResult<()> { let range = match overlay_db.load_stored_blocks()? { None => return Ok(()), - Some(range) if range.0 >= canon_number => return Ok(()), + Some(range) if range.0 > canon_number => return Ok(()), Some(range) => range, }; diff --git a/prdoc/pr_6864.prdoc b/prdoc/pr_6864.prdoc new file mode 100644 index 000000000000..6d6c84e22da4 --- /dev/null +++ b/prdoc/pr_6864.prdoc @@ -0,0 +1,18 @@ +# Schema: Polkadot SDK PRDoc Schema (prdoc) v1.0.0 +# See doc at https://raw.githubusercontent.com/paritytech/polkadot-sdk/master/prdoc/schema_user.json + +title: Fix approval-voting canonicalize off by one + +doc: + - audience: Node Dev + description: | + The approval-voting canonicalize was off by one, which lead to blocks being + cleaned up every other 2 blocks. Normally, this is not an issue, but on restart + we might end up sending NewBlocks to approval-distribution with finalized blocks. + This would be problematic in the case were finalization was already lagging before + restart, so after restart approval-distribution will trigger aggression on the wrong + already finalized block. + +crates: + - name: polkadot-node-core-approval-voting + bump: minor