From e3938747cdc201a6c3fbf920d0e983e2b7be01c7 Mon Sep 17 00:00:00 2001 From: Davide Galassi Date: Wed, 25 Jan 2023 12:57:59 +0100 Subject: [PATCH] Babe: bad epoch index with skipped epochs and warp sync (#13135) * Detect and correct epoch-index for skipped epochs * Code refactory * Epoch index should be also be fixed for secondary claims with VRF * Fix typo * Make clippy happy * Fix typo * Trigger pipeline --- client/consensus/babe/src/authorship.rs | 21 +++- client/consensus/babe/src/lib.rs | 75 +++++++----- client/consensus/babe/src/tests.rs | 141 +++++++++++++++++----- client/consensus/babe/src/verification.rs | 17 ++- 4 files changed, 187 insertions(+), 67 deletions(-) diff --git a/client/consensus/babe/src/authorship.rs b/client/consensus/babe/src/authorship.rs index b39153faa6d1a..3a92aa5c6b224 100644 --- a/client/consensus/babe/src/authorship.rs +++ b/client/consensus/babe/src/authorship.rs @@ -20,6 +20,7 @@ use super::Epoch; use codec::Encode; +use sc_consensus_epochs::Epoch as EpochT; use schnorrkel::{keys::PublicKey, vrf::VRFInOut}; use sp_application_crypto::AppKey; use sp_consensus_babe::{ @@ -135,18 +136,23 @@ fn claim_secondary_slot( keystore: &SyncCryptoStorePtr, author_secondary_vrf: bool, ) -> Option<(PreDigest, AuthorityId)> { - let Epoch { authorities, randomness, epoch_index, .. } = epoch; + let Epoch { authorities, randomness, mut epoch_index, .. } = epoch; if authorities.is_empty() { return None } + if epoch.end_slot() <= slot { + // Slot doesn't strictly belong to the epoch, create a clone with fixed values. + epoch_index = epoch.clone_for_slot(slot).epoch_index; + } + let expected_author = secondary_slot_author(slot, authorities, *randomness)?; for (authority_id, authority_index) in keys { if authority_id == expected_author { let pre_digest = if author_secondary_vrf { - let transcript_data = make_transcript_data(randomness, slot, *epoch_index); + let transcript_data = make_transcript_data(randomness, slot, epoch_index); let result = SyncCryptoStore::sr25519_vrf_sign( &**keystore, AuthorityId::ID, @@ -238,11 +244,16 @@ fn claim_primary_slot( keystore: &SyncCryptoStorePtr, keys: &[(AuthorityId, usize)], ) -> Option<(PreDigest, AuthorityId)> { - let Epoch { authorities, randomness, epoch_index, .. } = epoch; + let Epoch { authorities, randomness, mut epoch_index, .. } = epoch; + + if epoch.end_slot() <= slot { + // Slot doesn't strictly belong to the epoch, create a clone with fixed values. + epoch_index = epoch.clone_for_slot(slot).epoch_index; + } for (authority_id, authority_index) in keys { - let transcript = make_transcript(randomness, slot, *epoch_index); - let transcript_data = make_transcript_data(randomness, slot, *epoch_index); + let transcript = make_transcript(randomness, slot, epoch_index); + let transcript_data = make_transcript_data(randomness, slot, epoch_index); let result = SyncCryptoStore::sr25519_vrf_sign( &**keystore, AuthorityId::ID, diff --git a/client/consensus/babe/src/lib.rs b/client/consensus/babe/src/lib.rs index c9e171dbed887..5ba523665b4ef 100644 --- a/client/consensus/babe/src/lib.rs +++ b/client/consensus/babe/src/lib.rs @@ -209,8 +209,9 @@ impl From for Epoch { } impl Epoch { - /// Create the genesis epoch (epoch #0). This is defined to start at the slot of - /// the first block, so that has to be provided. + /// Create the genesis epoch (epoch #0). + /// + /// This is defined to start at the slot of the first block, so that has to be provided. pub fn genesis(genesis_config: &BabeConfiguration, slot: Slot) -> Epoch { Epoch { epoch_index: 0, @@ -224,6 +225,38 @@ impl Epoch { }, } } + + /// Clone and tweak epoch information to refer to the specified slot. + /// + /// All the information which depends on the slot value is recomputed and assigned + /// to the returned epoch instance. + /// + /// The `slot` must be greater than or equal the original epoch start slot, + /// if is less this operation is equivalent to a simple clone. + pub fn clone_for_slot(&self, slot: Slot) -> Epoch { + let mut epoch = self.clone(); + + let skipped_epochs = *slot.saturating_sub(self.start_slot) / self.duration; + + let epoch_index = epoch.epoch_index.checked_add(skipped_epochs).expect( + "epoch number is u64; it should be strictly smaller than number of slots; \ + slots relate in some way to wall clock time; \ + if u64 is not enough we should crash for safety; qed.", + ); + + let start_slot = skipped_epochs + .checked_mul(epoch.duration) + .and_then(|skipped_slots| epoch.start_slot.checked_add(skipped_slots)) + .expect( + "slot number is u64; it should relate in some way to wall clock time; \ + if u64 is not enough we should crash for safety; qed.", + ); + + epoch.epoch_index = epoch_index; + epoch.start_slot = Slot::from(start_slot); + + epoch + } } /// Errors encountered by the babe authorship task. @@ -1540,45 +1573,27 @@ where }; if viable_epoch.as_ref().end_slot() <= slot { - // some epochs must have been skipped as our current slot - // fits outside the current epoch. we will figure out - // which epoch it belongs to and we will re-use the same - // data for that epoch - let mut epoch_data = viable_epoch.as_mut(); - let skipped_epochs = - *slot.saturating_sub(epoch_data.start_slot) / epoch_data.duration; - - // NOTE: notice that we are only updating a local copy of the `Epoch`, this + // Some epochs must have been skipped as our current slot fits outside the + // current epoch. We will figure out which epoch it belongs to and we will + // re-use the same data for that epoch. + // Notice that we are only updating a local copy of the `Epoch`, this // makes it so that when we insert the next epoch into `EpochChanges` below // (after incrementing it), it will use the correct epoch index and start slot. - // we do not update the original epoch that will be re-used because there might + // We do not update the original epoch that will be re-used because there might // be other forks (that we haven't imported) where the epoch isn't skipped, and - // to import those forks we want to keep the original epoch data. not updating + // to import those forks we want to keep the original epoch data. Not updating // the original epoch works because when we search the tree for which epoch to // use for a given slot, we will search in-depth with the predicate // `epoch.start_slot <= slot` which will still match correctly without updating // `start_slot` to the correct value as below. - let epoch_index = epoch_data.epoch_index.checked_add(skipped_epochs).expect( - "epoch number is u64; it should be strictly smaller than number of slots; \ - slots relate in some way to wall clock time; \ - if u64 is not enough we should crash for safety; qed.", - ); - - let start_slot = skipped_epochs - .checked_mul(epoch_data.duration) - .and_then(|skipped_slots| epoch_data.start_slot.checked_add(skipped_slots)) - .expect( - "slot number is u64; it should relate in some way to wall clock time; \ - if u64 is not enough we should crash for safety; qed.", - ); + let epoch = viable_epoch.as_mut(); + let prev_index = epoch.epoch_index; + *epoch = epoch.clone_for_slot(slot); warn!( target: LOG_TARGET, - "👶 Epoch(s) skipped: from {} to {}", epoch_data.epoch_index, epoch_index, + "👶 Epoch(s) skipped: from {} to {}", prev_index, epoch.epoch_index, ); - - epoch_data.epoch_index = epoch_index; - epoch_data.start_slot = Slot::from(start_slot); } log!( diff --git a/client/consensus/babe/src/tests.rs b/client/consensus/babe/src/tests.rs index 304bc9fc22ca9..0dd0b59dd69c0 100644 --- a/client/consensus/babe/src/tests.rs +++ b/client/consensus/babe/src/tests.rs @@ -20,7 +20,6 @@ use super::*; use authorship::claim_slot; -use log::debug; use rand_chacha::{ rand_core::{RngCore, SeedableRng}, ChaChaRng, @@ -35,9 +34,10 @@ use sp_application_crypto::key_types::BABE; use sp_consensus::{DisableProofRecording, NoNetwork as DummyOracle, Proposal}; use sp_consensus_babe::{ inherents::InherentDataProvider, make_transcript, make_transcript_data, AllowedSlots, - AuthorityPair, Slot, + AuthorityId, AuthorityPair, Slot, }; use sp_consensus_slots::SlotDuration; +use sp_consensus_vrf::schnorrkel::VRFOutput; use sp_core::crypto::Pair; use sp_keyring::Sr25519Keyring; use sp_keystore::{ @@ -553,52 +553,133 @@ fn sig_is_not_pre_digest() { } #[test] -fn can_author_block() { - sp_tracing::try_init_simple(); +fn claim_epoch_slots() { + // We don't require the full claim information, thus as a shorter alias we're + // going to use a simple integer value. Generally not verbose enough, but good enough + // to be used within the narrow scope of a single test. + // 0 -> None (i.e. unable to claim the slot), + // 1 -> Primary + // 2 -> Secondary + // 3 -> Secondary with VRF + const EPOCH_DURATION: u64 = 10; let authority = Sr25519Keyring::Alice; let keystore = create_keystore(authority); - let mut i = 0; - let epoch = Epoch { + let mut epoch = Epoch { start_slot: 0.into(), authorities: vec![(authority.public().into(), 1)], randomness: [0; 32], epoch_index: 1, - duration: 100, + duration: EPOCH_DURATION, config: BabeEpochConfiguration { c: (3, 10), allowed_slots: AllowedSlots::PrimaryAndSecondaryPlainSlots, }, }; - let mut config = crate::BabeConfiguration { - slot_duration: 1000, - epoch_length: 100, - c: (3, 10), - authorities: Vec::new(), + let claim_slot_wrap = |s, e| match claim_slot(Slot::from(s as u64), &e, &keystore) { + None => 0, + Some((PreDigest::Primary(_), _)) => 1, + Some((PreDigest::SecondaryPlain(_), _)) => 2, + Some((PreDigest::SecondaryVRF(_), _)) => 3, + }; + + // With secondary mechanism we should be able to claim all slots. + let claims: Vec<_> = (0..EPOCH_DURATION) + .into_iter() + .map(|slot| claim_slot_wrap(slot, epoch.clone())) + .collect(); + assert_eq!(claims, [1, 2, 2, 1, 2, 2, 2, 2, 2, 1]); + + // With secondary with VRF mechanism we should be able to claim all the slots. + epoch.config.allowed_slots = AllowedSlots::PrimaryAndSecondaryVRFSlots; + let claims: Vec<_> = (0..EPOCH_DURATION) + .into_iter() + .map(|slot| claim_slot_wrap(slot, epoch.clone())) + .collect(); + assert_eq!(claims, [1, 3, 3, 1, 3, 3, 3, 3, 3, 1]); + + // Otherwise with only vrf-based primary slots we are able to claim a subset of the slots. + epoch.config.allowed_slots = AllowedSlots::PrimarySlots; + let claims: Vec<_> = (0..EPOCH_DURATION) + .into_iter() + .map(|slot| claim_slot_wrap(slot, epoch.clone())) + .collect(); + assert_eq!(claims, [1, 0, 0, 1, 0, 0, 0, 0, 0, 1]); +} + +#[test] +fn claim_vrf_check() { + let authority = Sr25519Keyring::Alice; + let keystore = create_keystore(authority); + + let public = authority.public(); + + let epoch = Epoch { + start_slot: 0.into(), + authorities: vec![(public.into(), 1)], randomness: [0; 32], - allowed_slots: AllowedSlots::PrimaryAndSecondaryPlainSlots, + epoch_index: 1, + duration: 10, + config: BabeEpochConfiguration { + c: (3, 10), + allowed_slots: AllowedSlots::PrimaryAndSecondaryVRFSlots, + }, }; - // with secondary slots enabled it should never be empty - match claim_slot(i.into(), &epoch, &keystore) { - None => i += 1, - Some(s) => debug!(target: LOG_TARGET, "Authored block {:?}", s.0), - } + // We leverage the predictability of claim types given a constant randomness. - // otherwise with only vrf-based primary slots we might need to try a couple - // of times. - config.allowed_slots = AllowedSlots::PrimarySlots; - loop { - match claim_slot(i.into(), &epoch, &keystore) { - None => i += 1, - Some(s) => { - debug!(target: LOG_TARGET, "Authored block {:?}", s.0); - break - }, - } - } + // We expect a Primary claim for slot 0 + + let pre_digest = match claim_slot(0.into(), &epoch, &keystore).unwrap().0 { + PreDigest::Primary(d) => d, + v => panic!("Unexpected pre-digest variant {:?}", v), + }; + let transcript = make_transcript_data(&epoch.randomness.clone(), 0.into(), epoch.epoch_index); + let sign = SyncCryptoStore::sr25519_vrf_sign(&*keystore, AuthorityId::ID, &public, transcript) + .unwrap() + .unwrap(); + assert_eq!(pre_digest.vrf_output, VRFOutput(sign.output)); + + // We expect a SecondaryVRF claim for slot 1 + let pre_digest = match claim_slot(1.into(), &epoch, &keystore).unwrap().0 { + PreDigest::SecondaryVRF(d) => d, + v => panic!("Unexpected pre-digest variant {:?}", v), + }; + let transcript = make_transcript_data(&epoch.randomness.clone(), 1.into(), epoch.epoch_index); + let sign = SyncCryptoStore::sr25519_vrf_sign(&*keystore, AuthorityId::ID, &public, transcript) + .unwrap() + .unwrap(); + assert_eq!(pre_digest.vrf_output, VRFOutput(sign.output)); + + // Check that correct epoch index has been used if epochs are skipped (primary VRF) + let slot = Slot::from(103); + let claim = match claim_slot(slot, &epoch, &keystore).unwrap().0 { + PreDigest::Primary(d) => d, + v => panic!("Unexpected claim variant {:?}", v), + }; + let fixed_epoch = epoch.clone_for_slot(slot); + let transcript = make_transcript_data(&epoch.randomness.clone(), slot, fixed_epoch.epoch_index); + let sign = SyncCryptoStore::sr25519_vrf_sign(&*keystore, AuthorityId::ID, &public, transcript) + .unwrap() + .unwrap(); + assert_eq!(fixed_epoch.epoch_index, 11); + assert_eq!(claim.vrf_output, VRFOutput(sign.output)); + + // Check that correct epoch index has been used if epochs are skipped (secondary VRF) + let slot = Slot::from(100); + let pre_digest = match claim_slot(slot, &epoch, &keystore).unwrap().0 { + PreDigest::SecondaryVRF(d) => d, + v => panic!("Unexpected claim variant {:?}", v), + }; + let fixed_epoch = epoch.clone_for_slot(slot); + let transcript = make_transcript_data(&epoch.randomness.clone(), slot, fixed_epoch.epoch_index); + let sign = SyncCryptoStore::sr25519_vrf_sign(&*keystore, AuthorityId::ID, &public, transcript) + .unwrap() + .unwrap(); + assert_eq!(fixed_epoch.epoch_index, 11); + assert_eq!(pre_digest.vrf_output, VRFOutput(sign.output)); } // Propose and import a new BABE block on top of the given parent. diff --git a/client/consensus/babe/src/verification.rs b/client/consensus/babe/src/verification.rs index e77a70c8e465a..8e8f5d998f884 100644 --- a/client/consensus/babe/src/verification.rs +++ b/client/consensus/babe/src/verification.rs @@ -22,6 +22,7 @@ use crate::{ babe_err, find_pre_digest, BlockT, Epoch, Error, LOG_TARGET, }; use log::{debug, trace}; +use sc_consensus_epochs::Epoch as EpochT; use sc_consensus_slots::CheckedHeader; use sp_consensus_babe::{ digests::{ @@ -155,10 +156,16 @@ fn check_primary_header( c: (u64, u64), ) -> Result<(), Error> { let author = &epoch.authorities[pre_digest.authority_index as usize].0; + let mut epoch_index = epoch.epoch_index; + + if epoch.end_slot() <= pre_digest.slot { + // Slot doesn't strictly belong to this epoch, create a clone with fixed values. + epoch_index = epoch.clone_for_slot(pre_digest.slot).epoch_index; + } if AuthorityPair::verify(&signature, pre_hash, author) { let (inout, _) = { - let transcript = make_transcript(&epoch.randomness, pre_digest.slot, epoch.epoch_index); + let transcript = make_transcript(&epoch.randomness, pre_digest.slot, epoch_index); schnorrkel::PublicKey::from_bytes(author.as_slice()) .and_then(|p| { @@ -228,8 +235,14 @@ fn check_secondary_vrf_header( return Err(Error::InvalidAuthor(expected_author.clone(), author.clone())) } + let mut epoch_index = epoch.epoch_index; + if epoch.end_slot() <= pre_digest.slot { + // Slot doesn't strictly belong to this epoch, create a clone with fixed values. + epoch_index = epoch.clone_for_slot(pre_digest.slot).epoch_index; + } + if AuthorityPair::verify(&signature, pre_hash.as_ref(), author) { - let transcript = make_transcript(&epoch.randomness, pre_digest.slot, epoch.epoch_index); + let transcript = make_transcript(&epoch.randomness, pre_digest.slot, epoch_index); schnorrkel::PublicKey::from_bytes(author.as_slice()) .and_then(|p| p.vrf_verify(transcript, &pre_digest.vrf_output, &pre_digest.vrf_proof))