From ec6bf5d0a52161bda0eccd2d797dffab4213900a Mon Sep 17 00:00:00 2001 From: s0me0ne-unkn0wn <48632512+s0me0ne-unkn0wn@users.noreply.github.com> Date: Tue, 13 Feb 2024 15:31:33 +0100 Subject: [PATCH] Use dynamic aura slot duration in lookahead collator (#3211) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's a follow-up of #2949. It enables the lookahead collator to dynamically adjust the aura slot size, which may change during the runtime upgrade. It also addressed a couple of issues with time constants I missed in the original PR. Good news: it works. The parachain successfully switches from sync backing with 12s slots to async backing with 6s slots. Bad news: during the transitional period of a single block in which the actual runtime upgrade is performed, it still gets the old slot duration of 12s (as it gets it from the best block), resulting in a runtime panic (logs follow). That doesn't affect the following block production of the parachain. Ideas on how to improve the situation are appreciated.
``` 2024-02-05 12:59:36.373 INFO tokio-runtime-worker sc_basic_authorship::basic_authorship: [Parachain] 🙌 Starting consensus session on top of parent 0x6fd2d8f904f12c22531bfabf77b16dc84a6a29e45d9ae358aa6547fbf3f0438b 2024-02-05 12:59:36.373 ERROR tokio-runtime-worker runtime: [Parachain] panicked at /home/s0me0ne/wrk/parity/polkadot-sdk/cumulus/pallets/aura-ext/src/consensus_hook.rs:69:9: assertion `left == right` failed: slot number mismatch left: Slot(142261198) right: Slot(284522396) 2024-02-05 12:59:36.373 WARN tokio-runtime-worker sp_state_machine::overlayed_changes::changeset: [Parachain] 1 storage transactions are left open by the runtime. Those will be rolled back. 2024-02-05 12:59:36.373 WARN tokio-runtime-worker sp_state_machine::overlayed_changes::changeset: [Parachain] 1 storage transactions are left open by the runtime. Those will be rolled back. 2024-02-05 12:59:36.373 WARN tokio-runtime-worker basic-authorship: [Parachain] ❗ Inherent extrinsic returned unexpected error: Error at calling runtime api: Execution failed: Execution aborted due to trap: wasm trap: wasm `unreachable` instruction executed WASM backtrace: error while executing at wasm backtrace: 0: 0x4e4a3b - !rust_begin_unwind 1: 0x46cf57 - !core::panicking::panic_fmt::h3c280dba88683724 2: 0x46d238 - !core::panicking::assert_failed_inner::hebac5970933beb4d 3: 0x3d00fc - !core::panicking::assert_failed::h640a47e2fb5dfb4b 4: 0xd0db3 - !frame_support::storage::transactional::with_transaction::hcbc31515f81b2ee1 5: 0x34d654 - ! as frame_support::traits::dispatch::UnfilteredDispatchable>::dispatch_bypass_filter::{{closure}}::hb7c2c9a11fa88301 6: 0x3547db - !environmental::local_key::LocalKey::with::h783f2605ae27d6d3 7: 0x7f454 - !::dispatch_bypass_filter::h5e11a01ab97c06c7 8: 0x7f237 - !::dispatch::h7f8ae4a8fede71ca 9: 0x26a0f3 - !frame_executive::Executive::apply_extrinsic::h75e524ff34738391 10: 0x282211 - !BlockBuilder_apply_extrinsic. Dropping. 2024-02-05 12:59:36.374 ERROR tokio-runtime-worker runtime: [Parachain] panicked at /home/s0me0ne/wrk/parity/polkadot-sdk/substrate/frame/aura/src/lib.rs:416:9: assertion `left == right` failed: Timestamp slot must match `CurrentSlot` left: Slot(142261198) right: Slot(284522396) 2024-02-05 12:59:36.374 WARN tokio-runtime-worker sp_state_machine::overlayed_changes::changeset: [Parachain] 1 storage transactions are left open by the runtime. Those will be rolled back. 2024-02-05 12:59:36.374 WARN tokio-runtime-worker sp_state_machine::overlayed_changes::changeset: [Parachain] 1 storage transactions are left open by the runtime. Those will be rolled back. 2024-02-05 12:59:36.374 WARN tokio-runtime-worker basic-authorship: [Parachain] ❗ Inherent extrinsic returned unexpected error: Error at calling runtime api: Execution failed: Execution aborted due to trap: wasm trap: wasm `unreachable` instruction executed WASM backtrace: error while executing at wasm backtrace: 0: 0x4e4a3b - !rust_begin_unwind 1: 0x46cf57 - !core::panicking::panic_fmt::h3c280dba88683724 2: 0x46d238 - !core::panicking::assert_failed_inner::hebac5970933beb4d 3: 0x3d00fc - !core::panicking::assert_failed::h640a47e2fb5dfb4b 4: 0x9ece6 - !frame_support::storage::transactional::with_transaction::h26f75cb9f9462088 5: 0x356d7e - !environmental::local_key::LocalKey::with::hbcf2d4e17b48fdb5 6: 0x7f507 - !::dispatch_bypass_filter::h5e11a01ab97c06c7 7: 0x7f237 - !::dispatch::h7f8ae4a8fede71ca 8: 0x26a0f3 - !frame_executive::Executive::apply_extrinsic::h75e524ff34738391 9: 0x282211 - !BlockBuilder_apply_extrinsic. Dropping. 2024-02-05 12:59:36.374 DEBUG tokio-runtime-worker runtime::xcmp-queue-migration: [Parachain] Lazy migration finished: item gone 2024-02-05 12:59:36.374 ERROR tokio-runtime-worker runtime: [Parachain] panicked at /home/s0me0ne/wrk/parity/polkadot-sdk/cumulus/pallets/parachain-system/src/lib.rs:265:18: set_validation_data inherent needs to be present in every block! 2024-02-05 12:59:36.374 ERROR tokio-runtime-worker aura::cumulus: [Parachain] err=Error { inner: Proposing Caused by: 0: Error at calling runtime api: Execution failed: Execution aborted due to trap: wasm trap: wasm `unreachable` instruction executed WASM backtrace: error while executing at wasm backtrace: 0: 0x4e4a3b - !rust_begin_unwind 1: 0x46cf57 - !core::panicking::panic_fmt::h3c280dba88683724 2: 0x46da8b - !core::option::expect_failed::hdf18d99c3adabca7 3: 0x2134cb - ! as frame_support::traits::hooks::OnFinalize<<<::Block as sp_runtime::traits::HeaderProvider>::HeaderT as sp_runtime::traits::Header>::Number>>::on_finalize::hf98aac39802896ba 4: 0x26a9d6 - !frame_executive::Executive::idle_and_finalize_hook::h32775c0df0749d92 5: 0x26ad9f - !frame_executive::Executive::finalize_block::h15e5a1a6b9ca8032 6: 0x2822bd - !BlockBuilder_finalize_block 1: Execution failed: Execution aborted due to trap: wasm trap: wasm `unreachable` instruction executed WASM backtrace: error while executing at wasm backtrace: 0: 0x4e4a3b - !rust_begin_unwind 1: 0x46cf57 - !core::panicking::panic_fmt::h3c280dba88683724 2: 0x46da8b - !core::option::expect_failed::hdf18d99c3adabca7 3: 0x2134cb - ! as frame_support::traits::hooks::OnFinalize<<<::Block as sp_runtime::traits::HeaderProvider>::HeaderT as sp_runtime::traits::Header>::Number>>::on_finalize::hf98aac39802896ba 4: 0x26a9d6 - !frame_executive::Executive::idle_and_finalize_hook::h32775c0df0749d92 5: 0x26ad9f - !frame_executive::Executive::finalize_block::h15e5a1a6b9ca8032 6: 0x2822bd - !BlockBuilder_finalize_block } ```
--------- Co-authored-by: Bastian Köcher --- .../consensus/aura/src/collators/lookahead.rs | 60 ++++++++++--------- .../contracts/contracts-rococo/src/lib.rs | 4 +- .../runtimes/people/people-rococo/src/lib.rs | 4 +- cumulus/polkadot-parachain/src/service.rs | 21 ------- 4 files changed, 37 insertions(+), 52 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/lookahead.rs b/cumulus/client/consensus/aura/src/collators/lookahead.rs index e24b7f6f1c93..a9f33173d832 100644 --- a/cumulus/client/consensus/aura/src/collators/lookahead.rs +++ b/cumulus/client/consensus/aura/src/collators/lookahead.rs @@ -59,7 +59,7 @@ use sp_api::ProvideRuntimeApi; use sp_application_crypto::AppPublic; use sp_blockchain::HeaderBackend; use sp_consensus::SyncOracle; -use sp_consensus_aura::{AuraApi, Slot, SlotDuration}; +use sp_consensus_aura::{AuraApi, Slot}; use sp_core::crypto::Pair; use sp_inherents::CreateInherentDataProviders; use sp_keystore::KeystorePtr; @@ -95,8 +95,6 @@ pub struct Params { pub para_id: ParaId, /// A handle to the relay-chain client's "Overseer" or task orchestrator. pub overseer_handle: OverseerHandle, - /// The length of slots in this chain. - pub slot_duration: SlotDuration, /// The length of slots in the relay chain. pub relay_chain_slot_duration: Duration, /// The underlying block proposer this should call into. @@ -214,26 +212,6 @@ where }, }; - let (slot_now, timestamp) = match consensus_common::relay_slot_and_timestamp( - &relay_parent_header, - params.relay_chain_slot_duration, - ) { - None => continue, - Some((r_s, t)) => { - let our_slot = Slot::from_timestamp(t, params.slot_duration); - tracing::debug!( - target: crate::LOG_TARGET, - relay_slot = ?r_s, - para_slot = ?our_slot, - timestamp = ?t, - slot_duration = ?params.slot_duration, - relay_chain_slot_duration = ?params.relay_chain_slot_duration, - "Adjusted relay-chain slot to parachain slot" - ); - (our_slot, t) - }, - }; - let parent_search_params = ParentSearchParams { relay_parent, para_id: params.para_id, @@ -272,14 +250,39 @@ where let para_client = &*params.para_client; let keystore = ¶ms.keystore; let can_build_upon = |block_hash| { - can_build_upon::<_, _, P>( + let slot_duration = match sc_consensus_aura::standalone::slot_duration_at( + &*params.para_client, + block_hash, + ) { + Ok(sd) => sd, + Err(err) => { + tracing::error!(target: crate::LOG_TARGET, ?err, "Failed to acquire parachain slot duration"); + return None + }, + }; + tracing::debug!(target: crate::LOG_TARGET, ?slot_duration, ?block_hash, "Parachain slot duration acquired"); + let (relay_slot, timestamp) = consensus_common::relay_slot_and_timestamp( + &relay_parent_header, + params.relay_chain_slot_duration, + )?; + let slot_now = Slot::from_timestamp(timestamp, slot_duration); + tracing::debug!( + target: crate::LOG_TARGET, + ?relay_slot, + para_slot = ?slot_now, + ?timestamp, + ?slot_duration, + relay_chain_slot_duration = ?params.relay_chain_slot_duration, + "Adjusted relay-chain slot to parachain slot" + ); + Some(can_build_upon::<_, _, P>( slot_now, timestamp, block_hash, included_block, para_client, &keystore, - ) + )) }; // Sort by depth, ascending, to choose the longest chain. @@ -301,9 +304,12 @@ where // This needs to change to support elastic scaling, but for continuously // scheduled chains this ensures that the backlog will grow steadily. for n_built in 0..2 { - let slot_claim = match can_build_upon(parent_hash).await { + let slot_claim = match can_build_upon(parent_hash) { + Some(fut) => match fut.await { + None => break, + Some(c) => c, + }, None => break, - Some(c) => c, }; tracing::debug!( diff --git a/cumulus/parachains/runtimes/contracts/contracts-rococo/src/lib.rs b/cumulus/parachains/runtimes/contracts/contracts-rococo/src/lib.rs index a0eb74657bd3..15a5d9b04a91 100644 --- a/cumulus/parachains/runtimes/contracts/contracts-rococo/src/lib.rs +++ b/cumulus/parachains/runtimes/contracts/contracts-rococo/src/lib.rs @@ -58,10 +58,10 @@ use frame_system::limits::{BlockLength, BlockWeights}; pub use parachains_common as common; use parachains_common::{ impls::DealWithFees, message_queue::*, AccountId, BlockNumber, Hash, Header, Nonce, Signature, - AVERAGE_ON_INITIALIZE_RATIO, MINUTES, NORMAL_DISPATCH_RATIO, + AVERAGE_ON_INITIALIZE_RATIO, NORMAL_DISPATCH_RATIO, }; pub use parachains_common::{AuraId, Balance}; -use testnet_parachains_constants::rococo::{consensus::*, currency::*, fee::WeightToFee}; +use testnet_parachains_constants::rococo::{consensus::*, currency::*, fee::WeightToFee, time::*}; use xcm_config::CollatorSelectionUpdateOrigin; #[cfg(any(feature = "std", test))] diff --git a/cumulus/parachains/runtimes/people/people-rococo/src/lib.rs b/cumulus/parachains/runtimes/people/people-rococo/src/lib.rs index 5e96395532e0..9e7dd6c884bd 100644 --- a/cumulus/parachains/runtimes/people/people-rococo/src/lib.rs +++ b/cumulus/parachains/runtimes/people/people-rococo/src/lib.rs @@ -44,7 +44,7 @@ use parachains_common::{ impls::DealWithFees, message_queue::{NarrowOriginToSibling, ParaIdToSibling}, AccountId, Balance, BlockNumber, Hash, Header, Nonce, Signature, AVERAGE_ON_INITIALIZE_RATIO, - HOURS, NORMAL_DISPATCH_RATIO, + NORMAL_DISPATCH_RATIO, }; use polkadot_runtime_common::{identity_migrator, BlockHashCount, SlowAdjustingFeeUpdate}; use sp_api::impl_runtime_apis; @@ -63,7 +63,7 @@ use sp_std::prelude::*; #[cfg(feature = "std")] use sp_version::NativeVersion; use sp_version::RuntimeVersion; -use testnet_parachains_constants::rococo::{consensus::*, currency::*, fee::WeightToFee}; +use testnet_parachains_constants::rococo::{consensus::*, currency::*, fee::WeightToFee, time::*}; use weights::{BlockExecutionWeight, ExtrinsicBaseWeight, RocksDbWeight}; use xcm::latest::prelude::BodyId; use xcm_config::{ diff --git a/cumulus/polkadot-parachain/src/service.rs b/cumulus/polkadot-parachain/src/service.rs index daeeadd5ecee..553975b01a80 100644 --- a/cumulus/polkadot-parachain/src/service.rs +++ b/cumulus/polkadot-parachain/src/service.rs @@ -939,8 +939,6 @@ pub async fn start_rococo_parachain_node( overseer_handle, announce_block, backend| { - let slot_duration = cumulus_client_consensus_aura::slot_duration(&*client)?; - let proposer_factory = sc_basic_authorship::ProposerFactory::with_proof_recording( task_manager.spawn_handle(), client.clone(), @@ -971,7 +969,6 @@ pub async fn start_rococo_parachain_node( collator_key, para_id, overseer_handle, - slot_duration, relay_chain_slot_duration, proposer, collator_service, @@ -1435,8 +1432,6 @@ where overseer_handle, announce_block, backend| { - let slot_duration = cumulus_client_consensus_aura::slot_duration(&*client)?; - let proposer_factory = sc_basic_authorship::ProposerFactory::with_proof_recording( task_manager.spawn_handle(), client.clone(), @@ -1467,7 +1462,6 @@ where collator_key, para_id, overseer_handle, - slot_duration, relay_chain_slot_duration, proposer, collator_service, @@ -1737,14 +1731,6 @@ where } // Move to Aura consensus. - let slot_duration = match cumulus_client_consensus_aura::slot_duration(&*client) { - Ok(d) => d, - Err(e) => { - log::error!("Could not get Aura slot duration: {e}"); - return - }, - }; - let proposer = Proposer::new(proposer_factory); let params = AuraParams { @@ -1761,7 +1747,6 @@ where collator_key, para_id, overseer_handle, - slot_duration, relay_chain_slot_duration, proposer, collator_service, @@ -1832,8 +1817,6 @@ where overseer_handle, announce_block, backend| { - let slot_duration = cumulus_client_consensus_aura::slot_duration(&*client)?; - let proposer_factory = sc_basic_authorship::ProposerFactory::with_proof_recording( task_manager.spawn_handle(), client.clone(), @@ -1864,7 +1847,6 @@ where collator_key, para_id, overseer_handle, - slot_duration, relay_chain_slot_duration, proposer, collator_service, @@ -2141,8 +2123,6 @@ pub async fn start_contracts_rococo_node( overseer_handle, announce_block, backend| { - let slot_duration = cumulus_client_consensus_aura::slot_duration(&*client)?; - let proposer_factory = sc_basic_authorship::ProposerFactory::with_proof_recording( task_manager.spawn_handle(), client.clone(), @@ -2173,7 +2153,6 @@ pub async fn start_contracts_rococo_node( collator_key, para_id, overseer_handle, - slot_duration, relay_chain_slot_duration, proposer, collator_service,