Skip to content

Commit

Permalink
Use dynamic aura slot duration in lookahead collator (#3211)
Browse files Browse the repository at this point in the history
It's a follow-up of #2949. It enables the lookahead collator to
dynamically adjust the aura slot size, which may change during the
runtime upgrade. It also addressed a couple of issues with time
constants I missed in the original PR.

Good news: it works. The parachain successfully switches from sync
backing with 12s slots to async backing with 6s slots.

Bad news: during the transitional period of a single block in which the
actual runtime upgrade is performed, it still gets the old slot duration
of 12s (as it gets it from the best block), resulting in a runtime panic
(logs follow). That doesn't affect the following block production of the
parachain. Ideas on how to improve the situation are appreciated.

<details>

```
2024-02-05 12:59:36.373  INFO tokio-runtime-worker sc_basic_authorship::basic_authorship: [Parachain] 🙌 Starting consensus session on top of parent 0x6fd2d8f904f12c22531bfabf77b16dc84a6a29e45d9ae358aa6547fbf3f0438b    
2024-02-05 12:59:36.373 ERROR tokio-runtime-worker runtime: [Parachain] panicked at /home/s0me0ne/wrk/parity/polkadot-sdk/cumulus/pallets/aura-ext/src/consensus_hook.rs:69:9:
assertion `left == right` failed: slot number mismatch
  left: Slot(142261198)
 right: Slot(284522396)    
2024-02-05 12:59:36.373  WARN tokio-runtime-worker sp_state_machine::overlayed_changes::changeset: [Parachain] 1 storage transactions are left open by the runtime. Those will be rolled back.    
2024-02-05 12:59:36.373  WARN tokio-runtime-worker sp_state_machine::overlayed_changes::changeset: [Parachain] 1 storage transactions are left open by the runtime. Those will be rolled back.    
2024-02-05 12:59:36.373  WARN tokio-runtime-worker basic-authorship: [Parachain] ❗ Inherent extrinsic returned unexpected error: Error at calling runtime api: Execution failed: Execution aborted due to trap: wasm trap: wasm `unreachable` instruction executed
WASM backtrace:
error while executing at wasm backtrace:
    0: 0x4e4a3b - <unknown>!rust_begin_unwind
    1: 0x46cf57 - <unknown>!core::panicking::panic_fmt::h3c280dba88683724
    2: 0x46d238 - <unknown>!core::panicking::assert_failed_inner::hebac5970933beb4d
    3: 0x3d00fc - <unknown>!core::panicking::assert_failed::h640a47e2fb5dfb4b
    4: 0xd0db3 - <unknown>!frame_support::storage::transactional::with_transaction::hcbc31515f81b2ee1
    5: 0x34d654 - <unknown>!<cumulus_pallet_parachain_system::pallet::Call<T> as frame_support::traits::dispatch::UnfilteredDispatchable>::dispatch_bypass_filter::{{closure}}::hb7c2c9a11fa88301
    6: 0x3547db - <unknown>!environmental::local_key::LocalKey<T>::with::h783f2605ae27d6d3
    7: 0x7f454 - <unknown>!<asset_hub_rococo_runtime::RuntimeCall as frame_support::traits::dispatch::UnfilteredDispatchable>::dispatch_bypass_filter::h5e11a01ab97c06c7
    8: 0x7f237 - <unknown>!<asset_hub_rococo_runtime::RuntimeCall as sp_runtime::traits::Dispatchable>::dispatch::h7f8ae4a8fede71ca
    9: 0x26a0f3 - <unknown>!frame_executive::Executive<System,Block,Context,UnsignedValidator,AllPalletsWithSystem,COnRuntimeUpgrade>::apply_extrinsic::h75e524ff34738391
   10: 0x282211 - <unknown>!BlockBuilder_apply_extrinsic. Dropping.    
2024-02-05 12:59:36.374 ERROR tokio-runtime-worker runtime: [Parachain] panicked at /home/s0me0ne/wrk/parity/polkadot-sdk/substrate/frame/aura/src/lib.rs:416:9:
assertion `left == right` failed: Timestamp slot must match `CurrentSlot`
  left: Slot(142261198)
 right: Slot(284522396)    
2024-02-05 12:59:36.374  WARN tokio-runtime-worker sp_state_machine::overlayed_changes::changeset: [Parachain] 1 storage transactions are left open by the runtime. Those will be rolled back.    
2024-02-05 12:59:36.374  WARN tokio-runtime-worker sp_state_machine::overlayed_changes::changeset: [Parachain] 1 storage transactions are left open by the runtime. Those will be rolled back.    
2024-02-05 12:59:36.374  WARN tokio-runtime-worker basic-authorship: [Parachain] ❗ Inherent extrinsic returned unexpected error: Error at calling runtime api: Execution failed: Execution aborted due to trap: wasm trap: wasm `unreachable` instruction executed
WASM backtrace:
error while executing at wasm backtrace:
    0: 0x4e4a3b - <unknown>!rust_begin_unwind
    1: 0x46cf57 - <unknown>!core::panicking::panic_fmt::h3c280dba88683724
    2: 0x46d238 - <unknown>!core::panicking::assert_failed_inner::hebac5970933beb4d
    3: 0x3d00fc - <unknown>!core::panicking::assert_failed::h640a47e2fb5dfb4b
    4: 0x9ece6 - <unknown>!frame_support::storage::transactional::with_transaction::h26f75cb9f9462088
    5: 0x356d7e - <unknown>!environmental::local_key::LocalKey<T>::with::hbcf2d4e17b48fdb5
    6: 0x7f507 - <unknown>!<asset_hub_rococo_runtime::RuntimeCall as frame_support::traits::dispatch::UnfilteredDispatchable>::dispatch_bypass_filter::h5e11a01ab97c06c7
    7: 0x7f237 - <unknown>!<asset_hub_rococo_runtime::RuntimeCall as sp_runtime::traits::Dispatchable>::dispatch::h7f8ae4a8fede71ca
    8: 0x26a0f3 - <unknown>!frame_executive::Executive<System,Block,Context,UnsignedValidator,AllPalletsWithSystem,COnRuntimeUpgrade>::apply_extrinsic::h75e524ff34738391
    9: 0x282211 - <unknown>!BlockBuilder_apply_extrinsic. Dropping.    
2024-02-05 12:59:36.374 DEBUG tokio-runtime-worker runtime::xcmp-queue-migration: [Parachain] Lazy migration finished: item gone    
2024-02-05 12:59:36.374 ERROR tokio-runtime-worker runtime: [Parachain] panicked at /home/s0me0ne/wrk/parity/polkadot-sdk/cumulus/pallets/parachain-system/src/lib.rs:265:18:
set_validation_data inherent needs to be present in every block!    
2024-02-05 12:59:36.374 ERROR tokio-runtime-worker aura::cumulus: [Parachain] err=Error { inner: Proposing

Caused by:
    0: Error at calling runtime api: Execution failed: Execution aborted due to trap: wasm trap: wasm `unreachable` instruction executed
       WASM backtrace:
       error while executing at wasm backtrace:
           0: 0x4e4a3b - <unknown>!rust_begin_unwind
           1: 0x46cf57 - <unknown>!core::panicking::panic_fmt::h3c280dba88683724
           2: 0x46da8b - <unknown>!core::option::expect_failed::hdf18d99c3adabca7
           3: 0x2134cb - <unknown>!<cumulus_pallet_parachain_system::pallet::Pallet<T> as frame_support::traits::hooks::OnFinalize<<<<T as frame_system::pallet::Config>::Block as sp_runtime::traits::HeaderProvider>::HeaderT as sp_runtime::traits::Header>::Number>>::on_finalize::hf98aac39802896ba
           4: 0x26a9d6 - <unknown>!frame_executive::Executive<System,Block,Context,UnsignedValidator,AllPalletsWithSystem,COnRuntimeUpgrade>::idle_and_finalize_hook::h32775c0df0749d92
           5: 0x26ad9f - <unknown>!frame_executive::Executive<System,Block,Context,UnsignedValidator,AllPalletsWithSystem,COnRuntimeUpgrade>::finalize_block::h15e5a1a6b9ca8032
           6: 0x2822bd - <unknown>!BlockBuilder_finalize_block
    1: Execution failed: Execution aborted due to trap: wasm trap: wasm `unreachable` instruction executed
       WASM backtrace:
       error while executing at wasm backtrace:
           0: 0x4e4a3b - <unknown>!rust_begin_unwind
           1: 0x46cf57 - <unknown>!core::panicking::panic_fmt::h3c280dba88683724
           2: 0x46da8b - <unknown>!core::option::expect_failed::hdf18d99c3adabca7
           3: 0x2134cb - <unknown>!<cumulus_pallet_parachain_system::pallet::Pallet<T> as frame_support::traits::hooks::OnFinalize<<<<T as frame_system::pallet::Config>::Block as sp_runtime::traits::HeaderProvider>::HeaderT as sp_runtime::traits::Header>::Number>>::on_finalize::hf98aac39802896ba
           4: 0x26a9d6 - <unknown>!frame_executive::Executive<System,Block,Context,UnsignedValidator,AllPalletsWithSystem,COnRuntimeUpgrade>::idle_and_finalize_hook::h32775c0df0749d92
           5: 0x26ad9f - <unknown>!frame_executive::Executive<System,Block,Context,UnsignedValidator,AllPalletsWithSystem,COnRuntimeUpgrade>::finalize_block::h15e5a1a6b9ca8032
           6: 0x2822bd - <unknown>!BlockBuilder_finalize_block }
```

</details>

---------

Co-authored-by: Bastian Köcher <git@kchr.de>
  • Loading branch information
s0me0ne-unkn0wn and bkchr authored Feb 13, 2024
1 parent 349132f commit ec6bf5d
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 52 deletions.
60 changes: 33 additions & 27 deletions cumulus/client/consensus/aura/src/collators/lookahead.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ use sp_api::ProvideRuntimeApi;
use sp_application_crypto::AppPublic;
use sp_blockchain::HeaderBackend;
use sp_consensus::SyncOracle;
use sp_consensus_aura::{AuraApi, Slot, SlotDuration};
use sp_consensus_aura::{AuraApi, Slot};
use sp_core::crypto::Pair;
use sp_inherents::CreateInherentDataProviders;
use sp_keystore::KeystorePtr;
Expand Down Expand Up @@ -95,8 +95,6 @@ pub struct Params<BI, CIDP, Client, Backend, RClient, CHP, SO, Proposer, CS> {
pub para_id: ParaId,
/// A handle to the relay-chain client's "Overseer" or task orchestrator.
pub overseer_handle: OverseerHandle,
/// The length of slots in this chain.
pub slot_duration: SlotDuration,
/// The length of slots in the relay chain.
pub relay_chain_slot_duration: Duration,
/// The underlying block proposer this should call into.
Expand Down Expand Up @@ -214,26 +212,6 @@ where
},
};

let (slot_now, timestamp) = match consensus_common::relay_slot_and_timestamp(
&relay_parent_header,
params.relay_chain_slot_duration,
) {
None => continue,
Some((r_s, t)) => {
let our_slot = Slot::from_timestamp(t, params.slot_duration);
tracing::debug!(
target: crate::LOG_TARGET,
relay_slot = ?r_s,
para_slot = ?our_slot,
timestamp = ?t,
slot_duration = ?params.slot_duration,
relay_chain_slot_duration = ?params.relay_chain_slot_duration,
"Adjusted relay-chain slot to parachain slot"
);
(our_slot, t)
},
};

let parent_search_params = ParentSearchParams {
relay_parent,
para_id: params.para_id,
Expand Down Expand Up @@ -272,14 +250,39 @@ where
let para_client = &*params.para_client;
let keystore = &params.keystore;
let can_build_upon = |block_hash| {
can_build_upon::<_, _, P>(
let slot_duration = match sc_consensus_aura::standalone::slot_duration_at(
&*params.para_client,
block_hash,
) {
Ok(sd) => sd,
Err(err) => {
tracing::error!(target: crate::LOG_TARGET, ?err, "Failed to acquire parachain slot duration");
return None
},
};
tracing::debug!(target: crate::LOG_TARGET, ?slot_duration, ?block_hash, "Parachain slot duration acquired");
let (relay_slot, timestamp) = consensus_common::relay_slot_and_timestamp(
&relay_parent_header,
params.relay_chain_slot_duration,
)?;
let slot_now = Slot::from_timestamp(timestamp, slot_duration);
tracing::debug!(
target: crate::LOG_TARGET,
?relay_slot,
para_slot = ?slot_now,
?timestamp,
?slot_duration,
relay_chain_slot_duration = ?params.relay_chain_slot_duration,
"Adjusted relay-chain slot to parachain slot"
);
Some(can_build_upon::<_, _, P>(
slot_now,
timestamp,
block_hash,
included_block,
para_client,
&keystore,
)
))
};

// Sort by depth, ascending, to choose the longest chain.
Expand All @@ -301,9 +304,12 @@ where
// This needs to change to support elastic scaling, but for continuously
// scheduled chains this ensures that the backlog will grow steadily.
for n_built in 0..2 {
let slot_claim = match can_build_upon(parent_hash).await {
let slot_claim = match can_build_upon(parent_hash) {
Some(fut) => match fut.await {
None => break,
Some(c) => c,
},
None => break,
Some(c) => c,
};

tracing::debug!(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ use frame_system::limits::{BlockLength, BlockWeights};
pub use parachains_common as common;
use parachains_common::{
impls::DealWithFees, message_queue::*, AccountId, BlockNumber, Hash, Header, Nonce, Signature,
AVERAGE_ON_INITIALIZE_RATIO, MINUTES, NORMAL_DISPATCH_RATIO,
AVERAGE_ON_INITIALIZE_RATIO, NORMAL_DISPATCH_RATIO,
};
pub use parachains_common::{AuraId, Balance};
use testnet_parachains_constants::rococo::{consensus::*, currency::*, fee::WeightToFee};
use testnet_parachains_constants::rococo::{consensus::*, currency::*, fee::WeightToFee, time::*};
use xcm_config::CollatorSelectionUpdateOrigin;

#[cfg(any(feature = "std", test))]
Expand Down
4 changes: 2 additions & 2 deletions cumulus/parachains/runtimes/people/people-rococo/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ use parachains_common::{
impls::DealWithFees,
message_queue::{NarrowOriginToSibling, ParaIdToSibling},
AccountId, Balance, BlockNumber, Hash, Header, Nonce, Signature, AVERAGE_ON_INITIALIZE_RATIO,
HOURS, NORMAL_DISPATCH_RATIO,
NORMAL_DISPATCH_RATIO,
};
use polkadot_runtime_common::{identity_migrator, BlockHashCount, SlowAdjustingFeeUpdate};
use sp_api::impl_runtime_apis;
Expand All @@ -63,7 +63,7 @@ use sp_std::prelude::*;
#[cfg(feature = "std")]
use sp_version::NativeVersion;
use sp_version::RuntimeVersion;
use testnet_parachains_constants::rococo::{consensus::*, currency::*, fee::WeightToFee};
use testnet_parachains_constants::rococo::{consensus::*, currency::*, fee::WeightToFee, time::*};
use weights::{BlockExecutionWeight, ExtrinsicBaseWeight, RocksDbWeight};
use xcm::latest::prelude::BodyId;
use xcm_config::{
Expand Down
21 changes: 0 additions & 21 deletions cumulus/polkadot-parachain/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -939,8 +939,6 @@ pub async fn start_rococo_parachain_node(
overseer_handle,
announce_block,
backend| {
let slot_duration = cumulus_client_consensus_aura::slot_duration(&*client)?;

let proposer_factory = sc_basic_authorship::ProposerFactory::with_proof_recording(
task_manager.spawn_handle(),
client.clone(),
Expand Down Expand Up @@ -971,7 +969,6 @@ pub async fn start_rococo_parachain_node(
collator_key,
para_id,
overseer_handle,
slot_duration,
relay_chain_slot_duration,
proposer,
collator_service,
Expand Down Expand Up @@ -1435,8 +1432,6 @@ where
overseer_handle,
announce_block,
backend| {
let slot_duration = cumulus_client_consensus_aura::slot_duration(&*client)?;

let proposer_factory = sc_basic_authorship::ProposerFactory::with_proof_recording(
task_manager.spawn_handle(),
client.clone(),
Expand Down Expand Up @@ -1467,7 +1462,6 @@ where
collator_key,
para_id,
overseer_handle,
slot_duration,
relay_chain_slot_duration,
proposer,
collator_service,
Expand Down Expand Up @@ -1737,14 +1731,6 @@ where
}

// Move to Aura consensus.
let slot_duration = match cumulus_client_consensus_aura::slot_duration(&*client) {
Ok(d) => d,
Err(e) => {
log::error!("Could not get Aura slot duration: {e}");
return
},
};

let proposer = Proposer::new(proposer_factory);

let params = AuraParams {
Expand All @@ -1761,7 +1747,6 @@ where
collator_key,
para_id,
overseer_handle,
slot_duration,
relay_chain_slot_duration,
proposer,
collator_service,
Expand Down Expand Up @@ -1832,8 +1817,6 @@ where
overseer_handle,
announce_block,
backend| {
let slot_duration = cumulus_client_consensus_aura::slot_duration(&*client)?;

let proposer_factory = sc_basic_authorship::ProposerFactory::with_proof_recording(
task_manager.spawn_handle(),
client.clone(),
Expand Down Expand Up @@ -1864,7 +1847,6 @@ where
collator_key,
para_id,
overseer_handle,
slot_duration,
relay_chain_slot_duration,
proposer,
collator_service,
Expand Down Expand Up @@ -2141,8 +2123,6 @@ pub async fn start_contracts_rococo_node(
overseer_handle,
announce_block,
backend| {
let slot_duration = cumulus_client_consensus_aura::slot_duration(&*client)?;

let proposer_factory = sc_basic_authorship::ProposerFactory::with_proof_recording(
task_manager.spawn_handle(),
client.clone(),
Expand Down Expand Up @@ -2173,7 +2153,6 @@ pub async fn start_contracts_rococo_node(
collator_key,
para_id,
overseer_handle,
slot_duration,
relay_chain_slot_duration,
proposer,
collator_service,
Expand Down

0 comments on commit ec6bf5d

Please sign in to comment.