From 01af6307b794cbf4a7d3bc02299414edd8ac3203 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 25 Oct 2023 11:50:45 +0300 Subject: [PATCH 01/52] skeleton Signed-off-by: Andrei Sandu --- Cargo.lock | 25 +++++++++++++ Cargo.toml | 1 + polkadot/node/subsystem-bench/Cargo.toml | 46 ++++++++++++++++++++++++ polkadot/node/subsystem-bench/README.md | 6 ++++ polkadot/node/subsystem-bench/build.rs | 22 ++++++++++++ 5 files changed, 100 insertions(+) create mode 100644 polkadot/node/subsystem-bench/Cargo.toml create mode 100644 polkadot/node/subsystem-bench/README.md create mode 100644 polkadot/node/subsystem-bench/build.rs diff --git a/Cargo.lock b/Cargo.lock index a8d679c6ce8b..e846ae53a543 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12990,6 +12990,31 @@ dependencies = [ "sp-core", ] +[[package]] +name = "polkadot-subsystem-bench" +version = "1.0.0" +dependencies = [ + "assert_matches", + "async-trait", + "clap 4.4.6", + "color-eyre", + "futures", + "futures-timer", + "polkadot-erasure-coding", + "polkadot-node-core-backing", + "polkadot-node-primitives", + "polkadot-node-subsystem", + "polkadot-node-subsystem-test-helpers", + "polkadot-node-subsystem-types", + "polkadot-node-subsystem-util", + "polkadot-primitives", + "rand 0.8.5", + "sp-core", + "sp-keystore", + "substrate-build-script-utils", + "tracing-gum", +] + [[package]] name = "polkadot-test-client" version = "1.0.0" diff --git a/Cargo.toml b/Cargo.toml index c98fe6d1a3ac..2c5acccd5cfb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -125,6 +125,7 @@ members = [ "polkadot/node/gum/proc-macro", "polkadot/node/jaeger", "polkadot/node/malus", + "polkadot/node/subsystem-bench", "polkadot/node/metrics", "polkadot/node/network/approval-distribution", "polkadot/node/network/availability-distribution", diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml new file mode 100644 index 000000000000..b2cc88ff057d --- /dev/null +++ b/polkadot/node/subsystem-bench/Cargo.toml @@ -0,0 +1,46 @@ +[package] +name = "polkadot-subsystem-bench" +description = "Subsystem performance benchmark client" +version = "1.0.0" +authors.workspace = true +edition.workspace = true +license.workspace = true +readme = "README.md" +publish = false + +[[bin]] +name = "subsystem-bench" +path = "src/subsystem-bench.rs" + +# Prevent rustdoc error. Already documented from top-level Cargo.toml. +doc = false + +[dependencies] +polkadot-node-subsystem = { path = "../subsystem" } +polkadot-node-subsystem-util = { path = "../subsystem-util" } +polkadot-node-subsystem-types = { path = "../subsystem-types" } +polkadot-node-core-backing = { path = "../core/backing" } +polkadot-node-primitives = { path = "../primitives" } +polkadot-primitives = { path = "../../primitives" } +color-eyre = { version = "0.6.1", default-features = false } +assert_matches = "1.5" +async-trait = "0.1.57" +sp-keystore = { path = "../../../substrate/primitives/keystore" } +sp-core = { path = "../../../substrate/primitives/core" } +clap = { version = "4.4.6", features = ["derive"] } +futures = "0.3.21" +futures-timer = "3.0.2" +gum = { package = "tracing-gum", path = "../gum" } +erasure = { package = "polkadot-erasure-coding", path = "../../erasure-coding" } +rand = "0.8.5" + +[dev-dependencies] +polkadot-node-subsystem-test-helpers = { path = "../subsystem-test-helpers" } +sp-core = { path = "../../../substrate/primitives/core" } +futures = { version = "0.3.21", features = ["thread-pool"] } + +[build-dependencies] +substrate-build-script-utils = { path = "../../../substrate/utils/build-script-utils" } + +[features] +default = [] diff --git a/polkadot/node/subsystem-bench/README.md b/polkadot/node/subsystem-bench/README.md new file mode 100644 index 000000000000..8843f9883116 --- /dev/null +++ b/polkadot/node/subsystem-bench/README.md @@ -0,0 +1,6 @@ +# Subsystem benchmark client + +Run subsystem performance tests in isolation. + +Currently implemented benchmarks: +* `availability-recovery` diff --git a/polkadot/node/subsystem-bench/build.rs b/polkadot/node/subsystem-bench/build.rs new file mode 100644 index 000000000000..84fe22e23ed6 --- /dev/null +++ b/polkadot/node/subsystem-bench/build.rs @@ -0,0 +1,22 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +fn main() { + substrate_build_script_utils::generate_cargo_keys(); + // For the node/worker version check, make sure we always rebuild the node and binary workers + // when the version changes. + substrate_build_script_utils::rerun_if_git_head_changed(); +} From 7c22abeb76ed0db47f7df9ac475c2ccab7743b67 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 6 Nov 2023 19:15:38 +0200 Subject: [PATCH 02/52] wip Signed-off-by: Andrei Sandu --- Cargo.lock | 18 +- .../network/availability-recovery/src/lib.rs | 13 +- polkadot/node/subsystem-bench/Cargo.toml | 21 +- .../node/subsystem-bench/src/availability.rs | 501 ++++++++++++++++++ .../subsystem-bench/src/subsystem-bench.rs | 133 +++++ 5 files changed, 674 insertions(+), 12 deletions(-) create mode 100644 polkadot/node/subsystem-bench/src/availability.rs create mode 100644 polkadot/node/subsystem-bench/src/subsystem-bench.rs diff --git a/Cargo.lock b/Cargo.lock index e846ae53a543..d113fd7e43cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12998,20 +12998,32 @@ dependencies = [ "async-trait", "clap 4.4.6", "color-eyre", + "env_logger 0.9.3", "futures", "futures-timer", + "log", + "parity-scale-codec", + "polkadot-availability-recovery", "polkadot-erasure-coding", - "polkadot-node-core-backing", + "polkadot-node-metrics", + "polkadot-node-network-protocol", "polkadot-node-primitives", "polkadot-node-subsystem", "polkadot-node-subsystem-test-helpers", "polkadot-node-subsystem-types", "polkadot-node-subsystem-util", "polkadot-primitives", + "polkadot-primitives-test-helpers", + "prometheus", "rand 0.8.5", + "sc-network", + "sc-service", + "sp-application-crypto", "sp-core", + "sp-keyring", "sp-keystore", "substrate-build-script-utils", + "tokio", "tracing-gum", ] @@ -18675,9 +18687,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.32.0" +version = "1.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" +checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653" dependencies = [ "backtrace", "bytes", diff --git a/polkadot/node/network/availability-recovery/src/lib.rs b/polkadot/node/network/availability-recovery/src/lib.rs index e2146981da92..156a8cbbc82e 100644 --- a/polkadot/node/network/availability-recovery/src/lib.rs +++ b/polkadot/node/network/availability-recovery/src/lib.rs @@ -65,7 +65,7 @@ mod error; mod futures_undead; mod metrics; mod task; -use metrics::Metrics; +pub use metrics::Metrics; #[cfg(test)] mod tests; @@ -582,7 +582,7 @@ impl AvailabilityRecoverySubsystem { } } - async fn run(self, mut ctx: Context) -> SubsystemResult<()> { + pub async fn run(self, mut ctx: Context) -> SubsystemResult<()> { let mut state = State::default(); let Self { mut req_receiver, metrics, recovery_strategy_kind, bypass_availability_store } = self; @@ -617,9 +617,12 @@ impl AvailabilityRecoverySubsystem { .into_iter() .cycle(); + gum::debug!("Subsystem running"); loop { let recv_req = req_receiver.recv(|| vec![COST_INVALID_REQUEST]).fuse(); pin_mut!(recv_req); + gum::debug!("waiting for message"); + futures::select! { erasure_task = erasure_task_rx.next() => { match erasure_task { @@ -640,7 +643,7 @@ impl AvailabilityRecoverySubsystem { } }, None => { - gum::debug!( + gum::trace!( target: LOG_TARGET, "Erasure task channel closed", ); @@ -655,6 +658,7 @@ impl AvailabilityRecoverySubsystem { &mut state, signal, ).await? { + gum::debug!(target: LOG_TARGET, "subsystem concluded"); return Ok(()); } FromOrchestra::Communication { msg } => { @@ -818,10 +822,11 @@ async fn erasure_task_thread( let _ = sender.send(maybe_data); }, None => { - gum::debug!( + gum::trace!( target: LOG_TARGET, "Erasure task channel closed. Node shutting down ?", ); + break }, } } diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml index b2cc88ff057d..729749ab153b 100644 --- a/polkadot/node/subsystem-bench/Cargo.toml +++ b/polkadot/node/subsystem-bench/Cargo.toml @@ -19,9 +19,10 @@ doc = false polkadot-node-subsystem = { path = "../subsystem" } polkadot-node-subsystem-util = { path = "../subsystem-util" } polkadot-node-subsystem-types = { path = "../subsystem-types" } -polkadot-node-core-backing = { path = "../core/backing" } polkadot-node-primitives = { path = "../primitives" } polkadot-primitives = { path = "../../primitives" } +polkadot-node-network-protocol = { path = "../network/protocol" } +polkadot-availability-recovery = { path = "../network/availability-recovery" } color-eyre = { version = "0.6.1", default-features = false } assert_matches = "1.5" async-trait = "0.1.57" @@ -31,13 +32,23 @@ clap = { version = "4.4.6", features = ["derive"] } futures = "0.3.21" futures-timer = "3.0.2" gum = { package = "tracing-gum", path = "../gum" } -erasure = { package = "polkadot-erasure-coding", path = "../../erasure-coding" } +polkadot-erasure-coding = { package = "polkadot-erasure-coding", path = "../../erasure-coding" } +log = "0.4.17" +env_logger = "0.9.0" rand = "0.8.5" +parity-scale-codec = { version = "3.6.1", features = ["std", "derive"] } +tokio = "1.24.2" -[dev-dependencies] polkadot-node-subsystem-test-helpers = { path = "../subsystem-test-helpers" } -sp-core = { path = "../../../substrate/primitives/core" } -futures = { version = "0.3.21", features = ["thread-pool"] } +sp-keyring = { path = "../../../substrate/primitives/keyring" } +sp-application-crypto = { path = "../../../substrate/primitives/application-crypto" } +sc-network = { path = "../../../substrate/client/network" } +sc-service = { path = "../../../substrate/client/service" } +polkadot-node-metrics = { path = "../metrics" } + +polkadot-primitives-test-helpers = { path = "../../primitives/test-helpers" } +# prometheus = { package = "substrate-prometheus-endpoint", path = "../../../substrate/utils/prometheus" } +prometheus = { version = "0.13.0", default-features = false } [build-dependencies] substrate-build-script-utils = { path = "../../../substrate/utils/build-script-utils" } diff --git a/polkadot/node/subsystem-bench/src/availability.rs b/polkadot/node/subsystem-bench/src/availability.rs new file mode 100644 index 000000000000..d5cb9515ca68 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/availability.rs @@ -0,0 +1,501 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use std::{sync::Arc, time::Duration}; + +use assert_matches::assert_matches; +use env_logger::Env; +use futures::{ + channel::{mpsc, oneshot}, + executor, future, Future, FutureExt, SinkExt, +}; +use futures_timer::Delay; +use polkadot_node_metrics::metrics::Metrics; + +use polkadot_availability_recovery::{AvailabilityRecoverySubsystem, Metrics as SubsystemMetrics}; + +use parity_scale_codec::Encode; +use polkadot_node_network_protocol::request_response::{ + self as req_res, v1::ChunkResponse, IncomingRequest, Recipient, ReqProtocolNames, Requests, +}; + +use prometheus::Registry; +use sc_network::{config::RequestResponseConfig, IfDisconnected, OutboundFailure, RequestFailure}; + +use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks}; +use polkadot_node_primitives::{BlockData, PoV, Proof}; +use polkadot_node_subsystem::{ + errors::RecoveryError, + jaeger, + messages::{ + AllMessages, AvailabilityRecoveryMessage, AvailabilityStoreMessage, NetworkBridgeTxMessage, + RuntimeApiMessage, RuntimeApiRequest, + }, + overseer, ActiveLeavesUpdate, FromOrchestra, OverseerSignal, SpawnedSubsystem, Subsystem, + SubsystemContext, SubsystemError, SubsystemResult, +}; + +const LOG_TARGET: &str = "subsystem-bench::availability"; + +use polkadot_erasure_coding::recovery_threshold; +use polkadot_node_primitives::{AvailableData, ErasureChunk}; +// use polkadot_node_subsystem::{ +// errors::RecoveryError, +// jaeger, +// messages::{AvailabilityRecoveryMessage, AvailabilityStoreMessage}, +// overseer, ActiveLeavesUpdate, FromOrchestra, OverseerSignal, SpawnedSubsystem, +// SubsystemContext, SubsystemError, SubsystemResult, +// }; +use polkadot_node_subsystem_test_helpers::{ + make_subsystem_context, mock::new_leaf, TestSubsystemContextHandle, +}; +use polkadot_node_subsystem_util::TimeoutExt; +use polkadot_primitives::{ + AuthorityDiscoveryId, CandidateHash, CandidateReceipt, GroupIndex, Hash, HeadData, IndexedVec, + PersistedValidationData, SessionIndex, SessionInfo, ValidatorId, ValidatorIndex, +}; +use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; +use sc_service::{SpawnTaskHandle, TaskManager}; + +type VirtualOverseer = TestSubsystemContextHandle; + +// Deterministic genesis hash for protocol names +const GENESIS_HASH: Hash = Hash::repeat_byte(0xff); + +struct AvailabilityRecoverySubsystemInstance { + protocol_config: RequestResponseConfig, +} + +pub struct EnvParams { + // The candidate we will recover in the benchmark. + candidate: CandidateReceipt, +} + +// Implements a mockup of NetworkBridge and AvilabilityStore to support provide state for +// `AvailabilityRecoverySubsystemInstance` +pub struct TestEnvironment { + // A tokio runtime to use in the test + runtime: tokio::runtime::Handle, + // A task manager that tracks task poll durations. + task_manager: TaskManager, + // The Prometheus metrics registry + registry: Registry, + // A test overseer. + to_subsystem: mpsc::Sender>, + // Parameters + params: EnvParams, + // Subsystem instance, currently keeps req/response protocol channel senders. + instance: AvailabilityRecoverySubsystemInstance, +} + +impl TestEnvironment { + pub fn new(runtime: tokio::runtime::Handle, mut params: EnvParams, registry: Registry) -> Self { + let task_manager: TaskManager = TaskManager::new(runtime.clone(), Some(®istry)).unwrap(); + let (instance, virtual_overseer) = AvailabilityRecoverySubsystemInstance::new( + ®istry, + task_manager.spawn_handle(), + runtime.clone(), + ); + + // TODO: support parametrization of initial test state + // n_validator, n_cores. + let state = TestState::new(params.candidate.clone()); + // Override candidate after computing erasure in `TestState::new` + params.candidate = state.candidate(); + + // Create channel to inject messages int the subsystem. + let to_subsystem = virtual_overseer.tx.clone(); + + // We need to start a receiver to process messages from the subsystem. + task_manager.spawn_handle().spawn_blocking( + "test-environment", + "test-environment", + async move { Self::env_task(virtual_overseer, state).await }, + ); + + TestEnvironment { runtime, task_manager, registry, to_subsystem, params, instance } + } + + pub fn params(&self) -> &EnvParams { + &self.params + } + + async fn respond_to_send_request(state: &mut TestState, request: Requests) { + match request { + Requests::ChunkFetchingV1(outgoing_request) => { + let validator_index = outgoing_request.payload.index.0 as usize; + let chunk: ChunkResponse = state.chunks[validator_index].clone().into(); + + let _ = outgoing_request + .pending_response + .send(Ok(req_res::v1::ChunkFetchingResponse::from(Some(chunk)).encode())); + }, + _ => panic!("received an unexpected request"), + } + } + + // A task that mocks dependent subsystems based on environment configuration. + // TODO: Spawn real subsystems, user overseer builder. + async fn env_task( + mut ctx: TestSubsystemContextHandle, + mut state: TestState, + ) { + loop { + futures::select! { + message = ctx.recv().fuse() => { + gum::debug!(target: LOG_TARGET, ?message, "Env task received message"); + + match message { + AllMessages::NetworkBridgeTx( + NetworkBridgeTxMessage::SendRequests( + requests, + _if_disconnected, + ) + ) => { + for request in requests { + // TODO: add latency variance when answering requests. This should be an env parameter. + Self::respond_to_send_request(&mut state, request).await; + } + }, + AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryAvailableData(_candidate_hash, tx)) => { + // TODO: Simulate av store load by delaying the response. + state.respond_none_to_available_data_query(tx).await; + }, + AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryAllChunks(_candidate_hash, tx)) => { + // Test env: We always have our own chunk. + state.respond_to_query_all_request(|index| index == state.validator_index.0 as usize, tx).await; + }, + AllMessages::AvailabilityStore( + AvailabilityStoreMessage::QueryChunkSize(_, tx) + ) => { + let chunk_size = state.chunks[0].encoded_size(); + let _ = tx.send(Some(chunk_size)); + } + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionInfo( + session_index, + tx, + ) + )) => { + tx.send(Ok(Some(state.session_info()))).unwrap(); + } + _ => panic!("Unexpected input") + } + } + } + } + } + + // Send a message to the subsystem under test environment. + pub async fn send_message(&mut self, msg: AvailabilityRecoveryMessage) { + gum::trace!(msg = ?msg, "sending message"); + self.to_subsystem + .send(FromOrchestra::Communication { msg }) + .timeout(MAX_TIME_OF_FLIGHT) + .await + .unwrap_or_else(|| { + panic!("{}ms maximum time of flight breached", MAX_TIME_OF_FLIGHT.as_millis()) + }) + .unwrap(); + } + + // Send a signal to the subsystem under test environment. + pub async fn send_signal(&mut self, signal: OverseerSignal) { + self.to_subsystem + .send(FromOrchestra::Signal(signal)) + .timeout(MAX_TIME_OF_FLIGHT) + .await + .unwrap_or_else(|| { + panic!("{}ms is more than enough for sending signals.", TIMEOUT.as_millis()) + }) + .unwrap(); + } +} + +/// Implementation for chunks only +/// TODO: all recovery methods. +impl AvailabilityRecoverySubsystemInstance { + pub fn new( + registry: &Registry, + spawn_task_handle: SpawnTaskHandle, + runtime: tokio::runtime::Handle, + ) -> (Self, TestSubsystemContextHandle) { + let (context, virtual_overseer) = make_subsystem_context(spawn_task_handle.clone()); + let (collation_req_receiver, req_cfg) = + IncomingRequest::get_config_receiver(&ReqProtocolNames::new(&GENESIS_HASH, None)); + let subsystem = AvailabilityRecoverySubsystem::with_chunks_only( + collation_req_receiver, + Metrics::try_register(®istry).unwrap(), + ); + + let spawned_subsystem = subsystem.start(context); + let subsystem_future = async move { + spawned_subsystem.future.await.unwrap(); + }; + + spawn_task_handle.spawn_blocking( + spawned_subsystem.name, + spawned_subsystem.name, + subsystem_future, + ); + + (Self { protocol_config: req_cfg }, virtual_overseer) + } +} + +const TIMEOUT: Duration = Duration::from_millis(300); + +// We use this to bail out sending messages to the subsystem if it is overloaded such that +// the time of flight is breaches 5s. +// This should eventually be a test parameter. +const MAX_TIME_OF_FLIGHT: Duration = Duration::from_millis(5000); + +macro_rules! delay { + ($delay:expr) => { + Delay::new(Duration::from_millis($delay)).await; + }; +} + +use sp_keyring::Sr25519Keyring; + +#[derive(Debug)] +enum Has { + No, + Yes, + NetworkError(RequestFailure), + /// Make request not return at all, instead the sender is returned from the function. + /// + /// Note, if you use `DoesNotReturn` you have to keep the returned senders alive, otherwise the + /// subsystem will receive a cancel event and the request actually does return. + DoesNotReturn, +} + +impl Has { + fn timeout() -> Self { + Has::NetworkError(RequestFailure::Network(OutboundFailure::Timeout)) + } +} + +#[derive(Clone)] +struct TestState { + validators: Vec, + validator_public: IndexedVec, + validator_authority_id: Vec, + // The test node validator index. + validator_index: ValidatorIndex, + candidate: CandidateReceipt, + session_index: SessionIndex, + + persisted_validation_data: PersistedValidationData, + + available_data: AvailableData, + chunks: Vec, + invalid_chunks: Vec, +} + +impl TestState { + fn candidate(&self) -> CandidateReceipt { + self.candidate.clone() + } + + fn threshold(&self) -> usize { + recovery_threshold(self.validators.len()).unwrap() + } + + fn impossibility_threshold(&self) -> usize { + self.validators.len() - self.threshold() + 1 + } + + async fn respond_to_available_data_query(&self, tx: oneshot::Sender>) { + let _ = tx.send(Some(self.available_data.clone())); + } + + async fn respond_none_to_available_data_query( + &self, + tx: oneshot::Sender>, + ) { + let _ = tx.send(None); + } + + fn session_info(&self) -> SessionInfo { + SessionInfo { + validators: self.validator_public.clone(), + discovery_keys: self.validator_authority_id.clone(), + // all validators in the same group. + validator_groups: IndexedVec::>::from(vec![(0..self + .validators + .len()) + .map(|i| ValidatorIndex(i as _)) + .collect()]), + assignment_keys: vec![], + n_cores: 0, + zeroth_delay_tranche_width: 0, + relay_vrf_modulo_samples: 0, + n_delay_tranches: 0, + no_show_slots: 0, + needed_approvals: 0, + active_validator_indices: vec![], + dispute_period: 6, + random_seed: [0u8; 32], + } + } + async fn respond_to_query_all_request( + &self, + send_chunk: impl Fn(usize) -> bool, + tx: oneshot::Sender>, + ) { + let v = self.chunks.iter().filter(|c| send_chunk(c.index.0 as usize)).cloned().collect(); + + let _ = tx.send(v); + } +} + +fn validator_pubkeys(val_ids: &[Sr25519Keyring]) -> IndexedVec { + val_ids.iter().map(|v| v.public().into()).collect() +} + +fn validator_authority_id(val_ids: &[Sr25519Keyring]) -> Vec { + val_ids.iter().map(|v| v.public().into()).collect() +} + +fn derive_erasure_chunks_with_proofs_and_root( + n_validators: usize, + available_data: &AvailableData, + alter_chunk: impl Fn(usize, &mut Vec), +) -> (Vec, Hash) { + let mut chunks: Vec> = obtain_chunks(n_validators, available_data).unwrap(); + + for (i, chunk) in chunks.iter_mut().enumerate() { + alter_chunk(i, chunk) + } + + // create proofs for each erasure chunk + let branches = branches(chunks.as_ref()); + + let root = branches.root(); + let erasure_chunks = branches + .enumerate() + .map(|(index, (proof, chunk))| ErasureChunk { + chunk: chunk.to_vec(), + index: ValidatorIndex(index as _), + proof: Proof::try_from(proof).unwrap(), + }) + .collect::>(); + + (erasure_chunks, root) +} + +impl TestState { + fn new(mut candidate: CandidateReceipt) -> Self { + let validators = vec![ + Sr25519Keyring::Ferdie, // <- this node, role: validator + Sr25519Keyring::Alice, + Sr25519Keyring::Bob, + Sr25519Keyring::Charlie, + Sr25519Keyring::Dave, + ]; + + let validator_public = validator_pubkeys(&validators); + let validator_authority_id = validator_authority_id(&validators); + let validator_index = ValidatorIndex(0); + + let session_index = 10; + + let persisted_validation_data = PersistedValidationData { + parent_head: HeadData(vec![7, 8, 9]), + relay_parent_number: Default::default(), + max_pov_size: 1024, + relay_parent_storage_root: Default::default(), + }; + + /// A 5MB PoV. + let pov = PoV { block_data: BlockData(vec![42; 1024 * 1024 * 5]) }; + + let available_data = AvailableData { + validation_data: persisted_validation_data.clone(), + pov: Arc::new(pov), + }; + + let (chunks, erasure_root) = derive_erasure_chunks_with_proofs_and_root( + validators.len(), + &available_data, + |_, _| {}, + ); + // Mess around: + let invalid_chunks = chunks + .iter() + .cloned() + .map(|mut chunk| { + if chunk.chunk.len() >= 2 && chunk.chunk[0] != chunk.chunk[1] { + chunk.chunk[0] = chunk.chunk[1]; + } else if chunk.chunk.len() >= 1 { + chunk.chunk[0] = !chunk.chunk[0]; + } else { + chunk.proof = Proof::dummy_proof(); + } + chunk + }) + .collect(); + debug_assert_ne!(chunks, invalid_chunks); + + candidate.descriptor.erasure_root = erasure_root; + + Self { + validators, + validator_public, + validator_authority_id, + validator_index, + candidate, + session_index, + persisted_validation_data, + available_data, + chunks, + invalid_chunks, + } + } +} + +pub fn bench_chunk_recovery_params() -> EnvParams { + let mut candidate = dummy_candidate_receipt(dummy_hash()); + EnvParams { candidate } +} +pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { + env.send_signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(new_leaf( + Hash::repeat_byte(1), + 1, + )))) + .await; + + let mut candidate = env.params().candidate.clone(); + + for candidate_num in 0..10u64 { + let (tx, rx) = oneshot::channel(); + + candidate.descriptor.relay_parent = Hash::from_low_u64_be(candidate_num); + + env.send_message(AvailabilityRecoveryMessage::RecoverAvailableData( + candidate.clone(), + 1, + Some(GroupIndex(0)), + tx, + )) + .await; + + let available_data = rx.await.unwrap().unwrap(); + } + env.send_signal(OverseerSignal::Conclude).await; +} diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs new file mode 100644 index 000000000000..3acf561e0daf --- /dev/null +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -0,0 +1,133 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! A tool for running subsystem benchmark tests designed for development and +//! CI regression testing. + +use clap::Parser; +use color_eyre::eyre; +use prometheus::proto::LabelPair; +use sc_service::TaskManager; + +pub(crate) mod availability; + +use availability::{EnvParams, TestEnvironment}; +const LOG_TARGET: &str = "subsystem-bench"; + +/// Define the supported benchmarks targets +#[derive(Debug, Parser)] +#[command(about = "Target subsystems", version, rename_all = "kebab-case")] +enum BenchmarkTarget { + /// Benchmark availability recovery strategies. + AvailabilityRecovery, +} + +#[derive(Debug, Parser)] +#[allow(missing_docs)] +struct BenchCli { + #[command(subcommand)] + pub target: BenchmarkTarget, +} + +fn new_runtime() -> tokio::runtime::Runtime { + tokio::runtime::Builder::new_multi_thread() + .thread_name("subsystem-bench") + .enable_all() + .thread_stack_size(3 * 1024 * 1024) + .build() + .unwrap() +} + +impl BenchCli { + /// Launch a malus node. + fn launch(self) -> eyre::Result<()> { + use prometheus::{proto::MetricType, Counter, Encoder, Opts, Registry, TextEncoder}; + + let encoder = TextEncoder::new(); + + println!("Preparing {:?} benchmarks", self.target); + + let runtime = new_runtime(); + let registry = Registry::new(); + + let params = availability::bench_chunk_recovery_params(); + let mut env = TestEnvironment::new(runtime.handle().clone(), params, registry.clone()); + + runtime.block_on(availability::bench_chunk_recovery(&mut env)); + + let metric_families = registry.gather(); + let total_subsystem_cpu = 0; + + for familiy in metric_families { + let metric_type = familiy.get_field_type(); + + for metric in familiy.get_metric() { + match metric_type { + MetricType::HISTOGRAM => { + let h = metric.get_histogram(); + + let mut inf_seen = false; + + let labels = metric.get_label(); + // Skip test env usage. + let mut env_label = LabelPair::default(); + env_label.set_name("task_group".into()); + env_label.set_value("test-environment".into()); + + let mut is_env_metric = false; + for label_pair in labels { + if &env_label == label_pair { + is_env_metric = true; + break + } + } + + if !is_env_metric { + println!( + "{:?} CPU seconds used: {:?}", + familiy.get_name(), + h.get_sample_sum() + ); + } + }, + _ => {}, + } + } + } + // encoder.encode(&metric_families, &mut buffer).unwrap(); + + // Output to the standard output. + // println!("Metrics: {}", String::from_utf8(buffer).unwrap()); + Ok(()) + } +} + +fn main() -> eyre::Result<()> { + color_eyre::install()?; + let _ = env_logger::builder() + .is_test(true) + .filter(Some(LOG_TARGET), log::LevelFilter::Debug) + .try_init(); + + let cli: BenchCli = BenchCli::parse(); + cli.launch()?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; +} From c3adc77f2920363d0df458c26f1b9a2a70e8ad2b Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 6 Nov 2023 22:54:39 +0200 Subject: [PATCH 03/52] measure tput and fixes Signed-off-by: Andrei Sandu --- .../node/subsystem-bench/src/availability.rs | 183 +++++++++++------- .../subsystem-bench/src/subsystem-bench.rs | 10 +- 2 files changed, 118 insertions(+), 75 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability.rs b/polkadot/node/subsystem-bench/src/availability.rs index d5cb9515ca68..72c8a736217d 100644 --- a/polkadot/node/subsystem-bench/src/availability.rs +++ b/polkadot/node/subsystem-bench/src/availability.rs @@ -14,10 +14,13 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::{sync::Arc, time::Duration}; +use std::{ + sync::Arc, + time::{Duration, Instant}, +}; use assert_matches::assert_matches; -use env_logger::Env; +use color_eyre::owo_colors::colors::xterm; use futures::{ channel::{mpsc, oneshot}, executor, future, Future, FutureExt, SinkExt, @@ -52,20 +55,14 @@ const LOG_TARGET: &str = "subsystem-bench::availability"; use polkadot_erasure_coding::recovery_threshold; use polkadot_node_primitives::{AvailableData, ErasureChunk}; -// use polkadot_node_subsystem::{ -// errors::RecoveryError, -// jaeger, -// messages::{AvailabilityRecoveryMessage, AvailabilityStoreMessage}, -// overseer, ActiveLeavesUpdate, FromOrchestra, OverseerSignal, SpawnedSubsystem, -// SubsystemContext, SubsystemError, SubsystemResult, -// }; + use polkadot_node_subsystem_test_helpers::{ - make_subsystem_context, mock::new_leaf, TestSubsystemContextHandle, + make_buffered_subsystem_context, mock::new_leaf, TestSubsystemContextHandle, }; use polkadot_node_subsystem_util::TimeoutExt; use polkadot_primitives::{ - AuthorityDiscoveryId, CandidateHash, CandidateReceipt, GroupIndex, Hash, HeadData, IndexedVec, - PersistedValidationData, SessionIndex, SessionInfo, ValidatorId, ValidatorIndex, + AuthorityDiscoveryId, CandidateHash, CandidateReceipt, CoreIndex, GroupIndex, Hash, HeadData, + IndexedVec, PersistedValidationData, SessionIndex, SessionInfo, ValidatorId, ValidatorIndex, }; use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; use sc_service::{SpawnTaskHandle, TaskManager}; @@ -97,12 +94,18 @@ pub struct TestEnvironment { to_subsystem: mpsc::Sender>, // Parameters params: EnvParams, - // Subsystem instance, currently keeps req/response protocol channel senders. + // Subsystem instance, currently keeps req/response protocol channel senders + // for the whole duration of the test. instance: AvailabilityRecoverySubsystemInstance, + // The test intial state. The current state is owned by the task doing the overseer/subsystem + // mockings. + state: TestState, } impl TestEnvironment { - pub fn new(runtime: tokio::runtime::Handle, mut params: EnvParams, registry: Registry) -> Self { + // Create a new test environment with specified initial state and prometheus registry. + // We use prometheus metrics to collect per job task poll time and subsystem metrics. + pub fn new(runtime: tokio::runtime::Handle, state: TestState, registry: Registry) -> Self { let task_manager: TaskManager = TaskManager::new(runtime.clone(), Some(®istry)).unwrap(); let (instance, virtual_overseer) = AvailabilityRecoverySubsystemInstance::new( ®istry, @@ -112,26 +115,29 @@ impl TestEnvironment { // TODO: support parametrization of initial test state // n_validator, n_cores. - let state = TestState::new(params.candidate.clone()); - // Override candidate after computing erasure in `TestState::new` - params.candidate = state.candidate(); + let params = EnvParams { candidate: state.candidate() }; - // Create channel to inject messages int the subsystem. + // Copy sender for later when we need to inject messages in to the subsystem. let to_subsystem = virtual_overseer.tx.clone(); + let task_state = state.clone(); // We need to start a receiver to process messages from the subsystem. + // This mocks an overseer and all dependent subsystems task_manager.spawn_handle().spawn_blocking( "test-environment", "test-environment", - async move { Self::env_task(virtual_overseer, state).await }, + async move { Self::env_task(virtual_overseer, task_state).await }, ); - TestEnvironment { runtime, task_manager, registry, to_subsystem, params, instance } + TestEnvironment { runtime, task_manager, registry, to_subsystem, params, instance, state } } pub fn params(&self) -> &EnvParams { &self.params } + pub fn input(&self) -> &TestInput { + self.state.input() + } async fn respond_to_send_request(state: &mut TestState, request: Requests) { match request { @@ -234,7 +240,8 @@ impl AvailabilityRecoverySubsystemInstance { spawn_task_handle: SpawnTaskHandle, runtime: tokio::runtime::Handle, ) -> (Self, TestSubsystemContextHandle) { - let (context, virtual_overseer) = make_subsystem_context(spawn_task_handle.clone()); + let (context, virtual_overseer) = + make_buffered_subsystem_context(spawn_task_handle.clone(), 4096); let (collation_req_receiver, req_cfg) = IncomingRequest::get_config_receiver(&ReqProtocolNames::new(&GENESIS_HASH, None)); let subsystem = AvailabilityRecoverySubsystem::with_chunks_only( @@ -291,7 +298,7 @@ impl Has { } #[derive(Clone)] -struct TestState { +pub struct TestState { validators: Vec, validator_public: IndexedVec, validator_authority_id: Vec, @@ -305,9 +312,14 @@ struct TestState { available_data: AvailableData, chunks: Vec, invalid_chunks: Vec, + input: TestInput, } impl TestState { + fn input(&self) -> &TestInput { + &self.input + } + fn candidate(&self) -> CandidateReceipt { self.candidate.clone() } @@ -362,53 +374,14 @@ impl TestState { let _ = tx.send(v); } -} -fn validator_pubkeys(val_ids: &[Sr25519Keyring]) -> IndexedVec { - val_ids.iter().map(|v| v.public().into()).collect() -} - -fn validator_authority_id(val_ids: &[Sr25519Keyring]) -> Vec { - val_ids.iter().map(|v| v.public().into()).collect() -} - -fn derive_erasure_chunks_with_proofs_and_root( - n_validators: usize, - available_data: &AvailableData, - alter_chunk: impl Fn(usize, &mut Vec), -) -> (Vec, Hash) { - let mut chunks: Vec> = obtain_chunks(n_validators, available_data).unwrap(); - - for (i, chunk) in chunks.iter_mut().enumerate() { - alter_chunk(i, chunk) - } - - // create proofs for each erasure chunk - let branches = branches(chunks.as_ref()); - - let root = branches.root(); - let erasure_chunks = branches - .enumerate() - .map(|(index, (proof, chunk))| ErasureChunk { - chunk: chunk.to_vec(), - index: ValidatorIndex(index as _), - proof: Proof::try_from(proof).unwrap(), - }) - .collect::>(); - - (erasure_chunks, root) -} - -impl TestState { - fn new(mut candidate: CandidateReceipt) -> Self { - let validators = vec![ - Sr25519Keyring::Ferdie, // <- this node, role: validator - Sr25519Keyring::Alice, - Sr25519Keyring::Bob, - Sr25519Keyring::Charlie, - Sr25519Keyring::Dave, - ]; + pub fn new(input: TestInput) -> Self { + let validators = (0..input.n_validators as u64) + .into_iter() + .map(|v| Sr25519Keyring::Alice) + .collect::>(); + let mut candidate = dummy_candidate_receipt(dummy_hash()); let validator_public = validator_pubkeys(&validators); let validator_authority_id = validator_authority_id(&validators); let validator_index = ValidatorIndex(0); @@ -465,15 +438,66 @@ impl TestState { available_data, chunks, invalid_chunks, + input, } } } -pub fn bench_chunk_recovery_params() -> EnvParams { - let mut candidate = dummy_candidate_receipt(dummy_hash()); - EnvParams { candidate } +fn validator_pubkeys(val_ids: &[Sr25519Keyring]) -> IndexedVec { + val_ids.iter().map(|v| v.public().into()).collect() } + +fn validator_authority_id(val_ids: &[Sr25519Keyring]) -> Vec { + val_ids.iter().map(|v| v.public().into()).collect() +} + +fn derive_erasure_chunks_with_proofs_and_root( + n_validators: usize, + available_data: &AvailableData, + alter_chunk: impl Fn(usize, &mut Vec), +) -> (Vec, Hash) { + let mut chunks: Vec> = obtain_chunks(n_validators, available_data).unwrap(); + + for (i, chunk) in chunks.iter_mut().enumerate() { + alter_chunk(i, chunk) + } + + // create proofs for each erasure chunk + let branches = branches(chunks.as_ref()); + + let root = branches.root(); + let erasure_chunks = branches + .enumerate() + .map(|(index, (proof, chunk))| ErasureChunk { + chunk: chunk.to_vec(), + index: ValidatorIndex(index as _), + proof: Proof::try_from(proof).unwrap(), + }) + .collect::>(); + + (erasure_chunks, root) +} + +/// The test input parameters +#[derive(Clone)] +pub struct TestInput { + pub n_validators: usize, + pub n_cores: usize, + pub pov_size: usize, + // This parameter is used to determine how many recoveries we batch in parallel + // similarly to how in practice tranche0 assignments work. + pub vrf_modulo_samples: usize, +} + +impl Default for TestInput { + fn default() -> Self { + Self { n_validators: 300, n_cores: 50, pov_size: 5 * 1024 * 1024, vrf_modulo_samples: 6 } + } +} + pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { + let input = env.input().clone(); + env.send_signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(new_leaf( Hash::repeat_byte(1), 1, @@ -482,8 +506,12 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { let mut candidate = env.params().candidate.clone(); - for candidate_num in 0..10u64 { + let start_marker = Instant::now(); + + let mut batch = Vec::new(); + for candidate_num in 0..input.n_cores as u64 { let (tx, rx) = oneshot::channel(); + batch.push(rx); candidate.descriptor.relay_parent = Hash::from_low_u64_be(candidate_num); @@ -495,7 +523,20 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { )) .await; + if batch.len() >= input.vrf_modulo_samples { + for rx in std::mem::take(&mut batch) { + let available_data = rx.await.unwrap().unwrap(); + } + } + } + + for rx in std::mem::take(&mut batch) { let available_data = rx.await.unwrap().unwrap(); } + env.send_signal(OverseerSignal::Conclude).await; + let duration = start_marker.elapsed().as_millis(); + let tput = ((input.n_cores * input.pov_size) as u128) / duration * 1000; + println!("Benchmark completed in {:?}ms", duration); + println!("Throughput: {}KiB/s", tput / 1024); } diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 3acf561e0daf..bfc0b63e86d3 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -24,7 +24,7 @@ use sc_service::TaskManager; pub(crate) mod availability; -use availability::{EnvParams, TestEnvironment}; +use availability::{EnvParams, TestEnvironment, TestInput, TestState}; const LOG_TARGET: &str = "subsystem-bench"; /// Define the supported benchmarks targets @@ -45,6 +45,7 @@ struct BenchCli { fn new_runtime() -> tokio::runtime::Runtime { tokio::runtime::Builder::new_multi_thread() .thread_name("subsystem-bench") + .max_blocking_threads(32) .enable_all() .thread_stack_size(3 * 1024 * 1024) .build() @@ -63,8 +64,9 @@ impl BenchCli { let runtime = new_runtime(); let registry = Registry::new(); - let params = availability::bench_chunk_recovery_params(); - let mut env = TestEnvironment::new(runtime.handle().clone(), params, registry.clone()); + let state = TestState::new(TestInput::default()); + + let mut env = TestEnvironment::new(runtime.handle().clone(), state, registry.clone()); runtime.block_on(availability::bench_chunk_recovery(&mut env)); @@ -119,7 +121,7 @@ fn main() -> eyre::Result<()> { color_eyre::install()?; let _ = env_logger::builder() .is_test(true) - .filter(Some(LOG_TARGET), log::LevelFilter::Debug) + .filter(Some(LOG_TARGET), log::LevelFilter::Info) .try_init(); let cli: BenchCli = BenchCli::parse(); From 31b0351eaea643f181fe3216e03aae5d4da12ed6 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 7 Nov 2023 15:37:33 +0200 Subject: [PATCH 04/52] add network emulation Signed-off-by: Andrei Sandu --- .../{availability.rs => availability/mod.rs} | 50 ++++- .../src/availability/network.rs | 212 ++++++++++++++++++ .../subsystem-bench/src/subsystem-bench.rs | 3 +- 3 files changed, 255 insertions(+), 10 deletions(-) rename polkadot/node/subsystem-bench/src/{availability.rs => availability/mod.rs} (92%) create mode 100644 polkadot/node/subsystem-bench/src/availability/network.rs diff --git a/polkadot/node/subsystem-bench/src/availability.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs similarity index 92% rename from polkadot/node/subsystem-bench/src/availability.rs rename to polkadot/node/subsystem-bench/src/availability/mod.rs index 72c8a736217d..cdc2bf5ce644 100644 --- a/polkadot/node/subsystem-bench/src/availability.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -16,6 +16,7 @@ use std::{ sync::Arc, + thread::sleep, time::{Duration, Instant}, }; @@ -67,6 +68,8 @@ use polkadot_primitives::{ use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; use sc_service::{SpawnTaskHandle, TaskManager}; +mod network; + type VirtualOverseer = TestSubsystemContextHandle; // Deterministic genesis hash for protocol names @@ -121,12 +124,13 @@ impl TestEnvironment { let to_subsystem = virtual_overseer.tx.clone(); let task_state = state.clone(); + let spawn_task_handle = task_manager.spawn_handle(); // We need to start a receiver to process messages from the subsystem. // This mocks an overseer and all dependent subsystems task_manager.spawn_handle().spawn_blocking( "test-environment", "test-environment", - async move { Self::env_task(virtual_overseer, task_state).await }, + async move { Self::env_task(virtual_overseer, task_state, spawn_task_handle).await }, ); TestEnvironment { runtime, task_manager, registry, to_subsystem, params, instance, state } @@ -139,15 +143,20 @@ impl TestEnvironment { self.state.input() } - async fn respond_to_send_request(state: &mut TestState, request: Requests) { + pub fn respond_to_send_request(state: &mut TestState, request: Requests) -> NetworkAction { match request { Requests::ChunkFetchingV1(outgoing_request) => { let validator_index = outgoing_request.payload.index.0 as usize; let chunk: ChunkResponse = state.chunks[validator_index].clone().into(); + let size = chunk.encoded_size(); + let future = async move { + let _ = outgoing_request + .pending_response + .send(Ok(req_res::v1::ChunkFetchingResponse::from(Some(chunk)).encode())); + } + .boxed(); - let _ = outgoing_request - .pending_response - .send(Ok(req_res::v1::ChunkFetchingResponse::from(Some(chunk)).encode())); + NetworkAction::new(validator_index, future, size) }, _ => panic!("received an unexpected request"), } @@ -158,7 +167,15 @@ impl TestEnvironment { async fn env_task( mut ctx: TestSubsystemContextHandle, mut state: TestState, + spawn_task_handle: SpawnTaskHandle, ) { + // Emulate `n_validators` each with 1MiB of bandwidth available. + let mut network = NetworkEmulator::new( + state.input().n_validators, + state.input().bandwidth, + spawn_task_handle, + ); + loop { futures::select! { message = ctx.recv().fuse() => { @@ -173,7 +190,9 @@ impl TestEnvironment { ) => { for request in requests { // TODO: add latency variance when answering requests. This should be an env parameter. - Self::respond_to_send_request(&mut state, request).await; + let action = Self::respond_to_send_request(&mut state, request); + // action.run().await; + network.submit_peer_action(action.index(), action); } }, AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryAvailableData(_candidate_hash, tx)) => { @@ -241,7 +260,7 @@ impl AvailabilityRecoverySubsystemInstance { runtime: tokio::runtime::Handle, ) -> (Self, TestSubsystemContextHandle) { let (context, virtual_overseer) = - make_buffered_subsystem_context(spawn_task_handle.clone(), 4096); + make_buffered_subsystem_context(spawn_task_handle.clone(), 4096 * 4); let (collation_req_receiver, req_cfg) = IncomingRequest::get_config_receiver(&ReqProtocolNames::new(&GENESIS_HASH, None)); let subsystem = AvailabilityRecoverySubsystem::with_chunks_only( @@ -279,6 +298,10 @@ macro_rules! delay { use sp_keyring::Sr25519Keyring; +use crate::availability::network::NetworkAction; + +use self::network::NetworkEmulator; + #[derive(Debug)] enum Has { No, @@ -479,7 +502,7 @@ fn derive_erasure_chunks_with_proofs_and_root( } /// The test input parameters -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct TestInput { pub n_validators: usize, pub n_cores: usize, @@ -487,11 +510,19 @@ pub struct TestInput { // This parameter is used to determine how many recoveries we batch in parallel // similarly to how in practice tranche0 assignments work. pub vrf_modulo_samples: usize, + // The amount of bandiwdht remote validators have. + pub bandwidth: usize, } impl Default for TestInput { fn default() -> Self { - Self { n_validators: 300, n_cores: 50, pov_size: 5 * 1024 * 1024, vrf_modulo_samples: 6 } + Self { + n_validators: 10, + n_cores: 10, + pov_size: 5 * 1024 * 1024, + vrf_modulo_samples: 6, + bandwidth: 15 * 1024 * 1024, + } } } @@ -535,6 +566,7 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { } env.send_signal(OverseerSignal::Conclude).await; + delay!(5); let duration = start_marker.elapsed().as_millis(); let tput = ((input.n_cores * input.pov_size) as u128) / duration * 1000; println!("Benchmark completed in {:?}ms", duration); diff --git a/polkadot/node/subsystem-bench/src/availability/network.rs b/polkadot/node/subsystem-bench/src/availability/network.rs new file mode 100644 index 000000000000..268de5d828eb --- /dev/null +++ b/polkadot/node/subsystem-bench/src/availability/network.rs @@ -0,0 +1,212 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use super::*; +use futures::stream::FuturesOrdered; +use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender}; + +// An emulated node egress traffic rate_limiter. +#[derive(Debug)] +struct RateLimit { + // How often we refill credits in buckets + tick_rate: usize, + // Total ticks + total_ticks: usize, + // Max refill per tick + max_refill: usize, + // Available credit. We allow for bursts over 1/tick_rate of `cps` budget, but we + // account it by negative credit. + credits: isize, + // When last refilled. + last_refill: Instant, +} + +impl RateLimit { + // Create a new `RateLimit` from a `cps` (credits per second) budget and + // `tick_rate`. + pub fn new(tick_rate: usize, cps: usize) -> Self { + // Compute how much refill for each tick + let max_refill = cps / tick_rate; + RateLimit { + tick_rate, + total_ticks: 0, + max_refill, + // A fresh start + credits: max_refill as isize, + last_refill: Instant::now(), + } + } + + pub async fn refill(&mut self) { + // If this is called to early, we need to sleep until next tick. + let now = Instant::now(); + let next_tick_delta = + (self.last_refill + Duration::from_millis(1000 / self.tick_rate as u64)) - now; + + // Sleep until next tick. + if !next_tick_delta.is_zero() { + gum::trace!(target: LOG_TARGET, "need to sleep {}ms", next_tick_delta.as_millis()); + tokio::time::sleep(next_tick_delta).await; + } + + self.total_ticks += 1; + self.credits += self.max_refill as isize; + self.last_refill = Instant::now(); + } + + // Reap credits from the bucket. + // Blocks if credits budged goes negative during call. + pub async fn reap(&mut self, amount: usize) { + self.credits -= amount as isize; + + if self.credits >= 0 { + return + } + + while self.credits < 0 { + gum::trace!(target: LOG_TARGET, "Before refill: {:?}", &self); + self.refill().await; + gum::trace!(target: LOG_TARGET, "After refill: {:?}", &self); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use polkadot_node_metrics::metered::CoarseDuration; + use std::time::Instant; + + use super::RateLimit; + + #[tokio::test] + async fn test_expected_rate() { + let tick_rate = 200; + let budget = 1_000_000; + // rate must not exceeed 100 credits per second + let mut rate_limiter = RateLimit::new(tick_rate, budget); + let mut total_sent = 0usize; + let start = Instant::now(); + + let mut reap_amount = 0; + while rate_limiter.total_ticks < tick_rate { + reap_amount += 1; + reap_amount = reap_amount % 100; + + rate_limiter.reap(reap_amount).await; + total_sent += reap_amount; + } + + let end = Instant::now(); + + // assert_eq!(end - start, Duration::from_secs(1)); + println!("duration: {}", (end - start).as_millis()); + + // Allow up to `budget/max_refill` error tolerance + let lower_bound = budget as u128 * ((end - start).as_millis() / 1000u128); + let upper_bound = budget as u128 * + ((end - start).as_millis() / 1000u128 + rate_limiter.max_refill as u128); + assert!(total_sent as u128 >= lower_bound); + assert!(total_sent as u128 <= upper_bound); + } +} +// A network peer emulator +struct PeerEmulator { + // The queue of requests waiting to be served by the emulator + actions_tx: UnboundedSender, +} + +impl PeerEmulator { + pub fn new(bandwidth: usize, spawn_task_handle: SpawnTaskHandle) -> Self { + let (actions_tx, mut actions_rx) = tokio::sync::mpsc::unbounded_channel(); + + spawn_task_handle.spawn("peer-emulator", "test-environment", async move { + let mut rate_limiter = RateLimit::new(20, bandwidth); + loop { + let maybe_action: Option = actions_rx.recv().await; + if let Some(action) = maybe_action { + let size = action.size(); + rate_limiter.reap(size).await; + action.run().await; + } else { + break + } + } + }); + + Self { actions_tx } + } + + // Queue a send request from the emulated peer. + pub fn send(&mut self, action: NetworkAction) { + self.actions_tx.send(action).expect("peer emulator task lives"); + } +} + +pub type ActionFuture = std::pin::Pin + std::marker::Send>>; +// An network action to be completed by the emulator task. +pub struct NetworkAction { + // The function that performs the action + run: ActionFuture, + // The payload size that we simulate sending from a peer + size: usize, + // Peer index + index: usize, +} + +impl NetworkAction { + pub fn new(index: usize, run: ActionFuture, size: usize) -> Self { + Self { run, size, index } + } + pub fn size(&self) -> usize { + self.size + } + + pub async fn run(self) { + self.run.await; + } + + pub fn index(&self) -> usize { + self.index + } +} + +// Mocks the network bridge and an arbitrary number of connected peer nodes. +// Implements network latency, bandwidth and error. +pub struct NetworkEmulator { + // Number of peers connected on validation protocol + n_peers: usize, + // The maximum Rx/Tx bandwidth in bytes per second. + bandwidth: usize, + // Per peer network emulation + peers: Vec, +} + +impl NetworkEmulator { + pub fn new(n_peers: usize, bandwidth: usize, spawn_task_handle: SpawnTaskHandle) -> Self { + Self { + n_peers, + bandwidth, + peers: (0..n_peers) + .map(|index| PeerEmulator::new(bandwidth, spawn_task_handle.clone())) + .collect::>(), + } + } + + pub fn submit_peer_action(&mut self, index: usize, action: NetworkAction) { + let _ = self.peers[index].send(action); + } +} diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index bfc0b63e86d3..d58f0bccba9b 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -45,7 +45,6 @@ struct BenchCli { fn new_runtime() -> tokio::runtime::Runtime { tokio::runtime::Builder::new_multi_thread() .thread_name("subsystem-bench") - .max_blocking_threads(32) .enable_all() .thread_stack_size(3 * 1024 * 1024) .build() @@ -68,6 +67,8 @@ impl BenchCli { let mut env = TestEnvironment::new(runtime.handle().clone(), state, registry.clone()); + println!("{:?}", env.input()); + runtime.block_on(availability::bench_chunk_recovery(&mut env)); let metric_families = registry.gather(); From e4bb037260e1fee1f06dee1d5581f9d7763e2548 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 7 Nov 2023 15:51:50 +0200 Subject: [PATCH 05/52] cleanup Signed-off-by: Andrei Sandu --- Cargo.lock | 1 - polkadot/node/subsystem-bench/Cargo.toml | 3 - polkadot/node/subsystem-bench/build.rs | 22 ----- .../subsystem-bench/src/availability/mod.rs | 90 +++++-------------- .../src/availability/network.rs | 5 +- .../subsystem-bench/src/subsystem-bench.rs | 10 +-- 6 files changed, 24 insertions(+), 107 deletions(-) delete mode 100644 polkadot/node/subsystem-bench/build.rs diff --git a/Cargo.lock b/Cargo.lock index d113fd7e43cd..4645aeee6aab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13022,7 +13022,6 @@ dependencies = [ "sp-core", "sp-keyring", "sp-keystore", - "substrate-build-script-utils", "tokio", "tracing-gum", ] diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml index 729749ab153b..7408397f930c 100644 --- a/polkadot/node/subsystem-bench/Cargo.toml +++ b/polkadot/node/subsystem-bench/Cargo.toml @@ -50,8 +50,5 @@ polkadot-primitives-test-helpers = { path = "../../primitives/test-helpers" } # prometheus = { package = "substrate-prometheus-endpoint", path = "../../../substrate/utils/prometheus" } prometheus = { version = "0.13.0", default-features = false } -[build-dependencies] -substrate-build-script-utils = { path = "../../../substrate/utils/build-script-utils" } - [features] default = [] diff --git a/polkadot/node/subsystem-bench/build.rs b/polkadot/node/subsystem-bench/build.rs deleted file mode 100644 index 84fe22e23ed6..000000000000 --- a/polkadot/node/subsystem-bench/build.rs +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (C) Parity Technologies (UK) Ltd. -// This file is part of Polkadot. - -// Polkadot is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// Polkadot is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// You should have received a copy of the GNU General Public License -// along with Polkadot. If not, see . - -fn main() { - substrate_build_script_utils::generate_cargo_keys(); - // For the node/worker version check, make sure we always rebuild the node and binary workers - // when the version changes. - substrate_build_script_utils::rerun_if_git_head_changed(); -} diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index cdc2bf5ce644..c6e9dead09c1 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -16,40 +16,34 @@ use std::{ sync::Arc, - thread::sleep, time::{Duration, Instant}, }; -use assert_matches::assert_matches; -use color_eyre::owo_colors::colors::xterm; use futures::{ channel::{mpsc, oneshot}, - executor, future, Future, FutureExt, SinkExt, + FutureExt, SinkExt, }; use futures_timer::Delay; use polkadot_node_metrics::metrics::Metrics; -use polkadot_availability_recovery::{AvailabilityRecoverySubsystem, Metrics as SubsystemMetrics}; +use polkadot_availability_recovery::AvailabilityRecoverySubsystem; use parity_scale_codec::Encode; use polkadot_node_network_protocol::request_response::{ - self as req_res, v1::ChunkResponse, IncomingRequest, Recipient, ReqProtocolNames, Requests, + self as req_res, v1::ChunkResponse, IncomingRequest, ReqProtocolNames, Requests, }; use prometheus::Registry; -use sc_network::{config::RequestResponseConfig, IfDisconnected, OutboundFailure, RequestFailure}; +use sc_network::{config::RequestResponseConfig, OutboundFailure, RequestFailure}; use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks}; use polkadot_node_primitives::{BlockData, PoV, Proof}; use polkadot_node_subsystem::{ - errors::RecoveryError, - jaeger, messages::{ AllMessages, AvailabilityRecoveryMessage, AvailabilityStoreMessage, NetworkBridgeTxMessage, RuntimeApiMessage, RuntimeApiRequest, }, - overseer, ActiveLeavesUpdate, FromOrchestra, OverseerSignal, SpawnedSubsystem, Subsystem, - SubsystemContext, SubsystemError, SubsystemResult, + ActiveLeavesUpdate, FromOrchestra, OverseerSignal, Subsystem, }; const LOG_TARGET: &str = "subsystem-bench::availability"; @@ -62,41 +56,30 @@ use polkadot_node_subsystem_test_helpers::{ }; use polkadot_node_subsystem_util::TimeoutExt; use polkadot_primitives::{ - AuthorityDiscoveryId, CandidateHash, CandidateReceipt, CoreIndex, GroupIndex, Hash, HeadData, - IndexedVec, PersistedValidationData, SessionIndex, SessionInfo, ValidatorId, ValidatorIndex, + AuthorityDiscoveryId, CandidateReceipt, GroupIndex, Hash, HeadData, IndexedVec, + PersistedValidationData, SessionIndex, SessionInfo, ValidatorId, ValidatorIndex, }; use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; use sc_service::{SpawnTaskHandle, TaskManager}; mod network; -type VirtualOverseer = TestSubsystemContextHandle; - // Deterministic genesis hash for protocol names const GENESIS_HASH: Hash = Hash::repeat_byte(0xff); struct AvailabilityRecoverySubsystemInstance { - protocol_config: RequestResponseConfig, -} - -pub struct EnvParams { - // The candidate we will recover in the benchmark. - candidate: CandidateReceipt, + _protocol_config: RequestResponseConfig, } // Implements a mockup of NetworkBridge and AvilabilityStore to support provide state for // `AvailabilityRecoverySubsystemInstance` pub struct TestEnvironment { - // A tokio runtime to use in the test - runtime: tokio::runtime::Handle, // A task manager that tracks task poll durations. task_manager: TaskManager, // The Prometheus metrics registry registry: Registry, // A test overseer. to_subsystem: mpsc::Sender>, - // Parameters - params: EnvParams, // Subsystem instance, currently keeps req/response protocol channel senders // for the whole duration of the test. instance: AvailabilityRecoverySubsystemInstance, @@ -110,15 +93,8 @@ impl TestEnvironment { // We use prometheus metrics to collect per job task poll time and subsystem metrics. pub fn new(runtime: tokio::runtime::Handle, state: TestState, registry: Registry) -> Self { let task_manager: TaskManager = TaskManager::new(runtime.clone(), Some(®istry)).unwrap(); - let (instance, virtual_overseer) = AvailabilityRecoverySubsystemInstance::new( - ®istry, - task_manager.spawn_handle(), - runtime.clone(), - ); - - // TODO: support parametrization of initial test state - // n_validator, n_cores. - let params = EnvParams { candidate: state.candidate() }; + let (instance, virtual_overseer) = + AvailabilityRecoverySubsystemInstance::new(®istry, task_manager.spawn_handle()); // Copy sender for later when we need to inject messages in to the subsystem. let to_subsystem = virtual_overseer.tx.clone(); @@ -133,12 +109,9 @@ impl TestEnvironment { async move { Self::env_task(virtual_overseer, task_state, spawn_task_handle).await }, ); - TestEnvironment { runtime, task_manager, registry, to_subsystem, params, instance, state } + TestEnvironment { task_manager, registry, to_subsystem, instance, state } } - pub fn params(&self) -> &EnvParams { - &self.params - } pub fn input(&self) -> &TestInput { self.state.input() } @@ -189,9 +162,7 @@ impl TestEnvironment { ) ) => { for request in requests { - // TODO: add latency variance when answering requests. This should be an env parameter. let action = Self::respond_to_send_request(&mut state, request); - // action.run().await; network.submit_peer_action(action.index(), action); } }, @@ -210,9 +181,9 @@ impl TestEnvironment { let _ = tx.send(Some(chunk_size)); } AllMessages::RuntimeApi(RuntimeApiMessage::Request( - relay_parent, + _relay_parent, RuntimeApiRequest::SessionInfo( - session_index, + _session_index, tx, ) )) => { @@ -257,7 +228,6 @@ impl AvailabilityRecoverySubsystemInstance { pub fn new( registry: &Registry, spawn_task_handle: SpawnTaskHandle, - runtime: tokio::runtime::Handle, ) -> (Self, TestSubsystemContextHandle) { let (context, virtual_overseer) = make_buffered_subsystem_context(spawn_task_handle.clone(), 4096 * 4); @@ -279,7 +249,7 @@ impl AvailabilityRecoverySubsystemInstance { subsystem_future, ); - (Self { protocol_config: req_cfg }, virtual_overseer) + (Self { _protocol_config: req_cfg }, virtual_overseer) } } @@ -302,24 +272,6 @@ use crate::availability::network::NetworkAction; use self::network::NetworkEmulator; -#[derive(Debug)] -enum Has { - No, - Yes, - NetworkError(RequestFailure), - /// Make request not return at all, instead the sender is returned from the function. - /// - /// Note, if you use `DoesNotReturn` you have to keep the returned senders alive, otherwise the - /// subsystem will receive a cancel event and the request actually does return. - DoesNotReturn, -} - -impl Has { - fn timeout() -> Self { - Has::NetworkError(RequestFailure::Network(OutboundFailure::Timeout)) - } -} - #[derive(Clone)] pub struct TestState { validators: Vec, @@ -401,7 +353,7 @@ impl TestState { pub fn new(input: TestInput) -> Self { let validators = (0..input.n_validators as u64) .into_iter() - .map(|v| Sr25519Keyring::Alice) + .map(|_v| Sr25519Keyring::Alice) .collect::>(); let mut candidate = dummy_candidate_receipt(dummy_hash()); @@ -418,8 +370,8 @@ impl TestState { relay_parent_storage_root: Default::default(), }; - /// A 5MB PoV. - let pov = PoV { block_data: BlockData(vec![42; 1024 * 1024 * 5]) }; + // A 5MB PoV. + let pov = PoV { block_data: BlockData(vec![42; input.pov_size]) }; let available_data = AvailableData { validation_data: persisted_validation_data.clone(), @@ -535,10 +487,8 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { )))) .await; - let mut candidate = env.params().candidate.clone(); - let start_marker = Instant::now(); - + let mut candidate = env.state.candidate(); let mut batch = Vec::new(); for candidate_num in 0..input.n_cores as u64 { let (tx, rx) = oneshot::channel(); @@ -556,13 +506,13 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { if batch.len() >= input.vrf_modulo_samples { for rx in std::mem::take(&mut batch) { - let available_data = rx.await.unwrap().unwrap(); + let _available_data = rx.await.unwrap().unwrap(); } } } for rx in std::mem::take(&mut batch) { - let available_data = rx.await.unwrap().unwrap(); + let _available_data = rx.await.unwrap().unwrap(); } env.send_signal(OverseerSignal::Conclude).await; diff --git a/polkadot/node/subsystem-bench/src/availability/network.rs b/polkadot/node/subsystem-bench/src/availability/network.rs index 268de5d828eb..1889e971cc1e 100644 --- a/polkadot/node/subsystem-bench/src/availability/network.rs +++ b/polkadot/node/subsystem-bench/src/availability/network.rs @@ -15,8 +15,7 @@ // along with Polkadot. If not, see . use super::*; -use futures::stream::FuturesOrdered; -use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender}; +use tokio::sync::mpsc::UnboundedSender; // An emulated node egress traffic rate_limiter. #[derive(Debug)] @@ -201,7 +200,7 @@ impl NetworkEmulator { n_peers, bandwidth, peers: (0..n_peers) - .map(|index| PeerEmulator::new(bandwidth, spawn_task_handle.clone())) + .map(|_index| PeerEmulator::new(bandwidth, spawn_task_handle.clone())) .collect::>(), } } diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index d58f0bccba9b..30a9dff02757 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -20,11 +20,10 @@ use clap::Parser; use color_eyre::eyre; use prometheus::proto::LabelPair; -use sc_service::TaskManager; pub(crate) mod availability; -use availability::{EnvParams, TestEnvironment, TestInput, TestState}; +use availability::{TestEnvironment, TestInput, TestState}; const LOG_TARGET: &str = "subsystem-bench"; /// Define the supported benchmarks targets @@ -54,9 +53,7 @@ fn new_runtime() -> tokio::runtime::Runtime { impl BenchCli { /// Launch a malus node. fn launch(self) -> eyre::Result<()> { - use prometheus::{proto::MetricType, Counter, Encoder, Opts, Registry, TextEncoder}; - - let encoder = TextEncoder::new(); + use prometheus::{proto::MetricType, Registry, TextEncoder}; println!("Preparing {:?} benchmarks", self.target); @@ -72,7 +69,6 @@ impl BenchCli { runtime.block_on(availability::bench_chunk_recovery(&mut env)); let metric_families = registry.gather(); - let total_subsystem_cpu = 0; for familiy in metric_families { let metric_type = familiy.get_field_type(); @@ -82,8 +78,6 @@ impl BenchCli { MetricType::HISTOGRAM => { let h = metric.get_histogram(); - let mut inf_seen = false; - let labels = metric.get_label(); // Skip test env usage. let mut env_label = LabelPair::default(); From a69492481061bebdfbcfe9bb834a5c24a1160a33 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 7 Nov 2023 18:31:15 +0200 Subject: [PATCH 06/52] Add latency emulation Signed-off-by: Andrei Sandu --- .../src/availability/configuration.rs | 107 ++++++++++++++++++ .../subsystem-bench/src/availability/mod.rs | 89 +++++++-------- .../src/availability/network.rs | 50 ++++---- .../subsystem-bench/src/subsystem-bench.rs | 9 +- 4 files changed, 182 insertions(+), 73 deletions(-) create mode 100644 polkadot/node/subsystem-bench/src/availability/configuration.rs diff --git a/polkadot/node/subsystem-bench/src/availability/configuration.rs b/polkadot/node/subsystem-bench/src/availability/configuration.rs new file mode 100644 index 000000000000..14e8f55128d9 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/availability/configuration.rs @@ -0,0 +1,107 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use super::*; + +/// Peer response latency configuration. +#[derive(Clone, Debug)] +pub struct PeerLatency { + /// Min latency for `NetworkAction` completion. + pub min_latency: Duration, + /// Max latency or `NetworkAction` completion. + pub max_latency: Duration, +} + +/// The test input parameters +#[derive(Clone, Debug)] +pub struct TestConfiguration { + /// Number of validators + pub n_validators: usize, + /// Number of cores + pub n_cores: usize, + /// The PoV size + pub pov_size: usize, + /// This parameter is used to determine how many recoveries we batch in parallel + /// similarly to how in practice tranche0 assignments work. + pub vrf_modulo_samples: usize, + /// The amount of bandiwdht remote validators have. + pub bandwidth: usize, + /// Optional peer emulation latency + pub latency: Option, +} + +impl Default for TestConfiguration { + fn default() -> Self { + Self { + n_validators: 10, + n_cores: 10, + pov_size: 5 * 1024 * 1024, + vrf_modulo_samples: 6, + bandwidth: 15 * 1024 * 1024, + latency: None, + } + } +} + +impl TestConfiguration { + /// An unconstrained standard configuration matching Polkadot/Kusama + pub fn unconstrained_300_validators_60_cores(pov_size: usize) -> TestConfiguration { + Self { + n_validators: 300, + n_cores: 60, + pov_size, + vrf_modulo_samples: 6, + // HW specs node bandwidth + bandwidth: 60 * 1024 * 1024, + // No latency + latency: None, + } + } + + /// Polkadot/Kusama configuration with typical latency constraints. + pub fn healthy_network_300_validators_60_cores(pov_size: usize) -> TestConfiguration { + Self { + n_validators: 300, + n_cores: 60, + pov_size, + vrf_modulo_samples: 6, + // HW specs node bandwidth + bandwidth: 60 * 1024 * 1024, + latency: Some(PeerLatency { + min_latency: Duration::from_millis(1), + max_latency: Duration::from_millis(50), + }), + } + } + + /// Polkadot/Kusama configuration with degraded due to latencies. + /// TODO: implement errors. + pub fn degraded_network_300_validators_60_cores(pov_size: usize) -> TestConfiguration { + Self { + n_validators: 300, + n_cores: 60, + pov_size, + vrf_modulo_samples: 6, + // HW specs node bandwidth + bandwidth: 60 * 1024 * 1024, + // A range of latencies to expect in a degraded network + latency: Some(PeerLatency { + min_latency: Duration::from_millis(1), + max_latency: Duration::from_millis(1000), + }), + } + } +} diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index c6e9dead09c1..6c0c41c86c0f 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -32,6 +32,7 @@ use parity_scale_codec::Encode; use polkadot_node_network_protocol::request_response::{ self as req_res, v1::ChunkResponse, IncomingRequest, ReqProtocolNames, Requests, }; +use rand::{distributions::Uniform, prelude::Distribution, thread_rng}; use prometheus::Registry; use sc_network::{config::RequestResponseConfig, OutboundFailure, RequestFailure}; @@ -62,8 +63,11 @@ use polkadot_primitives::{ use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; use sc_service::{SpawnTaskHandle, TaskManager}; +mod configuration; mod network; +pub use configuration::TestConfiguration; + // Deterministic genesis hash for protocol names const GENESIS_HASH: Hash = Hash::repeat_byte(0xff); @@ -112,8 +116,20 @@ impl TestEnvironment { TestEnvironment { task_manager, registry, to_subsystem, instance, state } } - pub fn input(&self) -> &TestInput { - self.state.input() + pub fn config(&self) -> &TestConfiguration { + self.state.config() + } + + /// Produce a randomized duration between `min` and `max`. + fn random_latency(maybe_peer_latency: Option<&PeerLatency>) -> Option { + if let Some(peer_latency) = maybe_peer_latency { + Some( + Uniform::from(peer_latency.min_latency..=peer_latency.max_latency) + .sample(&mut thread_rng()), + ) + } else { + None + } } pub fn respond_to_send_request(state: &mut TestState, request: Requests) -> NetworkAction { @@ -129,7 +145,13 @@ impl TestEnvironment { } .boxed(); - NetworkAction::new(validator_index, future, size) + NetworkAction::new( + validator_index, + future, + size, + // Generate a random latency based on configuration. + Self::random_latency(state.config().latency.as_ref()), + ) }, _ => panic!("received an unexpected request"), } @@ -144,8 +166,8 @@ impl TestEnvironment { ) { // Emulate `n_validators` each with 1MiB of bandwidth available. let mut network = NetworkEmulator::new( - state.input().n_validators, - state.input().bandwidth, + state.config().n_validators, + state.config().bandwidth, spawn_task_handle, ); @@ -270,7 +292,7 @@ use sp_keyring::Sr25519Keyring; use crate::availability::network::NetworkAction; -use self::network::NetworkEmulator; +use self::{configuration::PeerLatency, network::NetworkEmulator}; #[derive(Clone)] pub struct TestState { @@ -287,26 +309,18 @@ pub struct TestState { available_data: AvailableData, chunks: Vec, invalid_chunks: Vec, - input: TestInput, + config: TestConfiguration, } impl TestState { - fn input(&self) -> &TestInput { - &self.input + fn config(&self) -> &TestConfiguration { + &self.config } fn candidate(&self) -> CandidateReceipt { self.candidate.clone() } - fn threshold(&self) -> usize { - recovery_threshold(self.validators.len()).unwrap() - } - - fn impossibility_threshold(&self) -> usize { - self.validators.len() - self.threshold() + 1 - } - async fn respond_to_available_data_query(&self, tx: oneshot::Sender>) { let _ = tx.send(Some(self.available_data.clone())); } @@ -350,8 +364,8 @@ impl TestState { let _ = tx.send(v); } - pub fn new(input: TestInput) -> Self { - let validators = (0..input.n_validators as u64) + pub fn new(config: TestConfiguration) -> Self { + let validators = (0..config.n_validators as u64) .into_iter() .map(|_v| Sr25519Keyring::Alice) .collect::>(); @@ -371,7 +385,7 @@ impl TestState { }; // A 5MB PoV. - let pov = PoV { block_data: BlockData(vec![42; input.pov_size]) }; + let pov = PoV { block_data: BlockData(vec![42; config.pov_size]) }; let available_data = AvailableData { validation_data: persisted_validation_data.clone(), @@ -413,7 +427,7 @@ impl TestState { available_data, chunks, invalid_chunks, - input, + config, } } } @@ -453,33 +467,8 @@ fn derive_erasure_chunks_with_proofs_and_root( (erasure_chunks, root) } -/// The test input parameters -#[derive(Clone, Debug)] -pub struct TestInput { - pub n_validators: usize, - pub n_cores: usize, - pub pov_size: usize, - // This parameter is used to determine how many recoveries we batch in parallel - // similarly to how in practice tranche0 assignments work. - pub vrf_modulo_samples: usize, - // The amount of bandiwdht remote validators have. - pub bandwidth: usize, -} - -impl Default for TestInput { - fn default() -> Self { - Self { - n_validators: 10, - n_cores: 10, - pov_size: 5 * 1024 * 1024, - vrf_modulo_samples: 6, - bandwidth: 15 * 1024 * 1024, - } - } -} - pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { - let input = env.input().clone(); + let config = env.config().clone(); env.send_signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(new_leaf( Hash::repeat_byte(1), @@ -490,7 +479,7 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { let start_marker = Instant::now(); let mut candidate = env.state.candidate(); let mut batch = Vec::new(); - for candidate_num in 0..input.n_cores as u64 { + for candidate_num in 0..config.n_cores as u64 { let (tx, rx) = oneshot::channel(); batch.push(rx); @@ -504,7 +493,7 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { )) .await; - if batch.len() >= input.vrf_modulo_samples { + if batch.len() >= config.vrf_modulo_samples { for rx in std::mem::take(&mut batch) { let _available_data = rx.await.unwrap().unwrap(); } @@ -518,7 +507,7 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { env.send_signal(OverseerSignal::Conclude).await; delay!(5); let duration = start_marker.elapsed().as_millis(); - let tput = ((input.n_cores * input.pov_size) as u128) / duration * 1000; + let tput = ((config.n_cores * config.pov_size) as u128) / duration * 1000; println!("Benchmark completed in {:?}ms", duration); println!("Throughput: {}KiB/s", tput / 1024); } diff --git a/polkadot/node/subsystem-bench/src/availability/network.rs b/polkadot/node/subsystem-bench/src/availability/network.rs index 1889e971cc1e..d6fc175c859b 100644 --- a/polkadot/node/subsystem-bench/src/availability/network.rs +++ b/polkadot/node/subsystem-bench/src/availability/network.rs @@ -122,6 +122,7 @@ mod tests { assert!(total_sent as u128 <= upper_bound); } } + // A network peer emulator struct PeerEmulator { // The queue of requests waiting to be served by the emulator @@ -132,19 +133,32 @@ impl PeerEmulator { pub fn new(bandwidth: usize, spawn_task_handle: SpawnTaskHandle) -> Self { let (actions_tx, mut actions_rx) = tokio::sync::mpsc::unbounded_channel(); - spawn_task_handle.spawn("peer-emulator", "test-environment", async move { - let mut rate_limiter = RateLimit::new(20, bandwidth); - loop { - let maybe_action: Option = actions_rx.recv().await; - if let Some(action) = maybe_action { - let size = action.size(); - rate_limiter.reap(size).await; - action.run().await; - } else { - break + spawn_task_handle + .clone() + .spawn("peer-emulator", "test-environment", async move { + let mut rate_limiter = RateLimit::new(20, bandwidth); + loop { + let maybe_action: Option = actions_rx.recv().await; + if let Some(action) = maybe_action { + let size = action.size(); + rate_limiter.reap(size).await; + if let Some(latency) = action.latency { + spawn_task_handle.spawn( + "peer-emulator-latency", + "test-environment", + async move { + tokio::time::sleep(latency).await; + action.run().await; + }, + ) + } else { + action.run().await; + } + } else { + break + } } - } - }); + }); Self { actions_tx } } @@ -164,11 +178,13 @@ pub struct NetworkAction { size: usize, // Peer index index: usize, + // The amount of time to delay the polling `run` + latency: Option, } impl NetworkAction { - pub fn new(index: usize, run: ActionFuture, size: usize) -> Self { - Self { run, size, index } + pub fn new(index: usize, run: ActionFuture, size: usize, latency: Option) -> Self { + Self { run, size, index, latency } } pub fn size(&self) -> usize { self.size @@ -186,10 +202,6 @@ impl NetworkAction { // Mocks the network bridge and an arbitrary number of connected peer nodes. // Implements network latency, bandwidth and error. pub struct NetworkEmulator { - // Number of peers connected on validation protocol - n_peers: usize, - // The maximum Rx/Tx bandwidth in bytes per second. - bandwidth: usize, // Per peer network emulation peers: Vec, } @@ -197,8 +209,6 @@ pub struct NetworkEmulator { impl NetworkEmulator { pub fn new(n_peers: usize, bandwidth: usize, spawn_task_handle: SpawnTaskHandle) -> Self { Self { - n_peers, - bandwidth, peers: (0..n_peers) .map(|_index| PeerEmulator::new(bandwidth, spawn_task_handle.clone())) .collect::>(), diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 30a9dff02757..52c522726799 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -23,7 +23,7 @@ use prometheus::proto::LabelPair; pub(crate) mod availability; -use availability::{TestEnvironment, TestInput, TestState}; +use availability::{TestConfiguration, TestEnvironment, TestState}; const LOG_TARGET: &str = "subsystem-bench"; /// Define the supported benchmarks targets @@ -60,11 +60,14 @@ impl BenchCli { let runtime = new_runtime(); let registry = Registry::new(); - let state = TestState::new(TestInput::default()); + let test_config = + TestConfiguration::degraded_network_300_validators_60_cores(1024 * 1024); + + let state = TestState::new(test_config); let mut env = TestEnvironment::new(runtime.handle().clone(), state, registry.clone()); - println!("{:?}", env.input()); + println!("{:?}", env.config()); runtime.block_on(availability::bench_chunk_recovery(&mut env)); From 7ca4dbadf6d24bd94abb0a06bf25d3aaacea8e9f Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 8 Nov 2023 13:15:02 +0200 Subject: [PATCH 07/52] support multiple pov sizes Signed-off-by: Andrei Sandu --- .../src/availability/configuration.rs | 44 ++-- .../subsystem-bench/src/availability/mod.rs | 222 ++++++++++++------ .../subsystem-bench/src/subsystem-bench.rs | 6 +- 3 files changed, 176 insertions(+), 96 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability/configuration.rs b/polkadot/node/subsystem-bench/src/availability/configuration.rs index 14e8f55128d9..3df496ad0428 100644 --- a/polkadot/node/subsystem-bench/src/availability/configuration.rs +++ b/polkadot/node/subsystem-bench/src/availability/configuration.rs @@ -28,80 +28,92 @@ pub struct PeerLatency { /// The test input parameters #[derive(Clone, Debug)] pub struct TestConfiguration { + /// Configuration for the `availability-recovery` subsystem. + pub use_fast_path: bool, /// Number of validators pub n_validators: usize, /// Number of cores pub n_cores: usize, /// The PoV size - pub pov_size: usize, + pub pov_sizes: Vec, /// This parameter is used to determine how many recoveries we batch in parallel - /// similarly to how in practice tranche0 assignments work. - pub vrf_modulo_samples: usize, + /// to simulate tranche0 recoveries. + pub max_parallel_recoveries: usize, /// The amount of bandiwdht remote validators have. pub bandwidth: usize, /// Optional peer emulation latency pub latency: Option, + /// Error probability + pub error: usize, } impl Default for TestConfiguration { fn default() -> Self { Self { + use_fast_path: false, n_validators: 10, n_cores: 10, - pov_size: 5 * 1024 * 1024, - vrf_modulo_samples: 6, - bandwidth: 15 * 1024 * 1024, + pov_sizes: vec![5 * 1024 * 1024], + max_parallel_recoveries: 6, + bandwidth: 60 * 1024 * 1024, latency: None, + error: 0, } } } impl TestConfiguration { /// An unconstrained standard configuration matching Polkadot/Kusama - pub fn unconstrained_300_validators_60_cores(pov_size: usize) -> TestConfiguration { + pub fn unconstrained_300_validators_60_cores(pov_sizes: Vec) -> TestConfiguration { Self { + use_fast_path: false, n_validators: 300, n_cores: 60, - pov_size, - vrf_modulo_samples: 6, + pov_sizes, + max_parallel_recoveries: 20, // HW specs node bandwidth bandwidth: 60 * 1024 * 1024, // No latency latency: None, + error: 0, } } /// Polkadot/Kusama configuration with typical latency constraints. - pub fn healthy_network_300_validators_60_cores(pov_size: usize) -> TestConfiguration { + pub fn healthy_network_300_validators_60_cores(pov_sizes: Vec) -> TestConfiguration { Self { + use_fast_path: true, n_validators: 300, n_cores: 60, - pov_size, - vrf_modulo_samples: 6, + pov_sizes, + max_parallel_recoveries: 6, // HW specs node bandwidth bandwidth: 60 * 1024 * 1024, latency: Some(PeerLatency { min_latency: Duration::from_millis(1), max_latency: Duration::from_millis(50), }), + error: 5, } } /// Polkadot/Kusama configuration with degraded due to latencies. /// TODO: implement errors. - pub fn degraded_network_300_validators_60_cores(pov_size: usize) -> TestConfiguration { + pub fn degraded_network_300_validators_60_cores(pov_sizes: Vec) -> TestConfiguration { Self { + use_fast_path: true, n_validators: 300, n_cores: 60, - pov_size, - vrf_modulo_samples: 6, + pov_sizes, + max_parallel_recoveries: 6, // HW specs node bandwidth bandwidth: 60 * 1024 * 1024, // A range of latencies to expect in a degraded network latency: Some(PeerLatency { min_latency: Duration::from_millis(1), - max_latency: Duration::from_millis(1000), + max_latency: Duration::from_millis(500), }), + error: 30, } } } diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 6c0c41c86c0f..dcfeb4287780 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -15,6 +15,7 @@ // along with Polkadot. If not, see . use std::{ + collections::HashMap, sync::Arc, time::{Duration, Instant}, }; @@ -23,7 +24,6 @@ use futures::{ channel::{mpsc, oneshot}, FutureExt, SinkExt, }; -use futures_timer::Delay; use polkadot_node_metrics::metrics::Metrics; use polkadot_availability_recovery::AvailabilityRecoverySubsystem; @@ -49,7 +49,6 @@ use polkadot_node_subsystem::{ const LOG_TARGET: &str = "subsystem-bench::availability"; -use polkadot_erasure_coding::recovery_threshold; use polkadot_node_primitives::{AvailableData, ErasureChunk}; use polkadot_node_subsystem_test_helpers::{ @@ -57,7 +56,7 @@ use polkadot_node_subsystem_test_helpers::{ }; use polkadot_node_subsystem_util::TimeoutExt; use polkadot_primitives::{ - AuthorityDiscoveryId, CandidateReceipt, GroupIndex, Hash, HeadData, IndexedVec, + AuthorityDiscoveryId, CandidateHash, CandidateReceipt, GroupIndex, Hash, HeadData, IndexedVec, PersistedValidationData, SessionIndex, SessionInfo, ValidatorId, ValidatorIndex, }; use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; @@ -97,8 +96,11 @@ impl TestEnvironment { // We use prometheus metrics to collect per job task poll time and subsystem metrics. pub fn new(runtime: tokio::runtime::Handle, state: TestState, registry: Registry) -> Self { let task_manager: TaskManager = TaskManager::new(runtime.clone(), Some(®istry)).unwrap(); - let (instance, virtual_overseer) = - AvailabilityRecoverySubsystemInstance::new(®istry, task_manager.spawn_handle()); + let (instance, virtual_overseer) = AvailabilityRecoverySubsystemInstance::new( + ®istry, + task_manager.spawn_handle(), + state.config().use_fast_path, + ); // Copy sender for later when we need to inject messages in to the subsystem. let to_subsystem = virtual_overseer.tx.clone(); @@ -132,16 +134,60 @@ impl TestEnvironment { } } + /// Generate a random error based on `probability`. + /// `probability` should be a number between 0 and 100. + fn random_error(probability: usize) -> bool { + Uniform::from(0..=99).sample(&mut thread_rng()) < probability + } + pub fn respond_to_send_request(state: &mut TestState, request: Requests) -> NetworkAction { match request { Requests::ChunkFetchingV1(outgoing_request) => { let validator_index = outgoing_request.payload.index.0 as usize; - let chunk: ChunkResponse = state.chunks[validator_index].clone().into(); + let chunk: ChunkResponse = + state.chunks.get(&outgoing_request.payload.candidate_hash).unwrap() + [validator_index] + .clone() + .into(); let size = chunk.encoded_size(); + + let response = if Self::random_error(state.config().error) { + Err(RequestFailure::Network(OutboundFailure::ConnectionClosed)) + } else { + Ok(req_res::v1::ChunkFetchingResponse::from(Some(chunk)).encode()) + }; + let future = async move { - let _ = outgoing_request - .pending_response - .send(Ok(req_res::v1::ChunkFetchingResponse::from(Some(chunk)).encode())); + let _ = outgoing_request.pending_response.send(response); + } + .boxed(); + + NetworkAction::new( + validator_index, + future, + size, + // Generate a random latency based on configuration. + Self::random_latency(state.config().latency.as_ref()), + ) + }, + Requests::AvailableDataFetchingV1(outgoing_request) => { + // TODO: do better, by implementing diff authority ids and mapping network actions + // to authority id, + let validator_index = + Uniform::from(0..state.config().n_validators).sample(&mut thread_rng()); + let available_data = + state.candidates.get(&outgoing_request.payload.candidate_hash).unwrap().clone(); + let size = available_data.encoded_size(); + + let response = if Self::random_error(state.config().error) { + Err(RequestFailure::Network(OutboundFailure::ConnectionClosed)) + } else { + Ok(req_res::v1::AvailableDataFetchingResponse::from(Some(available_data)) + .encode()) + }; + + let future = async move { + let _ = outgoing_request.pending_response.send(response); } .boxed(); @@ -192,14 +238,14 @@ impl TestEnvironment { // TODO: Simulate av store load by delaying the response. state.respond_none_to_available_data_query(tx).await; }, - AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryAllChunks(_candidate_hash, tx)) => { + AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryAllChunks(candidate_hash, tx)) => { // Test env: We always have our own chunk. - state.respond_to_query_all_request(|index| index == state.validator_index.0 as usize, tx).await; + state.respond_to_query_all_request(candidate_hash, |index| index == state.validator_index.0 as usize, tx).await; }, AllMessages::AvailabilityStore( - AvailabilityStoreMessage::QueryChunkSize(_, tx) + AvailabilityStoreMessage::QueryChunkSize(candidate_hash, tx) ) => { - let chunk_size = state.chunks[0].encoded_size(); + let chunk_size = state.chunks.get(&candidate_hash).unwrap()[0].encoded_size(); let _ = tx.send(Some(chunk_size)); } AllMessages::RuntimeApi(RuntimeApiMessage::Request( @@ -250,15 +296,24 @@ impl AvailabilityRecoverySubsystemInstance { pub fn new( registry: &Registry, spawn_task_handle: SpawnTaskHandle, + use_fast_path: bool, ) -> (Self, TestSubsystemContextHandle) { let (context, virtual_overseer) = make_buffered_subsystem_context(spawn_task_handle.clone(), 4096 * 4); let (collation_req_receiver, req_cfg) = IncomingRequest::get_config_receiver(&ReqProtocolNames::new(&GENESIS_HASH, None)); - let subsystem = AvailabilityRecoverySubsystem::with_chunks_only( - collation_req_receiver, - Metrics::try_register(®istry).unwrap(), - ); + + let subsystem = if use_fast_path { + AvailabilityRecoverySubsystem::with_fast_path( + collation_req_receiver, + Metrics::try_register(®istry).unwrap(), + ) + } else { + AvailabilityRecoverySubsystem::with_chunks_only( + collation_req_receiver, + Metrics::try_register(®istry).unwrap(), + ) + }; let spawned_subsystem = subsystem.start(context); let subsystem_future = async move { @@ -282,12 +337,6 @@ const TIMEOUT: Duration = Duration::from_millis(300); // This should eventually be a test parameter. const MAX_TIME_OF_FLIGHT: Duration = Duration::from_millis(5000); -macro_rules! delay { - ($delay:expr) => { - Delay::new(Duration::from_millis($delay)).await; - }; -} - use sp_keyring::Sr25519Keyring; use crate::availability::network::NetworkAction; @@ -301,14 +350,15 @@ pub struct TestState { validator_authority_id: Vec, // The test node validator index. validator_index: ValidatorIndex, - candidate: CandidateReceipt, + // Per core candidates receipts. + candidate_receipts: Vec, session_index: SessionIndex, persisted_validation_data: PersistedValidationData, + /// A per size pov mapping to available data. + candidates: HashMap, - available_data: AvailableData, - chunks: Vec, - invalid_chunks: Vec, + chunks: HashMap>, config: TestConfiguration, } @@ -317,12 +367,8 @@ impl TestState { &self.config } - fn candidate(&self) -> CandidateReceipt { - self.candidate.clone() - } - - async fn respond_to_available_data_query(&self, tx: oneshot::Sender>) { - let _ = tx.send(Some(self.available_data.clone())); + fn candidate(&self, candidate_index: usize) -> CandidateReceipt { + self.candidate_receipts.get(candidate_index).unwrap().clone() } async fn respond_none_to_available_data_query( @@ -337,9 +383,7 @@ impl TestState { validators: self.validator_public.clone(), discovery_keys: self.validator_authority_id.clone(), // all validators in the same group. - validator_groups: IndexedVec::>::from(vec![(0..self - .validators - .len()) + validator_groups: IndexedVec::>::from(vec![(0..5) .map(|i| ValidatorIndex(i as _)) .collect()]), assignment_keys: vec![], @@ -356,10 +400,18 @@ impl TestState { } async fn respond_to_query_all_request( &self, + candidate_hash: CandidateHash, send_chunk: impl Fn(usize) -> bool, tx: oneshot::Sender>, ) { - let v = self.chunks.iter().filter(|c| send_chunk(c.index.0 as usize)).cloned().collect(); + let v = self + .chunks + .get(&candidate_hash) + .unwrap() + .iter() + .filter(|c| send_chunk(c.index.0 as usize)) + .cloned() + .collect(); let _ = tx.send(v); } @@ -370,13 +422,15 @@ impl TestState { .map(|_v| Sr25519Keyring::Alice) .collect::>(); - let mut candidate = dummy_candidate_receipt(dummy_hash()); let validator_public = validator_pubkeys(&validators); let validator_authority_id = validator_authority_id(&validators); let validator_index = ValidatorIndex(0); - + let mut pov_size_to_candidate = HashMap::new(); + let mut chunks = HashMap::new(); + let mut candidates = HashMap::new(); let session_index = 10; + // we use it for all candidates. let persisted_validation_data = PersistedValidationData { parent_head: HeadData(vec![7, 8, 9]), relay_parent_number: Default::default(), @@ -384,49 +438,57 @@ impl TestState { relay_parent_storage_root: Default::default(), }; - // A 5MB PoV. - let pov = PoV { block_data: BlockData(vec![42; config.pov_size]) }; + // Create initial candidate receipts + let mut candidate_receipts = config + .pov_sizes + .iter() + .map(|_index| dummy_candidate_receipt(dummy_hash())) + .collect::>(); - let available_data = AvailableData { - validation_data: persisted_validation_data.clone(), - pov: Arc::new(pov), - }; + for (index, pov_size) in config.pov_sizes.iter().enumerate() { + let mut candidate = &mut candidate_receipts[index]; + // a hack to make candidate unique. + candidate.descriptor.relay_parent = Hash::from_low_u64_be(index as u64); - let (chunks, erasure_root) = derive_erasure_chunks_with_proofs_and_root( - validators.len(), - &available_data, - |_, _| {}, - ); - // Mess around: - let invalid_chunks = chunks - .iter() - .cloned() - .map(|mut chunk| { - if chunk.chunk.len() >= 2 && chunk.chunk[0] != chunk.chunk[1] { - chunk.chunk[0] = chunk.chunk[1]; - } else if chunk.chunk.len() >= 1 { - chunk.chunk[0] = !chunk.chunk[0]; - } else { - chunk.proof = Proof::dummy_proof(); - } - chunk - }) - .collect(); - debug_assert_ne!(chunks, invalid_chunks); + // We reuse candidates of same size, to speed up the test startup. + let (erasure_root, available_data, new_chunks) = + pov_size_to_candidate.entry(pov_size).or_insert_with(|| { + let pov = PoV { block_data: BlockData(vec![index as u8; *pov_size]) }; + + let available_data = AvailableData { + validation_data: persisted_validation_data.clone(), + pov: Arc::new(pov), + }; + + let (new_chunks, erasure_root) = derive_erasure_chunks_with_proofs_and_root( + validators.len(), + &available_data, + |_, _| {}, + ); + + candidate.descriptor.erasure_root = erasure_root; - candidate.descriptor.erasure_root = erasure_root; + chunks.insert(candidate.hash(), new_chunks.clone()); + candidates.insert(candidate.hash(), available_data.clone()); + + (erasure_root, available_data, new_chunks) + }); + + candidate.descriptor.erasure_root = *erasure_root; + candidates.insert(candidate.hash(), available_data.clone()); + chunks.insert(candidate.hash(), new_chunks.clone()); + } Self { validators, validator_public, validator_authority_id, validator_index, - candidate, + candidate_receipts, session_index, persisted_validation_data, - available_data, + candidates, chunks, - invalid_chunks, config, } } @@ -467,6 +529,9 @@ fn derive_erasure_chunks_with_proofs_and_root( (erasure_chunks, root) } +pub async fn bench_with_chunks_if_pov_large(env: &mut TestEnvironment) {} + +pub async fn bench_inner(env: &mut TestEnvironment) {} pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { let config = env.config().clone(); @@ -477,14 +542,14 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { .await; let start_marker = Instant::now(); - let mut candidate = env.state.candidate(); let mut batch = Vec::new(); + let mut availability_bytes = 0; for candidate_num in 0..config.n_cores as u64 { + let candidate = env.state.candidate_receipts[candidate_num as usize].clone(); + let (tx, rx) = oneshot::channel(); batch.push(rx); - candidate.descriptor.relay_parent = Hash::from_low_u64_be(candidate_num); - env.send_message(AvailabilityRecoveryMessage::RecoverAvailableData( candidate.clone(), 1, @@ -493,21 +558,22 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { )) .await; - if batch.len() >= config.vrf_modulo_samples { + if batch.len() >= config.max_parallel_recoveries { for rx in std::mem::take(&mut batch) { - let _available_data = rx.await.unwrap().unwrap(); + let available_data = rx.await.unwrap().unwrap(); + availability_bytes += available_data.encoded_size(); } } } for rx in std::mem::take(&mut batch) { - let _available_data = rx.await.unwrap().unwrap(); + let available_data = rx.await.unwrap().unwrap(); + availability_bytes += available_data.encoded_size(); } env.send_signal(OverseerSignal::Conclude).await; - delay!(5); let duration = start_marker.elapsed().as_millis(); - let tput = ((config.n_cores * config.pov_size) as u128) / duration * 1000; + let tput = (availability_bytes as u128) / duration * 1000; println!("Benchmark completed in {:?}ms", duration); println!("Throughput: {}KiB/s", tput / 1024); } diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 52c522726799..2a5edf7cf197 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -60,8 +60,10 @@ impl BenchCli { let runtime = new_runtime(); let registry = Registry::new(); - let test_config = - TestConfiguration::degraded_network_300_validators_60_cores(1024 * 1024); + let mut pov_sizes = Vec::new(); + pov_sizes.append(&mut vec![1024 * 1024 * 5; 60]); + + let test_config = TestConfiguration::unconstrained_300_validators_60_cores(pov_sizes); let state = TestState::new(test_config); From 0430b5b909b84abc5bb4c078924ce46e944dc18a Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 8 Nov 2023 15:07:22 +0200 Subject: [PATCH 08/52] new metric in recovery and more testing Signed-off-by: Andrei Sandu --- Cargo.lock | 1 + .../availability-recovery/src/metrics.rs | 17 +++++++++++--- .../network/availability-recovery/src/task.rs | 3 ++- polkadot/node/subsystem-bench/Cargo.toml | 2 +- .../src/availability/configuration.rs | 22 +++++++++++++++---- .../subsystem-bench/src/availability/mod.rs | 10 +++++++-- .../subsystem-bench/src/subsystem-bench.rs | 14 ++++++++++-- .../node/subsystem-test-helpers/src/lib.rs | 12 ++++++---- 8 files changed, 64 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4645aeee6aab..5b54745cdcc3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13022,6 +13022,7 @@ dependencies = [ "sp-core", "sp-keyring", "sp-keystore", + "substrate-prometheus-endpoint", "tokio", "tracing-gum", ] diff --git a/polkadot/node/network/availability-recovery/src/metrics.rs b/polkadot/node/network/availability-recovery/src/metrics.rs index aa7216739507..d82a8f9ae5fa 100644 --- a/polkadot/node/network/availability-recovery/src/metrics.rs +++ b/polkadot/node/network/availability-recovery/src/metrics.rs @@ -29,7 +29,10 @@ struct MetricsInner { /// /// Gets incremented on each sent chunk requests. chunk_requests_issued: Counter, - + /// Total number of bytes recovered + /// + /// Gets incremented on each succesful recovery + recovered_bytes_total: Counter, /// A counter for finished chunk requests. /// /// Split by result: @@ -133,9 +136,10 @@ impl Metrics { } /// A full recovery succeeded. - pub fn on_recovery_succeeded(&self) { + pub fn on_recovery_succeeded(&self, bytes: usize) { if let Some(metrics) = &self.0 { - metrics.full_recoveries_finished.with_label_values(&["success"]).inc() + metrics.full_recoveries_finished.with_label_values(&["success"]).inc(); + metrics.recovered_bytes_total.inc_by(bytes as u64) } } @@ -171,6 +175,13 @@ impl metrics::Metrics for Metrics { )?, registry, )?, + recovered_bytes_total: prometheus::register( + Counter::new( + "polkadot_parachain_availability_recovery_bytes_total", + "Total number of bytes recovered", + )?, + registry, + )?, chunk_requests_finished: prometheus::register( CounterVec::new( Opts::new( diff --git a/polkadot/node/network/availability-recovery/src/task.rs b/polkadot/node/network/availability-recovery/src/task.rs index d5bc2da84944..9ed911f3b5a7 100644 --- a/polkadot/node/network/availability-recovery/src/task.rs +++ b/polkadot/node/network/availability-recovery/src/task.rs @@ -23,6 +23,7 @@ use crate::{ LOG_TARGET, }; use futures::{channel::oneshot, SinkExt}; +use parity_scale_codec::Encode; #[cfg(not(test))] use polkadot_node_network_protocol::request_response::CHUNK_REQUEST_TIMEOUT; use polkadot_node_network_protocol::request_response::{ @@ -426,7 +427,7 @@ where return Err(err) }, Ok(data) => { - self.params.metrics.on_recovery_succeeded(); + self.params.metrics.on_recovery_succeeded(data.encoded_size()); return Ok(data) }, } diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml index 7408397f930c..2de978234a63 100644 --- a/polkadot/node/subsystem-bench/Cargo.toml +++ b/polkadot/node/subsystem-bench/Cargo.toml @@ -47,7 +47,7 @@ sc-service = { path = "../../../substrate/client/service" } polkadot-node-metrics = { path = "../metrics" } polkadot-primitives-test-helpers = { path = "../../primitives/test-helpers" } -# prometheus = { package = "substrate-prometheus-endpoint", path = "../../../substrate/utils/prometheus" } +prometheus_endpoint = { package = "substrate-prometheus-endpoint", path = "../../../substrate/utils/prometheus" } prometheus = { version = "0.13.0", default-features = false } [features] diff --git a/polkadot/node/subsystem-bench/src/availability/configuration.rs b/polkadot/node/subsystem-bench/src/availability/configuration.rs index 3df496ad0428..9a93bf12e114 100644 --- a/polkadot/node/subsystem-bench/src/availability/configuration.rs +++ b/polkadot/node/subsystem-bench/src/availability/configuration.rs @@ -65,12 +65,26 @@ impl Default for TestConfiguration { impl TestConfiguration { /// An unconstrained standard configuration matching Polkadot/Kusama pub fn unconstrained_300_validators_60_cores(pov_sizes: Vec) -> TestConfiguration { + Self { + use_fast_path: false, + n_validators: 300, + n_cores: 100, + pov_sizes, + max_parallel_recoveries: 100, + // HW specs node bandwidth + bandwidth: 60 * 1024 * 1024, + // No latency + latency: None, + error: 0, + } + } + pub fn unconstrained_1000_validators_60_cores(pov_sizes: Vec) -> TestConfiguration { Self { use_fast_path: false, n_validators: 300, n_cores: 60, pov_sizes, - max_parallel_recoveries: 20, + max_parallel_recoveries: 30, // HW specs node bandwidth bandwidth: 60 * 1024 * 1024, // No latency @@ -101,11 +115,11 @@ impl TestConfiguration { /// TODO: implement errors. pub fn degraded_network_300_validators_60_cores(pov_sizes: Vec) -> TestConfiguration { Self { - use_fast_path: true, + use_fast_path: false, n_validators: 300, - n_cores: 60, + n_cores: 100, pov_sizes, - max_parallel_recoveries: 6, + max_parallel_recoveries: 20, // HW specs node bandwidth bandwidth: 60 * 1024 * 1024, // A range of latencies to expect in a degraded network diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index dcfeb4287780..8f8eca104385 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -298,8 +298,11 @@ impl AvailabilityRecoverySubsystemInstance { spawn_task_handle: SpawnTaskHandle, use_fast_path: bool, ) -> (Self, TestSubsystemContextHandle) { - let (context, virtual_overseer) = - make_buffered_subsystem_context(spawn_task_handle.clone(), 4096 * 4); + let (context, virtual_overseer) = make_buffered_subsystem_context( + spawn_task_handle.clone(), + 4096 * 4, + "availability-recovery", + ); let (collation_req_receiver, req_cfg) = IncomingRequest::get_config_receiver(&ReqProtocolNames::new(&GENESIS_HASH, None)); @@ -558,6 +561,7 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { )) .await; + // TODO: select between futures unordered of rx await and timer to send next request. if batch.len() >= config.max_parallel_recoveries { for rx in std::mem::take(&mut batch) { let available_data = rx.await.unwrap().unwrap(); @@ -576,4 +580,6 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { let tput = (availability_bytes as u128) / duration * 1000; println!("Benchmark completed in {:?}ms", duration); println!("Throughput: {}KiB/s", tput / 1024); + + tokio::time::sleep(Duration::from_secs(1)).await; } diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 2a5edf7cf197..4dc0936291b1 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -20,6 +20,7 @@ use clap::Parser; use color_eyre::eyre; use prometheus::proto::LabelPair; +use std::net::{Ipv4Addr, SocketAddr}; pub(crate) mod availability; @@ -59,16 +60,25 @@ impl BenchCli { let runtime = new_runtime(); let registry = Registry::new(); + let registry_clone = registry.clone(); let mut pov_sizes = Vec::new(); - pov_sizes.append(&mut vec![1024 * 1024 * 5; 60]); + pov_sizes.append(&mut vec![1024 * 1024; 100]); - let test_config = TestConfiguration::unconstrained_300_validators_60_cores(pov_sizes); + let test_config = TestConfiguration::unconstrained_1000_validators_60_cores(pov_sizes); let state = TestState::new(test_config); let mut env = TestEnvironment::new(runtime.handle().clone(), state, registry.clone()); + let handle = runtime.spawn(async move { + prometheus_endpoint::init_prometheus( + SocketAddr::new(std::net::IpAddr::V4(Ipv4Addr::LOCALHOST), 9999), + registry_clone, + ) + .await + }); + println!("{:?}", env.config()); runtime.block_on(availability::bench_chunk_recovery(&mut env)); diff --git a/polkadot/node/subsystem-test-helpers/src/lib.rs b/polkadot/node/subsystem-test-helpers/src/lib.rs index 3f92513498c4..5393ccafa6f3 100644 --- a/polkadot/node/subsystem-test-helpers/src/lib.rs +++ b/polkadot/node/subsystem-test-helpers/src/lib.rs @@ -187,6 +187,7 @@ pub struct TestSubsystemContext { tx: TestSubsystemSender, rx: mpsc::Receiver>, spawn: S, + name: &'static str, } #[async_trait::async_trait] @@ -223,7 +224,7 @@ where name: &'static str, s: Pin + Send>>, ) -> SubsystemResult<()> { - self.spawn.spawn(name, None, s); + self.spawn.spawn(name, Some(self.name), s); Ok(()) } @@ -232,7 +233,7 @@ where name: &'static str, s: Pin + Send>>, ) -> SubsystemResult<()> { - self.spawn.spawn_blocking(name, None, s); + self.spawn.spawn_blocking(name, Some(self.name), s); Ok(()) } @@ -292,8 +293,9 @@ impl TestSubsystemContextHandle { /// of the tests. pub fn make_subsystem_context( spawner: S, + name: &'static str, ) -> (TestSubsystemContext>, TestSubsystemContextHandle) { - make_buffered_subsystem_context(spawner, 0) + make_buffered_subsystem_context(spawner, 0, name) } /// Make a test subsystem context with buffered overseer channel. Some tests (e.g. @@ -302,6 +304,7 @@ pub fn make_subsystem_context( pub fn make_buffered_subsystem_context( spawner: S, buffer_size: usize, + name: &'static str, ) -> (TestSubsystemContext>, TestSubsystemContextHandle) { let (overseer_tx, overseer_rx) = mpsc::channel(buffer_size); let (all_messages_tx, all_messages_rx) = mpsc::unbounded(); @@ -311,6 +314,7 @@ pub fn make_buffered_subsystem_context( tx: TestSubsystemSender { tx: all_messages_tx }, rx: overseer_rx, spawn: SpawnGlue(spawner), + name, }, TestSubsystemContextHandle { tx: overseer_tx, rx: all_messages_rx }, ) @@ -332,7 +336,7 @@ pub fn subsystem_test_harness( Test: Future, { let pool = TaskExecutor::new(); - let (context, handle) = make_subsystem_context(pool); + let (context, handle) = make_subsystem_context(pool, "default"); let overseer = overseer_factory(handle); let test = test_factory(context); From 027bcd862eef7d2f776ceb0d2bf3dc11ef490b5c Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Thu, 9 Nov 2023 23:15:01 +0200 Subject: [PATCH 09/52] CLI update and fixes Signed-off-by: Andrei Sandu --- Cargo.lock | 11 ++ cumulus/pallets/xcmp-queue/src/tests.rs | 22 ++- .../network/availability-recovery/Cargo.toml | 4 + .../network/availability-recovery/src/lib.rs | 10 +- polkadot/node/subsystem-bench/Cargo.toml | 4 +- .../src/availability/configuration.rs | 94 ++++++------ .../subsystem-bench/src/availability/mod.rs | 140 ++++++++++++------ .../subsystem-bench/src/subsystem-bench.rs | 125 ++++++++++++++-- 8 files changed, 292 insertions(+), 118 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5b54745cdcc3..05355cad0e2c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2561,6 +2561,15 @@ dependencies = [ "clap_derive 4.4.2", ] +[[package]] +name = "clap-num" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488557e97528174edaa2ee268b23a809e0c598213a4bbcb4f34575a46fda147e" +dependencies = [ + "num-traits", +] + [[package]] name = "clap_builder" version = "4.4.6" @@ -11715,6 +11724,7 @@ dependencies = [ "sp-core", "sp-keyring", "thiserror", + "tokio", "tracing-gum", ] @@ -12997,6 +13007,7 @@ dependencies = [ "assert_matches", "async-trait", "clap 4.4.6", + "clap-num", "color-eyre", "env_logger 0.9.3", "futures", diff --git a/cumulus/pallets/xcmp-queue/src/tests.rs b/cumulus/pallets/xcmp-queue/src/tests.rs index cf6d947609d2..bab7e92ca2de 100644 --- a/cumulus/pallets/xcmp-queue/src/tests.rs +++ b/cumulus/pallets/xcmp-queue/src/tests.rs @@ -410,9 +410,11 @@ fn verify_fee_factor_increase_and_decrease() { assert_eq!(DeliveryFeeFactor::::get(sibling_para_id), initial); // Sending the message right now is cheap - let (_, delivery_fees) = validate_send::(destination, xcm.clone()) - .expect("message can be sent; qed"); - let Fungible(delivery_fee_amount) = delivery_fees.inner()[0].fun else { unreachable!("asset is fungible; qed"); }; + let (_, delivery_fees) = + validate_send::(destination, xcm.clone()).expect("message can be sent; qed"); + let Fungible(delivery_fee_amount) = delivery_fees.inner()[0].fun else { + unreachable!("asset is fungible; qed"); + }; assert_eq!(delivery_fee_amount, 402_000_000); let smaller_xcm = Xcm(vec![ClearOrigin; 30]); @@ -422,19 +424,23 @@ fn verify_fee_factor_increase_and_decrease() { assert_ok!(send_xcm::(destination, xcm.clone())); // Size 520 assert_eq!(DeliveryFeeFactor::::get(sibling_para_id), FixedU128::from_float(1.05)); - for _ in 0..12 { // We finish at size 929 + for _ in 0..12 { + // We finish at size 929 assert_ok!(send_xcm::(destination, smaller_xcm.clone())); } assert!(DeliveryFeeFactor::::get(sibling_para_id) > FixedU128::from_float(1.88)); // Sending the message right now is expensive - let (_, delivery_fees) = validate_send::(destination, xcm.clone()) - .expect("message can be sent; qed"); - let Fungible(delivery_fee_amount) = delivery_fees.inner()[0].fun else { unreachable!("asset is fungible; qed"); }; + let (_, delivery_fees) = + validate_send::(destination, xcm.clone()).expect("message can be sent; qed"); + let Fungible(delivery_fee_amount) = delivery_fees.inner()[0].fun else { + unreachable!("asset is fungible; qed"); + }; assert_eq!(delivery_fee_amount, 758_030_955); // Fee factor only decreases in `take_outbound_messages` - for _ in 0..5 { // We take 5 100 byte pages + for _ in 0..5 { + // We take 5 100 byte pages XcmpQueue::take_outbound_messages(1); } assert!(DeliveryFeeFactor::::get(sibling_para_id) < FixedU128::from_float(1.72)); diff --git a/polkadot/node/network/availability-recovery/Cargo.toml b/polkadot/node/network/availability-recovery/Cargo.toml index 42c3abef547b..5f3df09c2bd9 100644 --- a/polkadot/node/network/availability-recovery/Cargo.toml +++ b/polkadot/node/network/availability-recovery/Cargo.toml @@ -7,6 +7,7 @@ license.workspace = true [dependencies] futures = "0.3.21" +tokio = "1.24.2" schnellru = "0.2.1" rand = "0.8.5" fatality = "0.0.6" @@ -36,3 +37,6 @@ sc-network = { path = "../../../../substrate/client/network" } polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" } polkadot-primitives-test-helpers = { path = "../../../primitives/test-helpers" } + +[features] +subsystem-benchmarks = [] \ No newline at end of file diff --git a/polkadot/node/network/availability-recovery/src/lib.rs b/polkadot/node/network/availability-recovery/src/lib.rs index 156a8cbbc82e..ffb634ad76e2 100644 --- a/polkadot/node/network/availability-recovery/src/lib.rs +++ b/polkadot/node/network/availability-recovery/src/lib.rs @@ -617,12 +617,9 @@ impl AvailabilityRecoverySubsystem { .into_iter() .cycle(); - gum::debug!("Subsystem running"); loop { let recv_req = req_receiver.recv(|| vec![COST_INVALID_REQUEST]).fuse(); pin_mut!(recv_req); - gum::debug!("waiting for message"); - futures::select! { erasure_task = erasure_task_rx.next() => { match erasure_task { @@ -729,6 +726,8 @@ impl AvailabilityRecoverySubsystem { } } output = state.ongoing_recoveries.select_next_some() => { + // No caching for benchmark. + #[cfg(not(feature = "subsystem-benchmarks"))] if let Some((candidate_hash, result)) = output { if let Ok(recovery) = CachedRecovery::try_from(result) { state.availability_lru.insert(candidate_hash, recovery); @@ -829,5 +828,10 @@ async fn erasure_task_thread( break }, } + + // In benchmarks this is a very hot loop not yielding at all. + // To update promehteus metrics for the task we need to yield. + #[cfg(feature = "subsystem-benchmarks")] + tokio::task::yield_now().await; } } diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml index 2de978234a63..01b992d15fc6 100644 --- a/polkadot/node/subsystem-bench/Cargo.toml +++ b/polkadot/node/subsystem-bench/Cargo.toml @@ -22,7 +22,7 @@ polkadot-node-subsystem-types = { path = "../subsystem-types" } polkadot-node-primitives = { path = "../primitives" } polkadot-primitives = { path = "../../primitives" } polkadot-node-network-protocol = { path = "../network/protocol" } -polkadot-availability-recovery = { path = "../network/availability-recovery" } +polkadot-availability-recovery = { path = "../network/availability-recovery", features=["subsystem-benchmarks"]} color-eyre = { version = "0.6.1", default-features = false } assert_matches = "1.5" async-trait = "0.1.57" @@ -38,7 +38,7 @@ env_logger = "0.9.0" rand = "0.8.5" parity-scale-codec = { version = "3.6.1", features = ["std", "derive"] } tokio = "1.24.2" - +clap-num = "1.0.2" polkadot-node-subsystem-test-helpers = { path = "../subsystem-test-helpers" } sp-keyring = { path = "../../../substrate/primitives/keyring" } sp-application-crypto = { path = "../../../substrate/primitives/application-crypto" } diff --git a/polkadot/node/subsystem-bench/src/availability/configuration.rs b/polkadot/node/subsystem-bench/src/availability/configuration.rs index 9a93bf12e114..1355c67edea0 100644 --- a/polkadot/node/subsystem-bench/src/availability/configuration.rs +++ b/polkadot/node/subsystem-bench/src/availability/configuration.rs @@ -17,7 +17,7 @@ use super::*; /// Peer response latency configuration. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub struct PeerLatency { /// Min latency for `NetworkAction` completion. pub min_latency: Duration, @@ -36,15 +36,15 @@ pub struct TestConfiguration { pub n_cores: usize, /// The PoV size pub pov_sizes: Vec, - /// This parameter is used to determine how many recoveries we batch in parallel - /// to simulate tranche0 recoveries. - pub max_parallel_recoveries: usize, - /// The amount of bandiwdht remote validators have. + /// The amount of bandiwdth remote validators have. pub bandwidth: usize, /// Optional peer emulation latency pub latency: Option, /// Error probability pub error: usize, + /// Number of loops + /// In one loop `n_cores` candidates are recovered + pub num_loops: usize, } impl Default for TestConfiguration { @@ -54,80 +54,78 @@ impl Default for TestConfiguration { n_validators: 10, n_cores: 10, pov_sizes: vec![5 * 1024 * 1024], - max_parallel_recoveries: 6, bandwidth: 60 * 1024 * 1024, latency: None, error: 0, + num_loops: 1, } } } impl TestConfiguration { /// An unconstrained standard configuration matching Polkadot/Kusama - pub fn unconstrained_300_validators_60_cores(pov_sizes: Vec) -> TestConfiguration { + pub fn ideal_network( + num_loops: usize, + use_fast_path: bool, + n_validators: usize, + n_cores: usize, + pov_sizes: Vec, + ) -> TestConfiguration { Self { - use_fast_path: false, - n_validators: 300, - n_cores: 100, + use_fast_path, + n_cores, + n_validators, pov_sizes, - max_parallel_recoveries: 100, // HW specs node bandwidth - bandwidth: 60 * 1024 * 1024, - // No latency - latency: None, - error: 0, - } - } - pub fn unconstrained_1000_validators_60_cores(pov_sizes: Vec) -> TestConfiguration { - Self { - use_fast_path: false, - n_validators: 300, - n_cores: 60, - pov_sizes, - max_parallel_recoveries: 30, - // HW specs node bandwidth - bandwidth: 60 * 1024 * 1024, + bandwidth: 50 * 1024 * 1024, // No latency latency: None, error: 0, + num_loops, } } - /// Polkadot/Kusama configuration with typical latency constraints. - pub fn healthy_network_300_validators_60_cores(pov_sizes: Vec) -> TestConfiguration { + pub fn healthy_network( + num_loops: usize, + use_fast_path: bool, + n_validators: usize, + n_cores: usize, + pov_sizes: Vec, + ) -> TestConfiguration { Self { - use_fast_path: true, - n_validators: 300, - n_cores: 60, + use_fast_path, + n_cores, + n_validators, pov_sizes, - max_parallel_recoveries: 6, - // HW specs node bandwidth - bandwidth: 60 * 1024 * 1024, + bandwidth: 50 * 1024 * 1024, latency: Some(PeerLatency { min_latency: Duration::from_millis(1), - max_latency: Duration::from_millis(50), + max_latency: Duration::from_millis(100), }), - error: 5, + error: 3, + num_loops, } } - /// Polkadot/Kusama configuration with degraded due to latencies. - /// TODO: implement errors. - pub fn degraded_network_300_validators_60_cores(pov_sizes: Vec) -> TestConfiguration { + pub fn degraded_network( + num_loops: usize, + use_fast_path: bool, + n_validators: usize, + n_cores: usize, + pov_sizes: Vec, + ) -> TestConfiguration { Self { - use_fast_path: false, - n_validators: 300, - n_cores: 100, + use_fast_path, + n_cores, + n_validators, pov_sizes, - max_parallel_recoveries: 20, - // HW specs node bandwidth - bandwidth: 60 * 1024 * 1024, - // A range of latencies to expect in a degraded network + bandwidth: 50 * 1024 * 1024, latency: Some(PeerLatency { - min_latency: Duration::from_millis(1), + min_latency: Duration::from_millis(10), max_latency: Duration::from_millis(500), }), - error: 30, + error: 33, + num_loops, } } } diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 8f8eca104385..7b9b64c07096 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -22,7 +22,8 @@ use std::{ use futures::{ channel::{mpsc, oneshot}, - FutureExt, SinkExt, + stream::FuturesUnordered, + FutureExt, SinkExt, StreamExt, }; use polkadot_node_metrics::metrics::Metrics; @@ -32,7 +33,7 @@ use parity_scale_codec::Encode; use polkadot_node_network_protocol::request_response::{ self as req_res, v1::ChunkResponse, IncomingRequest, ReqProtocolNames, Requests, }; -use rand::{distributions::Uniform, prelude::Distribution, thread_rng}; +use rand::{distributions::Uniform, prelude::Distribution, seq::IteratorRandom, thread_rng}; use prometheus::Registry; use sc_network::{config::RequestResponseConfig, OutboundFailure, RequestFailure}; @@ -46,6 +47,7 @@ use polkadot_node_subsystem::{ }, ActiveLeavesUpdate, FromOrchestra, OverseerSignal, Subsystem, }; +use std::net::{Ipv4Addr, SocketAddr}; const LOG_TARGET: &str = "subsystem-bench::availability"; @@ -74,20 +76,47 @@ struct AvailabilityRecoverySubsystemInstance { _protocol_config: RequestResponseConfig, } -// Implements a mockup of NetworkBridge and AvilabilityStore to support provide state for -// `AvailabilityRecoverySubsystemInstance` +/// The test environment is responsible for creating an instance of the availability recovery +/// subsystem and connecting it to an emulated overseer. +/// +/// ## Mockups +/// We emulate the following subsystems: +/// - runtime api +/// - network bridge +/// - availability store +/// +/// As the subsystem's performance depends on network connectivity, the test environment +/// emulates validator nodes on the network, see `NetworkEmulator`. The network emulation +/// is configurable in terms of peer bandwidth, latency and connection error rate using +/// uniform distribution sampling. +/// +/// The mockup logic is implemented in `env_task` which owns and advances the `TestState`. +/// +/// ## Usage +/// `TestEnvironment` is used in tests to send `Overseer` messages or signals to the subsystem +/// under test. +/// +/// ## Collecting test metrics +/// +/// ### Prometheus +/// A prometheus endpoint is exposed while the test is running. A local Prometheus instance +/// can scrape it every 1s and a Grafana dashboard is the preferred way of visualizing +/// the performance characteristics of the subsystem. +/// +/// ### CLI +/// A subset of the Prometheus metrics are printed at the end of the test. pub struct TestEnvironment { - // A task manager that tracks task poll durations. + // A task manager that tracks task poll durations allows us to measure + // per task CPU usage as we do in the Polkadot node. task_manager: TaskManager, // The Prometheus metrics registry registry: Registry, - // A test overseer. + // A channel to the availability recovery subsystem to_subsystem: mpsc::Sender>, // Subsystem instance, currently keeps req/response protocol channel senders // for the whole duration of the test. instance: AvailabilityRecoverySubsystemInstance, - // The test intial state. The current state is owned by the task doing the overseer/subsystem - // mockings. + // The test intial state. The current state is owned by `env_task`. state: TestState, } @@ -115,6 +144,18 @@ impl TestEnvironment { async move { Self::env_task(virtual_overseer, task_state, spawn_task_handle).await }, ); + let registry_clone = registry.clone(); + task_manager + .spawn_handle() + .spawn_blocking("prometheus", "test-environment", async move { + prometheus_endpoint::init_prometheus( + SocketAddr::new(std::net::IpAddr::V4(Ipv4Addr::LOCALHOST), 9999), + registry_clone, + ) + .await + .unwrap(); + }); + TestEnvironment { task_manager, registry, to_subsystem, instance, state } } @@ -284,7 +325,10 @@ impl TestEnvironment { .timeout(MAX_TIME_OF_FLIGHT) .await .unwrap_or_else(|| { - panic!("{}ms is more than enough for sending signals.", TIMEOUT.as_millis()) + panic!( + "{}ms is more than enough for sending signals.", + MAX_TIME_OF_FLIGHT.as_millis() + ) }) .unwrap(); } @@ -382,15 +426,18 @@ impl TestState { } fn session_info(&self) -> SessionInfo { + let my_vec = (0..self.config().n_validators) + .map(|i| ValidatorIndex(i as _)) + .collect::>(); + + let validator_groups = my_vec.chunks(5).map(|x| Vec::from(x)).collect::>(); + SessionInfo { validators: self.validator_public.clone(), discovery_keys: self.validator_authority_id.clone(), - // all validators in the same group. - validator_groups: IndexedVec::>::from(vec![(0..5) - .map(|i| ValidatorIndex(i as _)) - .collect()]), + validator_groups: IndexedVec::>::from(validator_groups), assignment_keys: vec![], - n_cores: 0, + n_cores: self.config().n_cores as u32, zeroth_delay_tranche_width: 0, relay_vrf_modulo_samples: 0, n_delay_tranches: 0, @@ -449,7 +496,7 @@ impl TestState { .collect::>(); for (index, pov_size) in config.pov_sizes.iter().enumerate() { - let mut candidate = &mut candidate_receipts[index]; + let candidate = &mut candidate_receipts[index]; // a hack to make candidate unique. candidate.descriptor.relay_parent = Hash::from_low_u64_be(index as u64); @@ -532,9 +579,6 @@ fn derive_erasure_chunks_with_proofs_and_root( (erasure_chunks, root) } -pub async fn bench_with_chunks_if_pov_large(env: &mut TestEnvironment) {} - -pub async fn bench_inner(env: &mut TestEnvironment) {} pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { let config = env.config().clone(); @@ -545,39 +589,45 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { .await; let start_marker = Instant::now(); - let mut batch = Vec::new(); - let mut availability_bytes = 0; - for candidate_num in 0..config.n_cores as u64 { - let candidate = env.state.candidate_receipts[candidate_num as usize].clone(); - - let (tx, rx) = oneshot::channel(); - batch.push(rx); - - env.send_message(AvailabilityRecoveryMessage::RecoverAvailableData( - candidate.clone(), - 1, - Some(GroupIndex(0)), - tx, - )) - .await; - - // TODO: select between futures unordered of rx await and timer to send next request. - if batch.len() >= config.max_parallel_recoveries { - for rx in std::mem::take(&mut batch) { - let available_data = rx.await.unwrap().unwrap(); - availability_bytes += available_data.encoded_size(); - } + let mut batch = FuturesUnordered::new(); + let mut availability_bytes = 0u128; + + for loop_num in 0..env.config().num_loops { + gum::info!(target: LOG_TARGET, loop_num, "Starting loop"); + + for candidate_num in 0..config.n_cores as u64 { + let candidate = env.state.candidate(candidate_num as usize); + + let (tx, rx) = oneshot::channel(); + batch.push(rx); + + env.send_message(AvailabilityRecoveryMessage::RecoverAvailableData( + candidate.clone(), + 1, + Some(GroupIndex(candidate_num as u32 % (config.n_cores / 5) as u32)), + tx, + )) + .await; + + // // TODO: select between futures unordered of rx await and timer to send next request. + // if batch.len() >= config.max_parallel_recoveries { + // for rx in std::mem::take(&mut batch) { + // let available_data = rx.await.unwrap().unwrap(); + // availability_bytes += available_data.encoded_size() as u128; + // } + // } } - } - for rx in std::mem::take(&mut batch) { - let available_data = rx.await.unwrap().unwrap(); - availability_bytes += available_data.encoded_size(); + while let Some(completed) = batch.next().await { + let available_data = completed.unwrap().unwrap(); + availability_bytes += available_data.encoded_size() as u128; + } } + println!("Waiting for subsystem to complete work... {} requests ", batch.len()); env.send_signal(OverseerSignal::Conclude).await; let duration = start_marker.elapsed().as_millis(); - let tput = (availability_bytes as u128) / duration * 1000; + let tput = ((availability_bytes) / duration) * 1000; println!("Benchmark completed in {:?}ms", duration); println!("Throughput: {}KiB/s", tput / 1024); diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 4dc0936291b1..f5180004840c 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -20,24 +20,89 @@ use clap::Parser; use color_eyre::eyre; use prometheus::proto::LabelPair; -use std::net::{Ipv4Addr, SocketAddr}; +use std::time::Duration; pub(crate) mod availability; use availability::{TestConfiguration, TestEnvironment, TestState}; const LOG_TARGET: &str = "subsystem-bench"; +use clap_num::number_range; + +fn le_100(s: &str) -> Result { + number_range(s, 0, 100) +} + +fn le_5000(s: &str) -> Result { + number_range(s, 0, 5000) +} + +#[derive(Debug, clap::Parser, Clone)] +#[clap(rename_all = "kebab-case")] +#[allow(missing_docs)] +pub struct NetworkOptions {} + +#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq)] +#[value(rename_all = "kebab-case")] +#[non_exhaustive] +pub enum NetworkEmulation { + Ideal, + Healthy, + Degraded, +} + +#[derive(Debug, clap::Parser)] +#[clap(rename_all = "kebab-case")] +#[allow(missing_docs)] +pub struct DataAvailabilityReadOptions { + #[clap(long, ignore_case = true, default_value_t = 100)] + /// Number of cores to fetch availability for. + pub n_cores: usize, + + #[clap(long, ignore_case = true, default_value_t = 500)] + /// Number of validators to fetch chunks from. + pub n_validators: usize, + + #[clap(short, long, default_value_t = false)] + /// Turbo boost AD Read by fetching from backers first. Tipically this is only faster if nodes + /// have enough bandwidth. + pub fetch_from_backers: bool, + + #[clap(short, long, ignore_case = true, default_value_t = 1)] + /// Number of times to loop fetching for each core. + pub num_loops: usize, +} /// Define the supported benchmarks targets #[derive(Debug, Parser)] #[command(about = "Target subsystems", version, rename_all = "kebab-case")] enum BenchmarkTarget { /// Benchmark availability recovery strategies. - AvailabilityRecovery, + DataAvailabilityRead(DataAvailabilityReadOptions), } #[derive(Debug, Parser)] #[allow(missing_docs)] struct BenchCli { + #[arg(long, value_enum, ignore_case = true, default_value_t = NetworkEmulation::Ideal)] + /// The type of network to be emulated + pub network: NetworkEmulation, + + #[clap(short, long)] + /// The bandwidth of simulated remote peers in KiB + pub peer_bandwidth: Option, + + #[clap(long, value_parser=le_100)] + /// Simulated connection error rate [0-100]. + pub peer_error: Option, + + #[clap(long, value_parser=le_5000)] + /// Minimum remote peer latency in milliseconds [0-5000]. + pub peer_min_latency: Option, + + #[clap(long, value_parser=le_5000)] + /// Maximum remote peer latency in milliseconds [0-5000]. + pub peer_max_latency: Option, + #[command(subcommand)] pub target: BenchmarkTarget, } @@ -63,21 +128,57 @@ impl BenchCli { let registry_clone = registry.clone(); let mut pov_sizes = Vec::new(); - pov_sizes.append(&mut vec![1024 * 1024; 100]); + pov_sizes.append(&mut vec![5 * 1024 * 1024; 200]); + + let mut test_config = match self.target { + BenchmarkTarget::DataAvailabilityRead(options) => match self.network { + NetworkEmulation::Healthy => TestConfiguration::healthy_network( + options.num_loops, + options.fetch_from_backers, + options.n_validators, + options.n_cores, + pov_sizes, + ), + NetworkEmulation::Degraded => TestConfiguration::degraded_network( + options.num_loops, + options.fetch_from_backers, + options.n_validators, + options.n_cores, + pov_sizes, + ), + NetworkEmulation::Ideal => TestConfiguration::ideal_network( + options.num_loops, + options.fetch_from_backers, + options.n_validators, + options.n_cores, + pov_sizes, + ), + }, + }; + + let mut latency_config = test_config.latency.clone().unwrap_or_default(); + + if let Some(latency) = self.peer_min_latency { + latency_config.min_latency = Duration::from_millis(latency); + } - let test_config = TestConfiguration::unconstrained_1000_validators_60_cores(pov_sizes); + if let Some(latency) = self.peer_max_latency { + latency_config.max_latency = Duration::from_millis(latency); + } - let state = TestState::new(test_config); + if let Some(error) = self.peer_error { + test_config.error = error; + } + if let Some(bandwidth) = self.peer_bandwidth { + // CLI expects bw in KiB + test_config.bandwidth = bandwidth * 1024; + } + + let state = TestState::new(test_config); let mut env = TestEnvironment::new(runtime.handle().clone(), state, registry.clone()); - let handle = runtime.spawn(async move { - prometheus_endpoint::init_prometheus( - SocketAddr::new(std::net::IpAddr::V4(Ipv4Addr::LOCALHOST), 9999), - registry_clone, - ) - .await - }); + let runtime_handle = runtime.handle().clone(); println!("{:?}", env.config()); From 5a05da0f6c87e7e19ff1940d4b9f035cbb4cf7e9 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Thu, 9 Nov 2023 23:51:58 +0200 Subject: [PATCH 10/52] peer stats Signed-off-by: Andrei Sandu --- .../subsystem-bench/src/availability/mod.rs | 28 ++++++----- .../src/availability/network.rs | 49 +++++++++++++++++-- 2 files changed, 63 insertions(+), 14 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 7b9b64c07096..a4980ffc5fdd 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -118,6 +118,8 @@ pub struct TestEnvironment { instance: AvailabilityRecoverySubsystemInstance, // The test intial state. The current state is owned by `env_task`. state: TestState, + // A handle to the network emulator. + network: NetworkEmulator, } impl TestEnvironment { @@ -131,17 +133,24 @@ impl TestEnvironment { state.config().use_fast_path, ); + let mut network = NetworkEmulator::new( + state.config().n_validators, + state.config().bandwidth, + task_manager.spawn_handle(), + ); + // Copy sender for later when we need to inject messages in to the subsystem. let to_subsystem = virtual_overseer.tx.clone(); let task_state = state.clone(); - let spawn_task_handle = task_manager.spawn_handle(); + let task_network = network.clone(); + // We need to start a receiver to process messages from the subsystem. // This mocks an overseer and all dependent subsystems task_manager.spawn_handle().spawn_blocking( "test-environment", "test-environment", - async move { Self::env_task(virtual_overseer, task_state, spawn_task_handle).await }, + async move { Self::env_task(virtual_overseer, task_state, task_network).await }, ); let registry_clone = registry.clone(); @@ -156,7 +165,7 @@ impl TestEnvironment { .unwrap(); }); - TestEnvironment { task_manager, registry, to_subsystem, instance, state } + TestEnvironment { task_manager, registry, to_subsystem, instance, state, network } } pub fn config(&self) -> &TestConfiguration { @@ -249,15 +258,8 @@ impl TestEnvironment { async fn env_task( mut ctx: TestSubsystemContextHandle, mut state: TestState, - spawn_task_handle: SpawnTaskHandle, + mut network: NetworkEmulator, ) { - // Emulate `n_validators` each with 1MiB of bandwidth available. - let mut network = NetworkEmulator::new( - state.config().n_validators, - state.config().bandwidth, - spawn_task_handle, - ); - loop { futures::select! { message = ctx.recv().fuse() => { @@ -631,5 +633,9 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { println!("Benchmark completed in {:?}ms", duration); println!("Throughput: {}KiB/s", tput / 1024); + let stats = env.network.stats().await; + for (index, stat) in stats.iter().enumerate() { + println!("Validator #{} : {:?}", index, stat); + } tokio::time::sleep(Duration::from_secs(1)).await; } diff --git a/polkadot/node/subsystem-bench/src/availability/network.rs b/polkadot/node/subsystem-bench/src/availability/network.rs index d6fc175c859b..544ecf06372a 100644 --- a/polkadot/node/subsystem-bench/src/availability/network.rs +++ b/polkadot/node/subsystem-bench/src/availability/network.rs @@ -124,6 +124,7 @@ mod tests { } // A network peer emulator +#[derive(Clone)] struct PeerEmulator { // The queue of requests waiting to be served by the emulator actions_tx: UnboundedSender, @@ -137,11 +138,15 @@ impl PeerEmulator { .clone() .spawn("peer-emulator", "test-environment", async move { let mut rate_limiter = RateLimit::new(20, bandwidth); + let rx_bytes_total = 0; + let mut tx_bytes_total = 0u128; + loop { let maybe_action: Option = actions_rx.recv().await; if let Some(action) = maybe_action { let size = action.size(); rate_limiter.reap(size).await; + tx_bytes_total += size as u128; if let Some(latency) = action.latency { spawn_task_handle.spawn( "peer-emulator-latency", @@ -152,7 +157,12 @@ impl PeerEmulator { }, ) } else { - action.run().await; + // Send stats if requested + if let Some(stats_sender) = action.stats { + stats_sender.send(PeerEmulatorStats { rx_bytes_total, tx_bytes_total }).unwrap(); + } else { + action.run().await; + } } } else { break @@ -170,7 +180,7 @@ impl PeerEmulator { } pub type ActionFuture = std::pin::Pin + std::marker::Send>>; -// An network action to be completed by the emulator task. +/// An network action to be completed by the emulator task. pub struct NetworkAction { // The function that performs the action run: ActionFuture, @@ -180,12 +190,28 @@ pub struct NetworkAction { index: usize, // The amount of time to delay the polling `run` latency: Option, + // An optional request of rx/tx statistics for the peer at `index` + stats: Option>, +} + +/// Book keeping of sent and received bytes. +#[derive(Debug, Clone)] +pub struct PeerEmulatorStats { + pub rx_bytes_total: u128, + pub tx_bytes_total: u128, } impl NetworkAction { pub fn new(index: usize, run: ActionFuture, size: usize, latency: Option) -> Self { - Self { run, size, index, latency } + Self { run, size, index, latency, stats: None } + } + + pub fn stats(index: usize, stats_sender:oneshot::Sender) -> Self { + let run = async move {}.boxed(); + + Self { run, size: 0, index, latency: None, stats: Some(stats_sender) } } + pub fn size(&self) -> usize { self.size } @@ -201,6 +227,7 @@ impl NetworkAction { // Mocks the network bridge and an arbitrary number of connected peer nodes. // Implements network latency, bandwidth and error. +#[derive(Clone)] pub struct NetworkEmulator { // Per peer network emulation peers: Vec, @@ -218,4 +245,20 @@ impl NetworkEmulator { pub fn submit_peer_action(&mut self, index: usize, action: NetworkAction) { let _ = self.peers[index].send(action); } + + // Returns the sent/received stats for all peers. + pub async fn stats(&mut self) -> Vec { + let receivers = (0..self.peers.len()).map(|peer_index| { + let (stats_tx, stats_rx) = oneshot::channel(); + self.submit_peer_action(peer_index, NetworkAction::stats(peer_index, stats_tx)); + stats_rx + }).collect::>(); + + let mut stats = Vec::new(); + for receiver in receivers { + stats.push(receiver.await.unwrap()); + } + + stats + } } From 895e8d6a627334b46025a212242e31684eb8a9fc Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 10 Nov 2023 12:42:51 +0200 Subject: [PATCH 11/52] Switch stats to atomics Signed-off-by: Andrei Sandu --- .../subsystem-bench/src/availability/mod.rs | 1 - .../src/availability/network.rs | 83 ++++++++++--------- .../subsystem-bench/src/subsystem-bench.rs | 1 - 3 files changed, 42 insertions(+), 43 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index a4980ffc5fdd..7903ba08b616 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -13,7 +13,6 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . - use std::{ collections::HashMap, sync::Arc, diff --git a/polkadot/node/subsystem-bench/src/availability/network.rs b/polkadot/node/subsystem-bench/src/availability/network.rs index 544ecf06372a..02af817e691f 100644 --- a/polkadot/node/subsystem-bench/src/availability/network.rs +++ b/polkadot/node/subsystem-bench/src/availability/network.rs @@ -13,10 +13,11 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . - use super::*; +use prometheus_endpoint::U64; +use sc_network::network_state::Peer; +use std::sync::atomic::{AtomicU64, Ordering}; use tokio::sync::mpsc::UnboundedSender; - // An emulated node egress traffic rate_limiter. #[derive(Debug)] struct RateLimit { @@ -131,7 +132,11 @@ struct PeerEmulator { } impl PeerEmulator { - pub fn new(bandwidth: usize, spawn_task_handle: SpawnTaskHandle) -> Self { + pub fn new( + bandwidth: usize, + spawn_task_handle: SpawnTaskHandle, + stats: Arc, + ) -> Self { let (actions_tx, mut actions_rx) = tokio::sync::mpsc::unbounded_channel(); spawn_task_handle @@ -140,13 +145,12 @@ impl PeerEmulator { let mut rate_limiter = RateLimit::new(20, bandwidth); let rx_bytes_total = 0; let mut tx_bytes_total = 0u128; - loop { + let stats_clone = stats.clone(); let maybe_action: Option = actions_rx.recv().await; if let Some(action) = maybe_action { let size = action.size(); rate_limiter.reap(size).await; - tx_bytes_total += size as u128; if let Some(latency) = action.latency { spawn_task_handle.spawn( "peer-emulator-latency", @@ -154,15 +158,14 @@ impl PeerEmulator { async move { tokio::time::sleep(latency).await; action.run().await; + stats_clone + .tx_bytes_total + .fetch_add(size as u64, Ordering::Relaxed); }, ) } else { - // Send stats if requested - if let Some(stats_sender) = action.stats { - stats_sender.send(PeerEmulatorStats { rx_bytes_total, tx_bytes_total }).unwrap(); - } else { - action.run().await; - } + action.run().await; + stats_clone.tx_bytes_total.fetch_add(size as u64, Ordering::Relaxed); } } else { break @@ -190,26 +193,23 @@ pub struct NetworkAction { index: usize, // The amount of time to delay the polling `run` latency: Option, - // An optional request of rx/tx statistics for the peer at `index` - stats: Option>, } /// Book keeping of sent and received bytes. -#[derive(Debug, Clone)] +#[derive(Debug, Default)] pub struct PeerEmulatorStats { - pub rx_bytes_total: u128, - pub tx_bytes_total: u128, + pub rx_bytes_total: AtomicU64, + pub tx_bytes_total: AtomicU64, } +#[derive(Debug, Default)] +pub struct PeerStats { + pub rx_bytes_total: u64, + pub tx_bytes_total: u64, +} impl NetworkAction { pub fn new(index: usize, run: ActionFuture, size: usize, latency: Option) -> Self { - Self { run, size, index, latency, stats: None } - } - - pub fn stats(index: usize, stats_sender:oneshot::Sender) -> Self { - let run = async move {}.boxed(); - - Self { run, size: 0, index, latency: None, stats: Some(stats_sender) } + Self { run, size, index, latency } } pub fn size(&self) -> usize { @@ -231,15 +231,19 @@ impl NetworkAction { pub struct NetworkEmulator { // Per peer network emulation peers: Vec, + stats: Vec>, } impl NetworkEmulator { pub fn new(n_peers: usize, bandwidth: usize, spawn_task_handle: SpawnTaskHandle) -> Self { - Self { - peers: (0..n_peers) - .map(|_index| PeerEmulator::new(bandwidth, spawn_task_handle.clone())) - .collect::>(), - } + let (stats, peers) = (0..n_peers) + .map(|_index| { + let stats = Arc::new(PeerEmulatorStats::default()); + (stats.clone(), PeerEmulator::new(bandwidth, spawn_task_handle.clone(), stats)) + }) + .unzip(); + + Self { peers, stats } } pub fn submit_peer_action(&mut self, index: usize, action: NetworkAction) { @@ -247,18 +251,15 @@ impl NetworkEmulator { } // Returns the sent/received stats for all peers. - pub async fn stats(&mut self) -> Vec { - let receivers = (0..self.peers.len()).map(|peer_index| { - let (stats_tx, stats_rx) = oneshot::channel(); - self.submit_peer_action(peer_index, NetworkAction::stats(peer_index, stats_tx)); - stats_rx - }).collect::>(); - - let mut stats = Vec::new(); - for receiver in receivers { - stats.push(receiver.await.unwrap()); - } - - stats + pub async fn stats(&mut self) -> Vec { + let r = self + .stats + .iter() + .map(|stats| PeerStats { + rx_bytes_total: stats.rx_bytes_total.load(Ordering::Relaxed), + tx_bytes_total: stats.tx_bytes_total.load(Ordering::Relaxed), + }) + .collect::>(); + r } } diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index f5180004840c..ba66d06fe320 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -16,7 +16,6 @@ //! A tool for running subsystem benchmark tests designed for development and //! CI regression testing. - use clap::Parser; use color_eyre::eyre; use prometheus::proto::LabelPair; From a2fb0c95d08c17ad5647c904cb48854ec30ba470 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Sun, 12 Nov 2023 03:14:34 +0200 Subject: [PATCH 12/52] add more network metrics, new load generator Signed-off-by: Andrei Sandu --- Cargo.lock | 1 + polkadot/node/subsystem-bench/Cargo.toml | 2 +- .../src/availability/configuration.rs | 7 +- .../subsystem-bench/src/availability/mod.rs | 335 +++++++++++++----- .../src/availability/network.rs | 149 +++++++- .../src/availability/test_env.rs | 63 ++++ .../subsystem-bench/src/subsystem-bench.rs | 25 +- 7 files changed, 461 insertions(+), 121 deletions(-) create mode 100644 polkadot/node/subsystem-bench/src/availability/test_env.rs diff --git a/Cargo.lock b/Cargo.lock index 05355cad0e2c..ee80ffb6e815 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13012,6 +13012,7 @@ dependencies = [ "env_logger 0.9.3", "futures", "futures-timer", + "itertools 0.11.0", "log", "parity-scale-codec", "polkadot-availability-recovery", diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml index 01b992d15fc6..c5d62d3aa74f 100644 --- a/polkadot/node/subsystem-bench/Cargo.toml +++ b/polkadot/node/subsystem-bench/Cargo.toml @@ -45,7 +45,7 @@ sp-application-crypto = { path = "../../../substrate/primitives/application-cryp sc-network = { path = "../../../substrate/client/network" } sc-service = { path = "../../../substrate/client/service" } polkadot-node-metrics = { path = "../metrics" } - +itertools = "0.11.0" polkadot-primitives-test-helpers = { path = "../../primitives/test-helpers" } prometheus_endpoint = { package = "substrate-prometheus-endpoint", path = "../../../substrate/utils/prometheus" } prometheus = { version = "0.13.0", default-features = false } diff --git a/polkadot/node/subsystem-bench/src/availability/configuration.rs b/polkadot/node/subsystem-bench/src/availability/configuration.rs index 1355c67edea0..cf142de06634 100644 --- a/polkadot/node/subsystem-bench/src/availability/configuration.rs +++ b/polkadot/node/subsystem-bench/src/availability/configuration.rs @@ -37,6 +37,8 @@ pub struct TestConfiguration { /// The PoV size pub pov_sizes: Vec, /// The amount of bandiwdth remote validators have. + pub peer_bandwidth: usize, + /// The amount of bandiwdth our node has. pub bandwidth: usize, /// Optional peer emulation latency pub latency: Option, @@ -55,6 +57,7 @@ impl Default for TestConfiguration { n_cores: 10, pov_sizes: vec![5 * 1024 * 1024], bandwidth: 60 * 1024 * 1024, + peer_bandwidth: 60 * 1024 * 1024, latency: None, error: 0, num_loops: 1, @@ -76,8 +79,8 @@ impl TestConfiguration { n_cores, n_validators, pov_sizes, - // HW specs node bandwidth bandwidth: 50 * 1024 * 1024, + peer_bandwidth: 50 * 1024 * 1024, // No latency latency: None, error: 0, @@ -98,6 +101,7 @@ impl TestConfiguration { n_validators, pov_sizes, bandwidth: 50 * 1024 * 1024, + peer_bandwidth: 50 * 1024 * 1024, latency: Some(PeerLatency { min_latency: Duration::from_millis(1), max_latency: Duration::from_millis(100), @@ -120,6 +124,7 @@ impl TestConfiguration { n_validators, pov_sizes, bandwidth: 50 * 1024 * 1024, + peer_bandwidth: 50 * 1024 * 1024, latency: Some(PeerLatency { min_latency: Duration::from_millis(10), max_latency: Duration::from_millis(500), diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 7903ba08b616..4f821f819908 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -13,8 +13,11 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . +use itertools::Itertools; use std::{ collections::HashMap, + iter::Cycle, + ops::{Div, Sub}, sync::Arc, time::{Duration, Instant}, }; @@ -24,6 +27,8 @@ use futures::{ stream::FuturesUnordered, FutureExt, SinkExt, StreamExt, }; +use futures_timer::Delay; + use polkadot_node_metrics::metrics::Metrics; use polkadot_availability_recovery::AvailabilityRecoverySubsystem; @@ -48,6 +53,8 @@ use polkadot_node_subsystem::{ }; use std::net::{Ipv4Addr, SocketAddr}; +mod test_env; + const LOG_TARGET: &str = "subsystem-bench::availability"; use polkadot_node_primitives::{AvailableData, ErasureChunk}; @@ -119,6 +126,8 @@ pub struct TestEnvironment { state: TestState, // A handle to the network emulator. network: NetworkEmulator, + // Configuration/env metrics + metrics: TestEnvironmentMetrics, } impl TestEnvironment { @@ -131,11 +140,13 @@ impl TestEnvironment { task_manager.spawn_handle(), state.config().use_fast_path, ); - + let metrics = + TestEnvironmentMetrics::new(®istry).expect("Metrics need to be registered"); let mut network = NetworkEmulator::new( state.config().n_validators, - state.config().bandwidth, + state.config().peer_bandwidth, task_manager.spawn_handle(), + ®istry, ); // Copy sender for later when we need to inject messages in to the subsystem. @@ -143,13 +154,31 @@ impl TestEnvironment { let task_state = state.clone(); let task_network = network.clone(); + let spawn_handle = task_manager.spawn_handle(); + + // Our node rate limiting + let mut rx_limiter = RateLimit::new(10, state.config.bandwidth); + let (ingress_tx, mut ingress_rx) = tokio::sync::mpsc::unbounded_channel::(); + let our_network_stats = network.peer_stats(0); + + spawn_handle.spawn_blocking("our-node-rx", "test-environment", async move { + while let Some(action) = ingress_rx.recv().await { + let size = action.size(); + + // account for our node receiving the data. + our_network_stats.inc_received(size); + + rx_limiter.reap(size).await; + action.run().await; + } + }); // We need to start a receiver to process messages from the subsystem. // This mocks an overseer and all dependent subsystems task_manager.spawn_handle().spawn_blocking( "test-environment", "test-environment", - async move { Self::env_task(virtual_overseer, task_state, task_network).await }, + async move { Self::env_task(virtual_overseer, task_state, task_network, ingress_tx).await }, ); let registry_clone = registry.clone(); @@ -164,13 +193,17 @@ impl TestEnvironment { .unwrap(); }); - TestEnvironment { task_manager, registry, to_subsystem, instance, state, network } + TestEnvironment { task_manager, registry, to_subsystem, instance, state, network, metrics } } pub fn config(&self) -> &TestConfiguration { self.state.config() } + pub fn network(&self) -> &NetworkEmulator { + &self.network + } + /// Produce a randomized duration between `min` and `max`. fn random_latency(maybe_peer_latency: Option<&PeerLatency>) -> Option { if let Some(peer_latency) = maybe_peer_latency { @@ -183,24 +216,51 @@ impl TestEnvironment { } } + pub fn metrics(&self) -> &TestEnvironmentMetrics { + &self.metrics + } + /// Generate a random error based on `probability`. /// `probability` should be a number between 0 and 100. fn random_error(probability: usize) -> bool { Uniform::from(0..=99).sample(&mut thread_rng()) < probability } - pub fn respond_to_send_request(state: &mut TestState, request: Requests) -> NetworkAction { + pub fn request_size(request: &Requests) -> u64 { + match request { + Requests::ChunkFetchingV1(outgoing_request) => + outgoing_request.payload.encoded_size() as u64, + Requests::AvailableDataFetchingV1(outgoing_request) => + outgoing_request.payload.encoded_size() as u64, + _ => panic!("received an unexpected request"), + } + } + + pub fn respond_to_send_request( + state: &mut TestState, + request: Requests, + ingress_tx: tokio::sync::mpsc::UnboundedSender, + ) -> NetworkAction { match request { Requests::ChunkFetchingV1(outgoing_request) => { let validator_index = outgoing_request.payload.index.0 as usize; - let chunk: ChunkResponse = - state.chunks.get(&outgoing_request.payload.candidate_hash).unwrap() - [validator_index] - .clone() - .into(); - let size = chunk.encoded_size(); + let candidate_hash = outgoing_request.payload.candidate_hash; + + let candidate_index = state + .candidate_hashes + .get(&candidate_hash) + .expect("candidate was generated previously; qed"); + gum::warn!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + + let chunk: ChunkResponse = state.chunks.get(*candidate_index as usize).unwrap() + [validator_index] + .clone() + .into(); + let mut size = chunk.encoded_size(); let response = if Self::random_error(state.config().error) { + // Error will not account to any bandwidth used. + size = 0; Err(RequestFailure::Network(OutboundFailure::ConnectionClosed)) } else { Ok(req_res::v1::ChunkFetchingResponse::from(Some(chunk)).encode()) @@ -211,21 +271,39 @@ impl TestEnvironment { } .boxed(); + let future_wrapper = async move { + // Forward the response to the ingress channel of our node. + // On receive side we apply our node receiving rate limit. + let action = NetworkAction::new(validator_index, future, size, None); + ingress_tx.send(action).unwrap(); + } + .boxed(); + NetworkAction::new( validator_index, - future, + future_wrapper, size, // Generate a random latency based on configuration. Self::random_latency(state.config().latency.as_ref()), ) }, Requests::AvailableDataFetchingV1(outgoing_request) => { + println!("{:?}", outgoing_request); // TODO: do better, by implementing diff authority ids and mapping network actions // to authority id, let validator_index = Uniform::from(0..state.config().n_validators).sample(&mut thread_rng()); + + let candidate_hash = outgoing_request.payload.candidate_hash; + let candidate_index = state + .candidate_hashes + .get(&candidate_hash) + .expect("candidate was generated previously; qed"); + gum::warn!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + let available_data = - state.candidates.get(&outgoing_request.payload.candidate_hash).unwrap().clone(); + state.available_data.get(*candidate_index as usize).unwrap().clone(); + let size = available_data.encoded_size(); let response = if Self::random_error(state.config().error) { @@ -240,9 +318,17 @@ impl TestEnvironment { } .boxed(); + let future_wrapper = async move { + // Forward the response to the ingress channel of our node. + // On receive side we apply our node receiving rate limit. + let action = NetworkAction::new(validator_index, future, size, None); + ingress_tx.send(action).unwrap(); + } + .boxed(); + NetworkAction::new( validator_index, - future, + future_wrapper, size, // Generate a random latency based on configuration. Self::random_latency(state.config().latency.as_ref()), @@ -258,11 +344,12 @@ impl TestEnvironment { mut ctx: TestSubsystemContextHandle, mut state: TestState, mut network: NetworkEmulator, + ingress_tx: tokio::sync::mpsc::UnboundedSender, ) { loop { futures::select! { message = ctx.recv().fuse() => { - gum::debug!(target: LOG_TARGET, ?message, "Env task received message"); + gum::trace!(target: LOG_TARGET, ?message, "Env task received message"); match message { AllMessages::NetworkBridgeTx( @@ -272,7 +359,9 @@ impl TestEnvironment { ) ) => { for request in requests { - let action = Self::respond_to_send_request(&mut state, request); + network.inc_sent(Self::request_size(&request)); + let action = Self::respond_to_send_request(&mut state, request, ingress_tx.clone()); + // Account for our node sending the request over the emulated network. network.submit_peer_action(action.index(), action); } }, @@ -287,7 +376,10 @@ impl TestEnvironment { AllMessages::AvailabilityStore( AvailabilityStoreMessage::QueryChunkSize(candidate_hash, tx) ) => { - let chunk_size = state.chunks.get(&candidate_hash).unwrap()[0].encoded_size(); + let candidate_index = state.candidate_hashes.get(&candidate_hash).expect("candidate was generated previously; qed"); + gum::info!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + + let chunk_size = state.chunks.get(*candidate_index as usize).unwrap()[0].encoded_size(); let _ = tx.send(Some(chunk_size)); } AllMessages::RuntimeApi(RuntimeApiMessage::Request( @@ -345,8 +437,8 @@ impl AvailabilityRecoverySubsystemInstance { ) -> (Self, TestSubsystemContextHandle) { let (context, virtual_overseer) = make_buffered_subsystem_context( spawn_task_handle.clone(), - 4096 * 4, - "availability-recovery", + 128, + "availability-recovery-subsystem", ); let (collation_req_receiver, req_cfg) = IncomingRequest::get_config_receiver(&ReqProtocolNames::new(&GENESIS_HASH, None)); @@ -378,8 +470,6 @@ impl AvailabilityRecoverySubsystemInstance { } } -const TIMEOUT: Duration = Duration::from_millis(300); - // We use this to bail out sending messages to the subsystem if it is overloaded such that // the time of flight is breaches 5s. // This should eventually be a test parameter. @@ -387,9 +477,13 @@ const MAX_TIME_OF_FLIGHT: Duration = Duration::from_millis(5000); use sp_keyring::Sr25519Keyring; -use crate::availability::network::NetworkAction; +use crate::availability::network::{ActionFuture, NetworkAction}; -use self::{configuration::PeerLatency, network::NetworkEmulator}; +use self::{ + configuration::PeerLatency, + network::{NetworkEmulator, RateLimit}, + test_env::TestEnvironmentMetrics, +}; #[derive(Clone)] pub struct TestState { @@ -398,15 +492,22 @@ pub struct TestState { validator_authority_id: Vec, // The test node validator index. validator_index: ValidatorIndex, - // Per core candidates receipts. - candidate_receipts: Vec, session_index: SessionIndex, - + pov_sizes: Cycle>, + // Generated candidate receipts to be used in the test + candidates: Cycle>, + candidates_generated: usize, + // Map from pov size to candidate index + pov_size_to_candidate: HashMap, + // Map from generated candidate hashes to candidate index in `available_data` + // and `chunks`. + candidate_hashes: HashMap, persisted_validation_data: PersistedValidationData, - /// A per size pov mapping to available data. - candidates: HashMap, - chunks: HashMap>, + candidate_receipts: Vec, + available_data: Vec, + chunks: Vec>, + /// Next candidate index in config: TestConfiguration, } @@ -415,10 +516,6 @@ impl TestState { &self.config } - fn candidate(&self, candidate_index: usize) -> CandidateReceipt { - self.candidate_receipts.get(candidate_index).unwrap().clone() - } - async fn respond_none_to_available_data_query( &self, tx: oneshot::Sender>, @@ -455,9 +552,17 @@ impl TestState { send_chunk: impl Fn(usize) -> bool, tx: oneshot::Sender>, ) { + gum::info!(target: LOG_TARGET, ?candidate_hash, "respond_to_query_all_request"); + + let candidate_index = self + .candidate_hashes + .get(&candidate_hash) + .expect("candidate was generated previously; qed"); + gum::info!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + let v = self .chunks - .get(&candidate_hash) + .get(*candidate_index as usize) .unwrap() .iter() .filter(|c| send_chunk(c.index.0 as usize)) @@ -467,6 +572,41 @@ impl TestState { let _ = tx.send(v); } + pub fn next_candidate(&mut self) -> Option { + let candidate = self.candidates.next(); + let candidate_hash = candidate.as_ref().unwrap().hash(); + gum::trace!(target: LOG_TARGET, "Next candidate selected {:?}", candidate_hash); + candidate + } + + /// Generate candidates to be used in the test. + pub fn generate_candidates(&mut self, count: usize) { + gum::info!(target: LOG_TARGET, "Pre-generating {} candidates.", count); + + // Generate all candidates + self.candidates = (0..count) + .map(|index| { + let pov_size = self.pov_sizes.next().expect("This is a cycle; qed"); + let candidate_index = *self + .pov_size_to_candidate + .get(&pov_size) + .expect("pov_size always exists; qed"); + let mut candidate_receipt = self.candidate_receipts[candidate_index].clone(); + + // Make it unique. + candidate_receipt.descriptor.relay_parent = Hash::from_low_u64_be(index as u64); + // Store the new candidate in the state + self.candidate_hashes.insert(candidate_receipt.hash(), candidate_index); + + gum::info!(target: LOG_TARGET, candidate_hash = ?candidate_receipt.hash(), "new candidate"); + + candidate_receipt + }) + .collect::>() + .into_iter() + .cycle(); + } + pub fn new(config: TestConfiguration) -> Self { let validators = (0..config.n_validators as u64) .into_iter() @@ -476,9 +616,10 @@ impl TestState { let validator_public = validator_pubkeys(&validators); let validator_authority_id = validator_authority_id(&validators); let validator_index = ValidatorIndex(0); + let mut chunks = Vec::new(); + let mut available_data = Vec::new(); + let mut candidate_receipts = Vec::new(); let mut pov_size_to_candidate = HashMap::new(); - let mut chunks = HashMap::new(); - let mut candidates = HashMap::new(); let session_index = 10; // we use it for all candidates. @@ -489,59 +630,54 @@ impl TestState { relay_parent_storage_root: Default::default(), }; - // Create initial candidate receipts - let mut candidate_receipts = config - .pov_sizes - .iter() - .map(|_index| dummy_candidate_receipt(dummy_hash())) - .collect::>(); - - for (index, pov_size) in config.pov_sizes.iter().enumerate() { - let candidate = &mut candidate_receipts[index]; - // a hack to make candidate unique. - candidate.descriptor.relay_parent = Hash::from_low_u64_be(index as u64); - - // We reuse candidates of same size, to speed up the test startup. - let (erasure_root, available_data, new_chunks) = - pov_size_to_candidate.entry(pov_size).or_insert_with(|| { - let pov = PoV { block_data: BlockData(vec![index as u8; *pov_size]) }; - - let available_data = AvailableData { - validation_data: persisted_validation_data.clone(), - pov: Arc::new(pov), - }; + // For each unique pov we create a candidate receipt. + for (index, pov_size) in config.pov_sizes.iter().cloned().unique().enumerate() { + gum::info!(target: LOG_TARGET, index, pov_size, "Generating template candidates"); - let (new_chunks, erasure_root) = derive_erasure_chunks_with_proofs_and_root( - validators.len(), - &available_data, - |_, _| {}, - ); + let mut candidate_receipt = dummy_candidate_receipt(dummy_hash()); + let pov = PoV { block_data: BlockData(vec![index as u8; pov_size]) }; - candidate.descriptor.erasure_root = erasure_root; + let new_available_data = AvailableData { + validation_data: persisted_validation_data.clone(), + pov: Arc::new(pov), + }; - chunks.insert(candidate.hash(), new_chunks.clone()); - candidates.insert(candidate.hash(), available_data.clone()); + let (new_chunks, erasure_root) = derive_erasure_chunks_with_proofs_and_root( + validators.len(), + &new_available_data, + |_, _| {}, + ); - (erasure_root, available_data, new_chunks) - }); + candidate_receipt.descriptor.erasure_root = erasure_root; - candidate.descriptor.erasure_root = *erasure_root; - candidates.insert(candidate.hash(), available_data.clone()); - chunks.insert(candidate.hash(), new_chunks.clone()); + chunks.push(new_chunks); + available_data.push(new_available_data); + pov_size_to_candidate.insert(pov_size, index); + candidate_receipts.push(candidate_receipt); } - Self { + let pov_sizes = config.pov_sizes.clone().into_iter().cycle(); + let mut state = Self { validators, validator_public, validator_authority_id, validator_index, - candidate_receipts, session_index, persisted_validation_data, - candidates, + available_data, + candidate_receipts, chunks, config, - } + pov_size_to_candidate, + pov_sizes, + candidates_generated: 0, + candidate_hashes: HashMap::new(), + candidates: Vec::new().into_iter().cycle(), + }; + + gum::info!(target: LOG_TARGET, "Created test environment."); + + state } } @@ -593,12 +729,19 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { let mut batch = FuturesUnordered::new(); let mut availability_bytes = 0u128; + env.metrics().set_n_validators(config.n_validators); + env.metrics().set_n_cores(config.n_cores); + env.metrics().set_pov_size(config.pov_sizes[0]); + let mut completed_count = 0; + for loop_num in 0..env.config().num_loops { gum::info!(target: LOG_TARGET, loop_num, "Starting loop"); + env.metrics().set_current_loop(loop_num); + let loop_start_ts = Instant::now(); for candidate_num in 0..config.n_cores as u64 { - let candidate = env.state.candidate(candidate_num as usize); - + let candidate = + env.state.next_candidate().expect("We always send up to n_cores*num_loops; qed"); let (tx, rx) = oneshot::channel(); batch.push(rx); @@ -609,32 +752,40 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { tx, )) .await; - - // // TODO: select between futures unordered of rx await and timer to send next request. - // if batch.len() >= config.max_parallel_recoveries { - // for rx in std::mem::take(&mut batch) { - // let available_data = rx.await.unwrap().unwrap(); - // availability_bytes += available_data.encoded_size() as u128; - // } - // } } + gum::info!("{} requests pending, {} completed", batch.len(), completed_count); while let Some(completed) = batch.next().await { let available_data = completed.unwrap().unwrap(); availability_bytes += available_data.encoded_size() as u128; } + + let block_time_delta = + Duration::from_secs(6).saturating_sub(Instant::now().sub(loop_start_ts)); + gum::info!(target: LOG_TARGET, "Sleeping till end of block {}ms", block_time_delta.as_millis()); + tokio::time::sleep(block_time_delta).await; } - println!("Waiting for subsystem to complete work... {} requests ", batch.len()); env.send_signal(OverseerSignal::Conclude).await; let duration = start_marker.elapsed().as_millis(); - let tput = ((availability_bytes) / duration) * 1000; - println!("Benchmark completed in {:?}ms", duration); - println!("Throughput: {}KiB/s", tput / 1024); + let availability_bytes = availability_bytes / 1024; + gum::info!("Benchmark completed in {:?}ms", duration); + gum::info!("Throughput: {} KiB/block", availability_bytes / env.config().num_loops as u128); + gum::info!( + "Block time: {} ms", + start_marker.elapsed().as_millis() / env.config().num_loops as u128 + ); + + let stats = env.network.stats(); + gum::info!( + "Total received from network: {} MiB", + stats + .iter() + .enumerate() + .map(|(index, stats)| stats.tx_bytes_total as u128) + .sum::() / + (1024 * 1024) + ); - let stats = env.network.stats().await; - for (index, stat) in stats.iter().enumerate() { - println!("Validator #{} : {:?}", index, stat); - } tokio::time::sleep(Duration::from_secs(1)).await; } diff --git a/polkadot/node/subsystem-bench/src/availability/network.rs b/polkadot/node/subsystem-bench/src/availability/network.rs index 02af817e691f..948fbae445e1 100644 --- a/polkadot/node/subsystem-bench/src/availability/network.rs +++ b/polkadot/node/subsystem-bench/src/availability/network.rs @@ -20,7 +20,7 @@ use std::sync::atomic::{AtomicU64, Ordering}; use tokio::sync::mpsc::UnboundedSender; // An emulated node egress traffic rate_limiter. #[derive(Debug)] -struct RateLimit { +pub struct RateLimit { // How often we refill credits in buckets tick_rate: usize, // Total ticks @@ -142,9 +142,8 @@ impl PeerEmulator { spawn_task_handle .clone() .spawn("peer-emulator", "test-environment", async move { - let mut rate_limiter = RateLimit::new(20, bandwidth); - let rx_bytes_total = 0; - let mut tx_bytes_total = 0u128; + // Rate limit peer send. + let mut rate_limiter = RateLimit::new(10, bandwidth); loop { let stats_clone = stats.clone(); let maybe_action: Option = actions_rx.recv().await; @@ -158,14 +157,12 @@ impl PeerEmulator { async move { tokio::time::sleep(latency).await; action.run().await; - stats_clone - .tx_bytes_total - .fetch_add(size as u64, Ordering::Relaxed); + stats_clone.inc_sent(size); }, ) } else { action.run().await; - stats_clone.tx_bytes_total.fetch_add(size as u64, Ordering::Relaxed); + stats_clone.inc_sent(size); } } else { break @@ -195,11 +192,43 @@ pub struct NetworkAction { latency: Option, } +unsafe impl Send for NetworkAction {} + /// Book keeping of sent and received bytes. -#[derive(Debug, Default)] pub struct PeerEmulatorStats { - pub rx_bytes_total: AtomicU64, - pub tx_bytes_total: AtomicU64, + rx_bytes_total: AtomicU64, + tx_bytes_total: AtomicU64, + metrics: Metrics, + peer_index: usize, +} + +impl PeerEmulatorStats { + pub(crate) fn new(peer_index: usize, metrics: Metrics) -> Self { + Self { + metrics, + rx_bytes_total: AtomicU64::from(0), + tx_bytes_total: AtomicU64::from(0), + peer_index, + } + } + + pub fn inc_sent(&self, bytes: usize) { + self.tx_bytes_total.fetch_add(bytes as u64, Ordering::Relaxed); + self.metrics.on_peer_sent(self.peer_index, bytes as u64); + } + + pub fn inc_received(&self, bytes: usize) { + self.rx_bytes_total.fetch_add(bytes as u64, Ordering::Relaxed); + self.metrics.on_peer_received(self.peer_index, bytes as u64); + } + + pub fn sent(&self) -> u64 { + self.tx_bytes_total.load(Ordering::Relaxed) + } + + pub fn received(&self) -> u64 { + self.rx_bytes_total.load(Ordering::Relaxed) + } } #[derive(Debug, Default)] @@ -229,21 +258,31 @@ impl NetworkAction { // Implements network latency, bandwidth and error. #[derive(Clone)] pub struct NetworkEmulator { - // Per peer network emulation + // Per peer network emulation. peers: Vec, + // Per peer stats. stats: Vec>, + // Metrics + metrics: Metrics, } impl NetworkEmulator { - pub fn new(n_peers: usize, bandwidth: usize, spawn_task_handle: SpawnTaskHandle) -> Self { + pub fn new( + n_peers: usize, + bandwidth: usize, + spawn_task_handle: SpawnTaskHandle, + registry: &Registry, + ) -> Self { + let metrics = Metrics::new(®istry).expect("Metrics always register succesfully"); + let (stats, peers) = (0..n_peers) - .map(|_index| { - let stats = Arc::new(PeerEmulatorStats::default()); + .map(|peer_index| { + let stats = Arc::new(PeerEmulatorStats::new(peer_index, metrics.clone())); (stats.clone(), PeerEmulator::new(bandwidth, spawn_task_handle.clone(), stats)) }) .unzip(); - Self { peers, stats } + Self { peers, stats, metrics } } pub fn submit_peer_action(&mut self, index: usize, action: NetworkAction) { @@ -251,15 +290,87 @@ impl NetworkEmulator { } // Returns the sent/received stats for all peers. - pub async fn stats(&mut self) -> Vec { + pub fn peer_stats(&mut self, peer_index: usize) -> Arc { + self.stats[peer_index].clone() + } + + // Returns the sent/received stats for all peers. + pub fn stats(&mut self) -> Vec { let r = self .stats .iter() .map(|stats| PeerStats { - rx_bytes_total: stats.rx_bytes_total.load(Ordering::Relaxed), - tx_bytes_total: stats.tx_bytes_total.load(Ordering::Relaxed), + rx_bytes_total: stats.received(), + tx_bytes_total: stats.sent(), }) .collect::>(); r } + + // Increment bytes sent by our node (the node that contains the subsystem under test) + pub fn inc_sent(&self, bytes: u64) { + // Our node always is peer 0. + self.metrics.on_peer_sent(0, bytes); + } + + // Increment bytes received by our node (the node that contains the subsystem under test) + pub fn inc_received(&self, bytes: u64) { + // Our node always is peer 0. + self.metrics.on_peer_received(0, bytes); + } +} + +use polkadot_node_subsystem_util::metrics::{ + self, + prometheus::{self, Counter, CounterVec, Histogram, Opts, PrometheusError, Registry}, +}; + +/// Emulated network metrics. +#[derive(Clone)] +pub(crate) struct Metrics { + /// Number of bytes sent per peer. + peer_total_sent: CounterVec, + /// Number of received sent per peer. + peer_total_received: CounterVec, +} + +impl Metrics { + pub fn new(registry: &Registry) -> Result { + Ok(Self { + peer_total_sent: prometheus::register( + CounterVec::new( + Opts::new( + "subsystem_benchmark_network_peer_total_bytes_sent", + "Total number of bytes a peer has sent.", + ), + &["peer"], + )?, + registry, + )?, + peer_total_received: prometheus::register( + CounterVec::new( + Opts::new( + "subsystem_benchmark_network_peer_total_bytes_received", + "Total number of bytes a peer has received.", + ), + &["peer"], + )?, + registry, + )?, + }) + } + + /// Increment total sent for a peer. + pub fn on_peer_sent(&self, peer_index: usize, bytes: u64) { + self.peer_total_sent + .with_label_values(vec![format!("node{}", peer_index).as_str()].as_slice()) + .inc_by(bytes); + } + + /// Increment total receioved for a peer. + pub fn on_peer_received(&self, peer_index: usize, bytes: u64) { + self.peer_total_received + .with_label_values(vec![format!("node{}", peer_index).as_str()].as_slice()) + .inc_by(bytes); + } } diff --git a/polkadot/node/subsystem-bench/src/availability/test_env.rs b/polkadot/node/subsystem-bench/src/availability/test_env.rs new file mode 100644 index 000000000000..f67c132f4eb4 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/availability/test_env.rs @@ -0,0 +1,63 @@ +use super::*; +use polkadot_node_subsystem_util::metrics::{ + self, + prometheus::{self, Counter, Gauge, Histogram, Opts, PrometheusError, Registry, U64}, +}; + +/// Test environment/configuration metrics +#[derive(Clone)] +pub struct TestEnvironmentMetrics { + /// Number of bytes sent per peer. + n_validators: Gauge, + /// Number of received sent per peer. + n_cores: Gauge, + /// PoV size + pov_size: Gauge, + /// Current loop + current_loop: Gauge, +} + +impl TestEnvironmentMetrics { + pub fn new(registry: &Registry) -> Result { + Ok(Self { + n_validators: prometheus::register( + Gauge::new( + "subsystem_benchmark_n_validators", + "Total number of validators in the test", + )?, + registry, + )?, + n_cores: prometheus::register( + Gauge::new( + "subsystem_benchmark_n_cores", + "Number of cores we fetch availability for each loop", + )?, + registry, + )?, + pov_size: prometheus::register( + Gauge::new("subsystem_benchmark_pov_size", "The pov size")?, + registry, + )?, + current_loop: prometheus::register( + Gauge::new("subsystem_benchmark_current_loop", "The current test loop")?, + registry, + )?, + }) + } + + pub fn set_n_validators(&self, n_validators: usize) { + self.n_validators.set(n_validators as u64); + } + + pub fn set_n_cores(&self, n_cores: usize) { + self.n_cores.set(n_cores as u64); + } + + pub fn set_current_loop(&self, current_loop: usize) { + self.current_loop.set(current_loop as u64); + } + + pub fn set_pov_size(&self, pov_size: usize) { + self.pov_size.set(pov_size as u64); + } +} diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index ba66d06fe320..bdd8d93313bb 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -90,6 +90,10 @@ struct BenchCli { /// The bandwidth of simulated remote peers in KiB pub peer_bandwidth: Option, + #[clap(short, long)] + /// The bandwidth of our simulated node in KiB + pub bandwidth: Option, + #[clap(long, value_parser=le_100)] /// Simulated connection error rate [0-100]. pub peer_error: Option, @@ -124,10 +128,9 @@ impl BenchCli { let runtime = new_runtime(); let registry = Registry::new(); - let registry_clone = registry.clone(); let mut pov_sizes = Vec::new(); - pov_sizes.append(&mut vec![5 * 1024 * 1024; 200]); + pov_sizes.append(&mut vec![10 * 1024 * 1024; 200]); let mut test_config = match self.target { BenchmarkTarget::DataAvailabilityRead(options) => match self.network { @@ -170,14 +173,20 @@ impl BenchCli { } if let Some(bandwidth) = self.peer_bandwidth { + // CLI expects bw in KiB + test_config.peer_bandwidth = bandwidth * 1024; + } + + if let Some(bandwidth) = self.bandwidth { // CLI expects bw in KiB test_config.bandwidth = bandwidth * 1024; } - let state = TestState::new(test_config); - let mut env = TestEnvironment::new(runtime.handle().clone(), state, registry.clone()); + let candidate_count = test_config.n_cores * test_config.num_loops; - let runtime_handle = runtime.handle().clone(); + let mut state = TestState::new(test_config); + state.generate_candidates(candidate_count); + let mut env = TestEnvironment::new(runtime.handle().clone(), state, registry.clone()); println!("{:?}", env.config()); @@ -230,9 +239,9 @@ impl BenchCli { fn main() -> eyre::Result<()> { color_eyre::install()?; let _ = env_logger::builder() - .is_test(true) - .filter(Some(LOG_TARGET), log::LevelFilter::Info) - .try_init(); + .filter(Some("hyper"), log::LevelFilter::Info) + .try_init() + .unwrap(); let cli: BenchCli = BenchCli::parse(); cli.launch()?; From d1b9fa39aaa98cf7e20b2108399a887780255d3b Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 14 Nov 2023 12:20:24 +0200 Subject: [PATCH 13/52] refactor Signed-off-by: Andrei Sandu --- Cargo.lock | 1 + polkadot/node/subsystem-bench/Cargo.toml | 1 + .../subsystem-bench/src/availability/mod.rs | 105 ++++++++++-------- .../src/availability/test_env.rs | 63 ----------- .../node/subsystem-bench/src/core/display.rs | 15 +++ .../node/subsystem-bench/src/core/keyring.rs | 46 ++++++++ polkadot/node/subsystem-bench/src/core/mod.rs | 80 +++++++++++++ .../src/{availability => core}/network.rs | 48 +++++--- .../node/subsystem-bench/src/core/test_env.rs | 102 +++++++++++++++++ .../subsystem-bench/src/subsystem-bench.rs | 41 +++++-- 10 files changed, 371 insertions(+), 131 deletions(-) delete mode 100644 polkadot/node/subsystem-bench/src/availability/test_env.rs create mode 100644 polkadot/node/subsystem-bench/src/core/display.rs create mode 100644 polkadot/node/subsystem-bench/src/core/keyring.rs create mode 100644 polkadot/node/subsystem-bench/src/core/mod.rs rename polkadot/node/subsystem-bench/src/{availability => core}/network.rs (86%) create mode 100644 polkadot/node/subsystem-bench/src/core/test_env.rs diff --git a/Cargo.lock b/Cargo.lock index ee80ffb6e815..9e93536d4f32 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13028,6 +13028,7 @@ dependencies = [ "polkadot-primitives-test-helpers", "prometheus", "rand 0.8.5", + "sc-keystore", "sc-network", "sc-service", "sp-application-crypto", diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml index c5d62d3aa74f..72c8c3ac3c4d 100644 --- a/polkadot/node/subsystem-bench/Cargo.toml +++ b/polkadot/node/subsystem-bench/Cargo.toml @@ -27,6 +27,7 @@ color-eyre = { version = "0.6.1", default-features = false } assert_matches = "1.5" async-trait = "0.1.57" sp-keystore = { path = "../../../substrate/primitives/keystore" } +sc-keystore = { path = "../../../substrate/client/keystore" } sp-core = { path = "../../../substrate/primitives/core" } clap = { version = "4.4.6", features = ["derive"] } futures = "0.3.21" diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 4f821f819908..5f856ec1780f 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -22,6 +22,10 @@ use std::{ time::{Duration, Instant}, }; +use sc_keystore::LocalKeystore; +use sp_application_crypto::AppCrypto; +use sp_keystore::{Keystore, KeystorePtr}; + use futures::{ channel::{mpsc, oneshot}, stream::FuturesUnordered, @@ -53,7 +57,7 @@ use polkadot_node_subsystem::{ }; use std::net::{Ipv4Addr, SocketAddr}; -mod test_env; +use super::core::{keyring::Keyring, network::*, test_env::TestEnvironmentMetrics}; const LOG_TARGET: &str = "subsystem-bench::availability"; @@ -71,9 +75,8 @@ use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; use sc_service::{SpawnTaskHandle, TaskManager}; mod configuration; -mod network; -pub use configuration::TestConfiguration; +pub use configuration::{PeerLatency, TestConfiguration}; // Deterministic genesis hash for protocol names const GENESIS_HASH: Hash = Hash::repeat_byte(0xff); @@ -140,10 +143,12 @@ impl TestEnvironment { task_manager.spawn_handle(), state.config().use_fast_path, ); + let metrics = TestEnvironmentMetrics::new(®istry).expect("Metrics need to be registered"); let mut network = NetworkEmulator::new( state.config().n_validators, + state.validator_authority_id.clone(), state.config().peer_bandwidth, task_manager.spawn_handle(), ®istry, @@ -243,7 +248,7 @@ impl TestEnvironment { ) -> NetworkAction { match request { Requests::ChunkFetchingV1(outgoing_request) => { - let validator_index = outgoing_request.payload.index.0 as usize; + let validator_index: usize = outgoing_request.payload.index.0 as usize; let candidate_hash = outgoing_request.payload.candidate_hash; let candidate_index = state @@ -266,6 +271,12 @@ impl TestEnvironment { Ok(req_res::v1::ChunkFetchingResponse::from(Some(chunk)).encode()) }; + let authority_discovery_id = match outgoing_request.peer { + req_res::Recipient::Authority(authority_discovery_id) => authority_discovery_id, + _ => panic!("Peer recipient not supported yet"), + }; + let authority_discovery_id_clone = authority_discovery_id.clone(); + let future = async move { let _ = outgoing_request.pending_response.send(response); } @@ -274,13 +285,14 @@ impl TestEnvironment { let future_wrapper = async move { // Forward the response to the ingress channel of our node. // On receive side we apply our node receiving rate limit. - let action = NetworkAction::new(validator_index, future, size, None); + let action = + NetworkAction::new(authority_discovery_id_clone, future, size, None); ingress_tx.send(action).unwrap(); } .boxed(); NetworkAction::new( - validator_index, + authority_discovery_id, future_wrapper, size, // Generate a random latency based on configuration. @@ -288,12 +300,6 @@ impl TestEnvironment { ) }, Requests::AvailableDataFetchingV1(outgoing_request) => { - println!("{:?}", outgoing_request); - // TODO: do better, by implementing diff authority ids and mapping network actions - // to authority id, - let validator_index = - Uniform::from(0..state.config().n_validators).sample(&mut thread_rng()); - let candidate_hash = outgoing_request.payload.candidate_hash; let candidate_index = state .candidate_hashes @@ -318,16 +324,23 @@ impl TestEnvironment { } .boxed(); + let authority_discovery_id = match outgoing_request.peer { + req_res::Recipient::Authority(authority_discovery_id) => authority_discovery_id, + _ => panic!("Peer recipient not supported yet"), + }; + let authority_discovery_id_clone = authority_discovery_id.clone(); + let future_wrapper = async move { // Forward the response to the ingress channel of our node. // On receive side we apply our node receiving rate limit. - let action = NetworkAction::new(validator_index, future, size, None); + let action = + NetworkAction::new(authority_discovery_id_clone, future, size, None); ingress_tx.send(action).unwrap(); } .boxed(); NetworkAction::new( - validator_index, + authority_discovery_id, future_wrapper, size, // Generate a random latency based on configuration. @@ -362,7 +375,7 @@ impl TestEnvironment { network.inc_sent(Self::request_size(&request)); let action = Self::respond_to_send_request(&mut state, request, ingress_tx.clone()); // Account for our node sending the request over the emulated network. - network.submit_peer_action(action.index(), action); + network.submit_peer_action(action.peer(), action); } }, AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryAvailableData(_candidate_hash, tx)) => { @@ -470,25 +483,24 @@ impl AvailabilityRecoverySubsystemInstance { } } +pub fn random_pov_size(min_pov_size: usize, max_pov_size: usize) -> usize { + random_uniform_sample(min_pov_size, max_pov_size) +} + +fn random_uniform_sample + From>(min_value: T, max_value: T) -> T { + Uniform::from(min_value.into()..=max_value.into()) + .sample(&mut thread_rng()) + .into() +} + // We use this to bail out sending messages to the subsystem if it is overloaded such that // the time of flight is breaches 5s. // This should eventually be a test parameter. const MAX_TIME_OF_FLIGHT: Duration = Duration::from_millis(5000); -use sp_keyring::Sr25519Keyring; - -use crate::availability::network::{ActionFuture, NetworkAction}; - -use self::{ - configuration::PeerLatency, - network::{NetworkEmulator, RateLimit}, - test_env::TestEnvironmentMetrics, -}; - #[derive(Clone)] pub struct TestState { - validators: Vec, - validator_public: IndexedVec, + validator_public: Vec, validator_authority_id: Vec, // The test node validator index. validator_index: ValidatorIndex, @@ -531,7 +543,7 @@ impl TestState { let validator_groups = my_vec.chunks(5).map(|x| Vec::from(x)).collect::>(); SessionInfo { - validators: self.validator_public.clone(), + validators: self.validator_public.clone().into(), discovery_keys: self.validator_authority_id.clone(), validator_groups: IndexedVec::>::from(validator_groups), assignment_keys: vec![], @@ -608,13 +620,24 @@ impl TestState { } pub fn new(config: TestConfiguration) -> Self { - let validators = (0..config.n_validators as u64) - .into_iter() - .map(|_v| Sr25519Keyring::Alice) + let keystore: KeystorePtr = Arc::new(LocalKeystore::in_memory()); + + let keyrings = (0..config.n_validators) + .map(|peer_index| Keyring::new(format!("Node{}", peer_index).into())) .collect::>(); - let validator_public = validator_pubkeys(&validators); - let validator_authority_id = validator_authority_id(&validators); + // Generate `AuthorityDiscoveryId`` for each peer + let validator_public: Vec = keyrings + .iter() + .map(|keyring: &Keyring| keyring.clone().public().into()) + .collect::>(); + + let validator_authority_id: Vec = keyrings + .iter() + .map({ |keyring| keyring.clone().public().into() }) + .collect::>() + .into(); + let validator_index = ValidatorIndex(0); let mut chunks = Vec::new(); let mut available_data = Vec::new(); @@ -643,7 +666,7 @@ impl TestState { }; let (new_chunks, erasure_root) = derive_erasure_chunks_with_proofs_and_root( - validators.len(), + config.n_validators, &new_available_data, |_, _| {}, ); @@ -658,7 +681,6 @@ impl TestState { let pov_sizes = config.pov_sizes.clone().into_iter().cycle(); let mut state = Self { - validators, validator_public, validator_authority_id, validator_index, @@ -681,14 +703,6 @@ impl TestState { } } -fn validator_pubkeys(val_ids: &[Sr25519Keyring]) -> IndexedVec { - val_ids.iter().map(|v| v.public().into()).collect() -} - -fn validator_authority_id(val_ids: &[Sr25519Keyring]) -> Vec { - val_ids.iter().map(|v| v.public().into()).collect() -} - fn derive_erasure_chunks_with_proofs_and_root( n_validators: usize, available_data: &AvailableData, @@ -731,8 +745,6 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { env.metrics().set_n_validators(config.n_validators); env.metrics().set_n_cores(config.n_cores); - env.metrics().set_pov_size(config.pov_sizes[0]); - let mut completed_count = 0; for loop_num in 0..env.config().num_loops { gum::info!(target: LOG_TARGET, loop_num, "Starting loop"); @@ -754,9 +766,10 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { .await; } - gum::info!("{} requests pending, {} completed", batch.len(), completed_count); + gum::info!("{} requests pending", batch.len()); while let Some(completed) = batch.next().await { let available_data = completed.unwrap().unwrap(); + env.metrics().on_pov_size(available_data.encoded_size()); availability_bytes += available_data.encoded_size() as u128; } diff --git a/polkadot/node/subsystem-bench/src/availability/test_env.rs b/polkadot/node/subsystem-bench/src/availability/test_env.rs deleted file mode 100644 index f67c132f4eb4..000000000000 --- a/polkadot/node/subsystem-bench/src/availability/test_env.rs +++ /dev/null @@ -1,63 +0,0 @@ -use super::*; -use polkadot_node_subsystem_util::metrics::{ - self, - prometheus::{self, Counter, Gauge, Histogram, Opts, PrometheusError, Registry, U64}, -}; - -/// Test environment/configuration metrics -#[derive(Clone)] -pub struct TestEnvironmentMetrics { - /// Number of bytes sent per peer. - n_validators: Gauge, - /// Number of received sent per peer. - n_cores: Gauge, - /// PoV size - pov_size: Gauge, - /// Current loop - current_loop: Gauge, -} - -impl TestEnvironmentMetrics { - pub fn new(registry: &Registry) -> Result { - Ok(Self { - n_validators: prometheus::register( - Gauge::new( - "subsystem_benchmark_n_validators", - "Total number of validators in the test", - )?, - registry, - )?, - n_cores: prometheus::register( - Gauge::new( - "subsystem_benchmark_n_cores", - "Number of cores we fetch availability for each loop", - )?, - registry, - )?, - pov_size: prometheus::register( - Gauge::new("subsystem_benchmark_pov_size", "The pov size")?, - registry, - )?, - current_loop: prometheus::register( - Gauge::new("subsystem_benchmark_current_loop", "The current test loop")?, - registry, - )?, - }) - } - - pub fn set_n_validators(&self, n_validators: usize) { - self.n_validators.set(n_validators as u64); - } - - pub fn set_n_cores(&self, n_cores: usize) { - self.n_cores.set(n_cores as u64); - } - - pub fn set_current_loop(&self, current_loop: usize) { - self.current_loop.set(current_loop as u64); - } - - pub fn set_pov_size(&self, pov_size: usize) { - self.pov_size.set(pov_size as u64); - } -} diff --git a/polkadot/node/subsystem-bench/src/core/display.rs b/polkadot/node/subsystem-bench/src/core/display.rs new file mode 100644 index 000000000000..47483d33a42a --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/display.rs @@ -0,0 +1,15 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . diff --git a/polkadot/node/subsystem-bench/src/core/keyring.rs b/polkadot/node/subsystem-bench/src/core/keyring.rs new file mode 100644 index 000000000000..40e8d60d0cd1 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/keyring.rs @@ -0,0 +1,46 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +pub use sp_core::sr25519; +use sp_core::{ + sr25519::{Pair, Public, Signature}, + ByteArray, Pair as PairT, H256, +}; +use std::{collections::HashMap, ops::Deref}; + +/// Set of test accounts. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Keyring { + name: String, +} + +impl Keyring { + pub fn new(name: String) -> Keyring { + Self { name } + } + + pub fn pair(self) -> Pair { + Pair::from_string(&format!("//{}", self.name), None).expect("input is always good; qed") + } + + pub fn public(self) -> Public { + self.pair().public() + } + + pub fn to_seed(self) -> String { + format!("//{}", self.name) + } +} diff --git a/polkadot/node/subsystem-bench/src/core/mod.rs b/polkadot/node/subsystem-bench/src/core/mod.rs new file mode 100644 index 000000000000..4b9db3144f54 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/mod.rs @@ -0,0 +1,80 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use itertools::Itertools; +use std::{ + collections::HashMap, + iter::Cycle, + ops::{Div, Sub}, + sync::Arc, + time::{Duration, Instant}, +}; + +use sc_keystore::LocalKeystore; +use sp_application_crypto::AppCrypto; +use sp_keystore::{Keystore, KeystorePtr}; + +use futures::{ + channel::{mpsc, oneshot}, + stream::FuturesUnordered, + FutureExt, SinkExt, StreamExt, +}; +use futures_timer::Delay; + +use polkadot_node_metrics::metrics::Metrics; + +use polkadot_availability_recovery::AvailabilityRecoverySubsystem; + +use parity_scale_codec::Encode; +use polkadot_node_network_protocol::request_response::{ + self as req_res, v1::ChunkResponse, IncomingRequest, ReqProtocolNames, Requests, +}; +use rand::{distributions::Uniform, prelude::Distribution, seq::IteratorRandom, thread_rng}; + +use prometheus::Registry; +use sc_network::{config::RequestResponseConfig, OutboundFailure, RequestFailure}; + +use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks}; +use polkadot_node_primitives::{BlockData, PoV, Proof}; +use polkadot_node_subsystem::{ + messages::{ + AllMessages, AvailabilityRecoveryMessage, AvailabilityStoreMessage, NetworkBridgeTxMessage, + RuntimeApiMessage, RuntimeApiRequest, + }, + ActiveLeavesUpdate, FromOrchestra, OverseerSignal, Subsystem, +}; +use std::net::{Ipv4Addr, SocketAddr}; + +use super::core::{keyring::Keyring, network::*, test_env::TestEnvironmentMetrics}; + +const LOG_TARGET: &str = "subsystem-bench::core"; + +use polkadot_node_primitives::{AvailableData, ErasureChunk}; + +use polkadot_node_subsystem_test_helpers::{ + make_buffered_subsystem_context, mock::new_leaf, TestSubsystemContextHandle, +}; +use polkadot_node_subsystem_util::TimeoutExt; +use polkadot_primitives::{ + AuthorityDiscoveryId, CandidateHash, CandidateReceipt, GroupIndex, Hash, HeadData, IndexedVec, + PersistedValidationData, SessionIndex, SessionInfo, ValidatorId, ValidatorIndex, +}; +use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; +use sc_service::{SpawnTaskHandle, TaskManager}; + +pub mod keyring; +pub mod network; +pub mod test_env; diff --git a/polkadot/node/subsystem-bench/src/availability/network.rs b/polkadot/node/subsystem-bench/src/core/network.rs similarity index 86% rename from polkadot/node/subsystem-bench/src/availability/network.rs rename to polkadot/node/subsystem-bench/src/core/network.rs index 948fbae445e1..170ab45e35a3 100644 --- a/polkadot/node/subsystem-bench/src/availability/network.rs +++ b/polkadot/node/subsystem-bench/src/core/network.rs @@ -124,7 +124,9 @@ mod tests { } } -// A network peer emulator +// A network peer emulator. It spawns a task that accepts `NetworkActions` and +// executes them with a configurable delay and bandwidth constraints. Tipically +// these actions wrap a future that performs a channel send to the subsystem(s) under test. #[derive(Clone)] struct PeerEmulator { // The queue of requests waiting to be served by the emulator @@ -186,8 +188,8 @@ pub struct NetworkAction { run: ActionFuture, // The payload size that we simulate sending from a peer size: usize, - // Peer index - index: usize, + // Peer which should run the action. + peer: AuthorityDiscoveryId, // The amount of time to delay the polling `run` latency: Option, } @@ -237,8 +239,13 @@ pub struct PeerStats { pub tx_bytes_total: u64, } impl NetworkAction { - pub fn new(index: usize, run: ActionFuture, size: usize, latency: Option) -> Self { - Self { run, size, index, latency } + pub fn new( + peer: AuthorityDiscoveryId, + run: ActionFuture, + size: usize, + latency: Option, + ) -> Self { + Self { run, size, peer, latency } } pub fn size(&self) -> usize { @@ -249,44 +256,55 @@ impl NetworkAction { self.run.await; } - pub fn index(&self) -> usize { - self.index + pub fn peer(&self) -> AuthorityDiscoveryId { + self.peer.clone() } } -// Mocks the network bridge and an arbitrary number of connected peer nodes. -// Implements network latency, bandwidth and error. +/// Mocks the network bridge and an arbitrary number of connected peer nodes. +/// Implements network latency, bandwidth and connection errors. #[derive(Clone)] pub struct NetworkEmulator { // Per peer network emulation. peers: Vec, - // Per peer stats. + /// Per peer stats. stats: Vec>, - // Metrics + /// Network throughput metrics metrics: Metrics, + /// Each emulated peer is a validator. + validator_authority_ids: HashMap, } impl NetworkEmulator { pub fn new( n_peers: usize, + validator_authority_ids: Vec, bandwidth: usize, spawn_task_handle: SpawnTaskHandle, registry: &Registry, ) -> Self { let metrics = Metrics::new(®istry).expect("Metrics always register succesfully"); + let mut validator_authority_id_mapping = HashMap::new(); + // Create a `PeerEmulator` for each peer. let (stats, peers) = (0..n_peers) - .map(|peer_index| { + .zip(validator_authority_ids.into_iter()) + .map(|(peer_index, authority_id)| { + validator_authority_id_mapping.insert(authority_id, peer_index); let stats = Arc::new(PeerEmulatorStats::new(peer_index, metrics.clone())); (stats.clone(), PeerEmulator::new(bandwidth, spawn_task_handle.clone(), stats)) }) .unzip(); - Self { peers, stats, metrics } + Self { peers, stats, metrics, validator_authority_ids: validator_authority_id_mapping } } - pub fn submit_peer_action(&mut self, index: usize, action: NetworkAction) { - let _ = self.peers[index].send(action); + pub fn submit_peer_action(&mut self, peer: AuthorityDiscoveryId, action: NetworkAction) { + let index = self + .validator_authority_ids + .get(&peer) + .expect("all test authorities are valid; qed"); + self.peers[*index].send(action); } // Returns the sent/received stats for all peers. diff --git a/polkadot/node/subsystem-bench/src/core/test_env.rs b/polkadot/node/subsystem-bench/src/core/test_env.rs new file mode 100644 index 000000000000..c20b96d642af --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/test_env.rs @@ -0,0 +1,102 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use super::*; +use polkadot_node_subsystem_util::metrics::{ + self, + prometheus::{ + self, Counter, Gauge, Histogram, HistogramVec, Opts, PrometheusError, Registry, U64, + }, +}; + +const MIB: f64 = 1024.0*1024.0; + +/// Test environment/configuration metrics +#[derive(Clone)] +pub struct TestEnvironmentMetrics { + /// Number of bytes sent per peer. + n_validators: Gauge, + /// Number of received sent per peer. + n_cores: Gauge, + /// PoV size + pov_size: Histogram, + /// Current loop + current_loop: Gauge, +} + +impl TestEnvironmentMetrics { + pub fn new(registry: &Registry) -> Result { + let mut buckets = prometheus::exponential_buckets(16384.0, 2.0, 9) + .expect("arguments are always valid; qed"); + buckets.extend(vec![ + 5.0 * MIB, + 6.0 * MIB, + 7.0 * MIB, + 8.0 * MIB, + 9.0 * MIB, + 10.0 * MIB, + ]); + + Ok(Self { + n_validators: prometheus::register( + Gauge::new( + "subsystem_benchmark_n_validators", + "Total number of validators in the test", + )?, + registry, + )?, + n_cores: prometheus::register( + Gauge::new( + "subsystem_benchmark_n_cores", + "Number of cores we fetch availability for each loop", + )?, + registry, + )?, + current_loop: prometheus::register( + Gauge::new("subsystem_benchmark_current_loop", "The current test loop")?, + registry, + )?, + pov_size: prometheus::register( + Histogram::with_opts( + prometheus::HistogramOpts::new( + "subsystem_benchmark_pov_size", + "The compressed size of the proof of validity of a candidate", + ) + .buckets( + buckets + ), + )?, + registry, + )?, + }) + } + + pub fn set_n_validators(&self, n_validators: usize) { + self.n_validators.set(n_validators as u64); + } + + pub fn set_n_cores(&self, n_cores: usize) { + self.n_cores.set(n_cores as u64); + } + + pub fn set_current_loop(&self, current_loop: usize) { + self.current_loop.set(current_loop as u64); + } + + pub fn on_pov_size(&self, pov_size: usize) { + self.pov_size.observe(pov_size as f64); + } +} diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index bdd8d93313bb..9e581555d761 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -22,8 +22,9 @@ use prometheus::proto::LabelPair; use std::time::Duration; pub(crate) mod availability; +pub(crate) mod core; -use availability::{TestConfiguration, TestEnvironment, TestState}; +use availability::{random_pov_size, TestConfiguration, TestEnvironment, TestState}; const LOG_TARGET: &str = "subsystem-bench"; use clap_num::number_range; @@ -62,6 +63,14 @@ pub struct DataAvailabilityReadOptions { /// Number of validators to fetch chunks from. pub n_validators: usize, + #[clap(long, ignore_case = true, default_value_t = 5120)] + /// The minimum pov size in KiB + pub min_pov_size: usize, + + #[clap(long, ignore_case = true, default_value_t = 5120)] + /// The maximum pov size bytes + pub max_pov_size: usize, + #[clap(short, long, default_value_t = false)] /// Turbo boost AD Read by fetching from backers first. Tipically this is only faster if nodes /// have enough bandwidth. @@ -129,9 +138,6 @@ impl BenchCli { let runtime = new_runtime(); let registry = Registry::new(); - let mut pov_sizes = Vec::new(); - pov_sizes.append(&mut vec![10 * 1024 * 1024; 200]); - let mut test_config = match self.target { BenchmarkTarget::DataAvailabilityRead(options) => match self.network { NetworkEmulation::Healthy => TestConfiguration::healthy_network( @@ -139,21 +145,42 @@ impl BenchCli { options.fetch_from_backers, options.n_validators, options.n_cores, - pov_sizes, + (0..options.n_cores) + .map(|_| { + random_pov_size( + options.min_pov_size * 1024, + options.max_pov_size * 1024, + ) + }) + .collect(), ), NetworkEmulation::Degraded => TestConfiguration::degraded_network( options.num_loops, options.fetch_from_backers, options.n_validators, options.n_cores, - pov_sizes, + (0..options.n_cores) + .map(|_| { + random_pov_size( + options.min_pov_size * 1024, + options.max_pov_size * 1024, + ) + }) + .collect(), ), NetworkEmulation::Ideal => TestConfiguration::ideal_network( options.num_loops, options.fetch_from_backers, options.n_validators, options.n_cores, - pov_sizes, + (0..options.n_cores) + .map(|_| { + random_pov_size( + options.min_pov_size * 1024, + options.max_pov_size * 1024, + ) + }) + .collect(), ), }, }; From c5937ab840c56a812f840332fdbb295b23c10823 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 15 Nov 2023 08:42:41 +0200 Subject: [PATCH 14/52] pretty cli + minor refactor + remove unused Signed-off-by: Andrei Sandu --- Cargo.lock | 61 +++- cumulus/pallets/xcmp-queue/src/bridging.rs | 4 +- polkadot/node/subsystem-bench/Cargo.toml | 3 + .../src/availability/configuration.rs | 83 +++++- .../subsystem-bench/src/availability/mod.rs | 120 ++++---- .../node/subsystem-bench/src/core/display.rs | 276 ++++++++++++++++++ .../node/subsystem-bench/src/core/keyring.rs | 10 +- polkadot/node/subsystem-bench/src/core/mod.rs | 56 +--- .../node/subsystem-bench/src/core/network.rs | 4 +- .../node/subsystem-bench/src/core/test_env.rs | 39 +-- .../subsystem-bench/src/subsystem-bench.rs | 99 +++---- .../node/subsystem-test-helpers/src/lib.rs | 7 + 12 files changed, 537 insertions(+), 225 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9e93536d4f32..73fc3cbdeccc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2764,6 +2764,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "colored" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2674ec482fbc38012cf31e6c42ba0177b431a0cb6f15fe40efa5aab1bda516f6" +dependencies = [ + "is-terminal", + "lazy_static", + "windows-sys 0.48.0", +] + [[package]] name = "comfy-table" version = "7.0.1" @@ -8568,7 +8579,7 @@ dependencies = [ "itertools 0.10.5", "tar", "tempfile", - "toml_edit", + "toml_edit 0.19.14", ] [[package]] @@ -13009,6 +13020,7 @@ dependencies = [ "clap 4.4.6", "clap-num", "color-eyre", + "colored", "env_logger 0.9.3", "futures", "futures-timer", @@ -13031,12 +13043,14 @@ dependencies = [ "sc-keystore", "sc-network", "sc-service", + "serde", "sp-application-crypto", "sp-core", "sp-keyring", "sp-keystore", "substrate-prometheus-endpoint", "tokio", + "toml 0.8.8", "tracing-gum", ] @@ -13441,7 +13455,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" dependencies = [ "once_cell", - "toml_edit", + "toml_edit 0.19.14", ] [[package]] @@ -16276,18 +16290,18 @@ checksum = "f97841a747eef040fcd2e7b3b9a220a7205926e60488e673d9e4926d27772ce5" [[package]] name = "serde" -version = "1.0.188" +version = "1.0.192" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +checksum = "bca2a08484b285dcb282d0f67b26cadc0df8b19f8c12502c13d966bf9482f001" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.188" +version = "1.0.192" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +checksum = "d6c7207fbec9faa48073f3e3074cbe553af6ea512d7c21ba46e434e70ea9fbc1" dependencies = [ "proc-macro2", "quote", @@ -16316,9 +16330,9 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96426c9936fd7a0124915f9185ea1d20aa9445cc9821142f0a73bc9207a2e186" +checksum = "12022b835073e5b11e90a14f86838ceb1c8fb0325b72416845c487ac0fa95e80" dependencies = [ "serde", ] @@ -18819,14 +18833,26 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit", + "toml_edit 0.19.14", +] + +[[package]] +name = "toml" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1a195ec8c9da26928f773888e0742ca3ca1040c6cd859c919c9f59c1954ab35" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit 0.21.0", ] [[package]] name = "toml_datetime" -version = "0.6.3" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" dependencies = [ "serde", ] @@ -18844,6 +18870,19 @@ dependencies = [ "winnow", ] +[[package]] +name = "toml_edit" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34d383cd00a163b4a5b85053df514d45bc330f6de7737edfe0a93311d1eaa03" +dependencies = [ + "indexmap 2.0.0", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + [[package]] name = "tower" version = "0.4.13" diff --git a/cumulus/pallets/xcmp-queue/src/bridging.rs b/cumulus/pallets/xcmp-queue/src/bridging.rs index 0fc3f1f39ea3..53238fe2bf7a 100644 --- a/cumulus/pallets/xcmp-queue/src/bridging.rs +++ b/cumulus/pallets/xcmp-queue/src/bridging.rs @@ -55,7 +55,9 @@ impl, Runtime: crate::Config> let sibling_bridge_hub_id: ParaId = SiblingBridgeHubParaId::get(); // let's find the channel's state with the sibling parachain, - let Some((outbound_state, queued_pages)) = pallet::Pallet::::outbound_channel_state(sibling_bridge_hub_id) else { + let Some((outbound_state, queued_pages)) = + pallet::Pallet::::outbound_channel_state(sibling_bridge_hub_id) + else { return false }; // suspended channel => it is congested diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml index 72c8c3ac3c4d..3308b6fe1052 100644 --- a/polkadot/node/subsystem-bench/Cargo.toml +++ b/polkadot/node/subsystem-bench/Cargo.toml @@ -24,6 +24,7 @@ polkadot-primitives = { path = "../../primitives" } polkadot-node-network-protocol = { path = "../network/protocol" } polkadot-availability-recovery = { path = "../network/availability-recovery", features=["subsystem-benchmarks"]} color-eyre = { version = "0.6.1", default-features = false } +colored = "2.0.4" assert_matches = "1.5" async-trait = "0.1.57" sp-keystore = { path = "../../../substrate/primitives/keystore" } @@ -50,6 +51,8 @@ itertools = "0.11.0" polkadot-primitives-test-helpers = { path = "../../primitives/test-helpers" } prometheus_endpoint = { package = "substrate-prometheus-endpoint", path = "../../../substrate/utils/prometheus" } prometheus = { version = "0.13.0", default-features = false } +toml = "0.8.8" +serde = "1.0.192" [features] default = [] diff --git a/polkadot/node/subsystem-bench/src/availability/configuration.rs b/polkadot/node/subsystem-bench/src/availability/configuration.rs index cf142de06634..2d29d23811da 100644 --- a/polkadot/node/subsystem-bench/src/availability/configuration.rs +++ b/polkadot/node/subsystem-bench/src/availability/configuration.rs @@ -14,10 +14,12 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use super::*; +use std::path::Path; +use super::*; +use serde::{Deserialize,Serialize}; /// Peer response latency configuration. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Default, Serialize, Deserialize)] pub struct PeerLatency { /// Min latency for `NetworkAction` completion. pub min_latency: Duration, @@ -26,7 +28,7 @@ pub struct PeerLatency { } /// The test input parameters -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct TestConfiguration { /// Configuration for the `availability-recovery` subsystem. pub use_fast_path: bool, @@ -34,8 +36,13 @@ pub struct TestConfiguration { pub n_validators: usize, /// Number of cores pub n_cores: usize, - /// The PoV size - pub pov_sizes: Vec, + /// The min PoV size + pub min_pov_size: usize, + /// The max PoV size, + pub max_pov_size: usize, + /// Randomly sampled pov_sizes + #[serde(skip)] + pov_sizes: Vec, /// The amount of bandiwdth remote validators have. pub peer_bandwidth: usize, /// The amount of bandiwdth our node has. @@ -44,31 +51,72 @@ pub struct TestConfiguration { pub latency: Option, /// Error probability pub error: usize, - /// Number of loops - /// In one loop `n_cores` candidates are recovered - pub num_loops: usize, + /// Number of blocks + /// In one block `n_cores` candidates are recovered + pub num_blocks: usize, } + impl Default for TestConfiguration { fn default() -> Self { Self { use_fast_path: false, - n_validators: 10, + n_validators: 100, n_cores: 10, pov_sizes: vec![5 * 1024 * 1024], bandwidth: 60 * 1024 * 1024, peer_bandwidth: 60 * 1024 * 1024, latency: None, error: 0, - num_loops: 1, + num_blocks: 1, + min_pov_size: 5*1024*1024, + max_pov_size: 5*1024*1024, } } } +fn generate_pov_sizes(count: usize, min: usize, max: usize) -> Vec { + (0..count).map(|_| random_pov_size(min, max)).collect() +} + +#[derive(Serialize,Deserialize)] +pub struct TestSequence { + #[serde(rename(serialize = "TestConfiguration", deserialize = "TestConfiguration"))] + test_configurations: Vec +} + +impl TestSequence { + pub fn to_vec(mut self) -> Vec { + // Generate Pov sizes + + for config in self.test_configurations.iter_mut() { + config.pov_sizes = generate_pov_sizes(config.n_cores, config.min_pov_size, config.max_pov_size); + } + + self.test_configurations + } +} + +impl TestSequence { + pub fn new_from_file(path: &Path) -> std::io::Result { + let string = String::from_utf8(std::fs::read(&path)?).expect("File is valid UTF8"); + Ok(toml::from_str(&string).expect("File is valid test sequence TOML")) + } +} + impl TestConfiguration { + pub fn write_to_disk(&self) { + // Serialize a slice of configurations + let toml = toml::to_string(&TestSequence{ test_configurations: vec![self.clone()] }).unwrap(); + std::fs::write("last_test.toml", toml).unwrap(); + } + + pub fn pov_sizes(&self) -> &[usize] { + &self.pov_sizes + } /// An unconstrained standard configuration matching Polkadot/Kusama pub fn ideal_network( - num_loops: usize, + num_blocks: usize, use_fast_path: bool, n_validators: usize, n_cores: usize, @@ -84,12 +132,13 @@ impl TestConfiguration { // No latency latency: None, error: 0, - num_loops, + num_blocks, + ..Default::default() } } pub fn healthy_network( - num_loops: usize, + num_blocks: usize, use_fast_path: bool, n_validators: usize, n_cores: usize, @@ -107,12 +156,13 @@ impl TestConfiguration { max_latency: Duration::from_millis(100), }), error: 3, - num_loops, + num_blocks, + ..Default::default() } } pub fn degraded_network( - num_loops: usize, + num_blocks: usize, use_fast_path: bool, n_validators: usize, n_cores: usize, @@ -130,7 +180,8 @@ impl TestConfiguration { max_latency: Duration::from_millis(500), }), error: 33, - num_loops, + num_blocks, + ..Default::default() } } } diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 5f856ec1780f..2c9f3e735afb 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -17,22 +17,18 @@ use itertools::Itertools; use std::{ collections::HashMap, iter::Cycle, - ops::{Div, Sub}, + ops::Sub, sync::Arc, time::{Duration, Instant}, }; -use sc_keystore::LocalKeystore; -use sp_application_crypto::AppCrypto; -use sp_keystore::{Keystore, KeystorePtr}; +use colored::Colorize; use futures::{ channel::{mpsc, oneshot}, stream::FuturesUnordered, FutureExt, SinkExt, StreamExt, }; -use futures_timer::Delay; - use polkadot_node_metrics::metrics::Metrics; use polkadot_availability_recovery::AvailabilityRecoverySubsystem; @@ -41,7 +37,7 @@ use parity_scale_codec::Encode; use polkadot_node_network_protocol::request_response::{ self as req_res, v1::ChunkResponse, IncomingRequest, ReqProtocolNames, Requests, }; -use rand::{distributions::Uniform, prelude::Distribution, seq::IteratorRandom, thread_rng}; +use rand::{distributions::Uniform, prelude::Distribution, thread_rng}; use prometheus::Registry; use sc_network::{config::RequestResponseConfig, OutboundFailure, RequestFailure}; @@ -74,9 +70,9 @@ use polkadot_primitives::{ use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; use sc_service::{SpawnTaskHandle, TaskManager}; -mod configuration; +pub mod configuration; -pub use configuration::{PeerLatency, TestConfiguration}; +pub use configuration::{PeerLatency, TestConfiguration, TestSequence}; // Deterministic genesis hash for protocol names const GENESIS_HASH: Hash = Hash::repeat_byte(0xff); @@ -205,8 +201,12 @@ impl TestEnvironment { self.state.config() } - pub fn network(&self) -> &NetworkEmulator { - &self.network + pub fn network(&mut self) -> &mut NetworkEmulator { + &mut self.network + } + + pub fn registry(&self) -> &Registry { + &self.registry } /// Produce a randomized duration between `min` and `max`. @@ -361,7 +361,14 @@ impl TestEnvironment { ) { loop { futures::select! { - message = ctx.recv().fuse() => { + maybe_message = ctx.maybe_recv().fuse() => { + let message = if let Some(message) = maybe_message{ + message + } else { + gum::info!("{}", "Test completed".bright_blue()); + return + }; + gum::trace!(target: LOG_TARGET, ?message, "Env task received message"); match message { @@ -390,7 +397,7 @@ impl TestEnvironment { AvailabilityStoreMessage::QueryChunkSize(candidate_hash, tx) ) => { let candidate_index = state.candidate_hashes.get(&candidate_hash).expect("candidate was generated previously; qed"); - gum::info!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + gum::debug!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); let chunk_size = state.chunks.get(*candidate_index as usize).unwrap()[0].encoded_size(); let _ = tx.send(Some(chunk_size)); @@ -564,13 +571,11 @@ impl TestState { send_chunk: impl Fn(usize) -> bool, tx: oneshot::Sender>, ) { - gum::info!(target: LOG_TARGET, ?candidate_hash, "respond_to_query_all_request"); - let candidate_index = self .candidate_hashes .get(&candidate_hash) .expect("candidate was generated previously; qed"); - gum::info!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + gum::debug!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); let v = self .chunks @@ -593,7 +598,7 @@ impl TestState { /// Generate candidates to be used in the test. pub fn generate_candidates(&mut self, count: usize) { - gum::info!(target: LOG_TARGET, "Pre-generating {} candidates.", count); + gum::info!(target: LOG_TARGET,"{}", format!("Pre-generating {} candidates.", count).bright_blue()); // Generate all candidates self.candidates = (0..count) @@ -610,7 +615,7 @@ impl TestState { // Store the new candidate in the state self.candidate_hashes.insert(candidate_receipt.hash(), candidate_index); - gum::info!(target: LOG_TARGET, candidate_hash = ?candidate_receipt.hash(), "new candidate"); + gum::debug!(target: LOG_TARGET, candidate_hash = ?candidate_receipt.hash(), "new candidate"); candidate_receipt }) @@ -620,8 +625,6 @@ impl TestState { } pub fn new(config: TestConfiguration) -> Self { - let keystore: KeystorePtr = Arc::new(LocalKeystore::in_memory()); - let keyrings = (0..config.n_validators) .map(|peer_index| Keyring::new(format!("Node{}", peer_index).into())) .collect::>(); @@ -634,7 +637,7 @@ impl TestState { let validator_authority_id: Vec = keyrings .iter() - .map({ |keyring| keyring.clone().public().into() }) + .map(|keyring| keyring.clone().public().into()) .collect::>() .into(); @@ -654,8 +657,8 @@ impl TestState { }; // For each unique pov we create a candidate receipt. - for (index, pov_size) in config.pov_sizes.iter().cloned().unique().enumerate() { - gum::info!(target: LOG_TARGET, index, pov_size, "Generating template candidates"); + for (index, pov_size) in config.pov_sizes().iter().cloned().unique().enumerate() { + gum::info!(target: LOG_TARGET, index, pov_size, "{}", "Generating template candidate".bright_blue()); let mut candidate_receipt = dummy_candidate_receipt(dummy_hash()); let pov = PoV { block_data: BlockData(vec![index as u8; pov_size]) }; @@ -679,8 +682,10 @@ impl TestState { candidate_receipts.push(candidate_receipt); } - let pov_sizes = config.pov_sizes.clone().into_iter().cycle(); - let mut state = Self { + let pov_sizes = config.pov_sizes().to_vec().into_iter().cycle(); + gum::info!(target: LOG_TARGET, "{}","Created test environment.".bright_blue()); + + Self { validator_public, validator_authority_id, validator_index, @@ -695,11 +700,7 @@ impl TestState { candidates_generated: 0, candidate_hashes: HashMap::new(), candidates: Vec::new().into_iter().cycle(), - }; - - gum::info!(target: LOG_TARGET, "Created test environment."); - - state + } } } @@ -746,27 +747,29 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { env.metrics().set_n_validators(config.n_validators); env.metrics().set_n_cores(config.n_cores); - for loop_num in 0..env.config().num_loops { - gum::info!(target: LOG_TARGET, loop_num, "Starting loop"); - env.metrics().set_current_loop(loop_num); + for block_num in 0..env.config().num_blocks { + gum::info!(target: LOG_TARGET, "Current block {}/{}", block_num, env.config().num_blocks); + env.metrics().set_current_block(block_num); - let loop_start_ts = Instant::now(); + let block_start_ts = Instant::now(); for candidate_num in 0..config.n_cores as u64 { let candidate = - env.state.next_candidate().expect("We always send up to n_cores*num_loops; qed"); + env.state.next_candidate().expect("We always send up to n_cores*num_blocks; qed"); let (tx, rx) = oneshot::channel(); batch.push(rx); env.send_message(AvailabilityRecoveryMessage::RecoverAvailableData( candidate.clone(), 1, - Some(GroupIndex(candidate_num as u32 % (config.n_cores / 5) as u32)), + Some(GroupIndex( + candidate_num as u32 % (std::cmp::max(5, config.n_cores) / 5) as u32, + )), tx, )) .await; } - gum::info!("{} requests pending", batch.len()); + gum::info!("{}", format!("{} requests pending", batch.len()).bright_black()); while let Some(completed) = batch.next().await { let available_data = completed.unwrap().unwrap(); env.metrics().on_pov_size(available_data.encoded_size()); @@ -774,31 +777,44 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { } let block_time_delta = - Duration::from_secs(6).saturating_sub(Instant::now().sub(loop_start_ts)); - gum::info!(target: LOG_TARGET, "Sleeping till end of block {}ms", block_time_delta.as_millis()); + Duration::from_secs(6).saturating_sub(Instant::now().sub(block_start_ts)); + gum::info!(target: LOG_TARGET,"{}", format!("Sleeping till end of block ({}ms)", block_time_delta.as_millis()).bright_black()); tokio::time::sleep(block_time_delta).await; } env.send_signal(OverseerSignal::Conclude).await; let duration = start_marker.elapsed().as_millis(); let availability_bytes = availability_bytes / 1024; - gum::info!("Benchmark completed in {:?}ms", duration); - gum::info!("Throughput: {} KiB/block", availability_bytes / env.config().num_loops as u128); + gum::info!("Benchmark completed in {}", format!("{:?}ms", duration).cyan()); gum::info!( - "Block time: {} ms", - start_marker.elapsed().as_millis() / env.config().num_loops as u128 + "Throughput: {}", + format!("{} KiB/block", availability_bytes / env.config().num_blocks as u128).bright_red() + ); + gum::info!( + "Block time: {}", + format!("{} ms", start_marker.elapsed().as_millis() / env.config().num_blocks as u128).red() ); - let stats = env.network.stats(); + let stats = env.network().stats(); gum::info!( - "Total received from network: {} MiB", - stats - .iter() - .enumerate() - .map(|(index, stats)| stats.tx_bytes_total as u128) - .sum::() / - (1024 * 1024) + "Total received from network: {}", + format!( + "{} MiB", + stats + .iter() + .enumerate() + .map(|(_index, stats)| stats.tx_bytes_total as u128) + .sum::() / (1024 * 1024) + ) + .cyan() ); - tokio::time::sleep(Duration::from_secs(1)).await; + let test_metrics = super::core::display::parse_metrics(&env.registry()); + let subsystem_cpu_metrics = + test_metrics.subset_with_label_value("task_group", "availability-recovery-subsystem"); + gum::info!(target: LOG_TARGET, "Total subsystem CPU usage {}", format!("{:.2}s", subsystem_cpu_metrics.sum_by("substrate_tasks_polling_duration_sum")).bright_purple()); + + let test_env_cpu_metrics = + test_metrics.subset_with_label_value("task_group", "test-environment"); + gum::info!(target: LOG_TARGET, "Total test environment CPU usage {}", format!("{:.2}s", test_env_cpu_metrics.sum_by("substrate_tasks_polling_duration_sum")).bright_purple()); } diff --git a/polkadot/node/subsystem-bench/src/core/display.rs b/polkadot/node/subsystem-bench/src/core/display.rs index 47483d33a42a..4b63f45c5f8a 100644 --- a/polkadot/node/subsystem-bench/src/core/display.rs +++ b/polkadot/node/subsystem-bench/src/core/display.rs @@ -13,3 +13,279 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . +//! Some helper methods for parsing prometheus metrics to a format that can be +//! displayed in the CLI. +//! +//! Currently histogram buckets are skipped. +use super::LOG_TARGET; +use colored::Colorize; +use prometheus::{ + proto::{MetricFamily, MetricType}, + Registry, +}; +use std::fmt::Display; + +#[derive(Default)] +pub struct MetricCollection(Vec); + +impl From> for MetricCollection { + fn from(metrics: Vec) -> Self { + MetricCollection(metrics) + } +} + +impl MetricCollection { + pub fn get(&self, name: &str) -> Vec<&TestMetric> { + self.all().into_iter().filter(|metric| &metric.name == name).collect() + } + + pub fn all(&self) -> &Vec { + &self.0 + } + + /// Sums up all metrics with the given name in the collection + pub fn sum_by(&self, name: &str) -> f64 { + self.all() + .into_iter() + .filter(|metric| &metric.name == name) + .map(|metric| metric.value) + .sum() + } + + pub fn subset_with_label_value(&self, label_name: &str, label_value: &str) -> MetricCollection { + self.0 + .iter() + .filter_map(|metric| { + if let Some(index) = metric.label_names.iter().position(|label| label == label_name) + { + if Some(&String::from(label_value)) == metric.label_values.get(index) { + Some(metric.clone()) + } else { + None + } + } else { + None + } + }) + .collect::>() + .into() + } +} + +impl Display for MetricCollection { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "")?; + let metrics = self.all(); + for metric in metrics { + writeln!(f, "{}", metric)?; + } + Ok(()) + } +} +#[derive(Debug, Clone)] +pub struct TestMetric { + name: String, + label_names: Vec, + label_values: Vec, + value: f64, +} + +impl Display for TestMetric { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "({} = {}) [{:?}, {:?}]", + self.name.cyan(), + format!("{}", self.value).white(), + self.label_names, + self.label_values + ) + } +} + +// fn encode_impl( +// &self, +// metric_families: &[MetricFamily], +// writer: &mut dyn WriteUtf8, +// ) -> Result<()> { for mf in metric_families { // Fail-fast checks. check_metric_family(mf)?; + +// // Write `# HELP` header. +// let name = mf.get_name(); +// let help = mf.get_help(); +// if !help.is_empty() { +// writer.write_all("# HELP ")?; +// writer.write_all(name)?; +// writer.write_all(" ")?; +// writer.write_all(&escape_string(help, false))?; +// writer.write_all("\n")?; +// } + +// // Write `# TYPE` header. +// let metric_type = mf.get_field_type(); +// let lowercase_type = format!("{:?}", metric_type).to_lowercase(); +// writer.write_all("# TYPE ")?; +// writer.write_all(name)?; +// writer.write_all(" ")?; +// writer.write_all(&lowercase_type)?; +// writer.write_all("\n")?; + +// for m in mf.get_metric() { +// match metric_type { +// MetricType::COUNTER => { +// write_sample(writer, name, None, m, None, m.get_counter().get_value())?; +// } +// MetricType::GAUGE => { +// write_sample(writer, name, None, m, None, m.get_gauge().get_value())?; +// } +// MetricType::HISTOGRAM => { +// let h = m.get_histogram(); + +// let mut inf_seen = false; +// for b in h.get_bucket() { +// let upper_bound = b.get_upper_bound(); +// write_sample( +// writer, +// name, +// Some("_bucket"), +// m, +// Some((BUCKET_LABEL, &upper_bound.to_string())), +// b.get_cumulative_count() as f64, +// )?; +// if upper_bound.is_sign_positive() && upper_bound.is_infinite() { +// inf_seen = true; +// } +// } +// if !inf_seen { +// write_sample( +// writer, +// name, +// Some("_bucket"), +// m, +// Some((BUCKET_LABEL, POSITIVE_INF)), +// h.get_sample_count() as f64, +// )?; +// } + +// write_sample(writer, name, Some("_sum"), m, None, h.get_sample_sum())?; + +// write_sample( +// writer, +// name, +// Some("_count"), +// m, +// None, +// h.get_sample_count() as f64, +// )?; +// } +// MetricType::SUMMARY => { +// let s = m.get_summary(); + +// for q in s.get_quantile() { +// write_sample( +// writer, +// name, +// None, +// m, +// Some((QUANTILE, &q.get_quantile().to_string())), +// q.get_value(), +// )?; +// } + +// write_sample(writer, name, Some("_sum"), m, None, s.get_sample_sum())?; + +// write_sample( +// writer, +// name, +// Some("_count"), +// m, +// None, +// s.get_sample_count() as f64, +// )?; +// } +// MetricType::UNTYPED => { +// unimplemented!(); +// } +// } +// } +// } + +// Ok(()) +// } + +// Returns `false` if metric should be skipped. +fn check_metric_family(mf: &MetricFamily) -> bool { + if mf.get_metric().is_empty() { + gum::error!(target: LOG_TARGET, "MetricFamily has no metrics: {:?}", mf); + return false + } + if mf.get_name().is_empty() { + gum::error!(target: LOG_TARGET, "MetricFamily has no name: {:?}", mf); + return false + } + + true +} + +pub fn parse_metrics(registry: &Registry) -> MetricCollection { + let metric_families = registry.gather(); + let mut test_metrics = Vec::new(); + for mf in metric_families { + if !check_metric_family(&mf) { + continue + } + + let name: String = mf.get_name().into(); + let metric_type = mf.get_field_type(); + for m in mf.get_metric() { + let (label_names, label_values): (Vec, Vec) = m + .get_label() + .iter() + .map(|pair| (String::from(pair.get_name()), String::from(pair.get_value()))) + .unzip(); + + match metric_type { + MetricType::COUNTER => { + test_metrics.push(TestMetric { + name: name.clone(), + label_names, + label_values, + value: m.get_counter().get_value(), + }); + }, + MetricType::GAUGE => { + test_metrics.push(TestMetric { + name: name.clone(), + label_names, + label_values, + value: m.get_gauge().get_value(), + }); + }, + MetricType::HISTOGRAM => { + let h = m.get_histogram(); + let h_name = name.clone() + "_sum".into(); + test_metrics.push(TestMetric { + name: h_name, + label_names: label_names.clone(), + label_values: label_values.clone(), + value: h.get_sample_sum(), + }); + + let h_name = name.clone() + "_count".into(); + test_metrics.push(TestMetric { + name: h_name, + label_names, + label_values, + value: h.get_sample_sum(), + }); + }, + MetricType::SUMMARY => { + unimplemented!(); + }, + MetricType::UNTYPED => { + unimplemented!(); + }, + } + } + } + test_metrics.into() +} diff --git a/polkadot/node/subsystem-bench/src/core/keyring.rs b/polkadot/node/subsystem-bench/src/core/keyring.rs index 40e8d60d0cd1..2d9aa348a922 100644 --- a/polkadot/node/subsystem-bench/src/core/keyring.rs +++ b/polkadot/node/subsystem-bench/src/core/keyring.rs @@ -16,11 +16,9 @@ pub use sp_core::sr25519; use sp_core::{ - sr25519::{Pair, Public, Signature}, - ByteArray, Pair as PairT, H256, + sr25519::{Pair, Public}, + Pair as PairT, }; -use std::{collections::HashMap, ops::Deref}; - /// Set of test accounts. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Keyring { @@ -39,8 +37,4 @@ impl Keyring { pub fn public(self) -> Public { self.pair().public() } - - pub fn to_seed(self) -> String { - format!("//{}", self.name) - } } diff --git a/polkadot/node/subsystem-bench/src/core/mod.rs b/polkadot/node/subsystem-bench/src/core/mod.rs index 4b9db3144f54..0d7b5c3c4015 100644 --- a/polkadot/node/subsystem-bench/src/core/mod.rs +++ b/polkadot/node/subsystem-bench/src/core/mod.rs @@ -14,67 +14,17 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use itertools::Itertools; use std::{ collections::HashMap, - iter::Cycle, - ops::{Div, Sub}, sync::Arc, time::{Duration, Instant}, }; - -use sc_keystore::LocalKeystore; -use sp_application_crypto::AppCrypto; -use sp_keystore::{Keystore, KeystorePtr}; - -use futures::{ - channel::{mpsc, oneshot}, - stream::FuturesUnordered, - FutureExt, SinkExt, StreamExt, -}; -use futures_timer::Delay; - -use polkadot_node_metrics::metrics::Metrics; - -use polkadot_availability_recovery::AvailabilityRecoverySubsystem; - -use parity_scale_codec::Encode; -use polkadot_node_network_protocol::request_response::{ - self as req_res, v1::ChunkResponse, IncomingRequest, ReqProtocolNames, Requests, -}; -use rand::{distributions::Uniform, prelude::Distribution, seq::IteratorRandom, thread_rng}; - -use prometheus::Registry; -use sc_network::{config::RequestResponseConfig, OutboundFailure, RequestFailure}; - -use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks}; -use polkadot_node_primitives::{BlockData, PoV, Proof}; -use polkadot_node_subsystem::{ - messages::{ - AllMessages, AvailabilityRecoveryMessage, AvailabilityStoreMessage, NetworkBridgeTxMessage, - RuntimeApiMessage, RuntimeApiRequest, - }, - ActiveLeavesUpdate, FromOrchestra, OverseerSignal, Subsystem, -}; -use std::net::{Ipv4Addr, SocketAddr}; - -use super::core::{keyring::Keyring, network::*, test_env::TestEnvironmentMetrics}; - const LOG_TARGET: &str = "subsystem-bench::core"; -use polkadot_node_primitives::{AvailableData, ErasureChunk}; - -use polkadot_node_subsystem_test_helpers::{ - make_buffered_subsystem_context, mock::new_leaf, TestSubsystemContextHandle, -}; -use polkadot_node_subsystem_util::TimeoutExt; -use polkadot_primitives::{ - AuthorityDiscoveryId, CandidateHash, CandidateReceipt, GroupIndex, Hash, HeadData, IndexedVec, - PersistedValidationData, SessionIndex, SessionInfo, ValidatorId, ValidatorIndex, -}; -use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; -use sc_service::{SpawnTaskHandle, TaskManager}; +use polkadot_primitives::AuthorityDiscoveryId; +use sc_service::SpawnTaskHandle; pub mod keyring; pub mod network; pub mod test_env; +pub mod display; \ No newline at end of file diff --git a/polkadot/node/subsystem-bench/src/core/network.rs b/polkadot/node/subsystem-bench/src/core/network.rs index 170ab45e35a3..9250762f9987 100644 --- a/polkadot/node/subsystem-bench/src/core/network.rs +++ b/polkadot/node/subsystem-bench/src/core/network.rs @@ -15,7 +15,6 @@ // along with Polkadot. If not, see . use super::*; use prometheus_endpoint::U64; -use sc_network::network_state::Peer; use std::sync::atomic::{AtomicU64, Ordering}; use tokio::sync::mpsc::UnboundedSender; // An emulated node egress traffic rate_limiter. @@ -339,8 +338,7 @@ impl NetworkEmulator { } use polkadot_node_subsystem_util::metrics::{ - self, - prometheus::{self, Counter, CounterVec, Histogram, Opts, PrometheusError, Registry}, + prometheus::{CounterVec, Opts, PrometheusError, Registry}, }; /// Emulated network metrics. diff --git a/polkadot/node/subsystem-bench/src/core/test_env.rs b/polkadot/node/subsystem-bench/src/core/test_env.rs index c20b96d642af..153d5bdf95c7 100644 --- a/polkadot/node/subsystem-bench/src/core/test_env.rs +++ b/polkadot/node/subsystem-bench/src/core/test_env.rs @@ -14,15 +14,11 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use super::*; -use polkadot_node_subsystem_util::metrics::{ - self, - prometheus::{ - self, Counter, Gauge, Histogram, HistogramVec, Opts, PrometheusError, Registry, U64, - }, +use polkadot_node_subsystem_util::metrics::prometheus::{ + self, Gauge, Histogram, PrometheusError, Registry, U64, }; -const MIB: f64 = 1024.0*1024.0; +const MIB: f64 = 1024.0 * 1024.0; /// Test environment/configuration metrics #[derive(Clone)] @@ -33,22 +29,15 @@ pub struct TestEnvironmentMetrics { n_cores: Gauge, /// PoV size pov_size: Histogram, - /// Current loop - current_loop: Gauge, + /// Current block + current_block: Gauge, } impl TestEnvironmentMetrics { pub fn new(registry: &Registry) -> Result { let mut buckets = prometheus::exponential_buckets(16384.0, 2.0, 9) - .expect("arguments are always valid; qed"); - buckets.extend(vec![ - 5.0 * MIB, - 6.0 * MIB, - 7.0 * MIB, - 8.0 * MIB, - 9.0 * MIB, - 10.0 * MIB, - ]); + .expect("arguments are always valid; qed"); + buckets.extend(vec![5.0 * MIB, 6.0 * MIB, 7.0 * MIB, 8.0 * MIB, 9.0 * MIB, 10.0 * MIB]); Ok(Self { n_validators: prometheus::register( @@ -61,12 +50,12 @@ impl TestEnvironmentMetrics { n_cores: prometheus::register( Gauge::new( "subsystem_benchmark_n_cores", - "Number of cores we fetch availability for each loop", + "Number of cores we fetch availability for each block", )?, registry, )?, - current_loop: prometheus::register( - Gauge::new("subsystem_benchmark_current_loop", "The current test loop")?, + current_block: prometheus::register( + Gauge::new("subsystem_benchmark_current_block", "The current test block")?, registry, )?, pov_size: prometheus::register( @@ -75,9 +64,7 @@ impl TestEnvironmentMetrics { "subsystem_benchmark_pov_size", "The compressed size of the proof of validity of a candidate", ) - .buckets( - buckets - ), + .buckets(buckets), )?, registry, )?, @@ -92,8 +79,8 @@ impl TestEnvironmentMetrics { self.n_cores.set(n_cores as u64); } - pub fn set_current_loop(&self, current_loop: usize) { - self.current_loop.set(current_loop as u64); + pub fn set_current_block(&self, current_block: usize) { + self.current_block.set(current_block as u64); } pub fn on_pov_size(&self, pov_size: usize) { diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 9e581555d761..3cffd2ec427e 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -18,8 +18,9 @@ //! CI regression testing. use clap::Parser; use color_eyre::eyre; -use prometheus::proto::LabelPair; -use std::time::Duration; + +use colored::Colorize; +use std::{time::Duration, path::Path}; pub(crate) mod availability; pub(crate) mod core; @@ -77,15 +78,29 @@ pub struct DataAvailabilityReadOptions { pub fetch_from_backers: bool, #[clap(short, long, ignore_case = true, default_value_t = 1)] - /// Number of times to loop fetching for each core. - pub num_loops: usize, + /// Number of times to block fetching for each core. + pub num_blocks: usize, } + + +#[derive(Debug, clap::Parser)] +#[clap(rename_all = "kebab-case")] +#[allow(missing_docs)] +pub struct TestSequenceOptions { + #[clap(short, long, ignore_case = true)] + pub path: String, +} + + + /// Define the supported benchmarks targets #[derive(Debug, Parser)] #[command(about = "Target subsystems", version, rename_all = "kebab-case")] enum BenchmarkTarget { /// Benchmark availability recovery strategies. DataAvailabilityRead(DataAvailabilityReadOptions), + /// Run a test sequence specified in a file + TestSequence(TestSequenceOptions), } #[derive(Debug, Parser)] @@ -131,17 +146,31 @@ fn new_runtime() -> tokio::runtime::Runtime { impl BenchCli { /// Launch a malus node. fn launch(self) -> eyre::Result<()> { - use prometheus::{proto::MetricType, Registry, TextEncoder}; - - println!("Preparing {:?} benchmarks", self.target); + use prometheus::Registry; let runtime = new_runtime(); - let registry = Registry::new(); let mut test_config = match self.target { + BenchmarkTarget::TestSequence(options) => { + let test_sequence = availability::TestSequence::new_from_file(Path::new(&options.path)).expect("File exists").to_vec(); + let num_steps = test_sequence.len(); + gum::info!("{}", format!("Sequence contains {} step(s)",num_steps).bright_purple()); + for (index, test_config) in test_sequence.into_iter().enumerate(){ + gum::info!("{}", format!("Current step {}/{}", index + 1, num_steps).bright_purple()); + + let candidate_count = test_config.n_cores * test_config.num_blocks; + + let mut state = TestState::new(test_config); + state.generate_candidates(candidate_count); + let mut env = TestEnvironment::new(runtime.handle().clone(), state, Registry::new()); + + runtime.block_on(availability::bench_chunk_recovery(&mut env)); + } + return Ok(()) + } BenchmarkTarget::DataAvailabilityRead(options) => match self.network { NetworkEmulation::Healthy => TestConfiguration::healthy_network( - options.num_loops, + options.num_blocks, options.fetch_from_backers, options.n_validators, options.n_cores, @@ -155,7 +184,7 @@ impl BenchCli { .collect(), ), NetworkEmulation::Degraded => TestConfiguration::degraded_network( - options.num_loops, + options.num_blocks, options.fetch_from_backers, options.n_validators, options.n_cores, @@ -169,7 +198,7 @@ impl BenchCli { .collect(), ), NetworkEmulation::Ideal => TestConfiguration::ideal_network( - options.num_loops, + options.num_blocks, options.fetch_from_backers, options.n_validators, options.n_cores, @@ -209,56 +238,15 @@ impl BenchCli { test_config.bandwidth = bandwidth * 1024; } - let candidate_count = test_config.n_cores * test_config.num_loops; + let candidate_count = test_config.n_cores * test_config.num_blocks; + test_config.write_to_disk(); let mut state = TestState::new(test_config); state.generate_candidates(candidate_count); - let mut env = TestEnvironment::new(runtime.handle().clone(), state, registry.clone()); - - println!("{:?}", env.config()); + let mut env = TestEnvironment::new(runtime.handle().clone(), state, Registry::new()); runtime.block_on(availability::bench_chunk_recovery(&mut env)); - let metric_families = registry.gather(); - - for familiy in metric_families { - let metric_type = familiy.get_field_type(); - - for metric in familiy.get_metric() { - match metric_type { - MetricType::HISTOGRAM => { - let h = metric.get_histogram(); - - let labels = metric.get_label(); - // Skip test env usage. - let mut env_label = LabelPair::default(); - env_label.set_name("task_group".into()); - env_label.set_value("test-environment".into()); - - let mut is_env_metric = false; - for label_pair in labels { - if &env_label == label_pair { - is_env_metric = true; - break - } - } - - if !is_env_metric { - println!( - "{:?} CPU seconds used: {:?}", - familiy.get_name(), - h.get_sample_sum() - ); - } - }, - _ => {}, - } - } - } - // encoder.encode(&metric_families, &mut buffer).unwrap(); - - // Output to the standard output. - // println!("Metrics: {}", String::from_utf8(buffer).unwrap()); Ok(()) } } @@ -267,6 +255,7 @@ fn main() -> eyre::Result<()> { color_eyre::install()?; let _ = env_logger::builder() .filter(Some("hyper"), log::LevelFilter::Info) + .filter(None, log::LevelFilter::Info) .try_init() .unwrap(); diff --git a/polkadot/node/subsystem-test-helpers/src/lib.rs b/polkadot/node/subsystem-test-helpers/src/lib.rs index 5393ccafa6f3..1c3c47150ac6 100644 --- a/polkadot/node/subsystem-test-helpers/src/lib.rs +++ b/polkadot/node/subsystem-test-helpers/src/lib.rs @@ -279,6 +279,13 @@ impl TestSubsystemContextHandle { .expect("Test subsystem no longer live") } + /// Receive the next message from the subsystem. + pub async fn maybe_recv(&mut self) -> Option { + self.try_recv() + .timeout(Self::TIMEOUT) + .await + .expect("`fn recv` does not timeout") + } /// Receive the next message from the subsystem, or `None` if the channel has been closed. pub async fn try_recv(&mut self) -> Option { self.rx From d6c259df9ff7eaaa5f7207364b87a7f3a76b165e Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 15 Nov 2023 18:39:29 +0200 Subject: [PATCH 15/52] update Signed-off-by: Andrei Sandu --- .../network/availability-recovery/src/lib.rs | 3 +- .../subsystem-bench/src/availability/cli.rs | 17 ++++ .../src/availability/configuration.rs | 19 ++--- .../subsystem-bench/src/availability/mod.rs | 27 +++++-- polkadot/node/subsystem-bench/src/core/mod.rs | 2 +- .../node/subsystem-bench/src/core/network.rs | 8 +- .../node/subsystem-bench/src/core/test_env.rs | 10 +++ .../subsystem-bench/src/subsystem-bench.rs | 41 ++++++---- .../node/subsystem-bench/test_sequence.toml | 77 +++++++++++++++++++ 9 files changed, 165 insertions(+), 39 deletions(-) create mode 100644 polkadot/node/subsystem-bench/src/availability/cli.rs create mode 100644 polkadot/node/subsystem-bench/test_sequence.toml diff --git a/polkadot/node/network/availability-recovery/src/lib.rs b/polkadot/node/network/availability-recovery/src/lib.rs index ffb634ad76e2..6dafcf4ccfc8 100644 --- a/polkadot/node/network/availability-recovery/src/lib.rs +++ b/polkadot/node/network/availability-recovery/src/lib.rs @@ -582,6 +582,7 @@ impl AvailabilityRecoverySubsystem { } } + /// Starts the inner subsystem loop. pub async fn run(self, mut ctx: Context) -> SubsystemResult<()> { let mut state = State::default(); let Self { mut req_receiver, metrics, recovery_strategy_kind, bypass_availability_store } = @@ -726,8 +727,6 @@ impl AvailabilityRecoverySubsystem { } } output = state.ongoing_recoveries.select_next_some() => { - // No caching for benchmark. - #[cfg(not(feature = "subsystem-benchmarks"))] if let Some((candidate_hash, result)) = output { if let Ok(recovery) = CachedRecovery::try_from(result) { state.availability_lru.insert(candidate_hash, recovery); diff --git a/polkadot/node/subsystem-bench/src/availability/cli.rs b/polkadot/node/subsystem-bench/src/availability/cli.rs new file mode 100644 index 000000000000..43a938f2abea --- /dev/null +++ b/polkadot/node/subsystem-bench/src/availability/cli.rs @@ -0,0 +1,17 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use super::*; diff --git a/polkadot/node/subsystem-bench/src/availability/configuration.rs b/polkadot/node/subsystem-bench/src/availability/configuration.rs index 2d29d23811da..cbad4a2dc1b8 100644 --- a/polkadot/node/subsystem-bench/src/availability/configuration.rs +++ b/polkadot/node/subsystem-bench/src/availability/configuration.rs @@ -17,7 +17,7 @@ use std::path::Path; use super::*; -use serde::{Deserialize,Serialize}; +use serde::{Deserialize, Serialize}; /// Peer response latency configuration. #[derive(Clone, Debug, Default, Serialize, Deserialize)] pub struct PeerLatency { @@ -56,7 +56,6 @@ pub struct TestConfiguration { pub num_blocks: usize, } - impl Default for TestConfiguration { fn default() -> Self { Self { @@ -69,8 +68,8 @@ impl Default for TestConfiguration { latency: None, error: 0, num_blocks: 1, - min_pov_size: 5*1024*1024, - max_pov_size: 5*1024*1024, + min_pov_size: 5 * 1024 * 1024, + max_pov_size: 5 * 1024 * 1024, } } } @@ -79,10 +78,10 @@ fn generate_pov_sizes(count: usize, min: usize, max: usize) -> Vec { (0..count).map(|_| random_pov_size(min, max)).collect() } -#[derive(Serialize,Deserialize)] +#[derive(Serialize, Deserialize)] pub struct TestSequence { #[serde(rename(serialize = "TestConfiguration", deserialize = "TestConfiguration"))] - test_configurations: Vec + test_configurations: Vec, } impl TestSequence { @@ -90,14 +89,15 @@ impl TestSequence { // Generate Pov sizes for config in self.test_configurations.iter_mut() { - config.pov_sizes = generate_pov_sizes(config.n_cores, config.min_pov_size, config.max_pov_size); + config.pov_sizes = + generate_pov_sizes(config.n_cores, config.min_pov_size, config.max_pov_size); } self.test_configurations } } -impl TestSequence { +impl TestSequence { pub fn new_from_file(path: &Path) -> std::io::Result { let string = String::from_utf8(std::fs::read(&path)?).expect("File is valid UTF8"); Ok(toml::from_str(&string).expect("File is valid test sequence TOML")) @@ -107,7 +107,8 @@ impl TestSequence { impl TestConfiguration { pub fn write_to_disk(&self) { // Serialize a slice of configurations - let toml = toml::to_string(&TestSequence{ test_configurations: vec![self.clone()] }).unwrap(); + let toml = + toml::to_string(&TestSequence { test_configurations: vec![self.clone()] }).unwrap(); std::fs::write("last_test.toml", toml).unwrap(); } diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 2c9f3e735afb..0a0830ff9975 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -748,13 +748,15 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { env.metrics().set_n_cores(config.n_cores); for block_num in 0..env.config().num_blocks { - gum::info!(target: LOG_TARGET, "Current block {}/{}", block_num, env.config().num_blocks); + gum::info!(target: LOG_TARGET, "Current block {}/{}", block_num + 1, env.config().num_blocks); env.metrics().set_current_block(block_num); let block_start_ts = Instant::now(); for candidate_num in 0..config.n_cores as u64 { - let candidate = - env.state.next_candidate().expect("We always send up to n_cores*num_blocks; qed"); + let candidate = env + .state + .next_candidate() + .expect("We always send up to n_cores*num_blocks; qed"); let (tx, rx) = oneshot::channel(); batch.push(rx); @@ -769,7 +771,7 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { .await; } - gum::info!("{}", format!("{} requests pending", batch.len()).bright_black()); + gum::info!("{}", format!("{} recoveries pending", batch.len()).bright_black()); while let Some(completed) = batch.next().await { let available_data = completed.unwrap().unwrap(); env.metrics().on_pov_size(available_data.encoded_size()); @@ -778,6 +780,10 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { let block_time_delta = Duration::from_secs(6).saturating_sub(Instant::now().sub(block_start_ts)); + + let block_time = Instant::now().sub(block_start_ts).as_millis() as u64; + env.metrics().set_block_time(block_time); + gum::info!("Block time {}", format!("{:?}ms", block_time).cyan()); gum::info!(target: LOG_TARGET,"{}", format!("Sleeping till end of block ({}ms)", block_time_delta.as_millis()).bright_black()); tokio::time::sleep(block_time_delta).await; } @@ -785,14 +791,15 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { env.send_signal(OverseerSignal::Conclude).await; let duration = start_marker.elapsed().as_millis(); let availability_bytes = availability_bytes / 1024; - gum::info!("Benchmark completed in {}", format!("{:?}ms", duration).cyan()); + gum::info!("All blocks processed in {}", format!("{:?}ms", duration).cyan()); gum::info!( "Throughput: {}", format!("{} KiB/block", availability_bytes / env.config().num_blocks as u128).bright_red() ); gum::info!( "Block time: {}", - format!("{} ms", start_marker.elapsed().as_millis() / env.config().num_blocks as u128).red() + format!("{} ms", start_marker.elapsed().as_millis() / env.config().num_blocks as u128) + .red() ); let stats = env.network().stats(); @@ -812,9 +819,13 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { let test_metrics = super::core::display::parse_metrics(&env.registry()); let subsystem_cpu_metrics = test_metrics.subset_with_label_value("task_group", "availability-recovery-subsystem"); - gum::info!(target: LOG_TARGET, "Total subsystem CPU usage {}", format!("{:.2}s", subsystem_cpu_metrics.sum_by("substrate_tasks_polling_duration_sum")).bright_purple()); + let total_cpu = subsystem_cpu_metrics.sum_by("substrate_tasks_polling_duration_sum"); + gum::info!(target: LOG_TARGET, "Total subsystem CPU usage {}", format!("{:.2}s", total_cpu).bright_purple()); + gum::info!(target: LOG_TARGET, "CPU usage per block {}", format!("{:.2}s", total_cpu/env.config().num_blocks as f64).bright_purple()); let test_env_cpu_metrics = test_metrics.subset_with_label_value("task_group", "test-environment"); - gum::info!(target: LOG_TARGET, "Total test environment CPU usage {}", format!("{:.2}s", test_env_cpu_metrics.sum_by("substrate_tasks_polling_duration_sum")).bright_purple()); + let total_cpu = test_env_cpu_metrics.sum_by("substrate_tasks_polling_duration_sum"); + gum::info!(target: LOG_TARGET, "Total test environment CPU usage {}", format!("{:.2}s", total_cpu).bright_purple()); + gum::info!(target: LOG_TARGET, "CPU usage per block {}", format!("{:.2}s", total_cpu/env.config().num_blocks as f64).bright_purple()); } diff --git a/polkadot/node/subsystem-bench/src/core/mod.rs b/polkadot/node/subsystem-bench/src/core/mod.rs index 0d7b5c3c4015..2e9e0364273e 100644 --- a/polkadot/node/subsystem-bench/src/core/mod.rs +++ b/polkadot/node/subsystem-bench/src/core/mod.rs @@ -24,7 +24,7 @@ const LOG_TARGET: &str = "subsystem-bench::core"; use polkadot_primitives::AuthorityDiscoveryId; use sc_service::SpawnTaskHandle; +pub mod display; pub mod keyring; pub mod network; pub mod test_env; -pub mod display; \ No newline at end of file diff --git a/polkadot/node/subsystem-bench/src/core/network.rs b/polkadot/node/subsystem-bench/src/core/network.rs index 9250762f9987..629d09df694c 100644 --- a/polkadot/node/subsystem-bench/src/core/network.rs +++ b/polkadot/node/subsystem-bench/src/core/network.rs @@ -14,9 +14,11 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . use super::*; +use colored::Colorize; use prometheus_endpoint::U64; use std::sync::atomic::{AtomicU64, Ordering}; use tokio::sync::mpsc::UnboundedSender; + // An emulated node egress traffic rate_limiter. #[derive(Debug)] pub struct RateLimit { @@ -282,6 +284,8 @@ impl NetworkEmulator { spawn_task_handle: SpawnTaskHandle, registry: &Registry, ) -> Self { + gum::info!(target: LOG_TARGET, "{}",format!("Initializing network emulation for {} peers.", n_peers).bright_blue()); + let metrics = Metrics::new(®istry).expect("Metrics always register succesfully"); let mut validator_authority_id_mapping = HashMap::new(); @@ -337,8 +341,8 @@ impl NetworkEmulator { } } -use polkadot_node_subsystem_util::metrics::{ - prometheus::{CounterVec, Opts, PrometheusError, Registry}, +use polkadot_node_subsystem_util::metrics::prometheus::{ + self, CounterVec, Opts, PrometheusError, Registry, }; /// Emulated network metrics. diff --git a/polkadot/node/subsystem-bench/src/core/test_env.rs b/polkadot/node/subsystem-bench/src/core/test_env.rs index 153d5bdf95c7..e6b09a1c13e6 100644 --- a/polkadot/node/subsystem-bench/src/core/test_env.rs +++ b/polkadot/node/subsystem-bench/src/core/test_env.rs @@ -31,6 +31,8 @@ pub struct TestEnvironmentMetrics { pov_size: Histogram, /// Current block current_block: Gauge, + /// Current block + block_time: Gauge, } impl TestEnvironmentMetrics { @@ -58,6 +60,10 @@ impl TestEnvironmentMetrics { Gauge::new("subsystem_benchmark_current_block", "The current test block")?, registry, )?, + block_time: prometheus::register( + Gauge::new("subsystem_benchmark_block_time", "The time it takes for the target subsystems(s) to complete all the requests in a block")?, + registry, + )?, pov_size: prometheus::register( Histogram::with_opts( prometheus::HistogramOpts::new( @@ -83,6 +89,10 @@ impl TestEnvironmentMetrics { self.current_block.set(current_block as u64); } + pub fn set_block_time(&self, block_time_ms: u64) { + self.block_time.set(block_time_ms); + } + pub fn on_pov_size(&self, pov_size: usize) { self.pov_size.observe(pov_size as f64); } diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 3cffd2ec427e..280172662453 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -20,7 +20,7 @@ use clap::Parser; use color_eyre::eyre; use colored::Colorize; -use std::{time::Duration, path::Path}; +use std::{path::Path, time::Duration}; pub(crate) mod availability; pub(crate) mod core; @@ -82,7 +82,6 @@ pub struct DataAvailabilityReadOptions { pub num_blocks: usize, } - #[derive(Debug, clap::Parser)] #[clap(rename_all = "kebab-case")] #[allow(missing_docs)] @@ -91,12 +90,10 @@ pub struct TestSequenceOptions { pub path: String, } - - /// Define the supported benchmarks targets #[derive(Debug, Parser)] -#[command(about = "Target subsystems", version, rename_all = "kebab-case")] -enum BenchmarkTarget { +#[command(about = "Test objectives", version, rename_all = "kebab-case")] +enum TestObjective { /// Benchmark availability recovery strategies. DataAvailabilityRead(DataAvailabilityReadOptions), /// Run a test sequence specified in a file @@ -131,7 +128,7 @@ struct BenchCli { pub peer_max_latency: Option, #[command(subcommand)] - pub target: BenchmarkTarget, + pub objective: TestObjective, } fn new_runtime() -> tokio::runtime::Runtime { @@ -150,25 +147,35 @@ impl BenchCli { let runtime = new_runtime(); - let mut test_config = match self.target { - BenchmarkTarget::TestSequence(options) => { - let test_sequence = availability::TestSequence::new_from_file(Path::new(&options.path)).expect("File exists").to_vec(); + let mut test_config = match self.objective { + TestObjective::TestSequence(options) => { + let test_sequence = + availability::TestSequence::new_from_file(Path::new(&options.path)) + .expect("File exists") + .to_vec(); let num_steps = test_sequence.len(); - gum::info!("{}", format!("Sequence contains {} step(s)",num_steps).bright_purple()); - for (index, test_config) in test_sequence.into_iter().enumerate(){ - gum::info!("{}", format!("Current step {}/{}", index + 1, num_steps).bright_purple()); + gum::info!( + "{}", + format!("Sequence contains {} step(s)", num_steps).bright_purple() + ); + for (index, test_config) in test_sequence.into_iter().enumerate() { + gum::info!( + "{}", + format!("Current step {}/{}", index + 1, num_steps).bright_purple() + ); let candidate_count = test_config.n_cores * test_config.num_blocks; let mut state = TestState::new(test_config); state.generate_candidates(candidate_count); - let mut env = TestEnvironment::new(runtime.handle().clone(), state, Registry::new()); - + let mut env = + TestEnvironment::new(runtime.handle().clone(), state, Registry::new()); + runtime.block_on(availability::bench_chunk_recovery(&mut env)); } return Ok(()) - } - BenchmarkTarget::DataAvailabilityRead(options) => match self.network { + }, + TestObjective::DataAvailabilityRead(options) => match self.network { NetworkEmulation::Healthy => TestConfiguration::healthy_network( options.num_blocks, options.fetch_from_backers, diff --git a/polkadot/node/subsystem-bench/test_sequence.toml b/polkadot/node/subsystem-bench/test_sequence.toml new file mode 100644 index 000000000000..d32477b9efe9 --- /dev/null +++ b/polkadot/node/subsystem-bench/test_sequence.toml @@ -0,0 +1,77 @@ +[[TestConfiguration]] +use_fast_path = false +n_validators = 300 +n_cores = 20 +min_pov_size = 5242880 +max_pov_size = 5242880 +peer_bandwidth = 128000 +bandwidth = 52428800 +error = 33 +num_blocks = 5 + +[TestConfiguration.latency.min_latency] +secs = 0 +nanos = 1000000 + +[TestConfiguration.latency.max_latency] +secs = 0 +nanos = 100000000 + +[[TestConfiguration]] +use_fast_path = false +n_validators = 500 +n_cores = 20 +min_pov_size = 5242880 +max_pov_size = 5242880 +peer_bandwidth = 128000 +bandwidth = 52428800 +error = 33 +num_blocks = 5 + +[TestConfiguration.latency.min_latency] +secs = 0 +nanos = 1000000 + +[TestConfiguration.latency.max_latency] +secs = 0 +nanos = 1000000000 + + +[[TestConfiguration]] +use_fast_path = false +n_validators = 1000 +n_cores = 20 +min_pov_size = 5242880 +max_pov_size = 5242880 +peer_bandwidth = 128000 +bandwidth = 52428800 +error = 33 +num_blocks = 5 + +[TestConfiguration.latency.min_latency] +secs = 0 +nanos = 1000000 + +[TestConfiguration.latency.max_latency] +secs = 0 +nanos = 1000000000 + + +[[TestConfiguration]] +use_fast_path = false +n_validators = 2000 +n_cores = 20 +min_pov_size = 5242880 +max_pov_size = 5242880 +peer_bandwidth = 128000 +bandwidth = 52428800 +error = 33 +num_blocks = 5 + +[TestConfiguration.latency.min_latency] +secs = 0 +nanos = 1000000 + +[TestConfiguration.latency.max_latency] +secs = 0 +nanos = 1000000000 From 050529b68ca2402e79e593968571606580d5ca7a Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 15 Nov 2023 18:47:05 +0200 Subject: [PATCH 16/52] remove comment Signed-off-by: Andrei Sandu --- polkadot/node/subsystem-bench/src/subsystem-bench.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 280172662453..7dcc8a15074a 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -141,7 +141,6 @@ fn new_runtime() -> tokio::runtime::Runtime { } impl BenchCli { - /// Launch a malus node. fn launch(self) -> eyre::Result<()> { use prometheus::Registry; From cb38be5c505863df72567efa3a0e3489b9bc42eb Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 17 Nov 2023 11:38:23 +0200 Subject: [PATCH 17/52] separate cli options for availability Signed-off-by: Andrei Sandu --- .../subsystem-bench/src/availability/cli.rs | 44 +++++++++++++++- .../subsystem-bench/src/availability/mod.rs | 7 +-- .../src/core/{test_env.rs => environment.rs} | 2 + polkadot/node/subsystem-bench/src/core/mod.rs | 2 +- .../subsystem-bench/src/subsystem-bench.rs | 52 ++----------------- 5 files changed, 55 insertions(+), 52 deletions(-) rename polkadot/node/subsystem-bench/src/core/{test_env.rs => environment.rs} (98%) diff --git a/polkadot/node/subsystem-bench/src/availability/cli.rs b/polkadot/node/subsystem-bench/src/availability/cli.rs index 43a938f2abea..ef4d7e6f631a 100644 --- a/polkadot/node/subsystem-bench/src/availability/cli.rs +++ b/polkadot/node/subsystem-bench/src/availability/cli.rs @@ -14,4 +14,46 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use super::*; +#[derive(Debug, clap::Parser, Clone)] +#[clap(rename_all = "kebab-case")] +#[allow(missing_docs)] +pub struct NetworkOptions {} + +#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq)] +#[value(rename_all = "kebab-case")] +#[non_exhaustive] +pub enum NetworkEmulation { + Ideal, + Healthy, + Degraded, +} + +#[derive(Debug, clap::Parser)] +#[clap(rename_all = "kebab-case")] +#[allow(missing_docs)] +pub struct DataAvailabilityReadOptions { + #[clap(long, ignore_case = true, default_value_t = 100)] + /// Number of cores to fetch availability for. + pub n_cores: usize, + + #[clap(long, ignore_case = true, default_value_t = 500)] + /// Number of validators to fetch chunks from. + pub n_validators: usize, + + #[clap(long, ignore_case = true, default_value_t = 5120)] + /// The minimum pov size in KiB + pub min_pov_size: usize, + + #[clap(long, ignore_case = true, default_value_t = 5120)] + /// The maximum pov size bytes + pub max_pov_size: usize, + + #[clap(short, long, default_value_t = false)] + /// Turbo boost AD Read by fetching from backers first. Tipically this is only faster if nodes + /// have enough bandwidth. + pub fetch_from_backers: bool, + + #[clap(short, long, ignore_case = true, default_value_t = 1)] + /// Number of times to block fetching for each core. + pub num_blocks: usize, +} diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 0a0830ff9975..8866348ea22b 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -53,7 +53,7 @@ use polkadot_node_subsystem::{ }; use std::net::{Ipv4Addr, SocketAddr}; -use super::core::{keyring::Keyring, network::*, test_env::TestEnvironmentMetrics}; +use super::core::{environment::TestEnvironmentMetrics, keyring::Keyring, network::*}; const LOG_TARGET: &str = "subsystem-bench::availability"; @@ -70,8 +70,9 @@ use polkadot_primitives::{ use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; use sc_service::{SpawnTaskHandle, TaskManager}; +mod cli; pub mod configuration; - +pub use cli::{DataAvailabilityReadOptions, NetworkEmulation, NetworkOptions}; pub use configuration::{PeerLatency, TestConfiguration, TestSequence}; // Deterministic genesis hash for protocol names @@ -162,7 +163,7 @@ impl TestEnvironment { let (ingress_tx, mut ingress_rx) = tokio::sync::mpsc::unbounded_channel::(); let our_network_stats = network.peer_stats(0); - spawn_handle.spawn_blocking("our-node-rx", "test-environment", async move { + spawn_handle.spawn_blocking("node0-rx", "test-environment", async move { while let Some(action) = ingress_rx.recv().await { let size = action.size(); diff --git a/polkadot/node/subsystem-bench/src/core/test_env.rs b/polkadot/node/subsystem-bench/src/core/environment.rs similarity index 98% rename from polkadot/node/subsystem-bench/src/core/test_env.rs rename to polkadot/node/subsystem-bench/src/core/environment.rs index e6b09a1c13e6..6a680799972d 100644 --- a/polkadot/node/subsystem-bench/src/core/test_env.rs +++ b/polkadot/node/subsystem-bench/src/core/environment.rs @@ -14,6 +14,8 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . +use super::*; +use network::NetworkEmulator; use polkadot_node_subsystem_util::metrics::prometheus::{ self, Gauge, Histogram, PrometheusError, Registry, U64, }; diff --git a/polkadot/node/subsystem-bench/src/core/mod.rs b/polkadot/node/subsystem-bench/src/core/mod.rs index 2e9e0364273e..564fb7148fa0 100644 --- a/polkadot/node/subsystem-bench/src/core/mod.rs +++ b/polkadot/node/subsystem-bench/src/core/mod.rs @@ -25,6 +25,6 @@ use polkadot_primitives::AuthorityDiscoveryId; use sc_service::SpawnTaskHandle; pub mod display; +pub mod environment; pub mod keyring; pub mod network; -pub mod test_env; diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 7dcc8a15074a..42efb7fd63c8 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -25,10 +25,13 @@ use std::{path::Path, time::Duration}; pub(crate) mod availability; pub(crate) mod core; -use availability::{random_pov_size, TestConfiguration, TestEnvironment, TestState}; -const LOG_TARGET: &str = "subsystem-bench"; +use availability::{ + random_pov_size, DataAvailabilityReadOptions, NetworkEmulation, TestConfiguration, + TestEnvironment, TestState, +}; use clap_num::number_range; +const LOG_TARGET: &str = "subsystem-bench"; fn le_100(s: &str) -> Result { number_range(s, 0, 100) @@ -37,51 +40,6 @@ fn le_100(s: &str) -> Result { fn le_5000(s: &str) -> Result { number_range(s, 0, 5000) } - -#[derive(Debug, clap::Parser, Clone)] -#[clap(rename_all = "kebab-case")] -#[allow(missing_docs)] -pub struct NetworkOptions {} - -#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq)] -#[value(rename_all = "kebab-case")] -#[non_exhaustive] -pub enum NetworkEmulation { - Ideal, - Healthy, - Degraded, -} - -#[derive(Debug, clap::Parser)] -#[clap(rename_all = "kebab-case")] -#[allow(missing_docs)] -pub struct DataAvailabilityReadOptions { - #[clap(long, ignore_case = true, default_value_t = 100)] - /// Number of cores to fetch availability for. - pub n_cores: usize, - - #[clap(long, ignore_case = true, default_value_t = 500)] - /// Number of validators to fetch chunks from. - pub n_validators: usize, - - #[clap(long, ignore_case = true, default_value_t = 5120)] - /// The minimum pov size in KiB - pub min_pov_size: usize, - - #[clap(long, ignore_case = true, default_value_t = 5120)] - /// The maximum pov size bytes - pub max_pov_size: usize, - - #[clap(short, long, default_value_t = false)] - /// Turbo boost AD Read by fetching from backers first. Tipically this is only faster if nodes - /// have enough bandwidth. - pub fetch_from_backers: bool, - - #[clap(short, long, ignore_case = true, default_value_t = 1)] - /// Number of times to block fetching for each core. - pub num_blocks: usize, -} - #[derive(Debug, clap::Parser)] #[clap(rename_all = "kebab-case")] #[allow(missing_docs)] From 24a736afb7727f2cc4780748edcc873692928503 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 17 Nov 2023 13:07:40 +0200 Subject: [PATCH 18/52] implement unified and extensible configuration Signed-off-by: Andrei Sandu --- Cargo.lock | 52 +++-- polkadot/node/subsystem-bench/Cargo.toml | 2 +- .../subsystem-bench/src/availability/cli.rs | 23 +-- .../src/availability/configuration.rs | 169 +--------------- .../subsystem-bench/src/availability/mod.rs | 32 +-- polkadot/node/subsystem-bench/src/cli.rs | 65 ++++++ .../subsystem-bench/src/core/configuration.rs | 190 ++++++++++++++++++ polkadot/node/subsystem-bench/src/core/mod.rs | 1 + .../node/subsystem-bench/src/core/network.rs | 1 - .../subsystem-bench/src/subsystem-bench.rs | 100 ++++----- .../node/subsystem-bench/test_sequence.toml | 77 ------- .../node/subsystem-bench/test_sequence.yaml | 56 ++++++ 12 files changed, 398 insertions(+), 370 deletions(-) create mode 100644 polkadot/node/subsystem-bench/src/cli.rs create mode 100644 polkadot/node/subsystem-bench/src/core/configuration.rs delete mode 100644 polkadot/node/subsystem-bench/test_sequence.toml create mode 100644 polkadot/node/subsystem-bench/test_sequence.yaml diff --git a/Cargo.lock b/Cargo.lock index 73fc3cbdeccc..b40a40db47b5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8579,7 +8579,7 @@ dependencies = [ "itertools 0.10.5", "tar", "tempfile", - "toml_edit 0.19.14", + "toml_edit", ] [[package]] @@ -13044,13 +13044,13 @@ dependencies = [ "sc-network", "sc-service", "serde", + "serde_yaml", "sp-application-crypto", "sp-core", "sp-keyring", "sp-keystore", "substrate-prometheus-endpoint", "tokio", - "toml 0.8.8", "tracing-gum", ] @@ -13455,7 +13455,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" dependencies = [ "once_cell", - "toml_edit 0.19.14", + "toml_edit", ] [[package]] @@ -16349,6 +16349,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cc7a1570e38322cfe4154732e5110f887ea57e22b76f4bfd32b5bdd3368666c" +dependencies = [ + "indexmap 2.0.0", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "serial_test" version = "2.0.0" @@ -18833,19 +18846,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit 0.19.14", -] - -[[package]] -name = "toml" -version = "0.8.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1a195ec8c9da26928f773888e0742ca3ca1040c6cd859c919c9f59c1954ab35" -dependencies = [ - "serde", - "serde_spanned", - "toml_datetime", - "toml_edit 0.21.0", + "toml_edit", ] [[package]] @@ -18870,19 +18871,6 @@ dependencies = [ "winnow", ] -[[package]] -name = "toml_edit" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34d383cd00a163b4a5b85053df514d45bc330f6de7737edfe0a93311d1eaa03" -dependencies = [ - "indexmap 2.0.0", - "serde", - "serde_spanned", - "toml_datetime", - "winnow", -] - [[package]] name = "tower" version = "0.4.13" @@ -19325,6 +19313,12 @@ dependencies = [ "subtle 2.4.1", ] +[[package]] +name = "unsafe-libyaml" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa" + [[package]] name = "unsigned-varint" version = "0.7.1" diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml index 3308b6fe1052..9dab8dce8455 100644 --- a/polkadot/node/subsystem-bench/Cargo.toml +++ b/polkadot/node/subsystem-bench/Cargo.toml @@ -51,8 +51,8 @@ itertools = "0.11.0" polkadot-primitives-test-helpers = { path = "../../primitives/test-helpers" } prometheus_endpoint = { package = "substrate-prometheus-endpoint", path = "../../../substrate/utils/prometheus" } prometheus = { version = "0.13.0", default-features = false } -toml = "0.8.8" serde = "1.0.192" +serde_yaml = "0.9" [features] default = [] diff --git a/polkadot/node/subsystem-bench/src/availability/cli.rs b/polkadot/node/subsystem-bench/src/availability/cli.rs index ef4d7e6f631a..06fb2966d878 100644 --- a/polkadot/node/subsystem-bench/src/availability/cli.rs +++ b/polkadot/node/subsystem-bench/src/availability/cli.rs @@ -14,6 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . +use serde::{Deserialize, Serialize}; #[derive(Debug, clap::Parser, Clone)] #[clap(rename_all = "kebab-case")] #[allow(missing_docs)] @@ -28,32 +29,12 @@ pub enum NetworkEmulation { Degraded, } -#[derive(Debug, clap::Parser)] +#[derive(Debug, Clone, Serialize, Deserialize, clap::Parser)] #[clap(rename_all = "kebab-case")] #[allow(missing_docs)] pub struct DataAvailabilityReadOptions { - #[clap(long, ignore_case = true, default_value_t = 100)] - /// Number of cores to fetch availability for. - pub n_cores: usize, - - #[clap(long, ignore_case = true, default_value_t = 500)] - /// Number of validators to fetch chunks from. - pub n_validators: usize, - - #[clap(long, ignore_case = true, default_value_t = 5120)] - /// The minimum pov size in KiB - pub min_pov_size: usize, - - #[clap(long, ignore_case = true, default_value_t = 5120)] - /// The maximum pov size bytes - pub max_pov_size: usize, - #[clap(short, long, default_value_t = false)] /// Turbo boost AD Read by fetching from backers first. Tipically this is only faster if nodes /// have enough bandwidth. pub fetch_from_backers: bool, - - #[clap(short, long, ignore_case = true, default_value_t = 1)] - /// Number of times to block fetching for each core. - pub num_blocks: usize, } diff --git a/polkadot/node/subsystem-bench/src/availability/configuration.rs b/polkadot/node/subsystem-bench/src/availability/configuration.rs index cbad4a2dc1b8..f96b8e2cb7ce 100644 --- a/polkadot/node/subsystem-bench/src/availability/configuration.rs +++ b/polkadot/node/subsystem-bench/src/availability/configuration.rs @@ -14,175 +14,12 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::path::Path; - use super::*; use serde::{Deserialize, Serialize}; -/// Peer response latency configuration. -#[derive(Clone, Debug, Default, Serialize, Deserialize)] -pub struct PeerLatency { - /// Min latency for `NetworkAction` completion. - pub min_latency: Duration, - /// Max latency or `NetworkAction` completion. - pub max_latency: Duration, -} /// The test input parameters -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct TestConfiguration { - /// Configuration for the `availability-recovery` subsystem. +#[derive(Clone, Default, Debug, Serialize, Deserialize)] +pub struct AvailabilityRecoveryConfiguration { + /// Prefer the fast path (try fetch from backers first) pub use_fast_path: bool, - /// Number of validators - pub n_validators: usize, - /// Number of cores - pub n_cores: usize, - /// The min PoV size - pub min_pov_size: usize, - /// The max PoV size, - pub max_pov_size: usize, - /// Randomly sampled pov_sizes - #[serde(skip)] - pov_sizes: Vec, - /// The amount of bandiwdth remote validators have. - pub peer_bandwidth: usize, - /// The amount of bandiwdth our node has. - pub bandwidth: usize, - /// Optional peer emulation latency - pub latency: Option, - /// Error probability - pub error: usize, - /// Number of blocks - /// In one block `n_cores` candidates are recovered - pub num_blocks: usize, -} - -impl Default for TestConfiguration { - fn default() -> Self { - Self { - use_fast_path: false, - n_validators: 100, - n_cores: 10, - pov_sizes: vec![5 * 1024 * 1024], - bandwidth: 60 * 1024 * 1024, - peer_bandwidth: 60 * 1024 * 1024, - latency: None, - error: 0, - num_blocks: 1, - min_pov_size: 5 * 1024 * 1024, - max_pov_size: 5 * 1024 * 1024, - } - } -} - -fn generate_pov_sizes(count: usize, min: usize, max: usize) -> Vec { - (0..count).map(|_| random_pov_size(min, max)).collect() -} - -#[derive(Serialize, Deserialize)] -pub struct TestSequence { - #[serde(rename(serialize = "TestConfiguration", deserialize = "TestConfiguration"))] - test_configurations: Vec, -} - -impl TestSequence { - pub fn to_vec(mut self) -> Vec { - // Generate Pov sizes - - for config in self.test_configurations.iter_mut() { - config.pov_sizes = - generate_pov_sizes(config.n_cores, config.min_pov_size, config.max_pov_size); - } - - self.test_configurations - } -} - -impl TestSequence { - pub fn new_from_file(path: &Path) -> std::io::Result { - let string = String::from_utf8(std::fs::read(&path)?).expect("File is valid UTF8"); - Ok(toml::from_str(&string).expect("File is valid test sequence TOML")) - } -} - -impl TestConfiguration { - pub fn write_to_disk(&self) { - // Serialize a slice of configurations - let toml = - toml::to_string(&TestSequence { test_configurations: vec![self.clone()] }).unwrap(); - std::fs::write("last_test.toml", toml).unwrap(); - } - - pub fn pov_sizes(&self) -> &[usize] { - &self.pov_sizes - } - /// An unconstrained standard configuration matching Polkadot/Kusama - pub fn ideal_network( - num_blocks: usize, - use_fast_path: bool, - n_validators: usize, - n_cores: usize, - pov_sizes: Vec, - ) -> TestConfiguration { - Self { - use_fast_path, - n_cores, - n_validators, - pov_sizes, - bandwidth: 50 * 1024 * 1024, - peer_bandwidth: 50 * 1024 * 1024, - // No latency - latency: None, - error: 0, - num_blocks, - ..Default::default() - } - } - - pub fn healthy_network( - num_blocks: usize, - use_fast_path: bool, - n_validators: usize, - n_cores: usize, - pov_sizes: Vec, - ) -> TestConfiguration { - Self { - use_fast_path, - n_cores, - n_validators, - pov_sizes, - bandwidth: 50 * 1024 * 1024, - peer_bandwidth: 50 * 1024 * 1024, - latency: Some(PeerLatency { - min_latency: Duration::from_millis(1), - max_latency: Duration::from_millis(100), - }), - error: 3, - num_blocks, - ..Default::default() - } - } - - pub fn degraded_network( - num_blocks: usize, - use_fast_path: bool, - n_validators: usize, - n_cores: usize, - pov_sizes: Vec, - ) -> TestConfiguration { - Self { - use_fast_path, - n_cores, - n_validators, - pov_sizes, - bandwidth: 50 * 1024 * 1024, - peer_bandwidth: 50 * 1024 * 1024, - latency: Some(PeerLatency { - min_latency: Duration::from_millis(10), - max_latency: Duration::from_millis(500), - }), - error: 33, - num_blocks, - ..Default::default() - } - } } diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 8866348ea22b..9be15c576e3a 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -53,12 +53,18 @@ use polkadot_node_subsystem::{ }; use std::net::{Ipv4Addr, SocketAddr}; -use super::core::{environment::TestEnvironmentMetrics, keyring::Keyring, network::*}; +use super::core::{ + configuration::{PeerLatency, TestConfiguration}, + environment::TestEnvironmentMetrics, + keyring::Keyring, + network::*, +}; const LOG_TARGET: &str = "subsystem-bench::availability"; use polkadot_node_primitives::{AvailableData, ErasureChunk}; +use super::cli::TestObjective; use polkadot_node_subsystem_test_helpers::{ make_buffered_subsystem_context, mock::new_leaf, TestSubsystemContextHandle, }; @@ -73,7 +79,7 @@ use sc_service::{SpawnTaskHandle, TaskManager}; mod cli; pub mod configuration; pub use cli::{DataAvailabilityReadOptions, NetworkEmulation, NetworkOptions}; -pub use configuration::{PeerLatency, TestConfiguration, TestSequence}; +pub use configuration::AvailabilityRecoveryConfiguration; // Deterministic genesis hash for protocol names const GENESIS_HASH: Hash = Hash::repeat_byte(0xff); @@ -138,7 +144,10 @@ impl TestEnvironment { let (instance, virtual_overseer) = AvailabilityRecoverySubsystemInstance::new( ®istry, task_manager.spawn_handle(), - state.config().use_fast_path, + match &state.config().objective { + TestObjective::DataAvailabilityRead(options) => options.fetch_from_backers, + _ => panic!("Unexpected objective"), + }, ); let metrics = @@ -491,16 +500,6 @@ impl AvailabilityRecoverySubsystemInstance { } } -pub fn random_pov_size(min_pov_size: usize, max_pov_size: usize) -> usize { - random_uniform_sample(min_pov_size, max_pov_size) -} - -fn random_uniform_sample + From>(min_value: T, max_value: T) -> T { - Uniform::from(min_value.into()..=max_value.into()) - .sample(&mut thread_rng()) - .into() -} - // We use this to bail out sending messages to the subsystem if it is overloaded such that // the time of flight is breaches 5s. // This should eventually be a test parameter. @@ -508,6 +507,9 @@ const MAX_TIME_OF_FLIGHT: Duration = Duration::from_millis(5000); #[derive(Clone)] pub struct TestState { + // Full test configuration + config: TestConfiguration, + // State starts here. validator_public: Vec, validator_authority_id: Vec, // The test node validator index. @@ -527,8 +529,6 @@ pub struct TestState { candidate_receipts: Vec, available_data: Vec, chunks: Vec>, - /// Next candidate index in - config: TestConfiguration, } impl TestState { @@ -687,6 +687,7 @@ impl TestState { gum::info!(target: LOG_TARGET, "{}","Created test environment.".bright_blue()); Self { + config, validator_public, validator_authority_id, validator_index, @@ -695,7 +696,6 @@ impl TestState { available_data, candidate_receipts, chunks, - config, pov_size_to_candidate, pov_sizes, candidates_generated: 0, diff --git a/polkadot/node/subsystem-bench/src/cli.rs b/polkadot/node/subsystem-bench/src/cli.rs new file mode 100644 index 000000000000..2f00ad2f3585 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/cli.rs @@ -0,0 +1,65 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +use super::availability::{ + AvailabilityRecoveryConfiguration, DataAvailabilityReadOptions, NetworkEmulation, + TestEnvironment, TestState, +}; +use serde::{Deserialize, Serialize}; + +use super::core::configuration::{PeerLatency, TestConfiguration, TestSequence}; + +#[derive(Debug, Clone, Serialize, Deserialize, clap::Parser)] +#[clap(rename_all = "kebab-case")] +#[allow(missing_docs)] +pub struct TestSequenceOptions { + #[clap(short, long, ignore_case = true)] + pub path: String, +} + +/// Define the supported benchmarks targets +#[derive(Debug, Clone, clap::Parser, Serialize, Deserialize)] +#[command(about = "Test objectives", version, rename_all = "kebab-case")] +pub enum TestObjective { + /// Benchmark availability recovery strategies. + DataAvailabilityRead(DataAvailabilityReadOptions), + /// Run a test sequence specified in a file + TestSequence(TestSequenceOptions), +} + +#[derive(Debug, clap::Parser)] +#[clap(rename_all = "kebab-case")] +#[allow(missing_docs)] +pub struct StandardTestOptions { + #[clap(long, ignore_case = true, default_value_t = 100)] + /// Number of cores to fetch availability for. + pub n_cores: usize, + + #[clap(long, ignore_case = true, default_value_t = 500)] + /// Number of validators to fetch chunks from. + pub n_validators: usize, + + #[clap(long, ignore_case = true, default_value_t = 5120)] + /// The minimum pov size in KiB + pub min_pov_size: usize, + + #[clap(long, ignore_case = true, default_value_t = 5120)] + /// The maximum pov size bytes + pub max_pov_size: usize, + + #[clap(short, long, ignore_case = true, default_value_t = 1)] + /// The number of blocks the test is going to run. + pub num_blocks: usize, +} diff --git a/polkadot/node/subsystem-bench/src/core/configuration.rs b/polkadot/node/subsystem-bench/src/core/configuration.rs new file mode 100644 index 000000000000..017d4023ef65 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/configuration.rs @@ -0,0 +1,190 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +use std::path::Path; + +use crate::availability::AvailabilityRecoveryConfiguration; + +use super::*; +pub use crate::cli::TestObjective; +use rand::{distributions::Uniform, prelude::Distribution, thread_rng}; +use serde::{Deserialize, Serialize}; + +pub fn random_pov_size(min_pov_size: usize, max_pov_size: usize) -> usize { + random_uniform_sample(min_pov_size, max_pov_size) +} + +fn random_uniform_sample + From>(min_value: T, max_value: T) -> T { + Uniform::from(min_value.into()..=max_value.into()) + .sample(&mut thread_rng()) + .into() +} + +/// Peer response latency configuration. +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +pub struct PeerLatency { + /// Min latency for `NetworkAction` completion. + pub min_latency: Duration, + /// Max latency or `NetworkAction` completion. + pub max_latency: Duration, +} + +/// The test input parameters +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct TestConfiguration { + /// The test objective + pub objective: TestObjective, + /// Number of validators + pub n_validators: usize, + /// Number of cores + pub n_cores: usize, + /// The min PoV size + pub min_pov_size: usize, + /// The max PoV size, + pub max_pov_size: usize, + /// Randomly sampled pov_sizes + #[serde(skip)] + pov_sizes: Vec, + /// The amount of bandiwdth remote validators have. + pub peer_bandwidth: usize, + /// The amount of bandiwdth our node has. + pub bandwidth: usize, + /// Optional peer emulation latency + pub latency: Option, + /// Error probability + pub error: usize, + /// Number of blocks + /// In one block `n_cores` candidates are recovered + pub num_blocks: usize, +} + +fn generate_pov_sizes(count: usize, min_kib: usize, max_kib: usize) -> Vec { + (0..count).map(|_| random_pov_size(min_kib * 1024, max_kib * 1024)).collect() +} + +#[derive(Serialize, Deserialize)] +pub struct TestSequence { + #[serde(rename(serialize = "TestConfiguration", deserialize = "TestConfiguration"))] + test_configurations: Vec, +} + +impl TestSequence { + pub fn to_vec(mut self) -> Vec { + self.test_configurations + .into_iter() + .map(|mut config| { + config.pov_sizes = + generate_pov_sizes(config.n_cores, config.min_pov_size, config.max_pov_size); + config + }) + .collect() + } +} + +impl TestSequence { + pub fn new_from_file(path: &Path) -> std::io::Result { + let string = String::from_utf8(std::fs::read(&path)?).expect("File is valid UTF8"); + Ok(serde_yaml::from_str(&string).expect("File is valid test sequence YA")) + } +} + +impl TestConfiguration { + pub fn write_to_disk(&self) { + // Serialize a slice of configurations + let yaml = serde_yaml::to_string(&TestSequence { test_configurations: vec![self.clone()] }) + .unwrap(); + std::fs::write("last_test.yaml", yaml).unwrap(); + } + + pub fn pov_sizes(&self) -> &[usize] { + &self.pov_sizes + } + /// An unconstrained standard configuration matching Polkadot/Kusama + pub fn ideal_network( + objective: TestObjective, + num_blocks: usize, + n_validators: usize, + n_cores: usize, + min_pov_size: usize, + max_pov_size: usize, + ) -> TestConfiguration { + Self { + objective, + n_cores, + n_validators, + pov_sizes: generate_pov_sizes(n_cores, min_pov_size, max_pov_size), + bandwidth: 50 * 1024 * 1024, + peer_bandwidth: 50 * 1024 * 1024, + // No latency + latency: None, + error: 0, + num_blocks, + min_pov_size, + max_pov_size, + } + } + + pub fn healthy_network( + objective: TestObjective, + num_blocks: usize, + n_validators: usize, + n_cores: usize, + min_pov_size: usize, + max_pov_size: usize, + ) -> TestConfiguration { + Self { + objective, + n_cores, + n_validators, + pov_sizes: generate_pov_sizes(n_cores, min_pov_size, max_pov_size), + bandwidth: 50 * 1024 * 1024, + peer_bandwidth: 50 * 1024 * 1024, + latency: Some(PeerLatency { + min_latency: Duration::from_millis(1), + max_latency: Duration::from_millis(100), + }), + error: 3, + num_blocks, + min_pov_size, + max_pov_size, + } + } + + pub fn degraded_network( + objective: TestObjective, + num_blocks: usize, + n_validators: usize, + n_cores: usize, + min_pov_size: usize, + max_pov_size: usize, + ) -> TestConfiguration { + Self { + objective, + n_cores, + n_validators, + pov_sizes: generate_pov_sizes(n_cores, min_pov_size, max_pov_size), + bandwidth: 50 * 1024 * 1024, + peer_bandwidth: 50 * 1024 * 1024, + latency: Some(PeerLatency { + min_latency: Duration::from_millis(10), + max_latency: Duration::from_millis(500), + }), + error: 33, + num_blocks, + min_pov_size, + max_pov_size, + } + } +} diff --git a/polkadot/node/subsystem-bench/src/core/mod.rs b/polkadot/node/subsystem-bench/src/core/mod.rs index 564fb7148fa0..06aa58f7256b 100644 --- a/polkadot/node/subsystem-bench/src/core/mod.rs +++ b/polkadot/node/subsystem-bench/src/core/mod.rs @@ -24,6 +24,7 @@ const LOG_TARGET: &str = "subsystem-bench::core"; use polkadot_primitives::AuthorityDiscoveryId; use sc_service::SpawnTaskHandle; +pub mod configuration; pub mod display; pub mod environment; pub mod keyring; diff --git a/polkadot/node/subsystem-bench/src/core/network.rs b/polkadot/node/subsystem-bench/src/core/network.rs index 629d09df694c..f20bb919dedb 100644 --- a/polkadot/node/subsystem-bench/src/core/network.rs +++ b/polkadot/node/subsystem-bench/src/core/network.rs @@ -113,7 +113,6 @@ mod tests { let end = Instant::now(); - // assert_eq!(end - start, Duration::from_secs(1)); println!("duration: {}", (end - start).as_millis()); // Allow up to `budget/max_refill` error tolerance diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 42efb7fd63c8..b94e594e5945 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -23,12 +23,16 @@ use colored::Colorize; use std::{path::Path, time::Duration}; pub(crate) mod availability; +pub(crate) mod cli; pub(crate) mod core; use availability::{ - random_pov_size, DataAvailabilityReadOptions, NetworkEmulation, TestConfiguration, + AvailabilityRecoveryConfiguration, DataAvailabilityReadOptions, NetworkEmulation, TestEnvironment, TestState, }; +use cli::TestObjective; + +use core::configuration::{PeerLatency, TestConfiguration, TestSequence}; use clap_num::number_range; const LOG_TARGET: &str = "subsystem-bench"; @@ -40,23 +44,6 @@ fn le_100(s: &str) -> Result { fn le_5000(s: &str) -> Result { number_range(s, 0, 5000) } -#[derive(Debug, clap::Parser)] -#[clap(rename_all = "kebab-case")] -#[allow(missing_docs)] -pub struct TestSequenceOptions { - #[clap(short, long, ignore_case = true)] - pub path: String, -} - -/// Define the supported benchmarks targets -#[derive(Debug, Parser)] -#[command(about = "Test objectives", version, rename_all = "kebab-case")] -enum TestObjective { - /// Benchmark availability recovery strategies. - DataAvailabilityRead(DataAvailabilityReadOptions), - /// Run a test sequence specified in a file - TestSequence(TestSequenceOptions), -} #[derive(Debug, Parser)] #[allow(missing_docs)] @@ -65,6 +52,9 @@ struct BenchCli { /// The type of network to be emulated pub network: NetworkEmulation, + #[clap(flatten)] + pub standard_configuration: cli::StandardTestOptions, + #[clap(short, long)] /// The bandwidth of simulated remote peers in KiB pub peer_bandwidth: Option, @@ -86,7 +76,7 @@ struct BenchCli { pub peer_max_latency: Option, #[command(subcommand)] - pub objective: TestObjective, + pub objective: cli::TestObjective, } fn new_runtime() -> tokio::runtime::Runtime { @@ -104,10 +94,11 @@ impl BenchCli { let runtime = new_runtime(); + let configuration = self.standard_configuration; let mut test_config = match self.objective { TestObjective::TestSequence(options) => { let test_sequence = - availability::TestSequence::new_from_file(Path::new(&options.path)) + core::configuration::TestSequence::new_from_file(Path::new(&options.path)) .expect("File exists") .to_vec(); let num_steps = test_sequence.len(); @@ -117,8 +108,17 @@ impl BenchCli { ); for (index, test_config) in test_sequence.into_iter().enumerate() { gum::info!( - "{}", - format!("Current step {}/{}", index + 1, num_steps).bright_purple() + "{}, {}, {}, {}, {}, {}", + format!("Step {}/{}", index + 1, num_steps).bright_purple(), + format!("n_validators = {}", test_config.n_validators).blue(), + format!("n_cores = {}", test_config.n_cores).blue(), + format!( + "pov_size = {} - {}", + test_config.min_pov_size, test_config.max_pov_size + ) + .bright_black(), + format!("error = {}", test_config.error).bright_black(), + format!("latency = {:?}", test_config.latency).bright_black(), ); let candidate_count = test_config.n_cores * test_config.num_blocks; @@ -132,48 +132,30 @@ impl BenchCli { } return Ok(()) }, - TestObjective::DataAvailabilityRead(options) => match self.network { + TestObjective::DataAvailabilityRead(ref options) => match self.network { NetworkEmulation::Healthy => TestConfiguration::healthy_network( - options.num_blocks, - options.fetch_from_backers, - options.n_validators, - options.n_cores, - (0..options.n_cores) - .map(|_| { - random_pov_size( - options.min_pov_size * 1024, - options.max_pov_size * 1024, - ) - }) - .collect(), + self.objective, + configuration.num_blocks, + configuration.n_validators, + configuration.n_cores, + configuration.min_pov_size, + configuration.max_pov_size, ), NetworkEmulation::Degraded => TestConfiguration::degraded_network( - options.num_blocks, - options.fetch_from_backers, - options.n_validators, - options.n_cores, - (0..options.n_cores) - .map(|_| { - random_pov_size( - options.min_pov_size * 1024, - options.max_pov_size * 1024, - ) - }) - .collect(), + self.objective, + configuration.num_blocks, + configuration.n_validators, + configuration.n_cores, + configuration.min_pov_size, + configuration.max_pov_size, ), NetworkEmulation::Ideal => TestConfiguration::ideal_network( - options.num_blocks, - options.fetch_from_backers, - options.n_validators, - options.n_cores, - (0..options.n_cores) - .map(|_| { - random_pov_size( - options.min_pov_size * 1024, - options.max_pov_size * 1024, - ) - }) - .collect(), + self.objective, + configuration.num_blocks, + configuration.n_validators, + configuration.n_cores, + configuration.min_pov_size, + configuration.max_pov_size, ), }, }; diff --git a/polkadot/node/subsystem-bench/test_sequence.toml b/polkadot/node/subsystem-bench/test_sequence.toml deleted file mode 100644 index d32477b9efe9..000000000000 --- a/polkadot/node/subsystem-bench/test_sequence.toml +++ /dev/null @@ -1,77 +0,0 @@ -[[TestConfiguration]] -use_fast_path = false -n_validators = 300 -n_cores = 20 -min_pov_size = 5242880 -max_pov_size = 5242880 -peer_bandwidth = 128000 -bandwidth = 52428800 -error = 33 -num_blocks = 5 - -[TestConfiguration.latency.min_latency] -secs = 0 -nanos = 1000000 - -[TestConfiguration.latency.max_latency] -secs = 0 -nanos = 100000000 - -[[TestConfiguration]] -use_fast_path = false -n_validators = 500 -n_cores = 20 -min_pov_size = 5242880 -max_pov_size = 5242880 -peer_bandwidth = 128000 -bandwidth = 52428800 -error = 33 -num_blocks = 5 - -[TestConfiguration.latency.min_latency] -secs = 0 -nanos = 1000000 - -[TestConfiguration.latency.max_latency] -secs = 0 -nanos = 1000000000 - - -[[TestConfiguration]] -use_fast_path = false -n_validators = 1000 -n_cores = 20 -min_pov_size = 5242880 -max_pov_size = 5242880 -peer_bandwidth = 128000 -bandwidth = 52428800 -error = 33 -num_blocks = 5 - -[TestConfiguration.latency.min_latency] -secs = 0 -nanos = 1000000 - -[TestConfiguration.latency.max_latency] -secs = 0 -nanos = 1000000000 - - -[[TestConfiguration]] -use_fast_path = false -n_validators = 2000 -n_cores = 20 -min_pov_size = 5242880 -max_pov_size = 5242880 -peer_bandwidth = 128000 -bandwidth = 52428800 -error = 33 -num_blocks = 5 - -[TestConfiguration.latency.min_latency] -secs = 0 -nanos = 1000000 - -[TestConfiguration.latency.max_latency] -secs = 0 -nanos = 1000000000 diff --git a/polkadot/node/subsystem-bench/test_sequence.yaml b/polkadot/node/subsystem-bench/test_sequence.yaml new file mode 100644 index 000000000000..088a7e15729b --- /dev/null +++ b/polkadot/node/subsystem-bench/test_sequence.yaml @@ -0,0 +1,56 @@ +TestConfiguration: +# Test 1 +- objective: !DataAvailabilityRead + fetch_from_backers: false + n_validators: 300 + n_cores: 10 + min_pov_size: 1120 + max_pov_size: 5120 + peer_bandwidth: 52428800 + bandwidth: 52428800 + latency: + min_latency: + secs: 0 + nanos: 1000000 + max_latency: + secs: 0 + nanos: 100000000 + error: 3 + num_blocks: 10 +# Test 2 +- objective: !DataAvailabilityRead + fetch_from_backers: false + n_validators: 500 + n_cores: 10 + min_pov_size: 1120 + max_pov_size: 5120 + peer_bandwidth: 52428800 + bandwidth: 52428800 + latency: + min_latency: + secs: 0 + nanos: 1000000 + max_latency: + secs: 0 + nanos: 100000000 + error: 3 + num_blocks: 10 + +# Test 2 +- objective: !DataAvailabilityRead + fetch_from_backers: false + n_validators: 1000 + n_cores: 10 + min_pov_size: 1120 + max_pov_size: 5120 + peer_bandwidth: 52428800 + bandwidth: 52428800 + latency: + min_latency: + secs: 0 + nanos: 1000000 + max_latency: + secs: 0 + nanos: 100000000 + error: 3 + num_blocks: 10 From 28438650ef36a17528cf45883712786ca9dc034d Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 24 Nov 2023 16:05:51 +0200 Subject: [PATCH 19/52] Prepare to swtich to overseer Signed-off-by: Andrei Sandu --- Cargo.lock | 1 + polkadot/node/subsystem-bench/Cargo.toml | 1 + .../subsystem-bench/src/availability/mod.rs | 20 ++--- .../subsystem-bench/src/core/mock/dummy.rs | 89 +++++++++++++++++++ .../node/subsystem-bench/src/core/mock/mod.rs | 76 ++++++++++++++++ polkadot/node/subsystem-bench/src/core/mod.rs | 1 + .../subsystem-bench/src/core/subsystem.rs | 16 ++++ .../subsystem-bench/src/subsystem-bench.rs | 8 +- .../procedural/src/pallet/expand/warnings.rs | 8 +- 9 files changed, 199 insertions(+), 21 deletions(-) create mode 100644 polkadot/node/subsystem-bench/src/core/mock/dummy.rs create mode 100644 polkadot/node/subsystem-bench/src/core/mock/mod.rs create mode 100644 polkadot/node/subsystem-bench/src/core/subsystem.rs diff --git a/Cargo.lock b/Cargo.lock index b40a40db47b5..44a9093710ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13027,6 +13027,7 @@ dependencies = [ "itertools 0.11.0", "log", "parity-scale-codec", + "paste", "polkadot-availability-recovery", "polkadot-erasure-coding", "polkadot-node-metrics", diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml index 9dab8dce8455..d1c68c6e5f54 100644 --- a/polkadot/node/subsystem-bench/Cargo.toml +++ b/polkadot/node/subsystem-bench/Cargo.toml @@ -53,6 +53,7 @@ prometheus_endpoint = { package = "substrate-prometheus-endpoint", path = "../.. prometheus = { version = "0.13.0", default-features = false } serde = "1.0.192" serde_yaml = "0.9" +paste = "1.0.14" [features] default = [] diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 9be15c576e3a..8bd28b02bd73 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -129,7 +129,7 @@ pub struct TestEnvironment { // for the whole duration of the test. instance: AvailabilityRecoverySubsystemInstance, // The test intial state. The current state is owned by `env_task`. - state: TestState, + config: TestConfiguration, // A handle to the network emulator. network: NetworkEmulator, // Configuration/env metrics @@ -140,6 +140,7 @@ impl TestEnvironment { // Create a new test environment with specified initial state and prometheus registry. // We use prometheus metrics to collect per job task poll time and subsystem metrics. pub fn new(runtime: tokio::runtime::Handle, state: TestState, registry: Registry) -> Self { + let config = state.config().clone(); let task_manager: TaskManager = TaskManager::new(runtime.clone(), Some(®istry)).unwrap(); let (instance, virtual_overseer) = AvailabilityRecoverySubsystemInstance::new( ®istry, @@ -153,9 +154,9 @@ impl TestEnvironment { let metrics = TestEnvironmentMetrics::new(®istry).expect("Metrics need to be registered"); let mut network = NetworkEmulator::new( - state.config().n_validators, + config.n_validators, state.validator_authority_id.clone(), - state.config().peer_bandwidth, + config.peer_bandwidth, task_manager.spawn_handle(), ®istry, ); @@ -168,7 +169,7 @@ impl TestEnvironment { let spawn_handle = task_manager.spawn_handle(); // Our node rate limiting - let mut rx_limiter = RateLimit::new(10, state.config.bandwidth); + let mut rx_limiter = RateLimit::new(10, config.bandwidth); let (ingress_tx, mut ingress_rx) = tokio::sync::mpsc::unbounded_channel::(); let our_network_stats = network.peer_stats(0); @@ -204,11 +205,11 @@ impl TestEnvironment { .unwrap(); }); - TestEnvironment { task_manager, registry, to_subsystem, instance, state, network, metrics } + TestEnvironment { task_manager, registry, to_subsystem, instance, config, network, metrics } } pub fn config(&self) -> &TestConfiguration { - self.state.config() + &self.config } pub fn network(&mut self) -> &mut NetworkEmulator { @@ -457,8 +458,6 @@ impl TestEnvironment { } } -/// Implementation for chunks only -/// TODO: all recovery methods. impl AvailabilityRecoverySubsystemInstance { pub fn new( registry: &Registry, @@ -732,7 +731,7 @@ fn derive_erasure_chunks_with_proofs_and_root( (erasure_chunks, root) } -pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { +pub async fn bench_chunk_recovery(env: &mut TestEnvironment, mut state: TestState) { let config = env.config().clone(); env.send_signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(new_leaf( @@ -754,8 +753,7 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment) { let block_start_ts = Instant::now(); for candidate_num in 0..config.n_cores as u64 { - let candidate = env - .state + let candidate = state .next_candidate() .expect("We always send up to n_cores*num_blocks; qed"); let (tx, rx) = oneshot::channel(); diff --git a/polkadot/node/subsystem-bench/src/core/mock/dummy.rs b/polkadot/node/subsystem-bench/src/core/mock/dummy.rs new file mode 100644 index 000000000000..122fc23ac52f --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/mock/dummy.rs @@ -0,0 +1,89 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +//! Dummy subsystem mocks. +//! +use paste::paste; + +use futures::{channel::oneshot, select, Future, FutureExt}; +use polkadot_node_subsystem::{ + overseer, AllMessages, FromOrchestra, HeadSupportsParachains, Overseer, OverseerConnector, + OverseerHandle, SpawnGlue, SpawnedSubsystem, Subsystem, SubsystemError, +}; +use std::time::Duration; +use tokio::time::sleep; + +macro_rules! mock { + // Just query by relay parent + ($subsystem_name:ident) => { + paste! { + pub struct [] {} + #[overseer::subsystem($subsystem_name, error=SubsystemError, prefix=self::overseer)] + impl [] { + fn start(self, ctx: Context) -> SpawnedSubsystem { + let future = self.run(ctx).map(|_| Ok(())).boxed(); + + SpawnedSubsystem { name: stringify!($subsystem_name), future } + } + } + + #[overseer::contextbounds($subsystem_name, prefix = self::overseer)] + impl [] { + async fn run(self, mut ctx: Context) { + let mut count_total_msg = 0; + loop { + futures::select!{ + _msg = ctx.recv().fuse() => { + count_total_msg +=1; + } + _ = sleep(Duration::from_secs(6)).fuse() => { + if count_total_msg > 0 { + gum::info!(target: "mock-subsystems", "Subsystem {} processed {} messages since last time", stringify!($subsystem_name), count_total_msg); + } + count_total_msg = 0; + } + } + } + } + } + } + }; +} + +mock!(AvailabilityStore); +mock!(StatementDistribution); +mock!(BitfieldSigning); +mock!(BitfieldDistribution); +mock!(Provisioner); +mock!(NetworkBridgeRx); +mock!(CollationGeneration); +mock!(CollatorProtocol); +mock!(GossipSupport); +mock!(DisputeDistribution); +mock!(DisputeCoordinator); +mock!(ProspectiveParachains); +mock!(PvfChecker); +mock!(CandidateBacking); +mock!(AvailabilityDistribution); +mock!(CandidateValidation); +mock!(AvailabilityRecovery); +mock!(NetworkBridgeTx); +mock!(ChainApi); +mock!(ChainSelection); +mock!(ApprovalVoting); +mock!(ApprovalDistribution); +mock!(RuntimeApi); + + diff --git a/polkadot/node/subsystem-bench/src/core/mock/mod.rs b/polkadot/node/subsystem-bench/src/core/mock/mod.rs new file mode 100644 index 000000000000..f13e87c8683b --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/mock/mod.rs @@ -0,0 +1,76 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use polkadot_node_subsystem::{ + overseer, AllMessages, FromOrchestra, HeadSupportsParachains, Overseer, OverseerConnector, + OverseerHandle, SpawnGlue, SpawnedSubsystem, Subsystem, +}; +use polkadot_node_subsystem_types::Hash; + +pub mod dummy; +mod temp; + +use dummy::*; +use sc_service::SpawnTaskHandle; + +struct AlwaysSupportsParachains {} +#[async_trait::async_trait] +impl HeadSupportsParachains for AlwaysSupportsParachains { + async fn head_supports_parachains(&self, _head: &Hash) -> bool { + true + } +} + +pub fn new_overseer_with_dummy_subsystems(spawn_task_handle: SpawnTaskHandle) { + // Initialize a mock overseer. + // All subsystem except approval_voting and approval_distribution are mock subsystems. + let spawner_glue = SpawnGlue(spawn_task_handle); + let overseer_connector = OverseerConnector::with_event_capacity(64000); + let builder = Overseer::builder() + .approval_voting(MockApprovalVoting {}) + .approval_distribution(MockApprovalDistribution {}) + .availability_recovery(MockAvailabilityRecovery {}) + .candidate_validation(MockCandidateValidation {}) + .chain_api(MockChainApi { }) + .chain_selection(MockChainSelection {}) + .dispute_coordinator(MockDisputeCoordinator {}) + .runtime_api(MockRuntimeApi { }) + .network_bridge_tx(MockNetworkBridgeTx {}) + .availability_distribution(MockAvailabilityDistribution {}) + .availability_store(MockAvailabilityStore {}) + .pvf_checker(MockPvfChecker {}) + .candidate_backing(MockCandidateBacking {}) + .statement_distribution(MockStatementDistribution {}) + .bitfield_signing(MockBitfieldSigning {}) + .bitfield_distribution(MockBitfieldDistribution {}) + .provisioner(MockProvisioner {}) + .network_bridge_rx(MockNetworkBridgeRx {}) + .collation_generation(MockCollationGeneration {}) + .collator_protocol(MockCollatorProtocol {}) + .gossip_support(MockGossipSupport {}) + .dispute_distribution(MockDisputeDistribution {}) + .prospective_parachains(MockProspectiveParachains {}) + .activation_external_listeners(Default::default()) + .span_per_active_leaf(Default::default()) + .active_leaves(Default::default()) + .metrics(Default::default()) + .supports_parachains(AlwaysSupportsParachains {}) + .spawner(spawner_glue); + + let (mock_overseer, mock_overseer_handle) = + builder.build_with_connector(overseer_connector).expect("Should not fail"); + +} \ No newline at end of file diff --git a/polkadot/node/subsystem-bench/src/core/mod.rs b/polkadot/node/subsystem-bench/src/core/mod.rs index 06aa58f7256b..af2abf0860cd 100644 --- a/polkadot/node/subsystem-bench/src/core/mod.rs +++ b/polkadot/node/subsystem-bench/src/core/mod.rs @@ -29,3 +29,4 @@ pub mod display; pub mod environment; pub mod keyring; pub mod network; +pub mod mock; diff --git a/polkadot/node/subsystem-bench/src/core/subsystem.rs b/polkadot/node/subsystem-bench/src/core/subsystem.rs new file mode 100644 index 000000000000..c61e641d255d --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/subsystem.rs @@ -0,0 +1,16 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index b94e594e5945..ca561e5c4955 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -126,9 +126,9 @@ impl BenchCli { let mut state = TestState::new(test_config); state.generate_candidates(candidate_count); let mut env = - TestEnvironment::new(runtime.handle().clone(), state, Registry::new()); + TestEnvironment::new(runtime.handle().clone(), state.clone(), Registry::new()); - runtime.block_on(availability::bench_chunk_recovery(&mut env)); + runtime.block_on(availability::bench_chunk_recovery(&mut env, state)); } return Ok(()) }, @@ -189,9 +189,9 @@ impl BenchCli { let mut state = TestState::new(test_config); state.generate_candidates(candidate_count); - let mut env = TestEnvironment::new(runtime.handle().clone(), state, Registry::new()); + let mut env = TestEnvironment::new(runtime.handle().clone(), state.clone(), Registry::new()); - runtime.block_on(availability::bench_chunk_recovery(&mut env)); + runtime.block_on(availability::bench_chunk_recovery(&mut env, state)); Ok(()) } diff --git a/substrate/frame/support/procedural/src/pallet/expand/warnings.rs b/substrate/frame/support/procedural/src/pallet/expand/warnings.rs index 6ce2097c2684..030e3ddaf323 100644 --- a/substrate/frame/support/procedural/src/pallet/expand/warnings.rs +++ b/substrate/frame/support/procedural/src/pallet/expand/warnings.rs @@ -33,9 +33,7 @@ pub(crate) fn weight_witness_warning( if dev_mode { return } - let CallWeightDef::Immediate(w) = &method.weight else { - return - }; + let CallWeightDef::Immediate(w) = &method.weight else { return }; let partial_warning = Warning::new_deprecated("UncheckedWeightWitness") .old("not check weight witness data") @@ -66,9 +64,7 @@ pub(crate) fn weight_constant_warning( if dev_mode { return } - let syn::Expr::Lit(lit) = weight else { - return - }; + let syn::Expr::Lit(lit) = weight else { return }; let warning = Warning::new_deprecated("ConstantWeight") .index(warnings.len()) From b17a1477ede5840d882a69d44f8e0a40eb986c56 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 27 Nov 2023 11:28:53 +0200 Subject: [PATCH 20/52] add mocked subsystems Signed-off-by: Andrei Sandu --- Cargo.lock | 1 + polkadot/node/subsystem-bench/Cargo.toml | 1 + .../src/availability/configuration.rs | 1 - .../subsystem-bench/src/availability/mod.rs | 5 +- polkadot/node/subsystem-bench/src/cli.rs | 7 +- .../subsystem-bench/src/core/configuration.rs | 22 +- .../subsystem-bench/src/core/environment.rs | 2 - .../subsystem-bench/src/core/mock/av_store.rs | 127 +++++++++ .../subsystem-bench/src/core/mock/dummy.rs | 10 +- .../node/subsystem-bench/src/core/mock/mod.rs | 97 ++++--- .../src/core/mock/network_bridge.rs | 262 ++++++++++++++++++ .../src/core/mock/runtime_api.rs | 107 +++++++ polkadot/node/subsystem-bench/src/core/mod.rs | 2 +- .../subsystem-bench/src/subsystem-bench.rs | 21 +- 14 files changed, 587 insertions(+), 78 deletions(-) create mode 100644 polkadot/node/subsystem-bench/src/core/mock/av_store.rs create mode 100644 polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs create mode 100644 polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs diff --git a/Cargo.lock b/Cargo.lock index 15cda46316f9..b349886761ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13433,6 +13433,7 @@ dependencies = [ "futures-timer", "itertools 0.11.0", "log", + "orchestra", "parity-scale-codec", "paste", "polkadot-availability-recovery", diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml index d1c68c6e5f54..8296874c0dab 100644 --- a/polkadot/node/subsystem-bench/Cargo.toml +++ b/polkadot/node/subsystem-bench/Cargo.toml @@ -54,6 +54,7 @@ prometheus = { version = "0.13.0", default-features = false } serde = "1.0.192" serde_yaml = "0.9" paste = "1.0.14" +orchestra = { version = "0.3.3", default-features = false, features=["futures_channel"] } [features] default = [] diff --git a/polkadot/node/subsystem-bench/src/availability/configuration.rs b/polkadot/node/subsystem-bench/src/availability/configuration.rs index f96b8e2cb7ce..1274862a8e4a 100644 --- a/polkadot/node/subsystem-bench/src/availability/configuration.rs +++ b/polkadot/node/subsystem-bench/src/availability/configuration.rs @@ -14,7 +14,6 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use super::*; use serde::{Deserialize, Serialize}; /// The test input parameters diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 8bd28b02bd73..3f9598505074 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -753,9 +753,8 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment, mut state: TestStat let block_start_ts = Instant::now(); for candidate_num in 0..config.n_cores as u64 { - let candidate = state - .next_candidate() - .expect("We always send up to n_cores*num_blocks; qed"); + let candidate = + state.next_candidate().expect("We always send up to n_cores*num_blocks; qed"); let (tx, rx) = oneshot::channel(); batch.push(rx); diff --git a/polkadot/node/subsystem-bench/src/cli.rs b/polkadot/node/subsystem-bench/src/cli.rs index 2f00ad2f3585..ee67a01d449e 100644 --- a/polkadot/node/subsystem-bench/src/cli.rs +++ b/polkadot/node/subsystem-bench/src/cli.rs @@ -13,14 +13,9 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use super::availability::{ - AvailabilityRecoveryConfiguration, DataAvailabilityReadOptions, NetworkEmulation, - TestEnvironment, TestState, -}; +use super::availability::DataAvailabilityReadOptions; use serde::{Deserialize, Serialize}; -use super::core::configuration::{PeerLatency, TestConfiguration, TestSequence}; - #[derive(Debug, Clone, Serialize, Deserialize, clap::Parser)] #[clap(rename_all = "kebab-case")] #[allow(missing_docs)] diff --git a/polkadot/node/subsystem-bench/src/core/configuration.rs b/polkadot/node/subsystem-bench/src/core/configuration.rs index 017d4023ef65..4526505c3a64 100644 --- a/polkadot/node/subsystem-bench/src/core/configuration.rs +++ b/polkadot/node/subsystem-bench/src/core/configuration.rs @@ -15,8 +15,6 @@ // along with Polkadot. If not, see . use std::path::Path; -use crate::availability::AvailabilityRecoveryConfiguration; - use super::*; pub use crate::cli::TestObjective; use rand::{distributions::Uniform, prelude::Distribution, thread_rng}; @@ -81,7 +79,7 @@ pub struct TestSequence { } impl TestSequence { - pub fn to_vec(mut self) -> Vec { + pub fn to_vec(self) -> Vec { self.test_configurations .into_iter() .map(|mut config| { @@ -188,3 +186,21 @@ impl TestConfiguration { } } } + +/// Produce a randomized duration between `min` and `max`. +pub fn random_latency(maybe_peer_latency: Option<&PeerLatency>) -> Option { + if let Some(peer_latency) = maybe_peer_latency { + Some( + Uniform::from(peer_latency.min_latency..=peer_latency.max_latency) + .sample(&mut thread_rng()), + ) + } else { + None + } +} + +/// Generate a random error based on `probability`. +/// `probability` should be a number between 0 and 100. +pub fn random_error(probability: usize) -> bool { + Uniform::from(0..=99).sample(&mut thread_rng()) < probability +} diff --git a/polkadot/node/subsystem-bench/src/core/environment.rs b/polkadot/node/subsystem-bench/src/core/environment.rs index 6a680799972d..e6b09a1c13e6 100644 --- a/polkadot/node/subsystem-bench/src/core/environment.rs +++ b/polkadot/node/subsystem-bench/src/core/environment.rs @@ -14,8 +14,6 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use super::*; -use network::NetworkEmulator; use polkadot_node_subsystem_util::metrics::prometheus::{ self, Gauge, Histogram, PrometheusError, Registry, U64, }; diff --git a/polkadot/node/subsystem-bench/src/core/mock/av_store.rs b/polkadot/node/subsystem-bench/src/core/mock/av_store.rs new file mode 100644 index 000000000000..e84aeba5b6b7 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/mock/av_store.rs @@ -0,0 +1,127 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +//! +//! A generic av store subsystem mockup suitable to be used in benchmarks. + +use parity_scale_codec::Encode; +use polkadot_primitives::CandidateHash; + +use std::collections::HashMap; + +use futures::{channel::oneshot, FutureExt}; + +use polkadot_node_primitives::ErasureChunk; + +use polkadot_node_subsystem::{ + messages::AvailabilityStoreMessage, overseer, SpawnedSubsystem, SubsystemError, +}; + +pub struct AvailabilityStoreState { + candidate_hashes: HashMap, + chunks: Vec>, +} + +const LOG_TARGET: &str = "subsystem-bench::av-store-mock"; + +/// A mock of the availability store subsystem. This one also generates all the +/// candidates that a +pub struct MockAvailabilityStore { + state: AvailabilityStoreState, +} + +impl MockAvailabilityStore { + pub fn new( + chunks: Vec>, + candidate_hashes: HashMap, + ) -> MockAvailabilityStore { + Self { state: AvailabilityStoreState { chunks, candidate_hashes } } + } + + async fn respond_to_query_all_request( + &self, + candidate_hash: CandidateHash, + send_chunk: impl Fn(usize) -> bool, + tx: oneshot::Sender>, + ) { + let candidate_index = self + .state + .candidate_hashes + .get(&candidate_hash) + .expect("candidate was generated previously; qed"); + gum::debug!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + + let v = self + .state + .chunks + .get(*candidate_index as usize) + .unwrap() + .iter() + .filter(|c| send_chunk(c.index.0 as usize)) + .cloned() + .collect(); + + let _ = tx.send(v); + } +} + +#[overseer::subsystem(AvailabilityStore, error=SubsystemError, prefix=self::overseer)] +impl MockAvailabilityStore { + fn start(self, ctx: Context) -> SpawnedSubsystem { + let future = self.run(ctx).map(|_| Ok(())).boxed(); + + SpawnedSubsystem { name: "av-store-mock-subsystem", future } + } +} + +#[overseer::contextbounds(AvailabilityStore, prefix = self::overseer)] +impl MockAvailabilityStore { + async fn run(self, mut ctx: Context) { + loop { + let msg = ctx.recv().await.expect("Overseer never fails us"); + + match msg { + orchestra::FromOrchestra::Signal(_) => {}, + orchestra::FromOrchestra::Communication { msg } => match msg { + AvailabilityStoreMessage::QueryAvailableData(_candidate_hash, tx) => { + // We never have the full available data. + let _ = tx.send(None); + }, + AvailabilityStoreMessage::QueryAllChunks(candidate_hash, tx) => { + // We always have our own chunk. + self.respond_to_query_all_request(candidate_hash, |index| index == 0, tx) + .await; + }, + AvailabilityStoreMessage::QueryChunkSize(candidate_hash, tx) => { + let candidate_index = self + .state + .candidate_hashes + .get(&candidate_hash) + .expect("candidate was generated previously; qed"); + gum::debug!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + + let chunk_size = self.state.chunks.get(*candidate_index as usize).unwrap() + [0] + .encoded_size(); + let _ = tx.send(Some(chunk_size)); + }, + _ => { + unimplemented!("Unexpected runtime-api message") + }, + }, + } + } + } +} diff --git a/polkadot/node/subsystem-bench/src/core/mock/dummy.rs b/polkadot/node/subsystem-bench/src/core/mock/dummy.rs index 122fc23ac52f..196cc81f1e82 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/dummy.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/dummy.rs @@ -14,14 +14,10 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . //! Dummy subsystem mocks. -//! use paste::paste; -use futures::{channel::oneshot, select, Future, FutureExt}; -use polkadot_node_subsystem::{ - overseer, AllMessages, FromOrchestra, HeadSupportsParachains, Overseer, OverseerConnector, - OverseerHandle, SpawnGlue, SpawnedSubsystem, Subsystem, SubsystemError, -}; +use futures::FutureExt; +use polkadot_node_subsystem::{overseer, SpawnedSubsystem, SubsystemError}; use std::time::Duration; use tokio::time::sleep; @@ -85,5 +81,3 @@ mock!(ChainSelection); mock!(ApprovalVoting); mock!(ApprovalDistribution); mock!(RuntimeApi); - - diff --git a/polkadot/node/subsystem-bench/src/core/mock/mod.rs b/polkadot/node/subsystem-bench/src/core/mock/mod.rs index f13e87c8683b..df874de31a7c 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/mod.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/mod.rs @@ -15,18 +15,20 @@ // along with Polkadot. If not, see . use polkadot_node_subsystem::{ - overseer, AllMessages, FromOrchestra, HeadSupportsParachains, Overseer, OverseerConnector, - OverseerHandle, SpawnGlue, SpawnedSubsystem, Subsystem, + HeadSupportsParachains, Overseer, OverseerConnector, OverseerHandle, SpawnGlue, }; use polkadot_node_subsystem_types::Hash; +pub mod av_store; pub mod dummy; -mod temp; +pub mod network_bridge; +pub mod runtime_api; + +pub(crate) use dummy::*; -use dummy::*; use sc_service::SpawnTaskHandle; -struct AlwaysSupportsParachains {} +pub struct AlwaysSupportsParachains {} #[async_trait::async_trait] impl HeadSupportsParachains for AlwaysSupportsParachains { async fn head_supports_parachains(&self, _head: &Hash) -> bool { @@ -34,43 +36,50 @@ impl HeadSupportsParachains for AlwaysSupportsParachains { } } -pub fn new_overseer_with_dummy_subsystems(spawn_task_handle: SpawnTaskHandle) { - // Initialize a mock overseer. - // All subsystem except approval_voting and approval_distribution are mock subsystems. - let spawner_glue = SpawnGlue(spawn_task_handle); - let overseer_connector = OverseerConnector::with_event_capacity(64000); - let builder = Overseer::builder() - .approval_voting(MockApprovalVoting {}) - .approval_distribution(MockApprovalDistribution {}) - .availability_recovery(MockAvailabilityRecovery {}) - .candidate_validation(MockCandidateValidation {}) - .chain_api(MockChainApi { }) - .chain_selection(MockChainSelection {}) - .dispute_coordinator(MockDisputeCoordinator {}) - .runtime_api(MockRuntimeApi { }) - .network_bridge_tx(MockNetworkBridgeTx {}) - .availability_distribution(MockAvailabilityDistribution {}) - .availability_store(MockAvailabilityStore {}) - .pvf_checker(MockPvfChecker {}) - .candidate_backing(MockCandidateBacking {}) - .statement_distribution(MockStatementDistribution {}) - .bitfield_signing(MockBitfieldSigning {}) - .bitfield_distribution(MockBitfieldDistribution {}) - .provisioner(MockProvisioner {}) - .network_bridge_rx(MockNetworkBridgeRx {}) - .collation_generation(MockCollationGeneration {}) - .collator_protocol(MockCollatorProtocol {}) - .gossip_support(MockGossipSupport {}) - .dispute_distribution(MockDisputeDistribution {}) - .prospective_parachains(MockProspectiveParachains {}) - .activation_external_listeners(Default::default()) - .span_per_active_leaf(Default::default()) - .active_leaves(Default::default()) - .metrics(Default::default()) - .supports_parachains(AlwaysSupportsParachains {}) - .spawner(spawner_glue); - - let (mock_overseer, mock_overseer_handle) = - builder.build_with_connector(overseer_connector).expect("Should not fail"); +// An orchestra with dummy subsystems +macro_rules! dummy_builder { + ($spawn_task_handle: ident) => { + // Initialize a mock overseer. + // All subsystem except approval_voting and approval_distribution are mock subsystems. + Overseer::builder() + .approval_voting(MockApprovalVoting {}) + .approval_distribution(MockApprovalDistribution {}) + .availability_recovery(MockAvailabilityRecovery {}) + .candidate_validation(MockCandidateValidation {}) + .chain_api(MockChainApi {}) + .chain_selection(MockChainSelection {}) + .dispute_coordinator(MockDisputeCoordinator {}) + .runtime_api(MockRuntimeApi {}) + .network_bridge_tx(MockNetworkBridgeTx {}) + .availability_distribution(MockAvailabilityDistribution {}) + .availability_store(MockAvailabilityStore {}) + .pvf_checker(MockPvfChecker {}) + .candidate_backing(MockCandidateBacking {}) + .statement_distribution(MockStatementDistribution {}) + .bitfield_signing(MockBitfieldSigning {}) + .bitfield_distribution(MockBitfieldDistribution {}) + .provisioner(MockProvisioner {}) + .network_bridge_rx(MockNetworkBridgeRx {}) + .collation_generation(MockCollationGeneration {}) + .collator_protocol(MockCollatorProtocol {}) + .gossip_support(MockGossipSupport {}) + .dispute_distribution(MockDisputeDistribution {}) + .prospective_parachains(MockProspectiveParachains {}) + .activation_external_listeners(Default::default()) + .span_per_active_leaf(Default::default()) + .active_leaves(Default::default()) + .metrics(Default::default()) + .supports_parachains(AlwaysSupportsParachains {}) + .spawner(SpawnGlue($spawn_task_handle)) + }; +} -} \ No newline at end of file +pub fn new_overseer_with_dummy_subsystems( + spawn_task_handle: SpawnTaskHandle, +) -> (Overseer, AlwaysSupportsParachains>, OverseerHandle) { + let overseer_connector = OverseerConnector::with_event_capacity(64000); + let dummy = dummy_builder!(spawn_task_handle); + let builder = dummy.replace_chain_api(|_| MockChainApi {}); + // let (mock_overseer, mock_overseer_handle) = + builder.build_with_connector(overseer_connector).expect("Should not fail") +} diff --git a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs new file mode 100644 index 000000000000..cd374f8c18db --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs @@ -0,0 +1,262 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +//! +//! A generic av store subsystem mockup suitable to be used in benchmarks. + +use parity_scale_codec::Encode; + +use std::collections::HashMap; + +use futures::FutureExt; + +use polkadot_node_primitives::{AvailableData, ErasureChunk}; + +use polkadot_primitives::CandidateHash; +use sc_network::{OutboundFailure, RequestFailure}; + +use polkadot_node_subsystem::{ + messages::NetworkBridgeTxMessage, overseer, SpawnedSubsystem, SubsystemError, +}; + +use polkadot_node_network_protocol::request_response::{ + self as req_res, v1::ChunkResponse, Requests, +}; + +use crate::core::{ + configuration::{random_error, random_latency, TestConfiguration}, + network::{NetworkAction, NetworkEmulator, RateLimit}, +}; + +/// The availability store state of all emulated peers. +/// The network bridge tx mock will respond to requests as if the request is being serviced +/// by a remote peer on the network +pub struct NetworkAvailabilityState { + candidate_hashes: HashMap, + available_data: Vec, + chunks: Vec>, +} + +const LOG_TARGET: &str = "subsystem-bench::network-bridge-tx-mock"; + +/// A mock of the network bridge tx subsystem. +pub struct MockNetworkBridgeTx { + /// The test configurationg + config: TestConfiguration, + /// The network availability state + availabilty: NetworkAvailabilityState, + /// A network emulator instance + network: NetworkEmulator, +} + +impl MockNetworkBridgeTx { + pub fn new( + config: TestConfiguration, + availabilty: NetworkAvailabilityState, + network: NetworkEmulator, + ) -> MockNetworkBridgeTx { + Self { config, availabilty, network } + } + + pub fn respond_to_send_request( + &mut self, + request: Requests, + ingress_tx: &mut tokio::sync::mpsc::UnboundedSender, + ) -> NetworkAction { + let ingress_tx = ingress_tx.clone(); + + match request { + Requests::ChunkFetchingV1(outgoing_request) => { + let validator_index: usize = outgoing_request.payload.index.0 as usize; + let candidate_hash = outgoing_request.payload.candidate_hash; + + let candidate_index = self + .availabilty + .candidate_hashes + .get(&candidate_hash) + .expect("candidate was generated previously; qed"); + gum::warn!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + + let chunk: ChunkResponse = + self.availabilty.chunks.get(*candidate_index as usize).unwrap() + [validator_index] + .clone() + .into(); + let mut size = chunk.encoded_size(); + + let response = if random_error(self.config.error) { + // Error will not account to any bandwidth used. + size = 0; + Err(RequestFailure::Network(OutboundFailure::ConnectionClosed)) + } else { + Ok(req_res::v1::ChunkFetchingResponse::from(Some(chunk)).encode()) + }; + + let authority_discovery_id = match outgoing_request.peer { + req_res::Recipient::Authority(authority_discovery_id) => authority_discovery_id, + _ => unimplemented!("Peer recipient not supported yet"), + }; + let authority_discovery_id_clone = authority_discovery_id.clone(); + + let future = async move { + let _ = outgoing_request.pending_response.send(response); + } + .boxed(); + + let future_wrapper = async move { + // Forward the response to the ingress channel of our node. + // On receive side we apply our node receiving rate limit. + let action = + NetworkAction::new(authority_discovery_id_clone, future, size, None); + ingress_tx.send(action).unwrap(); + } + .boxed(); + + NetworkAction::new( + authority_discovery_id, + future_wrapper, + size, + // Generate a random latency based on configuration. + random_latency(self.config.latency.as_ref()), + ) + }, + Requests::AvailableDataFetchingV1(outgoing_request) => { + let candidate_hash = outgoing_request.payload.candidate_hash; + let candidate_index = self + .availabilty + .candidate_hashes + .get(&candidate_hash) + .expect("candidate was generated previously; qed"); + gum::warn!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + + let available_data = + self.availabilty.available_data.get(*candidate_index as usize).unwrap().clone(); + + let size = available_data.encoded_size(); + + let response = if random_error(self.config.error) { + Err(RequestFailure::Network(OutboundFailure::ConnectionClosed)) + } else { + Ok(req_res::v1::AvailableDataFetchingResponse::from(Some(available_data)) + .encode()) + }; + + let future = async move { + let _ = outgoing_request.pending_response.send(response); + } + .boxed(); + + let authority_discovery_id = match outgoing_request.peer { + req_res::Recipient::Authority(authority_discovery_id) => authority_discovery_id, + _ => unimplemented!("Peer recipient not supported yet"), + }; + let authority_discovery_id_clone = authority_discovery_id.clone(); + + let future_wrapper = async move { + // Forward the response to the ingress channel of our node. + // On receive side we apply our node receiving rate limit. + let action = + NetworkAction::new(authority_discovery_id_clone, future, size, None); + ingress_tx.send(action).unwrap(); + } + .boxed(); + + NetworkAction::new( + authority_discovery_id, + future_wrapper, + size, + // Generate a random latency based on configuration. + random_latency(self.config.latency.as_ref()), + ) + }, + _ => panic!("received an unexpected request"), + } + } +} + +#[overseer::subsystem(NetworkBridgeTx, error=SubsystemError, prefix=self::overseer)] +impl MockNetworkBridgeTx { + fn start(self, ctx: Context) -> SpawnedSubsystem { + let future = self.run(ctx).map(|_| Ok(())).boxed(); + + SpawnedSubsystem { name: "network-bridge-tx-mock-subsystem", future } + } +} + +#[overseer::contextbounds(NetworkBridgeTx, prefix = self::overseer)] +impl MockNetworkBridgeTx { + async fn run(mut self, mut ctx: Context) { + let (mut ingress_tx, mut ingress_rx) = + tokio::sync::mpsc::unbounded_channel::(); + + // Initialize our node bandwidth limits. + let mut rx_limiter = RateLimit::new(10, self.config.bandwidth); + + // Get a handle to our node network emulation stats. + let our_network_stats = self.network.peer_stats(0); + // This task will handle receipt of messages on our simulated network of the node. + let _ = ctx + .spawn_blocking( + "node0-rx", + async move { + while let Some(action) = ingress_rx.recv().await { + let size = action.size(); + + // account for our node receiving the data. + our_network_stats.inc_received(size); + + rx_limiter.reap(size).await; + action.run().await; + } + } + .boxed(), + ) + .expect("We never fail to spawn tasks"); + + // Main subsystem loop. + loop { + let msg = ctx.recv().await.expect("Overseer never fails us"); + + match msg { + orchestra::FromOrchestra::Signal(_) => {}, + orchestra::FromOrchestra::Communication { msg } => match msg { + NetworkBridgeTxMessage::SendRequests(requests, _if_disconnected) => { + for request in requests { + self.network.inc_sent(request_size(&request)); + let action = self.respond_to_send_request(request, &mut ingress_tx); + // Will account for our node sending the request over the emulated + // network. + self.network.submit_peer_action(action.peer(), action); + } + }, + _ => { + unimplemented!("Unexpected runtime-api message") + }, + }, + } + } + } +} + +// A helper to determine the request payload size. +fn request_size(request: &Requests) -> u64 { + match request { + Requests::ChunkFetchingV1(outgoing_request) => + outgoing_request.payload.encoded_size() as u64, + Requests::AvailableDataFetchingV1(outgoing_request) => + outgoing_request.payload.encoded_size() as u64, + _ => panic!("received an unexpected request"), + } +} diff --git a/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs b/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs new file mode 100644 index 000000000000..e8c1098b97f0 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs @@ -0,0 +1,107 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +//! +//! A generic runtime api subsystem mockup suitable to be used in benchmarks. + +use polkadot_primitives::{ + AuthorityDiscoveryId, GroupIndex, IndexedVec, SessionInfo, ValidatorId, ValidatorIndex, +}; + +use polkadot_node_subsystem::{ + messages::{RuntimeApiMessage, RuntimeApiRequest}, + overseer, SpawnedSubsystem, SubsystemError, +}; + +use crate::core::configuration::TestConfiguration; +use futures::FutureExt; + +pub struct RuntimeApiState { + validator_public: Vec, + validator_authority_id: Vec, +} + +pub struct MockRuntimeApi { + state: RuntimeApiState, + config: TestConfiguration, +} + +impl MockRuntimeApi { + pub fn new( + config: TestConfiguration, + validator_public: Vec, + validator_authority_id: Vec, + ) -> MockRuntimeApi { + Self { state: RuntimeApiState { validator_public, validator_authority_id }, config } + } + + fn session_info(&self) -> SessionInfo { + let all_validators = (0..self.config.n_validators) + .map(|i| ValidatorIndex(i as _)) + .collect::>(); + + let validator_groups = all_validators.chunks(5).map(|x| Vec::from(x)).collect::>(); + + SessionInfo { + validators: self.state.validator_public.clone().into(), + discovery_keys: self.state.validator_authority_id.clone(), + validator_groups: IndexedVec::>::from(validator_groups), + assignment_keys: vec![], + n_cores: self.config.n_cores as u32, + zeroth_delay_tranche_width: 0, + relay_vrf_modulo_samples: 0, + n_delay_tranches: 0, + no_show_slots: 0, + needed_approvals: 0, + active_validator_indices: vec![], + dispute_period: 6, + random_seed: [0u8; 32], + } + } +} + +#[overseer::subsystem(RuntimeApi, error=SubsystemError, prefix=self::overseer)] +impl MockRuntimeApi { + fn start(self, ctx: Context) -> SpawnedSubsystem { + let future = self.run(ctx).map(|_| Ok(())).boxed(); + + SpawnedSubsystem { name: "runtime-api-mock-subsystem", future } + } +} + +#[overseer::contextbounds(RuntimeApi, prefix = self::overseer)] +impl MockRuntimeApi { + async fn run(self, mut ctx: Context) { + loop { + let msg = ctx.recv().await.expect("Overseer never fails us"); + + match msg { + orchestra::FromOrchestra::Signal(_) => {}, + orchestra::FromOrchestra::Communication { msg } => match msg { + RuntimeApiMessage::Request( + _request, + RuntimeApiRequest::SessionInfo(_session_index, sender), + ) => { + let _ = sender.send(Ok(Some(self.session_info()))); + }, + // Long term TODO: implement more as needed. + _ => { + unimplemented!("Unexpected runtime-api message") + }, + }, + } + } + } +} diff --git a/polkadot/node/subsystem-bench/src/core/mod.rs b/polkadot/node/subsystem-bench/src/core/mod.rs index af2abf0860cd..11ca03dbda4c 100644 --- a/polkadot/node/subsystem-bench/src/core/mod.rs +++ b/polkadot/node/subsystem-bench/src/core/mod.rs @@ -28,5 +28,5 @@ pub mod configuration; pub mod display; pub mod environment; pub mod keyring; -pub mod network; pub mod mock; +pub mod network; diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index ca561e5c4955..ce9e8aa3a8be 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -26,16 +26,13 @@ pub(crate) mod availability; pub(crate) mod cli; pub(crate) mod core; -use availability::{ - AvailabilityRecoveryConfiguration, DataAvailabilityReadOptions, NetworkEmulation, - TestEnvironment, TestState, -}; +use availability::{NetworkEmulation, TestEnvironment, TestState}; use cli::TestObjective; -use core::configuration::{PeerLatency, TestConfiguration, TestSequence}; +use core::configuration::TestConfiguration; use clap_num::number_range; -const LOG_TARGET: &str = "subsystem-bench"; +// const LOG_TARGET: &str = "subsystem-bench"; fn le_100(s: &str) -> Result { number_range(s, 0, 100) @@ -125,14 +122,17 @@ impl BenchCli { let mut state = TestState::new(test_config); state.generate_candidates(candidate_count); - let mut env = - TestEnvironment::new(runtime.handle().clone(), state.clone(), Registry::new()); + let mut env = TestEnvironment::new( + runtime.handle().clone(), + state.clone(), + Registry::new(), + ); runtime.block_on(availability::bench_chunk_recovery(&mut env, state)); } return Ok(()) }, - TestObjective::DataAvailabilityRead(ref options) => match self.network { + TestObjective::DataAvailabilityRead(ref _options) => match self.network { NetworkEmulation::Healthy => TestConfiguration::healthy_network( self.objective, configuration.num_blocks, @@ -189,7 +189,8 @@ impl BenchCli { let mut state = TestState::new(test_config); state.generate_candidates(candidate_count); - let mut env = TestEnvironment::new(runtime.handle().clone(), state.clone(), Registry::new()); + let mut env = + TestEnvironment::new(runtime.handle().clone(), state.clone(), Registry::new()); runtime.block_on(availability::bench_chunk_recovery(&mut env, state)); From 4724d8c98d6b47643cbccb9e00dc7e550dad3a78 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 27 Nov 2023 19:43:15 +0200 Subject: [PATCH 21/52] full overseer based implementation complete Signed-off-by: Andrei Sandu --- polkadot/node/overseer/src/lib.rs | 2 + .../subsystem-bench/src/availability/mod.rs | 637 ++++++------------ .../subsystem-bench/src/core/configuration.rs | 33 + .../subsystem-bench/src/core/environment.rs | 28 + .../subsystem-bench/src/core/mock/av_store.rs | 8 +- .../subsystem-bench/src/core/mock/dummy.rs | 25 +- .../node/subsystem-bench/src/core/mock/mod.rs | 26 +- .../src/core/mock/network_bridge.rs | 7 +- .../src/core/mock/runtime_api.rs | 28 +- .../node/subsystem-bench/src/core/network.rs | 2 +- .../subsystem-bench/src/subsystem-bench.rs | 40 +- .../node/subsystem-test-helpers/src/mock.rs | 12 +- 12 files changed, 347 insertions(+), 501 deletions(-) diff --git a/polkadot/node/overseer/src/lib.rs b/polkadot/node/overseer/src/lib.rs index da99546a44f7..f4eddf1f41ce 100644 --- a/polkadot/node/overseer/src/lib.rs +++ b/polkadot/node/overseer/src/lib.rs @@ -276,6 +276,7 @@ impl From> for BlockInfo { /// An event from outside the overseer scope, such /// as the substrate framework or user interaction. +#[derive(Debug)] pub enum Event { /// A new block was imported. /// @@ -300,6 +301,7 @@ pub enum Event { } /// Some request from outer world. +#[derive(Debug)] pub enum ExternalRequest { /// Wait for the activation of a particular hash /// and be notified by means of the return channel. diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 3f9598505074..54a3cd961319 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -21,14 +21,16 @@ use std::{ sync::Arc, time::{Duration, Instant}, }; +use tokio::runtime::{Handle, Runtime}; + +use polkadot_node_subsystem::{ + BlockInfo, Event, Overseer, OverseerConnector, OverseerHandle, SpawnGlue, +}; +use sc_network::request_responses::ProtocolConfig; use colored::Colorize; -use futures::{ - channel::{mpsc, oneshot}, - stream::FuturesUnordered, - FutureExt, SinkExt, StreamExt, -}; +use futures::{channel::oneshot, stream::FuturesUnordered, FutureExt, SinkExt, StreamExt}; use polkadot_node_metrics::metrics::Metrics; use polkadot_availability_recovery::AvailabilityRecoverySubsystem; @@ -40,23 +42,30 @@ use polkadot_node_network_protocol::request_response::{ use rand::{distributions::Uniform, prelude::Distribution, thread_rng}; use prometheus::Registry; -use sc_network::{config::RequestResponseConfig, OutboundFailure, RequestFailure}; +use sc_network::{OutboundFailure, RequestFailure}; use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks}; use polkadot_node_primitives::{BlockData, PoV, Proof}; use polkadot_node_subsystem::{ - messages::{ - AllMessages, AvailabilityRecoveryMessage, AvailabilityStoreMessage, NetworkBridgeTxMessage, - RuntimeApiMessage, RuntimeApiRequest, - }, - ActiveLeavesUpdate, FromOrchestra, OverseerSignal, Subsystem, + messages::{AllMessages, AvailabilityRecoveryMessage}, + ActiveLeavesUpdate, OverseerSignal, }; use std::net::{Ipv4Addr, SocketAddr}; +use crate::core::{ + configuration::TestAuthorities, + environment::TestEnvironmentDependencies, + mock::{ + av_store, + network_bridge::{self, MockNetworkBridgeTx, NetworkAvailabilityState}, + runtime_api, MockAvailabilityStore, MockRuntimeApi, + }, +}; + use super::core::{ configuration::{PeerLatency, TestConfiguration}, environment::TestEnvironmentMetrics, - keyring::Keyring, + mock::dummy_builder, network::*, }; @@ -64,14 +73,12 @@ const LOG_TARGET: &str = "subsystem-bench::availability"; use polkadot_node_primitives::{AvailableData, ErasureChunk}; -use super::cli::TestObjective; -use polkadot_node_subsystem_test_helpers::{ - make_buffered_subsystem_context, mock::new_leaf, TestSubsystemContextHandle, -}; +use super::{cli::TestObjective, core::mock::AlwaysSupportsParachains}; +use polkadot_node_subsystem_test_helpers::mock::new_block_import_event; use polkadot_node_subsystem_util::TimeoutExt; use polkadot_primitives::{ - AuthorityDiscoveryId, CandidateHash, CandidateReceipt, GroupIndex, Hash, HeadData, IndexedVec, - PersistedValidationData, SessionIndex, SessionInfo, ValidatorId, ValidatorIndex, + CandidateHash, CandidateReceipt, GroupIndex, Hash, HeadData, PersistedValidationData, + SessionIndex, ValidatorIndex, }; use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; use sc_service::{SpawnTaskHandle, TaskManager}; @@ -81,28 +88,22 @@ pub mod configuration; pub use cli::{DataAvailabilityReadOptions, NetworkEmulation, NetworkOptions}; pub use configuration::AvailabilityRecoveryConfiguration; -// Deterministic genesis hash for protocol names +// A dummy genesis hash const GENESIS_HASH: Hash = Hash::repeat_byte(0xff); -struct AvailabilityRecoverySubsystemInstance { - _protocol_config: RequestResponseConfig, -} - -/// The test environment is responsible for creating an instance of the availability recovery -/// subsystem and connecting it to an emulated overseer. +/// The test environment is the high level wrapper of all things required to test +/// a certain subsystem. /// /// ## Mockups -/// We emulate the following subsystems: -/// - runtime api -/// - network bridge -/// - availability store +/// The overseer is passed in during construction and it can host an arbitrary number of +/// real subsystems instances and the corresponding mocked instances such that the real +/// subsystems can get their messages answered. /// /// As the subsystem's performance depends on network connectivity, the test environment /// emulates validator nodes on the network, see `NetworkEmulator`. The network emulation /// is configurable in terms of peer bandwidth, latency and connection error rate using /// uniform distribution sampling. /// -/// The mockup logic is implemented in `env_task` which owns and advances the `TestState`. /// /// ## Usage /// `TestEnvironment` is used in tests to send `Overseer` messages or signals to the subsystem @@ -121,13 +122,14 @@ pub struct TestEnvironment { // A task manager that tracks task poll durations allows us to measure // per task CPU usage as we do in the Polkadot node. task_manager: TaskManager, + // Our runtime + runtime: tokio::runtime::Runtime, + // A runtime handle + runtime_handle: tokio::runtime::Handle, // The Prometheus metrics registry registry: Registry, - // A channel to the availability recovery subsystem - to_subsystem: mpsc::Sender>, - // Subsystem instance, currently keeps req/response protocol channel senders - // for the whole duration of the test. - instance: AvailabilityRecoverySubsystemInstance, + // A handle to the lovely overseer + overseer_handle: OverseerHandle, // The test intial state. The current state is owned by `env_task`. config: TestConfiguration, // A handle to the network emulator. @@ -136,62 +138,142 @@ pub struct TestEnvironment { metrics: TestEnvironmentMetrics, } -impl TestEnvironment { - // Create a new test environment with specified initial state and prometheus registry. - // We use prometheus metrics to collect per job task poll time and subsystem metrics. - pub fn new(runtime: tokio::runtime::Handle, state: TestState, registry: Registry) -> Self { - let config = state.config().clone(); - let task_manager: TaskManager = TaskManager::new(runtime.clone(), Some(®istry)).unwrap(); - let (instance, virtual_overseer) = AvailabilityRecoverySubsystemInstance::new( - ®istry, - task_manager.spawn_handle(), - match &state.config().objective { - TestObjective::DataAvailabilityRead(options) => options.fetch_from_backers, - _ => panic!("Unexpected objective"), - }, - ); +fn build_overseer( + spawn_task_handle: SpawnTaskHandle, + runtime_api: MockRuntimeApi, + av_store: MockAvailabilityStore, + network_bridge: MockNetworkBridgeTx, + availability_recovery: AvailabilityRecoverySubsystem, +) -> (Overseer, AlwaysSupportsParachains>, OverseerHandle) { + let overseer_connector = OverseerConnector::with_event_capacity(64000); + let dummy = dummy_builder!(spawn_task_handle); + let builder = dummy + .replace_runtime_api(|_| runtime_api) + .replace_availability_store(|_| av_store) + .replace_network_bridge_tx(|_| network_bridge) + .replace_availability_recovery(|_| availability_recovery); + + builder.build_with_connector(overseer_connector).expect("Should not fail") +} - let metrics = - TestEnvironmentMetrics::new(®istry).expect("Metrics need to be registered"); - let mut network = NetworkEmulator::new( - config.n_validators, - state.validator_authority_id.clone(), - config.peer_bandwidth, - task_manager.spawn_handle(), - ®istry, - ); - - // Copy sender for later when we need to inject messages in to the subsystem. - let to_subsystem = virtual_overseer.tx.clone(); - - let task_state = state.clone(); - let task_network = network.clone(); - let spawn_handle = task_manager.spawn_handle(); +/// Takes a test configuration and uses it to creates the `TestEnvironment`. +pub fn prepare_test( + config: TestConfiguration, + state: &mut TestState, +) -> (TestEnvironment, ProtocolConfig) { + prepare_test_inner(config, state, TestEnvironmentDependencies::default()) +} + +/// Takes a test configuration and uses it to creates the `TestEnvironment`. +pub fn prepare_test_with_dependencies( + config: TestConfiguration, + state: &mut TestState, + dependencies: TestEnvironmentDependencies, +) -> (TestEnvironment, ProtocolConfig) { + prepare_test_inner(config, state, dependencies) +} + +fn prepare_test_inner( + config: TestConfiguration, + state: &mut TestState, + dependencies: TestEnvironmentDependencies, +) -> (TestEnvironment, ProtocolConfig) { + // We need to first create the high level test state object. + // This will then be decomposed into per subsystem states. + let candidate_count = config.n_cores * config.num_blocks; + state.generate_candidates(candidate_count); + + // Generate test authorities. + let test_authorities = config.generate_authorities(); + + let runtime_api = runtime_api::MockRuntimeApi::new( + config.clone(), + test_authorities.validator_public.clone(), + test_authorities.validator_authority_id.clone(), + ); - // Our node rate limiting - let mut rx_limiter = RateLimit::new(10, config.bandwidth); - let (ingress_tx, mut ingress_rx) = tokio::sync::mpsc::unbounded_channel::(); - let our_network_stats = network.peer_stats(0); + let av_store = + av_store::MockAvailabilityStore::new(state.chunks.clone(), state.candidate_hashes.clone()); + + let availability_state = NetworkAvailabilityState { + candidate_hashes: state.candidate_hashes.clone(), + available_data: state.available_data.clone(), + chunks: state.chunks.clone(), + }; + + let network = NetworkEmulator::new( + config.n_validators.clone(), + test_authorities.validator_authority_id.clone(), + config.peer_bandwidth, + dependencies.task_manager.spawn_handle(), + &dependencies.registry, + ); - spawn_handle.spawn_blocking("node0-rx", "test-environment", async move { - while let Some(action) = ingress_rx.recv().await { - let size = action.size(); + let network_bridge_tx = network_bridge::MockNetworkBridgeTx::new( + config.clone(), + availability_state, + network.clone(), + ); + + let use_fast_path = match &state.config().objective { + TestObjective::DataAvailabilityRead(options) => options.fetch_from_backers, + _ => panic!("Unexpected objective"), + }; + + let (collation_req_receiver, req_cfg) = + IncomingRequest::get_config_receiver(&ReqProtocolNames::new(&GENESIS_HASH, None)); + + let subsystem = if use_fast_path { + AvailabilityRecoverySubsystem::with_fast_path( + collation_req_receiver, + Metrics::try_register(&dependencies.registry).unwrap(), + ) + } else { + AvailabilityRecoverySubsystem::with_chunks_only( + collation_req_receiver, + Metrics::try_register(&dependencies.registry).unwrap(), + ) + }; + + let (overseer, overseer_handle) = build_overseer( + dependencies.task_manager.spawn_handle(), + runtime_api, + av_store, + network_bridge_tx, + subsystem, + ); - // account for our node receiving the data. - our_network_stats.inc_received(size); + ( + TestEnvironment::new( + dependencies.task_manager, + config, + dependencies.registry, + dependencies.runtime, + network, + overseer, + overseer_handle, + ), + req_cfg, + ) +} - rx_limiter.reap(size).await; - action.run().await; - } - }); +impl TestEnvironment { + // Create a new test environment with specified initial state and prometheus registry. + // We use prometheus metrics to collect per job task poll time and subsystem metrics. + pub fn new( + task_manager: TaskManager, + config: TestConfiguration, + registry: Registry, + runtime: Runtime, + network: NetworkEmulator, + overseer: Overseer, AlwaysSupportsParachains>, + overseer_handle: OverseerHandle, + ) -> Self { + let metrics = + TestEnvironmentMetrics::new(®istry).expect("Metrics need to be registered"); - // We need to start a receiver to process messages from the subsystem. - // This mocks an overseer and all dependent subsystems - task_manager.spawn_handle().spawn_blocking( - "test-environment", - "test-environment", - async move { Self::env_task(virtual_overseer, task_state, task_network, ingress_tx).await }, - ); + let spawn_handle = task_manager.spawn_handle(); + spawn_handle.spawn_blocking("overseer", "overseer", overseer.run()); let registry_clone = registry.clone(); task_manager @@ -205,7 +287,16 @@ impl TestEnvironment { .unwrap(); }); - TestEnvironment { task_manager, registry, to_subsystem, instance, config, network, metrics } + TestEnvironment { + task_manager, + runtime_handle: runtime.handle().clone(), + runtime, + registry, + overseer_handle, + config, + network, + metrics, + } } pub fn config(&self) -> &TestConfiguration { @@ -236,266 +327,20 @@ impl TestEnvironment { &self.metrics } - /// Generate a random error based on `probability`. - /// `probability` should be a number between 0 and 100. - fn random_error(probability: usize) -> bool { - Uniform::from(0..=99).sample(&mut thread_rng()) < probability - } - - pub fn request_size(request: &Requests) -> u64 { - match request { - Requests::ChunkFetchingV1(outgoing_request) => - outgoing_request.payload.encoded_size() as u64, - Requests::AvailableDataFetchingV1(outgoing_request) => - outgoing_request.payload.encoded_size() as u64, - _ => panic!("received an unexpected request"), - } - } - - pub fn respond_to_send_request( - state: &mut TestState, - request: Requests, - ingress_tx: tokio::sync::mpsc::UnboundedSender, - ) -> NetworkAction { - match request { - Requests::ChunkFetchingV1(outgoing_request) => { - let validator_index: usize = outgoing_request.payload.index.0 as usize; - let candidate_hash = outgoing_request.payload.candidate_hash; - - let candidate_index = state - .candidate_hashes - .get(&candidate_hash) - .expect("candidate was generated previously; qed"); - gum::warn!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); - - let chunk: ChunkResponse = state.chunks.get(*candidate_index as usize).unwrap() - [validator_index] - .clone() - .into(); - let mut size = chunk.encoded_size(); - - let response = if Self::random_error(state.config().error) { - // Error will not account to any bandwidth used. - size = 0; - Err(RequestFailure::Network(OutboundFailure::ConnectionClosed)) - } else { - Ok(req_res::v1::ChunkFetchingResponse::from(Some(chunk)).encode()) - }; - - let authority_discovery_id = match outgoing_request.peer { - req_res::Recipient::Authority(authority_discovery_id) => authority_discovery_id, - _ => panic!("Peer recipient not supported yet"), - }; - let authority_discovery_id_clone = authority_discovery_id.clone(); - - let future = async move { - let _ = outgoing_request.pending_response.send(response); - } - .boxed(); - - let future_wrapper = async move { - // Forward the response to the ingress channel of our node. - // On receive side we apply our node receiving rate limit. - let action = - NetworkAction::new(authority_discovery_id_clone, future, size, None); - ingress_tx.send(action).unwrap(); - } - .boxed(); - - NetworkAction::new( - authority_discovery_id, - future_wrapper, - size, - // Generate a random latency based on configuration. - Self::random_latency(state.config().latency.as_ref()), - ) - }, - Requests::AvailableDataFetchingV1(outgoing_request) => { - let candidate_hash = outgoing_request.payload.candidate_hash; - let candidate_index = state - .candidate_hashes - .get(&candidate_hash) - .expect("candidate was generated previously; qed"); - gum::warn!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); - - let available_data = - state.available_data.get(*candidate_index as usize).unwrap().clone(); - - let size = available_data.encoded_size(); - - let response = if Self::random_error(state.config().error) { - Err(RequestFailure::Network(OutboundFailure::ConnectionClosed)) - } else { - Ok(req_res::v1::AvailableDataFetchingResponse::from(Some(available_data)) - .encode()) - }; - - let future = async move { - let _ = outgoing_request.pending_response.send(response); - } - .boxed(); - - let authority_discovery_id = match outgoing_request.peer { - req_res::Recipient::Authority(authority_discovery_id) => authority_discovery_id, - _ => panic!("Peer recipient not supported yet"), - }; - let authority_discovery_id_clone = authority_discovery_id.clone(); - - let future_wrapper = async move { - // Forward the response to the ingress channel of our node. - // On receive side we apply our node receiving rate limit. - let action = - NetworkAction::new(authority_discovery_id_clone, future, size, None); - ingress_tx.send(action).unwrap(); - } - .boxed(); - - NetworkAction::new( - authority_discovery_id, - future_wrapper, - size, - // Generate a random latency based on configuration. - Self::random_latency(state.config().latency.as_ref()), - ) - }, - _ => panic!("received an unexpected request"), - } - } - - // A task that mocks dependent subsystems based on environment configuration. - // TODO: Spawn real subsystems, user overseer builder. - async fn env_task( - mut ctx: TestSubsystemContextHandle, - mut state: TestState, - mut network: NetworkEmulator, - ingress_tx: tokio::sync::mpsc::UnboundedSender, - ) { - loop { - futures::select! { - maybe_message = ctx.maybe_recv().fuse() => { - let message = if let Some(message) = maybe_message{ - message - } else { - gum::info!("{}", "Test completed".bright_blue()); - return - }; - - gum::trace!(target: LOG_TARGET, ?message, "Env task received message"); - - match message { - AllMessages::NetworkBridgeTx( - NetworkBridgeTxMessage::SendRequests( - requests, - _if_disconnected, - ) - ) => { - for request in requests { - network.inc_sent(Self::request_size(&request)); - let action = Self::respond_to_send_request(&mut state, request, ingress_tx.clone()); - // Account for our node sending the request over the emulated network. - network.submit_peer_action(action.peer(), action); - } - }, - AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryAvailableData(_candidate_hash, tx)) => { - // TODO: Simulate av store load by delaying the response. - state.respond_none_to_available_data_query(tx).await; - }, - AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryAllChunks(candidate_hash, tx)) => { - // Test env: We always have our own chunk. - state.respond_to_query_all_request(candidate_hash, |index| index == state.validator_index.0 as usize, tx).await; - }, - AllMessages::AvailabilityStore( - AvailabilityStoreMessage::QueryChunkSize(candidate_hash, tx) - ) => { - let candidate_index = state.candidate_hashes.get(&candidate_hash).expect("candidate was generated previously; qed"); - gum::debug!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); - - let chunk_size = state.chunks.get(*candidate_index as usize).unwrap()[0].encoded_size(); - let _ = tx.send(Some(chunk_size)); - } - AllMessages::RuntimeApi(RuntimeApiMessage::Request( - _relay_parent, - RuntimeApiRequest::SessionInfo( - _session_index, - tx, - ) - )) => { - tx.send(Ok(Some(state.session_info()))).unwrap(); - } - _ => panic!("Unexpected input") - } - } - } - } + pub fn runtime(&self) -> Handle { + self.runtime_handle.clone() } // Send a message to the subsystem under test environment. - pub async fn send_message(&mut self, msg: AvailabilityRecoveryMessage) { - gum::trace!(msg = ?msg, "sending message"); - self.to_subsystem - .send(FromOrchestra::Communication { msg }) + pub async fn send_message(&mut self, msg: Event) { + self.overseer_handle + .send(msg) .timeout(MAX_TIME_OF_FLIGHT) .await .unwrap_or_else(|| { panic!("{}ms maximum time of flight breached", MAX_TIME_OF_FLIGHT.as_millis()) }) - .unwrap(); - } - - // Send a signal to the subsystem under test environment. - pub async fn send_signal(&mut self, signal: OverseerSignal) { - self.to_subsystem - .send(FromOrchestra::Signal(signal)) - .timeout(MAX_TIME_OF_FLIGHT) - .await - .unwrap_or_else(|| { - panic!( - "{}ms is more than enough for sending signals.", - MAX_TIME_OF_FLIGHT.as_millis() - ) - }) - .unwrap(); - } -} - -impl AvailabilityRecoverySubsystemInstance { - pub fn new( - registry: &Registry, - spawn_task_handle: SpawnTaskHandle, - use_fast_path: bool, - ) -> (Self, TestSubsystemContextHandle) { - let (context, virtual_overseer) = make_buffered_subsystem_context( - spawn_task_handle.clone(), - 128, - "availability-recovery-subsystem", - ); - let (collation_req_receiver, req_cfg) = - IncomingRequest::get_config_receiver(&ReqProtocolNames::new(&GENESIS_HASH, None)); - - let subsystem = if use_fast_path { - AvailabilityRecoverySubsystem::with_fast_path( - collation_req_receiver, - Metrics::try_register(®istry).unwrap(), - ) - } else { - AvailabilityRecoverySubsystem::with_chunks_only( - collation_req_receiver, - Metrics::try_register(®istry).unwrap(), - ) - }; - - let spawned_subsystem = subsystem.start(context); - let subsystem_future = async move { - spawned_subsystem.future.await.unwrap(); - }; - - spawn_task_handle.spawn_blocking( - spawned_subsystem.name, - spawned_subsystem.name, - subsystem_future, - ); - - (Self { _protocol_config: req_cfg }, virtual_overseer) + .expect("send never fails"); } } @@ -509,8 +354,7 @@ pub struct TestState { // Full test configuration config: TestConfiguration, // State starts here. - validator_public: Vec, - validator_authority_id: Vec, + test_authorities: TestAuthorities, // The test node validator index. validator_index: ValidatorIndex, session_index: SessionIndex, @@ -535,60 +379,6 @@ impl TestState { &self.config } - async fn respond_none_to_available_data_query( - &self, - tx: oneshot::Sender>, - ) { - let _ = tx.send(None); - } - - fn session_info(&self) -> SessionInfo { - let my_vec = (0..self.config().n_validators) - .map(|i| ValidatorIndex(i as _)) - .collect::>(); - - let validator_groups = my_vec.chunks(5).map(|x| Vec::from(x)).collect::>(); - - SessionInfo { - validators: self.validator_public.clone().into(), - discovery_keys: self.validator_authority_id.clone(), - validator_groups: IndexedVec::>::from(validator_groups), - assignment_keys: vec![], - n_cores: self.config().n_cores as u32, - zeroth_delay_tranche_width: 0, - relay_vrf_modulo_samples: 0, - n_delay_tranches: 0, - no_show_slots: 0, - needed_approvals: 0, - active_validator_indices: vec![], - dispute_period: 6, - random_seed: [0u8; 32], - } - } - async fn respond_to_query_all_request( - &self, - candidate_hash: CandidateHash, - send_chunk: impl Fn(usize) -> bool, - tx: oneshot::Sender>, - ) { - let candidate_index = self - .candidate_hashes - .get(&candidate_hash) - .expect("candidate was generated previously; qed"); - gum::debug!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); - - let v = self - .chunks - .get(*candidate_index as usize) - .unwrap() - .iter() - .filter(|c| send_chunk(c.index.0 as usize)) - .cloned() - .collect(); - - let _ = tx.send(v); - } - pub fn next_candidate(&mut self) -> Option { let candidate = self.candidates.next(); let candidate_hash = candidate.as_ref().unwrap().hash(); @@ -596,6 +386,10 @@ impl TestState { candidate } + pub fn authorities(&self) -> &TestAuthorities { + &self.test_authorities + } + /// Generate candidates to be used in the test. pub fn generate_candidates(&mut self, count: usize) { gum::info!(target: LOG_TARGET,"{}", format!("Pre-generating {} candidates.", count).bright_blue()); @@ -624,22 +418,9 @@ impl TestState { .cycle(); } - pub fn new(config: TestConfiguration) -> Self { - let keyrings = (0..config.n_validators) - .map(|peer_index| Keyring::new(format!("Node{}", peer_index).into())) - .collect::>(); - - // Generate `AuthorityDiscoveryId`` for each peer - let validator_public: Vec = keyrings - .iter() - .map(|keyring: &Keyring| keyring.clone().public().into()) - .collect::>(); - - let validator_authority_id: Vec = keyrings - .iter() - .map(|keyring| keyring.clone().public().into()) - .collect::>() - .into(); + pub fn new(config: &TestConfiguration) -> Self { + let config = config.clone(); + let test_authorities = config.generate_authorities(); let validator_index = ValidatorIndex(0); let mut chunks = Vec::new(); @@ -687,8 +468,7 @@ impl TestState { Self { config, - validator_public, - validator_authority_id, + test_authorities, validator_index, session_index, persisted_validation_data, @@ -734,11 +514,7 @@ fn derive_erasure_chunks_with_proofs_and_root( pub async fn bench_chunk_recovery(env: &mut TestEnvironment, mut state: TestState) { let config = env.config().clone(); - env.send_signal(OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(new_leaf( - Hash::repeat_byte(1), - 1, - )))) - .await; + env.send_message(new_block_import_event(Hash::repeat_byte(1), 1)).await; let start_marker = Instant::now(); let mut batch = FuturesUnordered::new(); @@ -758,15 +534,20 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment, mut state: TestStat let (tx, rx) = oneshot::channel(); batch.push(rx); - env.send_message(AvailabilityRecoveryMessage::RecoverAvailableData( - candidate.clone(), - 1, - Some(GroupIndex( - candidate_num as u32 % (std::cmp::max(5, config.n_cores) / 5) as u32, - )), - tx, - )) - .await; + let message = Event::MsgToSubsystem { + msg: AllMessages::AvailabilityRecovery( + AvailabilityRecoveryMessage::RecoverAvailableData( + candidate.clone(), + 1, + Some(GroupIndex( + candidate_num as u32 % (std::cmp::max(5, config.n_cores) / 5) as u32, + )), + tx, + ), + ), + origin: LOG_TARGET, + }; + env.send_message(message).await; } gum::info!("{}", format!("{} recoveries pending", batch.len()).bright_black()); @@ -786,8 +567,8 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment, mut state: TestStat tokio::time::sleep(block_time_delta).await; } - env.send_signal(OverseerSignal::Conclude).await; - let duration = start_marker.elapsed().as_millis(); + env.send_message(Event::Stop).await; + let duration: u128 = start_marker.elapsed().as_millis(); let availability_bytes = availability_bytes / 1024; gum::info!("All blocks processed in {}", format!("{:?}ms", duration).cyan()); gum::info!( diff --git a/polkadot/node/subsystem-bench/src/core/configuration.rs b/polkadot/node/subsystem-bench/src/core/configuration.rs index 4526505c3a64..f8fdcf2973eb 100644 --- a/polkadot/node/subsystem-bench/src/core/configuration.rs +++ b/polkadot/node/subsystem-bench/src/core/configuration.rs @@ -16,7 +16,10 @@ use std::path::Path; use super::*; +use keyring::Keyring; + pub use crate::cli::TestObjective; +use polkadot_primitives::ValidatorId; use rand::{distributions::Uniform, prelude::Distribution, thread_rng}; use serde::{Deserialize, Serialize}; @@ -98,6 +101,14 @@ impl TestSequence { } } +/// Helper struct for authority related state. +#[derive(Clone)] +pub struct TestAuthorities { + pub keyrings: Vec, + pub validator_public: Vec, + pub validator_authority_id: Vec, +} + impl TestConfiguration { pub fn write_to_disk(&self) { // Serialize a slice of configurations @@ -109,6 +120,28 @@ impl TestConfiguration { pub fn pov_sizes(&self) -> &[usize] { &self.pov_sizes } + + /// Generates the authority keys we need for the network emulation. + pub fn generate_authorities(&self) -> TestAuthorities { + let keyrings = (0..self.n_validators) + .map(|peer_index| Keyring::new(format!("Node{}", peer_index).into())) + .collect::>(); + + // Generate `AuthorityDiscoveryId`` for each peer + let validator_public: Vec = keyrings + .iter() + .map(|keyring: &Keyring| keyring.clone().public().into()) + .collect::>(); + + let validator_authority_id: Vec = keyrings + .iter() + .map(|keyring| keyring.clone().public().into()) + .collect::>() + .into(); + + TestAuthorities { keyrings, validator_public, validator_authority_id } + } + /// An unconstrained standard configuration matching Polkadot/Kusama pub fn ideal_network( objective: TestObjective, diff --git a/polkadot/node/subsystem-bench/src/core/environment.rs b/polkadot/node/subsystem-bench/src/core/environment.rs index e6b09a1c13e6..c9cc6ae40410 100644 --- a/polkadot/node/subsystem-bench/src/core/environment.rs +++ b/polkadot/node/subsystem-bench/src/core/environment.rs @@ -17,6 +17,7 @@ use polkadot_node_subsystem_util::metrics::prometheus::{ self, Gauge, Histogram, PrometheusError, Registry, U64, }; +use sc_service::TaskManager; const MIB: f64 = 1024.0 * 1024.0; @@ -97,3 +98,30 @@ impl TestEnvironmentMetrics { self.pov_size.observe(pov_size as f64); } } + +fn new_runtime() -> tokio::runtime::Runtime { + tokio::runtime::Builder::new_multi_thread() + .thread_name("subsystem-bench") + .enable_all() + .thread_stack_size(3 * 1024 * 1024) + .build() + .unwrap() +} + +/// Wrapper for dependencies +pub struct TestEnvironmentDependencies { + pub registry: Registry, + pub task_manager: TaskManager, + pub runtime: tokio::runtime::Runtime, +} + +impl Default for TestEnvironmentDependencies { + fn default() -> Self { + let runtime = new_runtime(); + let registry = Registry::new(); + let task_manager: TaskManager = + TaskManager::new(runtime.handle().clone(), Some(®istry)).unwrap(); + + Self { runtime, registry, task_manager } + } +} diff --git a/polkadot/node/subsystem-bench/src/core/mock/av_store.rs b/polkadot/node/subsystem-bench/src/core/mock/av_store.rs index e84aeba5b6b7..7f6ff2abfe9e 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/av_store.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/av_store.rs @@ -89,22 +89,28 @@ impl MockAvailabilityStore { #[overseer::contextbounds(AvailabilityStore, prefix = self::overseer)] impl MockAvailabilityStore { async fn run(self, mut ctx: Context) { + gum::debug!(target: LOG_TARGET, "Subsystem running"); loop { let msg = ctx.recv().await.expect("Overseer never fails us"); match msg { orchestra::FromOrchestra::Signal(_) => {}, orchestra::FromOrchestra::Communication { msg } => match msg { - AvailabilityStoreMessage::QueryAvailableData(_candidate_hash, tx) => { + AvailabilityStoreMessage::QueryAvailableData(candidate_hash, tx) => { + gum::debug!(target: LOG_TARGET, candidate_hash = ?candidate_hash, "Responding to QueryAvailableData"); + // We never have the full available data. let _ = tx.send(None); }, AvailabilityStoreMessage::QueryAllChunks(candidate_hash, tx) => { // We always have our own chunk. + gum::debug!(target: LOG_TARGET, candidate_hash = ?candidate_hash, "Responding to QueryAllChunks"); self.respond_to_query_all_request(candidate_hash, |index| index == 0, tx) .await; }, AvailabilityStoreMessage::QueryChunkSize(candidate_hash, tx) => { + gum::debug!(target: LOG_TARGET, candidate_hash = ?candidate_hash, "Responding to QueryChunkSize"); + let candidate_index = self .state .candidate_hashes diff --git a/polkadot/node/subsystem-bench/src/core/mock/dummy.rs b/polkadot/node/subsystem-bench/src/core/mock/dummy.rs index 196cc81f1e82..998153875ede 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/dummy.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/dummy.rs @@ -21,6 +21,8 @@ use polkadot_node_subsystem::{overseer, SpawnedSubsystem, SubsystemError}; use std::time::Duration; use tokio::time::sleep; +const LOG_TARGET: &str = "subsystem-bench::mockery"; + macro_rules! mock { // Just query by relay parent ($subsystem_name:ident) => { @@ -41,15 +43,22 @@ macro_rules! mock { let mut count_total_msg = 0; loop { futures::select!{ - _msg = ctx.recv().fuse() => { - count_total_msg +=1; - } - _ = sleep(Duration::from_secs(6)).fuse() => { - if count_total_msg > 0 { - gum::info!(target: "mock-subsystems", "Subsystem {} processed {} messages since last time", stringify!($subsystem_name), count_total_msg); + msg = ctx.recv().fuse() => { + match msg.unwrap() { + orchestra::FromOrchestra::Signal(_) => {}, + orchestra::FromOrchestra::Communication { msg } => { + gum::debug!(target: LOG_TARGET, msg = ?msg, "mocked subsystem received message"); + } + } + + count_total_msg +=1; + } + _ = sleep(Duration::from_secs(6)).fuse() => { + if count_total_msg > 0 { + gum::trace!(target: LOG_TARGET, "Subsystem {} processed {} messages since last time", stringify!($subsystem_name), count_total_msg); + } + count_total_msg = 0; } - count_total_msg = 0; - } } } } diff --git a/polkadot/node/subsystem-bench/src/core/mock/mod.rs b/polkadot/node/subsystem-bench/src/core/mock/mod.rs index df874de31a7c..d59642e96058 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/mod.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/mod.rs @@ -14,9 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use polkadot_node_subsystem::{ - HeadSupportsParachains, Overseer, OverseerConnector, OverseerHandle, SpawnGlue, -}; +use polkadot_node_subsystem::HeadSupportsParachains; use polkadot_node_subsystem_types::Hash; pub mod av_store; @@ -24,9 +22,9 @@ pub mod dummy; pub mod network_bridge; pub mod runtime_api; -pub(crate) use dummy::*; - -use sc_service::SpawnTaskHandle; +pub use av_store::*; +pub use network_bridge::*; +pub use runtime_api::*; pub struct AlwaysSupportsParachains {} #[async_trait::async_trait] @@ -38,7 +36,9 @@ impl HeadSupportsParachains for AlwaysSupportsParachains { // An orchestra with dummy subsystems macro_rules! dummy_builder { - ($spawn_task_handle: ident) => { + ($spawn_task_handle: ident) => {{ + use super::core::mock::dummy::*; + // Initialize a mock overseer. // All subsystem except approval_voting and approval_distribution are mock subsystems. Overseer::builder() @@ -71,15 +71,7 @@ macro_rules! dummy_builder { .metrics(Default::default()) .supports_parachains(AlwaysSupportsParachains {}) .spawner(SpawnGlue($spawn_task_handle)) - }; + }}; } -pub fn new_overseer_with_dummy_subsystems( - spawn_task_handle: SpawnTaskHandle, -) -> (Overseer, AlwaysSupportsParachains>, OverseerHandle) { - let overseer_connector = OverseerConnector::with_event_capacity(64000); - let dummy = dummy_builder!(spawn_task_handle); - let builder = dummy.replace_chain_api(|_| MockChainApi {}); - // let (mock_overseer, mock_overseer_handle) = - builder.build_with_connector(overseer_connector).expect("Should not fail") -} +pub(crate) use dummy_builder; diff --git a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs index cd374f8c18db..a6d07c3d4a20 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs @@ -44,9 +44,9 @@ use crate::core::{ /// The network bridge tx mock will respond to requests as if the request is being serviced /// by a remote peer on the network pub struct NetworkAvailabilityState { - candidate_hashes: HashMap, - available_data: Vec, - chunks: Vec>, + pub candidate_hashes: HashMap, + pub available_data: Vec, + pub chunks: Vec>, } const LOG_TARGET: &str = "subsystem-bench::network-bridge-tx-mock"; @@ -234,6 +234,7 @@ impl MockNetworkBridgeTx { orchestra::FromOrchestra::Communication { msg } => match msg { NetworkBridgeTxMessage::SendRequests(requests, _if_disconnected) => { for request in requests { + gum::debug!(target: LOG_TARGET, request = ?request, "Processing request"); self.network.inc_sent(request_size(&request)); let action = self.respond_to_send_request(request, &mut ingress_tx); // Will account for our node sending the request over the emulated diff --git a/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs b/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs index e8c1098b97f0..a106eb130991 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs @@ -28,6 +28,8 @@ use polkadot_node_subsystem::{ use crate::core::configuration::TestConfiguration; use futures::FutureExt; +const LOG_TARGET: &str = "subsystem-bench::runtime-api-mock"; + pub struct RuntimeApiState { validator_public: Vec, validator_authority_id: Vec, @@ -89,17 +91,21 @@ impl MockRuntimeApi { match msg { orchestra::FromOrchestra::Signal(_) => {}, - orchestra::FromOrchestra::Communication { msg } => match msg { - RuntimeApiMessage::Request( - _request, - RuntimeApiRequest::SessionInfo(_session_index, sender), - ) => { - let _ = sender.send(Ok(Some(self.session_info()))); - }, - // Long term TODO: implement more as needed. - _ => { - unimplemented!("Unexpected runtime-api message") - }, + orchestra::FromOrchestra::Communication { msg } => { + gum::debug!(target: LOG_TARGET, msg=?msg, "recv message"); + + match msg { + RuntimeApiMessage::Request( + _request, + RuntimeApiRequest::SessionInfo(_session_index, sender), + ) => { + let _ = sender.send(Ok(Some(self.session_info()))); + }, + // Long term TODO: implement more as needed. + _ => { + unimplemented!("Unexpected runtime-api message") + }, + } }, } } diff --git a/polkadot/node/subsystem-bench/src/core/network.rs b/polkadot/node/subsystem-bench/src/core/network.rs index f20bb919dedb..80d961babe03 100644 --- a/polkadot/node/subsystem-bench/src/core/network.rs +++ b/polkadot/node/subsystem-bench/src/core/network.rs @@ -186,7 +186,7 @@ pub type ActionFuture = std::pin::Pin + std pub struct NetworkAction { // The function that performs the action run: ActionFuture, - // The payload size that we simulate sending from a peer + // The payload size that we simulate sending/receiving from a peer size: usize, // Peer which should run the action. peer: AuthorityDiscoveryId, diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index ce9e8aa3a8be..4460315c35c5 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -26,7 +26,7 @@ pub(crate) mod availability; pub(crate) mod cli; pub(crate) mod core; -use availability::{NetworkEmulation, TestEnvironment, TestState}; +use availability::{prepare_test, NetworkEmulation, TestEnvironment, TestState}; use cli::TestObjective; use core::configuration::TestConfiguration; @@ -76,21 +76,8 @@ struct BenchCli { pub objective: cli::TestObjective, } -fn new_runtime() -> tokio::runtime::Runtime { - tokio::runtime::Builder::new_multi_thread() - .thread_name("subsystem-bench") - .enable_all() - .thread_stack_size(3 * 1024 * 1024) - .build() - .unwrap() -} - impl BenchCli { fn launch(self) -> eyre::Result<()> { - use prometheus::Registry; - - let runtime = new_runtime(); - let configuration = self.standard_configuration; let mut test_config = match self.objective { TestObjective::TestSequence(options) => { @@ -120,15 +107,9 @@ impl BenchCli { let candidate_count = test_config.n_cores * test_config.num_blocks; - let mut state = TestState::new(test_config); - state.generate_candidates(candidate_count); - let mut env = TestEnvironment::new( - runtime.handle().clone(), - state.clone(), - Registry::new(), - ); - - runtime.block_on(availability::bench_chunk_recovery(&mut env, state)); + let mut state = TestState::new(&test_config); + let (mut env, _protocol_config) = prepare_test(test_config, &mut state); + env.runtime().block_on(availability::bench_chunk_recovery(&mut env, state)); } return Ok(()) }, @@ -185,14 +166,11 @@ impl BenchCli { } let candidate_count = test_config.n_cores * test_config.num_blocks; - test_config.write_to_disk(); - - let mut state = TestState::new(test_config); - state.generate_candidates(candidate_count); - let mut env = - TestEnvironment::new(runtime.handle().clone(), state.clone(), Registry::new()); + // test_config.write_to_disk(); - runtime.block_on(availability::bench_chunk_recovery(&mut env, state)); + let mut state = TestState::new(&test_config); + let (mut env, _protocol_config) = prepare_test(test_config, &mut state); + env.runtime().block_on(availability::bench_chunk_recovery(&mut env, state)); Ok(()) } @@ -202,7 +180,7 @@ fn main() -> eyre::Result<()> { color_eyre::install()?; let _ = env_logger::builder() .filter(Some("hyper"), log::LevelFilter::Info) - .filter(None, log::LevelFilter::Info) + // .filter(None, log::LevelFilter::Trace) .try_init() .unwrap(); diff --git a/polkadot/node/subsystem-test-helpers/src/mock.rs b/polkadot/node/subsystem-test-helpers/src/mock.rs index 522bc3c2cc4f..11e77b6e8968 100644 --- a/polkadot/node/subsystem-test-helpers/src/mock.rs +++ b/polkadot/node/subsystem-test-helpers/src/mock.rs @@ -16,7 +16,7 @@ use std::sync::Arc; -use polkadot_node_subsystem::{jaeger, ActivatedLeaf}; +use polkadot_node_subsystem::{jaeger, ActivatedLeaf, Event, BlockInfo}; use sc_client_api::UnpinHandle; use sc_keystore::LocalKeystore; use sc_utils::mpsc::tracing_unbounded; @@ -59,3 +59,13 @@ pub fn new_leaf(hash: Hash, number: BlockNumber) -> ActivatedLeaf { span: Arc::new(jaeger::Span::Disabled), } } + +/// Create a new leaf with the given hash and number. +pub fn new_block_import_event(hash: Hash, number: BlockNumber) -> Event { + Event::BlockImported(BlockInfo { + hash, + parent_hash: Hash::default(), + number, + unpin_handle: dummy_unpin_handle(hash), + }) +} From 7aed30f13be1c8cf6de43e49945dd419613037f2 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 27 Nov 2023 19:55:18 +0200 Subject: [PATCH 22/52] make clean Signed-off-by: Andrei Sandu --- .../subsystem-bench/src/availability/mod.rs | 185 +----------------- .../subsystem-bench/src/core/environment.rs | 157 ++++++++++++++- .../subsystem-bench/src/core/subsystem.rs | 16 -- .../subsystem-bench/src/subsystem-bench.rs | 13 +- 4 files changed, 171 insertions(+), 200 deletions(-) delete mode 100644 polkadot/node/subsystem-bench/src/core/subsystem.rs diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 54a3cd961319..e1974794cb8d 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -21,36 +21,24 @@ use std::{ sync::Arc, time::{Duration, Instant}, }; -use tokio::runtime::{Handle, Runtime}; -use polkadot_node_subsystem::{ - BlockInfo, Event, Overseer, OverseerConnector, OverseerHandle, SpawnGlue, -}; +use crate::TestEnvironment; +use polkadot_node_subsystem::{Event, Overseer, OverseerConnector, OverseerHandle, SpawnGlue}; use sc_network::request_responses::ProtocolConfig; use colored::Colorize; -use futures::{channel::oneshot, stream::FuturesUnordered, FutureExt, SinkExt, StreamExt}; +use futures::{channel::oneshot, stream::FuturesUnordered, StreamExt}; use polkadot_node_metrics::metrics::Metrics; use polkadot_availability_recovery::AvailabilityRecoverySubsystem; +use crate::GENESIS_HASH; use parity_scale_codec::Encode; -use polkadot_node_network_protocol::request_response::{ - self as req_res, v1::ChunkResponse, IncomingRequest, ReqProtocolNames, Requests, -}; -use rand::{distributions::Uniform, prelude::Distribution, thread_rng}; - -use prometheus::Registry; -use sc_network::{OutboundFailure, RequestFailure}; - use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks}; +use polkadot_node_network_protocol::request_response::{IncomingRequest, ReqProtocolNames}; use polkadot_node_primitives::{BlockData, PoV, Proof}; -use polkadot_node_subsystem::{ - messages::{AllMessages, AvailabilityRecoveryMessage}, - ActiveLeavesUpdate, OverseerSignal, -}; -use std::net::{Ipv4Addr, SocketAddr}; +use polkadot_node_subsystem::messages::{AllMessages, AvailabilityRecoveryMessage}; use crate::core::{ configuration::TestAuthorities, @@ -62,12 +50,7 @@ use crate::core::{ }, }; -use super::core::{ - configuration::{PeerLatency, TestConfiguration}, - environment::TestEnvironmentMetrics, - mock::dummy_builder, - network::*, -}; +use super::core::{configuration::TestConfiguration, mock::dummy_builder, network::*}; const LOG_TARGET: &str = "subsystem-bench::availability"; @@ -75,69 +58,18 @@ use polkadot_node_primitives::{AvailableData, ErasureChunk}; use super::{cli::TestObjective, core::mock::AlwaysSupportsParachains}; use polkadot_node_subsystem_test_helpers::mock::new_block_import_event; -use polkadot_node_subsystem_util::TimeoutExt; use polkadot_primitives::{ CandidateHash, CandidateReceipt, GroupIndex, Hash, HeadData, PersistedValidationData, - SessionIndex, ValidatorIndex, + ValidatorIndex, }; use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; -use sc_service::{SpawnTaskHandle, TaskManager}; +use sc_service::SpawnTaskHandle; mod cli; pub mod configuration; pub use cli::{DataAvailabilityReadOptions, NetworkEmulation, NetworkOptions}; pub use configuration::AvailabilityRecoveryConfiguration; -// A dummy genesis hash -const GENESIS_HASH: Hash = Hash::repeat_byte(0xff); - -/// The test environment is the high level wrapper of all things required to test -/// a certain subsystem. -/// -/// ## Mockups -/// The overseer is passed in during construction and it can host an arbitrary number of -/// real subsystems instances and the corresponding mocked instances such that the real -/// subsystems can get their messages answered. -/// -/// As the subsystem's performance depends on network connectivity, the test environment -/// emulates validator nodes on the network, see `NetworkEmulator`. The network emulation -/// is configurable in terms of peer bandwidth, latency and connection error rate using -/// uniform distribution sampling. -/// -/// -/// ## Usage -/// `TestEnvironment` is used in tests to send `Overseer` messages or signals to the subsystem -/// under test. -/// -/// ## Collecting test metrics -/// -/// ### Prometheus -/// A prometheus endpoint is exposed while the test is running. A local Prometheus instance -/// can scrape it every 1s and a Grafana dashboard is the preferred way of visualizing -/// the performance characteristics of the subsystem. -/// -/// ### CLI -/// A subset of the Prometheus metrics are printed at the end of the test. -pub struct TestEnvironment { - // A task manager that tracks task poll durations allows us to measure - // per task CPU usage as we do in the Polkadot node. - task_manager: TaskManager, - // Our runtime - runtime: tokio::runtime::Runtime, - // A runtime handle - runtime_handle: tokio::runtime::Handle, - // The Prometheus metrics registry - registry: Registry, - // A handle to the lovely overseer - overseer_handle: OverseerHandle, - // The test intial state. The current state is owned by `env_task`. - config: TestConfiguration, - // A handle to the network emulator. - network: NetworkEmulator, - // Configuration/env metrics - metrics: TestEnvironmentMetrics, -} - fn build_overseer( spawn_task_handle: SpawnTaskHandle, runtime_api: MockRuntimeApi, @@ -257,107 +189,12 @@ fn prepare_test_inner( ) } -impl TestEnvironment { - // Create a new test environment with specified initial state and prometheus registry. - // We use prometheus metrics to collect per job task poll time and subsystem metrics. - pub fn new( - task_manager: TaskManager, - config: TestConfiguration, - registry: Registry, - runtime: Runtime, - network: NetworkEmulator, - overseer: Overseer, AlwaysSupportsParachains>, - overseer_handle: OverseerHandle, - ) -> Self { - let metrics = - TestEnvironmentMetrics::new(®istry).expect("Metrics need to be registered"); - - let spawn_handle = task_manager.spawn_handle(); - spawn_handle.spawn_blocking("overseer", "overseer", overseer.run()); - - let registry_clone = registry.clone(); - task_manager - .spawn_handle() - .spawn_blocking("prometheus", "test-environment", async move { - prometheus_endpoint::init_prometheus( - SocketAddr::new(std::net::IpAddr::V4(Ipv4Addr::LOCALHOST), 9999), - registry_clone, - ) - .await - .unwrap(); - }); - - TestEnvironment { - task_manager, - runtime_handle: runtime.handle().clone(), - runtime, - registry, - overseer_handle, - config, - network, - metrics, - } - } - - pub fn config(&self) -> &TestConfiguration { - &self.config - } - - pub fn network(&mut self) -> &mut NetworkEmulator { - &mut self.network - } - - pub fn registry(&self) -> &Registry { - &self.registry - } - - /// Produce a randomized duration between `min` and `max`. - fn random_latency(maybe_peer_latency: Option<&PeerLatency>) -> Option { - if let Some(peer_latency) = maybe_peer_latency { - Some( - Uniform::from(peer_latency.min_latency..=peer_latency.max_latency) - .sample(&mut thread_rng()), - ) - } else { - None - } - } - - pub fn metrics(&self) -> &TestEnvironmentMetrics { - &self.metrics - } - - pub fn runtime(&self) -> Handle { - self.runtime_handle.clone() - } - - // Send a message to the subsystem under test environment. - pub async fn send_message(&mut self, msg: Event) { - self.overseer_handle - .send(msg) - .timeout(MAX_TIME_OF_FLIGHT) - .await - .unwrap_or_else(|| { - panic!("{}ms maximum time of flight breached", MAX_TIME_OF_FLIGHT.as_millis()) - }) - .expect("send never fails"); - } -} - -// We use this to bail out sending messages to the subsystem if it is overloaded such that -// the time of flight is breaches 5s. -// This should eventually be a test parameter. -const MAX_TIME_OF_FLIGHT: Duration = Duration::from_millis(5000); - #[derive(Clone)] pub struct TestState { // Full test configuration config: TestConfiguration, // State starts here. test_authorities: TestAuthorities, - // The test node validator index. - validator_index: ValidatorIndex, - session_index: SessionIndex, pov_sizes: Cycle>, // Generated candidate receipts to be used in the test candidates: Cycle>, @@ -422,12 +259,10 @@ impl TestState { let config = config.clone(); let test_authorities = config.generate_authorities(); - let validator_index = ValidatorIndex(0); let mut chunks = Vec::new(); let mut available_data = Vec::new(); let mut candidate_receipts = Vec::new(); let mut pov_size_to_candidate = HashMap::new(); - let session_index = 10; // we use it for all candidates. let persisted_validation_data = PersistedValidationData { @@ -469,8 +304,6 @@ impl TestState { Self { config, test_authorities, - validator_index, - session_index, persisted_validation_data, available_data, candidate_receipts, diff --git a/polkadot/node/subsystem-bench/src/core/environment.rs b/polkadot/node/subsystem-bench/src/core/environment.rs index c9cc6ae40410..4fd752675074 100644 --- a/polkadot/node/subsystem-bench/src/core/environment.rs +++ b/polkadot/node/subsystem-bench/src/core/environment.rs @@ -14,10 +14,23 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . +use crate::{ + core::{configuration::PeerLatency, mock::AlwaysSupportsParachains, network::NetworkEmulator}, + TestConfiguration, +}; +use core::time::Duration; +use polkadot_node_subsystem::{Event, Overseer, OverseerHandle, SpawnGlue, TimeoutExt}; +use polkadot_node_subsystem_types::Hash; use polkadot_node_subsystem_util::metrics::prometheus::{ self, Gauge, Histogram, PrometheusError, Registry, U64, }; -use sc_service::TaskManager; +use rand::{ + distributions::{Distribution, Uniform}, + thread_rng, +}; +use sc_service::{SpawnTaskHandle, TaskManager}; +use std::net::{Ipv4Addr, SocketAddr}; +use tokio::runtime::{Handle, Runtime}; const MIB: f64 = 1024.0 * 1024.0; @@ -125,3 +138,145 @@ impl Default for TestEnvironmentDependencies { Self { runtime, registry, task_manager } } } + +// A dummy genesis hash +pub const GENESIS_HASH: Hash = Hash::repeat_byte(0xff); + +// We use this to bail out sending messages to the subsystem if it is overloaded such that +// the time of flight is breaches 5s. +// This should eventually be a test parameter. +const MAX_TIME_OF_FLIGHT: Duration = Duration::from_millis(5000); + +/// The test environment is the high level wrapper of all things required to test +/// a certain subsystem. +/// +/// ## Mockups +/// The overseer is passed in during construction and it can host an arbitrary number of +/// real subsystems instances and the corresponding mocked instances such that the real +/// subsystems can get their messages answered. +/// +/// As the subsystem's performance depends on network connectivity, the test environment +/// emulates validator nodes on the network, see `NetworkEmulator`. The network emulation +/// is configurable in terms of peer bandwidth, latency and connection error rate using +/// uniform distribution sampling. +/// +/// +/// ## Usage +/// `TestEnvironment` is used in tests to send `Overseer` messages or signals to the subsystem +/// under test. +/// +/// ## Collecting test metrics +/// +/// ### Prometheus +/// A prometheus endpoint is exposed while the test is running. A local Prometheus instance +/// can scrape it every 1s and a Grafana dashboard is the preferred way of visualizing +/// the performance characteristics of the subsystem. +/// +/// ### CLI +/// A subset of the Prometheus metrics are printed at the end of the test. +pub struct TestEnvironment { + // A task manager that tracks task poll durations allows us to measure + // per task CPU usage as we do in the Polkadot node. + task_manager: TaskManager, + // Our runtime + runtime: tokio::runtime::Runtime, + // A runtime handle + runtime_handle: tokio::runtime::Handle, + // The Prometheus metrics registry + registry: Registry, + // A handle to the lovely overseer + overseer_handle: OverseerHandle, + // The test intial state. The current state is owned by `env_task`. + config: TestConfiguration, + // A handle to the network emulator. + network: NetworkEmulator, + // Configuration/env metrics + metrics: TestEnvironmentMetrics, +} + +impl TestEnvironment { + // Create a new test environment with specified initial state and prometheus registry. + // We use prometheus metrics to collect per job task poll time and subsystem metrics. + pub fn new( + task_manager: TaskManager, + config: TestConfiguration, + registry: Registry, + runtime: Runtime, + network: NetworkEmulator, + overseer: Overseer, AlwaysSupportsParachains>, + overseer_handle: OverseerHandle, + ) -> Self { + let metrics = + TestEnvironmentMetrics::new(®istry).expect("Metrics need to be registered"); + + let spawn_handle = task_manager.spawn_handle(); + spawn_handle.spawn_blocking("overseer", "overseer", overseer.run()); + + let registry_clone = registry.clone(); + task_manager + .spawn_handle() + .spawn_blocking("prometheus", "test-environment", async move { + prometheus_endpoint::init_prometheus( + SocketAddr::new(std::net::IpAddr::V4(Ipv4Addr::LOCALHOST), 9999), + registry_clone, + ) + .await + .unwrap(); + }); + + TestEnvironment { + task_manager, + runtime_handle: runtime.handle().clone(), + runtime, + registry, + overseer_handle, + config, + network, + metrics, + } + } + + pub fn config(&self) -> &TestConfiguration { + &self.config + } + + pub fn network(&mut self) -> &mut NetworkEmulator { + &mut self.network + } + + pub fn registry(&self) -> &Registry { + &self.registry + } + + /// Produce a randomized duration between `min` and `max`. + fn random_latency(maybe_peer_latency: Option<&PeerLatency>) -> Option { + if let Some(peer_latency) = maybe_peer_latency { + Some( + Uniform::from(peer_latency.min_latency..=peer_latency.max_latency) + .sample(&mut thread_rng()), + ) + } else { + None + } + } + + pub fn metrics(&self) -> &TestEnvironmentMetrics { + &self.metrics + } + + pub fn runtime(&self) -> Handle { + self.runtime_handle.clone() + } + + // Send a message to the subsystem under test environment. + pub async fn send_message(&mut self, msg: Event) { + self.overseer_handle + .send(msg) + .timeout(MAX_TIME_OF_FLIGHT) + .await + .unwrap_or_else(|| { + panic!("{}ms maximum time of flight breached", MAX_TIME_OF_FLIGHT.as_millis()) + }) + .expect("send never fails"); + } +} diff --git a/polkadot/node/subsystem-bench/src/core/subsystem.rs b/polkadot/node/subsystem-bench/src/core/subsystem.rs deleted file mode 100644 index c61e641d255d..000000000000 --- a/polkadot/node/subsystem-bench/src/core/subsystem.rs +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (C) Parity Technologies (UK) Ltd. -// This file is part of Polkadot. - -// Polkadot is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// Polkadot is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// You should have received a copy of the GNU General Public License -// along with Polkadot. If not, see . - diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 4460315c35c5..51ce8fc1d5ea 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -26,10 +26,13 @@ pub(crate) mod availability; pub(crate) mod cli; pub(crate) mod core; -use availability::{prepare_test, NetworkEmulation, TestEnvironment, TestState}; +use availability::{prepare_test, NetworkEmulation, TestState}; use cli::TestObjective; -use core::configuration::TestConfiguration; +use core::{ + configuration::TestConfiguration, + environment::{TestEnvironment, GENESIS_HASH}, +}; use clap_num::number_range; // const LOG_TARGET: &str = "subsystem-bench"; @@ -105,8 +108,6 @@ impl BenchCli { format!("latency = {:?}", test_config.latency).bright_black(), ); - let candidate_count = test_config.n_cores * test_config.num_blocks; - let mut state = TestState::new(&test_config); let (mut env, _protocol_config) = prepare_test(test_config, &mut state); env.runtime().block_on(availability::bench_chunk_recovery(&mut env, state)); @@ -165,11 +166,9 @@ impl BenchCli { test_config.bandwidth = bandwidth * 1024; } - let candidate_count = test_config.n_cores * test_config.num_blocks; - // test_config.write_to_disk(); - let mut state = TestState::new(&test_config); let (mut env, _protocol_config) = prepare_test(test_config, &mut state); + // test_config.write_to_disk(); env.runtime().block_on(availability::bench_chunk_recovery(&mut env, state)); Ok(()) From b51485bfe995c8891107b8c8618dc4b972c1d0c0 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 27 Nov 2023 20:00:10 +0200 Subject: [PATCH 23/52] more cleaning Signed-off-by: Andrei Sandu --- .../subsystem-bench/src/availability/mod.rs | 13 +----- .../subsystem-bench/src/core/environment.rs | 41 ++++++++----------- 2 files changed, 18 insertions(+), 36 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index e1974794cb8d..02ec794dc745 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -175,18 +175,7 @@ fn prepare_test_inner( subsystem, ); - ( - TestEnvironment::new( - dependencies.task_manager, - config, - dependencies.registry, - dependencies.runtime, - network, - overseer, - overseer_handle, - ), - req_cfg, - ) + (TestEnvironment::new(dependencies, config, network, overseer, overseer_handle), req_cfg) } #[derive(Clone)] diff --git a/polkadot/node/subsystem-bench/src/core/environment.rs b/polkadot/node/subsystem-bench/src/core/environment.rs index 4fd752675074..d213d24c9af7 100644 --- a/polkadot/node/subsystem-bench/src/core/environment.rs +++ b/polkadot/node/subsystem-bench/src/core/environment.rs @@ -30,7 +30,7 @@ use rand::{ }; use sc_service::{SpawnTaskHandle, TaskManager}; use std::net::{Ipv4Addr, SocketAddr}; -use tokio::runtime::{Handle, Runtime}; +use tokio::runtime::Handle; const MIB: f64 = 1024.0 * 1024.0; @@ -175,15 +175,10 @@ const MAX_TIME_OF_FLIGHT: Duration = Duration::from_millis(5000); /// ### CLI /// A subset of the Prometheus metrics are printed at the end of the test. pub struct TestEnvironment { - // A task manager that tracks task poll durations allows us to measure - // per task CPU usage as we do in the Polkadot node. - task_manager: TaskManager, - // Our runtime - runtime: tokio::runtime::Runtime, + // Test dependencies + dependencies: TestEnvironmentDependencies, // A runtime handle runtime_handle: tokio::runtime::Handle, - // The Prometheus metrics registry - registry: Registry, // A handle to the lovely overseer overseer_handle: OverseerHandle, // The test intial state. The current state is owned by `env_task`. @@ -198,37 +193,35 @@ impl TestEnvironment { // Create a new test environment with specified initial state and prometheus registry. // We use prometheus metrics to collect per job task poll time and subsystem metrics. pub fn new( - task_manager: TaskManager, + dependencies: TestEnvironmentDependencies, config: TestConfiguration, - registry: Registry, - runtime: Runtime, network: NetworkEmulator, overseer: Overseer, AlwaysSupportsParachains>, overseer_handle: OverseerHandle, ) -> Self { - let metrics = - TestEnvironmentMetrics::new(®istry).expect("Metrics need to be registered"); + let metrics = TestEnvironmentMetrics::new(&dependencies.registry) + .expect("Metrics need to be registered"); - let spawn_handle = task_manager.spawn_handle(); + let spawn_handle = dependencies.task_manager.spawn_handle(); spawn_handle.spawn_blocking("overseer", "overseer", overseer.run()); - let registry_clone = registry.clone(); - task_manager - .spawn_handle() - .spawn_blocking("prometheus", "test-environment", async move { + let registry_clone = dependencies.registry.clone(); + dependencies.task_manager.spawn_handle().spawn_blocking( + "prometheus", + "test-environment", + async move { prometheus_endpoint::init_prometheus( SocketAddr::new(std::net::IpAddr::V4(Ipv4Addr::LOCALHOST), 9999), registry_clone, ) .await .unwrap(); - }); + }, + ); TestEnvironment { - task_manager, - runtime_handle: runtime.handle().clone(), - runtime, - registry, + runtime_handle: dependencies.runtime.handle().clone(), + dependencies, overseer_handle, config, network, @@ -245,7 +238,7 @@ impl TestEnvironment { } pub fn registry(&self) -> &Registry { - &self.registry + &self.dependencies.registry } /// Produce a randomized duration between `min` and `max`. From 7e464447d7db427223089a2d01aa38048f7c8927 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 27 Nov 2023 20:11:06 +0200 Subject: [PATCH 24/52] more cleaning Signed-off-by: Andrei Sandu --- .../subsystem-bench/src/availability/mod.rs | 5 +- .../subsystem-bench/src/core/configuration.rs | 1 + .../node/subsystem-bench/src/core/display.rs | 109 ------------------ .../subsystem-bench/src/core/environment.rs | 20 +--- .../node/subsystem-bench/src/core/network.rs | 1 + 5 files changed, 5 insertions(+), 131 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 02ec794dc745..5546d9cc357f 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -97,6 +97,7 @@ pub fn prepare_test( } /// Takes a test configuration and uses it to creates the `TestEnvironment`. +#[allow(unused)] pub fn prepare_test_with_dependencies( config: TestConfiguration, state: &mut TestState, @@ -212,10 +213,6 @@ impl TestState { candidate } - pub fn authorities(&self) -> &TestAuthorities { - &self.test_authorities - } - /// Generate candidates to be used in the test. pub fn generate_candidates(&mut self, count: usize) { gum::info!(target: LOG_TARGET,"{}", format!("Pre-generating {} candidates.", count).bright_blue()); diff --git a/polkadot/node/subsystem-bench/src/core/configuration.rs b/polkadot/node/subsystem-bench/src/core/configuration.rs index f8fdcf2973eb..35fa51790c91 100644 --- a/polkadot/node/subsystem-bench/src/core/configuration.rs +++ b/polkadot/node/subsystem-bench/src/core/configuration.rs @@ -110,6 +110,7 @@ pub struct TestAuthorities { } impl TestConfiguration { + #[allow(unused)] pub fn write_to_disk(&self) { // Serialize a slice of configurations let yaml = serde_yaml::to_string(&TestSequence { test_configurations: vec![self.clone()] }) diff --git a/polkadot/node/subsystem-bench/src/core/display.rs b/polkadot/node/subsystem-bench/src/core/display.rs index 4b63f45c5f8a..921c22b2059e 100644 --- a/polkadot/node/subsystem-bench/src/core/display.rs +++ b/polkadot/node/subsystem-bench/src/core/display.rs @@ -103,115 +103,6 @@ impl Display for TestMetric { } } -// fn encode_impl( -// &self, -// metric_families: &[MetricFamily], -// writer: &mut dyn WriteUtf8, -// ) -> Result<()> { for mf in metric_families { // Fail-fast checks. check_metric_family(mf)?; - -// // Write `# HELP` header. -// let name = mf.get_name(); -// let help = mf.get_help(); -// if !help.is_empty() { -// writer.write_all("# HELP ")?; -// writer.write_all(name)?; -// writer.write_all(" ")?; -// writer.write_all(&escape_string(help, false))?; -// writer.write_all("\n")?; -// } - -// // Write `# TYPE` header. -// let metric_type = mf.get_field_type(); -// let lowercase_type = format!("{:?}", metric_type).to_lowercase(); -// writer.write_all("# TYPE ")?; -// writer.write_all(name)?; -// writer.write_all(" ")?; -// writer.write_all(&lowercase_type)?; -// writer.write_all("\n")?; - -// for m in mf.get_metric() { -// match metric_type { -// MetricType::COUNTER => { -// write_sample(writer, name, None, m, None, m.get_counter().get_value())?; -// } -// MetricType::GAUGE => { -// write_sample(writer, name, None, m, None, m.get_gauge().get_value())?; -// } -// MetricType::HISTOGRAM => { -// let h = m.get_histogram(); - -// let mut inf_seen = false; -// for b in h.get_bucket() { -// let upper_bound = b.get_upper_bound(); -// write_sample( -// writer, -// name, -// Some("_bucket"), -// m, -// Some((BUCKET_LABEL, &upper_bound.to_string())), -// b.get_cumulative_count() as f64, -// )?; -// if upper_bound.is_sign_positive() && upper_bound.is_infinite() { -// inf_seen = true; -// } -// } -// if !inf_seen { -// write_sample( -// writer, -// name, -// Some("_bucket"), -// m, -// Some((BUCKET_LABEL, POSITIVE_INF)), -// h.get_sample_count() as f64, -// )?; -// } - -// write_sample(writer, name, Some("_sum"), m, None, h.get_sample_sum())?; - -// write_sample( -// writer, -// name, -// Some("_count"), -// m, -// None, -// h.get_sample_count() as f64, -// )?; -// } -// MetricType::SUMMARY => { -// let s = m.get_summary(); - -// for q in s.get_quantile() { -// write_sample( -// writer, -// name, -// None, -// m, -// Some((QUANTILE, &q.get_quantile().to_string())), -// q.get_value(), -// )?; -// } - -// write_sample(writer, name, Some("_sum"), m, None, s.get_sample_sum())?; - -// write_sample( -// writer, -// name, -// Some("_count"), -// m, -// None, -// s.get_sample_count() as f64, -// )?; -// } -// MetricType::UNTYPED => { -// unimplemented!(); -// } -// } -// } -// } - -// Ok(()) -// } - // Returns `false` if metric should be skipped. fn check_metric_family(mf: &MetricFamily) -> bool { if mf.get_metric().is_empty() { diff --git a/polkadot/node/subsystem-bench/src/core/environment.rs b/polkadot/node/subsystem-bench/src/core/environment.rs index d213d24c9af7..28e98c6b42d0 100644 --- a/polkadot/node/subsystem-bench/src/core/environment.rs +++ b/polkadot/node/subsystem-bench/src/core/environment.rs @@ -15,7 +15,7 @@ // along with Polkadot. If not, see . use crate::{ - core::{configuration::PeerLatency, mock::AlwaysSupportsParachains, network::NetworkEmulator}, + core::{mock::AlwaysSupportsParachains, network::NetworkEmulator}, TestConfiguration, }; use core::time::Duration; @@ -24,10 +24,6 @@ use polkadot_node_subsystem_types::Hash; use polkadot_node_subsystem_util::metrics::prometheus::{ self, Gauge, Histogram, PrometheusError, Registry, U64, }; -use rand::{ - distributions::{Distribution, Uniform}, - thread_rng, -}; use sc_service::{SpawnTaskHandle, TaskManager}; use std::net::{Ipv4Addr, SocketAddr}; use tokio::runtime::Handle; @@ -181,7 +177,7 @@ pub struct TestEnvironment { runtime_handle: tokio::runtime::Handle, // A handle to the lovely overseer overseer_handle: OverseerHandle, - // The test intial state. The current state is owned by `env_task`. + // The test configuration. config: TestConfiguration, // A handle to the network emulator. network: NetworkEmulator, @@ -241,18 +237,6 @@ impl TestEnvironment { &self.dependencies.registry } - /// Produce a randomized duration between `min` and `max`. - fn random_latency(maybe_peer_latency: Option<&PeerLatency>) -> Option { - if let Some(peer_latency) = maybe_peer_latency { - Some( - Uniform::from(peer_latency.min_latency..=peer_latency.max_latency) - .sample(&mut thread_rng()), - ) - } else { - None - } - } - pub fn metrics(&self) -> &TestEnvironmentMetrics { &self.metrics } diff --git a/polkadot/node/subsystem-bench/src/core/network.rs b/polkadot/node/subsystem-bench/src/core/network.rs index 80d961babe03..f5532087e35c 100644 --- a/polkadot/node/subsystem-bench/src/core/network.rs +++ b/polkadot/node/subsystem-bench/src/core/network.rs @@ -334,6 +334,7 @@ impl NetworkEmulator { } // Increment bytes received by our node (the node that contains the subsystem under test) + #[allow(unused)] pub fn inc_received(&self, bytes: u64) { // Our node always is peer 0. self.metrics.on_peer_received(0, bytes); From d3df9279adbe6bfb78856e16ff4502d09245c25f Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 27 Nov 2023 22:37:48 +0200 Subject: [PATCH 25/52] proper overseer control Signed-off-by: Andrei Sandu --- Cargo.lock | 1 + polkadot/node/subsystem-bench/Cargo.toml | 1 + .../subsystem-bench/src/availability/mod.rs | 56 ++++++++----------- .../node/subsystem-bench/src/core/display.rs | 1 + .../subsystem-bench/src/core/environment.rs | 31 ++++++++-- .../subsystem-bench/src/core/mock/av_store.rs | 9 ++- .../subsystem-bench/src/core/mock/dummy.rs | 10 +++- .../src/core/mock/network_bridge.rs | 8 ++- .../src/core/mock/runtime_api.rs | 29 +++++----- .../subsystem-bench/src/subsystem-bench.rs | 2 + .../node/subsystem-test-helpers/src/mock.rs | 8 +-- 11 files changed, 91 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b349886761ad..197807b7fa8d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13445,6 +13445,7 @@ dependencies = [ "polkadot-node-subsystem-test-helpers", "polkadot-node-subsystem-types", "polkadot-node-subsystem-util", + "polkadot-overseer", "polkadot-primitives", "polkadot-primitives-test-helpers", "prometheus", diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml index 8296874c0dab..f775a1ff9efe 100644 --- a/polkadot/node/subsystem-bench/Cargo.toml +++ b/polkadot/node/subsystem-bench/Cargo.toml @@ -24,6 +24,7 @@ polkadot-primitives = { path = "../../primitives" } polkadot-node-network-protocol = { path = "../network/protocol" } polkadot-availability-recovery = { path = "../network/availability-recovery", features=["subsystem-benchmarks"]} color-eyre = { version = "0.6.1", default-features = false } +polkadot-overseer = { path = "../overseer" } colored = "2.0.4" assert_matches = "1.5" async-trait = "0.1.57" diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 5546d9cc357f..6282a6b63f01 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -23,7 +23,9 @@ use std::{ }; use crate::TestEnvironment; -use polkadot_node_subsystem::{Event, Overseer, OverseerConnector, OverseerHandle, SpawnGlue}; +use polkadot_node_subsystem::{Overseer, OverseerConnector, SpawnGlue}; +use polkadot_overseer::Handle as OverseerHandle; + use sc_network::request_responses::ProtocolConfig; use colored::Colorize; @@ -41,7 +43,6 @@ use polkadot_node_primitives::{BlockData, PoV, Proof}; use polkadot_node_subsystem::messages::{AllMessages, AvailabilityRecoveryMessage}; use crate::core::{ - configuration::TestAuthorities, environment::TestEnvironmentDependencies, mock::{ av_store, @@ -57,7 +58,7 @@ const LOG_TARGET: &str = "subsystem-bench::availability"; use polkadot_node_primitives::{AvailableData, ErasureChunk}; use super::{cli::TestObjective, core::mock::AlwaysSupportsParachains}; -use polkadot_node_subsystem_test_helpers::mock::new_block_import_event; +use polkadot_node_subsystem_test_helpers::mock::new_block_import_info; use polkadot_primitives::{ CandidateHash, CandidateReceipt, GroupIndex, Hash, HeadData, PersistedValidationData, ValidatorIndex, @@ -85,7 +86,10 @@ fn build_overseer( .replace_network_bridge_tx(|_| network_bridge) .replace_availability_recovery(|_| availability_recovery); - builder.build_with_connector(overseer_connector).expect("Should not fail") + let (overseer, raw_handle) = + builder.build_with_connector(overseer_connector).expect("Should not fail"); + + (overseer, OverseerHandle::new(raw_handle)) } /// Takes a test configuration and uses it to creates the `TestEnvironment`. @@ -119,11 +123,7 @@ fn prepare_test_inner( // Generate test authorities. let test_authorities = config.generate_authorities(); - let runtime_api = runtime_api::MockRuntimeApi::new( - config.clone(), - test_authorities.validator_public.clone(), - test_authorities.validator_authority_id.clone(), - ); + let runtime_api = runtime_api::MockRuntimeApi::new(config.clone(), test_authorities.clone()); let av_store = av_store::MockAvailabilityStore::new(state.chunks.clone(), state.candidate_hashes.clone()); @@ -136,7 +136,7 @@ fn prepare_test_inner( let network = NetworkEmulator::new( config.n_validators.clone(), - test_authorities.validator_authority_id.clone(), + test_authorities.validator_authority_id, config.peer_bandwidth, dependencies.task_manager.spawn_handle(), &dependencies.registry, @@ -183,18 +183,14 @@ fn prepare_test_inner( pub struct TestState { // Full test configuration config: TestConfiguration, - // State starts here. - test_authorities: TestAuthorities, pov_sizes: Cycle>, // Generated candidate receipts to be used in the test candidates: Cycle>, - candidates_generated: usize, // Map from pov size to candidate index pov_size_to_candidate: HashMap, // Map from generated candidate hashes to candidate index in `available_data` // and `chunks`. candidate_hashes: HashMap, - persisted_validation_data: PersistedValidationData, candidate_receipts: Vec, available_data: Vec, @@ -243,7 +239,6 @@ impl TestState { pub fn new(config: &TestConfiguration) -> Self { let config = config.clone(); - let test_authorities = config.generate_authorities(); let mut chunks = Vec::new(); let mut available_data = Vec::new(); @@ -289,14 +284,11 @@ impl TestState { Self { config, - test_authorities, - persisted_validation_data, available_data, candidate_receipts, chunks, pov_size_to_candidate, pov_sizes, - candidates_generated: 0, candidate_hashes: HashMap::new(), candidates: Vec::new().into_iter().cycle(), } @@ -333,7 +325,7 @@ fn derive_erasure_chunks_with_proofs_and_root( pub async fn bench_chunk_recovery(env: &mut TestEnvironment, mut state: TestState) { let config = env.config().clone(); - env.send_message(new_block_import_event(Hash::repeat_byte(1), 1)).await; + env.import_block(new_block_import_info(Hash::repeat_byte(1), 1)).await; let start_marker = Instant::now(); let mut batch = FuturesUnordered::new(); @@ -353,19 +345,16 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment, mut state: TestStat let (tx, rx) = oneshot::channel(); batch.push(rx); - let message = Event::MsgToSubsystem { - msg: AllMessages::AvailabilityRecovery( - AvailabilityRecoveryMessage::RecoverAvailableData( - candidate.clone(), - 1, - Some(GroupIndex( - candidate_num as u32 % (std::cmp::max(5, config.n_cores) / 5) as u32, - )), - tx, - ), + let message = AllMessages::AvailabilityRecovery( + AvailabilityRecoveryMessage::RecoverAvailableData( + candidate.clone(), + 1, + Some(GroupIndex( + candidate_num as u32 % (std::cmp::max(5, config.n_cores) / 5) as u32, + )), + tx, ), - origin: LOG_TARGET, - }; + ); env.send_message(message).await; } @@ -386,7 +375,8 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment, mut state: TestStat tokio::time::sleep(block_time_delta).await; } - env.send_message(Event::Stop).await; + env.stop().await; + let duration: u128 = start_marker.elapsed().as_millis(); let availability_bytes = availability_bytes / 1024; gum::info!("All blocks processed in {}", format!("{:?}ms", duration).cyan()); @@ -416,7 +406,7 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment, mut state: TestStat let test_metrics = super::core::display::parse_metrics(&env.registry()); let subsystem_cpu_metrics = - test_metrics.subset_with_label_value("task_group", "availability-recovery-subsystem"); + test_metrics.subset_with_label_value("task_group", "availability-recovery"); let total_cpu = subsystem_cpu_metrics.sum_by("substrate_tasks_polling_duration_sum"); gum::info!(target: LOG_TARGET, "Total subsystem CPU usage {}", format!("{:.2}s", total_cpu).bright_purple()); gum::info!(target: LOG_TARGET, "CPU usage per block {}", format!("{:.2}s", total_cpu/env.config().num_blocks as f64).bright_purple()); diff --git a/polkadot/node/subsystem-bench/src/core/display.rs b/polkadot/node/subsystem-bench/src/core/display.rs index 921c22b2059e..13ea7d375e95 100644 --- a/polkadot/node/subsystem-bench/src/core/display.rs +++ b/polkadot/node/subsystem-bench/src/core/display.rs @@ -13,6 +13,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . +// //! Some helper methods for parsing prometheus metrics to a format that can be //! displayed in the CLI. //! diff --git a/polkadot/node/subsystem-bench/src/core/environment.rs b/polkadot/node/subsystem-bench/src/core/environment.rs index 28e98c6b42d0..fd09de9169a4 100644 --- a/polkadot/node/subsystem-bench/src/core/environment.rs +++ b/polkadot/node/subsystem-bench/src/core/environment.rs @@ -19,11 +19,15 @@ use crate::{ TestConfiguration, }; use core::time::Duration; -use polkadot_node_subsystem::{Event, Overseer, OverseerHandle, SpawnGlue, TimeoutExt}; +use polkadot_overseer::{BlockInfo, Handle as OverseerHandle}; + +use polkadot_node_subsystem::{messages::AllMessages, Overseer, SpawnGlue, TimeoutExt}; use polkadot_node_subsystem_types::Hash; use polkadot_node_subsystem_util::metrics::prometheus::{ self, Gauge, Histogram, PrometheusError, Registry, U64, }; + +use sc_network::peer_store::LOG_TARGET; use sc_service::{SpawnTaskHandle, TaskManager}; use std::net::{Ipv4Addr, SocketAddr}; use tokio::runtime::Handle; @@ -199,8 +203,8 @@ impl TestEnvironment { .expect("Metrics need to be registered"); let spawn_handle = dependencies.task_manager.spawn_handle(); - spawn_handle.spawn_blocking("overseer", "overseer", overseer.run()); + spawn_handle.spawn_blocking("overseer", "overseer", overseer.run()); let registry_clone = dependencies.registry.clone(); dependencies.task_manager.spawn_handle().spawn_blocking( "prometheus", @@ -246,14 +250,29 @@ impl TestEnvironment { } // Send a message to the subsystem under test environment. - pub async fn send_message(&mut self, msg: Event) { + pub async fn send_message(&mut self, msg: AllMessages) { self.overseer_handle - .send(msg) + .send_msg(msg, LOG_TARGET) .timeout(MAX_TIME_OF_FLIGHT) .await .unwrap_or_else(|| { panic!("{}ms maximum time of flight breached", MAX_TIME_OF_FLIGHT.as_millis()) - }) - .expect("send never fails"); + }); + } + + // Send a signal to the subsystem under test environment. + pub async fn import_block(&mut self, block: BlockInfo) { + self.overseer_handle + .block_imported(block) + .timeout(MAX_TIME_OF_FLIGHT) + .await + .unwrap_or_else(|| { + panic!("{}ms maximum time of flight breached", MAX_TIME_OF_FLIGHT.as_millis()) + }); + } + + // Stop overseer and subsystems. + pub async fn stop(&mut self) { + self.overseer_handle.stop().await; } } diff --git a/polkadot/node/subsystem-bench/src/core/mock/av_store.rs b/polkadot/node/subsystem-bench/src/core/mock/av_store.rs index 7f6ff2abfe9e..1ff7d1728af9 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/av_store.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/av_store.rs @@ -29,6 +29,8 @@ use polkadot_node_subsystem::{ messages::AvailabilityStoreMessage, overseer, SpawnedSubsystem, SubsystemError, }; +use polkadot_node_subsystem_types::OverseerSignal; + pub struct AvailabilityStoreState { candidate_hashes: HashMap, chunks: Vec>, @@ -82,7 +84,7 @@ impl MockAvailabilityStore { fn start(self, ctx: Context) -> SpawnedSubsystem { let future = self.run(ctx).map(|_| Ok(())).boxed(); - SpawnedSubsystem { name: "av-store-mock-subsystem", future } + SpawnedSubsystem { name: "test-environment", future } } } @@ -94,7 +96,10 @@ impl MockAvailabilityStore { let msg = ctx.recv().await.expect("Overseer never fails us"); match msg { - orchestra::FromOrchestra::Signal(_) => {}, + orchestra::FromOrchestra::Signal(signal) => match signal { + OverseerSignal::Conclude => return, + _ => {}, + }, orchestra::FromOrchestra::Communication { msg } => match msg { AvailabilityStoreMessage::QueryAvailableData(candidate_hash, tx) => { gum::debug!(target: LOG_TARGET, candidate_hash = ?candidate_hash, "Responding to QueryAvailableData"); diff --git a/polkadot/node/subsystem-bench/src/core/mock/dummy.rs b/polkadot/node/subsystem-bench/src/core/mock/dummy.rs index 998153875ede..0628368a49c0 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/dummy.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/dummy.rs @@ -33,7 +33,8 @@ macro_rules! mock { fn start(self, ctx: Context) -> SpawnedSubsystem { let future = self.run(ctx).map(|_| Ok(())).boxed(); - SpawnedSubsystem { name: stringify!($subsystem_name), future } + // The name will appear in substrate CPU task metrics as `task_group`.` + SpawnedSubsystem { name: "test-environment", future } } } @@ -45,7 +46,12 @@ macro_rules! mock { futures::select!{ msg = ctx.recv().fuse() => { match msg.unwrap() { - orchestra::FromOrchestra::Signal(_) => {}, + orchestra::FromOrchestra::Signal(signal) => { + match signal { + polkadot_node_subsystem_types::OverseerSignal::Conclude => {return}, + _ => {} + } + }, orchestra::FromOrchestra::Communication { msg } => { gum::debug!(target: LOG_TARGET, msg = ?msg, "mocked subsystem received message"); } diff --git a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs index a6d07c3d4a20..144a16b9f14b 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs @@ -17,6 +17,7 @@ //! A generic av store subsystem mockup suitable to be used in benchmarks. use parity_scale_codec::Encode; +use polkadot_node_subsystem_types::OverseerSignal; use std::collections::HashMap; @@ -191,7 +192,7 @@ impl MockNetworkBridgeTx { fn start(self, ctx: Context) -> SpawnedSubsystem { let future = self.run(ctx).map(|_| Ok(())).boxed(); - SpawnedSubsystem { name: "network-bridge-tx-mock-subsystem", future } + SpawnedSubsystem { name: "test-environment", future } } } @@ -230,7 +231,10 @@ impl MockNetworkBridgeTx { let msg = ctx.recv().await.expect("Overseer never fails us"); match msg { - orchestra::FromOrchestra::Signal(_) => {}, + orchestra::FromOrchestra::Signal(signal) => match signal { + OverseerSignal::Conclude => return, + _ => {}, + }, orchestra::FromOrchestra::Communication { msg } => match msg { NetworkBridgeTxMessage::SendRequests(requests, _if_disconnected) => { for request in requests { diff --git a/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs b/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs index a106eb130991..9cbe025ae806 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs @@ -16,23 +16,21 @@ //! //! A generic runtime api subsystem mockup suitable to be used in benchmarks. -use polkadot_primitives::{ - AuthorityDiscoveryId, GroupIndex, IndexedVec, SessionInfo, ValidatorId, ValidatorIndex, -}; +use polkadot_primitives::{GroupIndex, IndexedVec, SessionInfo, ValidatorIndex}; use polkadot_node_subsystem::{ messages::{RuntimeApiMessage, RuntimeApiRequest}, overseer, SpawnedSubsystem, SubsystemError, }; +use polkadot_node_subsystem_types::OverseerSignal; -use crate::core::configuration::TestConfiguration; +use crate::core::configuration::{TestAuthorities, TestConfiguration}; use futures::FutureExt; const LOG_TARGET: &str = "subsystem-bench::runtime-api-mock"; pub struct RuntimeApiState { - validator_public: Vec, - validator_authority_id: Vec, + authorities: TestAuthorities, } pub struct MockRuntimeApi { @@ -41,12 +39,8 @@ pub struct MockRuntimeApi { } impl MockRuntimeApi { - pub fn new( - config: TestConfiguration, - validator_public: Vec, - validator_authority_id: Vec, - ) -> MockRuntimeApi { - Self { state: RuntimeApiState { validator_public, validator_authority_id }, config } + pub fn new(config: TestConfiguration, authorities: TestAuthorities) -> MockRuntimeApi { + Self { state: RuntimeApiState { authorities }, config } } fn session_info(&self) -> SessionInfo { @@ -57,8 +51,8 @@ impl MockRuntimeApi { let validator_groups = all_validators.chunks(5).map(|x| Vec::from(x)).collect::>(); SessionInfo { - validators: self.state.validator_public.clone().into(), - discovery_keys: self.state.validator_authority_id.clone(), + validators: self.state.authorities.validator_public.clone().into(), + discovery_keys: self.state.authorities.validator_authority_id.clone(), validator_groups: IndexedVec::>::from(validator_groups), assignment_keys: vec![], n_cores: self.config.n_cores as u32, @@ -79,7 +73,7 @@ impl MockRuntimeApi { fn start(self, ctx: Context) -> SpawnedSubsystem { let future = self.run(ctx).map(|_| Ok(())).boxed(); - SpawnedSubsystem { name: "runtime-api-mock-subsystem", future } + SpawnedSubsystem { name: "test-environment", future } } } @@ -90,7 +84,10 @@ impl MockRuntimeApi { let msg = ctx.recv().await.expect("Overseer never fails us"); match msg { - orchestra::FromOrchestra::Signal(_) => {}, + orchestra::FromOrchestra::Signal(signal) => match signal { + OverseerSignal::Conclude => return, + _ => {}, + }, orchestra::FromOrchestra::Communication { msg } => { gum::debug!(target: LOG_TARGET, msg=?msg, "recv message"); diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 51ce8fc1d5ea..f9261d848778 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -179,6 +179,8 @@ fn main() -> eyre::Result<()> { color_eyre::install()?; let _ = env_logger::builder() .filter(Some("hyper"), log::LevelFilter::Info) + // Avoid `Terminating due to subsystem exit subsystem` warnings + .filter(Some("polkadot_overseer"), log::LevelFilter::Error) // .filter(None, log::LevelFilter::Trace) .try_init() .unwrap(); diff --git a/polkadot/node/subsystem-test-helpers/src/mock.rs b/polkadot/node/subsystem-test-helpers/src/mock.rs index 11e77b6e8968..fc2dd6a4e34e 100644 --- a/polkadot/node/subsystem-test-helpers/src/mock.rs +++ b/polkadot/node/subsystem-test-helpers/src/mock.rs @@ -16,7 +16,7 @@ use std::sync::Arc; -use polkadot_node_subsystem::{jaeger, ActivatedLeaf, Event, BlockInfo}; +use polkadot_node_subsystem::{jaeger, ActivatedLeaf,BlockInfo}; use sc_client_api::UnpinHandle; use sc_keystore::LocalKeystore; use sc_utils::mpsc::tracing_unbounded; @@ -61,11 +61,11 @@ pub fn new_leaf(hash: Hash, number: BlockNumber) -> ActivatedLeaf { } /// Create a new leaf with the given hash and number. -pub fn new_block_import_event(hash: Hash, number: BlockNumber) -> Event { - Event::BlockImported(BlockInfo { +pub fn new_block_import_info(hash: Hash, number: BlockNumber) -> BlockInfo { + BlockInfo { hash, parent_hash: Hash::default(), number, unpin_handle: dummy_unpin_handle(hash), - }) + } } From 7557768d740a87336cb0479d78ebe2c7b816e6b2 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 27 Nov 2023 23:06:17 +0200 Subject: [PATCH 26/52] refactor CLI display of env stats Signed-off-by: Andrei Sandu --- .../subsystem-bench/src/availability/mod.rs | 27 +------- .../subsystem-bench/src/core/environment.rs | 63 +++++++++++++++++-- .../src/core/mock/network_bridge.rs | 2 + .../node/subsystem-bench/src/core/network.rs | 2 +- 4 files changed, 63 insertions(+), 31 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 6282a6b63f01..ae4e743205e3 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -390,30 +390,5 @@ pub async fn bench_chunk_recovery(env: &mut TestEnvironment, mut state: TestStat .red() ); - let stats = env.network().stats(); - gum::info!( - "Total received from network: {}", - format!( - "{} MiB", - stats - .iter() - .enumerate() - .map(|(_index, stats)| stats.tx_bytes_total as u128) - .sum::() / (1024 * 1024) - ) - .cyan() - ); - - let test_metrics = super::core::display::parse_metrics(&env.registry()); - let subsystem_cpu_metrics = - test_metrics.subset_with_label_value("task_group", "availability-recovery"); - let total_cpu = subsystem_cpu_metrics.sum_by("substrate_tasks_polling_duration_sum"); - gum::info!(target: LOG_TARGET, "Total subsystem CPU usage {}", format!("{:.2}s", total_cpu).bright_purple()); - gum::info!(target: LOG_TARGET, "CPU usage per block {}", format!("{:.2}s", total_cpu/env.config().num_blocks as f64).bright_purple()); - - let test_env_cpu_metrics = - test_metrics.subset_with_label_value("task_group", "test-environment"); - let total_cpu = test_env_cpu_metrics.sum_by("substrate_tasks_polling_duration_sum"); - gum::info!(target: LOG_TARGET, "Total test environment CPU usage {}", format!("{:.2}s", total_cpu).bright_purple()); - gum::info!(target: LOG_TARGET, "CPU usage per block {}", format!("{:.2}s", total_cpu/env.config().num_blocks as f64).bright_purple()); + gum::info!("{}", &env); } diff --git a/polkadot/node/subsystem-bench/src/core/environment.rs b/polkadot/node/subsystem-bench/src/core/environment.rs index fd09de9169a4..24d10ecb1fa1 100644 --- a/polkadot/node/subsystem-bench/src/core/environment.rs +++ b/polkadot/node/subsystem-bench/src/core/environment.rs @@ -18,6 +18,7 @@ use crate::{ core::{mock::AlwaysSupportsParachains, network::NetworkEmulator}, TestConfiguration, }; +use colored::Colorize; use core::time::Duration; use polkadot_overseer::{BlockInfo, Handle as OverseerHandle}; @@ -29,7 +30,10 @@ use polkadot_node_subsystem_util::metrics::prometheus::{ use sc_network::peer_store::LOG_TARGET; use sc_service::{SpawnTaskHandle, TaskManager}; -use std::net::{Ipv4Addr, SocketAddr}; +use std::{ + fmt::Display, + net::{Ipv4Addr, SocketAddr}, +}; use tokio::runtime::Handle; const MIB: f64 = 1024.0 * 1024.0; @@ -233,8 +237,8 @@ impl TestEnvironment { &self.config } - pub fn network(&mut self) -> &mut NetworkEmulator { - &mut self.network + pub fn network(&self) -> &NetworkEmulator { + &self.network } pub fn registry(&self) -> &Registry { @@ -260,7 +264,7 @@ impl TestEnvironment { }); } - // Send a signal to the subsystem under test environment. + // Send an `ActiveLeavesUpdate` signal to all subsystems under test. pub async fn import_block(&mut self, block: BlockInfo) { self.overseer_handle .block_imported(block) @@ -276,3 +280,54 @@ impl TestEnvironment { self.overseer_handle.stop().await; } } + +impl Display for TestEnvironment { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let stats = self.network().stats(); + + writeln!(f, "\n")?; + writeln!( + f, + "Total received from network: {}", + format!( + "{} MiB", + stats + .iter() + .enumerate() + .map(|(_index, stats)| stats.tx_bytes_total as u128) + .sum::() / (1024 * 1024) + ) + .cyan() + )?; + writeln!( + f, + "Total sent to network: {}", + format!("{} KiB", stats[0].tx_bytes_total / (1024)).cyan() + )?; + + let test_metrics = super::display::parse_metrics(self.registry()); + let subsystem_cpu_metrics = + test_metrics.subset_with_label_value("task_group", "availability-recovery"); + let total_cpu = subsystem_cpu_metrics.sum_by("substrate_tasks_polling_duration_sum"); + writeln!(f, "Total subsystem CPU usage {}", format!("{:.2}s", total_cpu).bright_purple())?; + writeln!( + f, + "CPU usage per block {}", + format!("{:.2}s", total_cpu / self.config().num_blocks as f64).bright_purple() + )?; + + let test_env_cpu_metrics = + test_metrics.subset_with_label_value("task_group", "test-environment"); + let total_cpu = test_env_cpu_metrics.sum_by("substrate_tasks_polling_duration_sum"); + writeln!( + f, + "Total test environment CPU usage {}", + format!("{:.2}s", total_cpu).bright_purple() + )?; + writeln!( + f, + "CPU usage per block {}", + format!("{:.2}s", total_cpu / self.config().num_blocks as f64).bright_purple() + ) + } +} diff --git a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs index 144a16b9f14b..a45cacd0241a 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs @@ -80,6 +80,8 @@ impl MockNetworkBridgeTx { match request { Requests::ChunkFetchingV1(outgoing_request) => { + self.network.peer_stats(0).inc_sent(outgoing_request.payload.encoded_size()); + let validator_index: usize = outgoing_request.payload.index.0 as usize; let candidate_hash = outgoing_request.payload.candidate_hash; diff --git a/polkadot/node/subsystem-bench/src/core/network.rs b/polkadot/node/subsystem-bench/src/core/network.rs index f5532087e35c..f36c0967466b 100644 --- a/polkadot/node/subsystem-bench/src/core/network.rs +++ b/polkadot/node/subsystem-bench/src/core/network.rs @@ -315,7 +315,7 @@ impl NetworkEmulator { } // Returns the sent/received stats for all peers. - pub fn stats(&mut self) -> Vec { + pub fn stats(&self) -> Vec { let r = self .stats .iter() From 787dc00bc7c411becbc17e04eea29fa91d1f8e00 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 28 Nov 2023 10:13:11 +0200 Subject: [PATCH 27/52] Add grafana dashboards for DA read Signed-off-by: Andrei Sandu --- .../src/grafana/availability-read.json | 1872 +++++++++++++++++ .../src/grafana/task-cpu-usage.json | 755 +++++++ 2 files changed, 2627 insertions(+) create mode 100644 polkadot/node/subsystem-bench/src/grafana/availability-read.json create mode 100644 polkadot/node/subsystem-bench/src/grafana/task-cpu-usage.json diff --git a/polkadot/node/subsystem-bench/src/grafana/availability-read.json b/polkadot/node/subsystem-bench/src/grafana/availability-read.json new file mode 100644 index 000000000000..4fbbe1f58731 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/grafana/availability-read.json @@ -0,0 +1,1872 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Subsystem and test environment metrics", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": 60000, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 90, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "subsystem_benchmark_n_validators{}", + "instant": false, + "legendFormat": "n_vaidators", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "subsystem_benchmark_n_cores{}", + "hide": false, + "instant": false, + "legendFormat": "n_cores", + "range": true, + "refId": "B" + } + ], + "title": "Test configuration", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 31, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$data_source" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 57, + "interval": "1s", + "options": { + "legend": { + "calcs": [ + "mean", + "min", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.2", + "repeat": "nodename", + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "sum(rate(substrate_tasks_polling_duration_sum{}[2s])) by ($cpu_group_by)", + "interval": "", + "legendFormat": "{{task_group}}", + "range": true, + "refId": "A" + } + ], + "title": "All tasks CPU usage breakdown", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$data_source" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 6 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 93, + "interval": "1s", + "options": { + "legend": { + "calcs": [ + "mean", + "min", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "increase(substrate_tasks_polling_duration_sum{task_group=\"availability-recovery-subsystem\"}[6s])", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Availability subsystem CPU usage per block", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$data_source" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 10, + "type": "log" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 94, + "interval": "1s", + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "sum(substrate_tasks_polling_duration_sum{}) by ($cpu_group_by)", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Total CPU burn", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$data_source" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 10, + "type": "log" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-red", + "value": 6000 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 95, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "subsystem_benchmark_block_time", + "interval": "", + "legendFormat": "Instant block time", + "range": true, + "refId": "A" + } + ], + "title": "Block time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 2, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 89, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "sum(rate(subsystem_benchmark_network_peer_total_bytes_sent{}[5s]))", + "instant": false, + "legendFormat": "Received", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "sum(rate(subsystem_benchmark_network_peer_total_bytes_received{}[5s]))", + "hide": false, + "instant": false, + "legendFormat": "Sent", + "range": true, + "refId": "B" + } + ], + "title": "Emulated network throughput ", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 88, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "rate(subsystem_benchmark_network_peer_total_bytes_received{}[10s])", + "instant": false, + "legendFormat": "Received by {{peer}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "rate(subsystem_benchmark_network_peer_total_bytes_sent{}[10s])", + "hide": false, + "instant": false, + "legendFormat": "Sent by {{peer}}", + "range": true, + "refId": "B" + } + ], + "title": "Emulated peer throughput", + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 12, + "y": 52 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 92, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 0, + "reverse": false, + "unit": "bytes" + } + }, + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(subsystem_benchmark_pov_size_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Recovered PoV sizes", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "Number of erasure-encoded chunks of data belonging to candidate blocks. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic", + "seriesBy": "max" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "chunks/s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 67 + }, + "id": 43, + "interval": "1s", + "maxDataPoints": 1340, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(polkadot_parachain_availability_recovery_chunk_requests_issued{}[10s]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Chunks requested", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Availability", + "transformations": [], + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 77 + }, + "id": 35, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Availability subystem metrics", + "type": "row" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 78 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 68, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 0, + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(polkadot_parachain_availability_recovery_time_total_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Time to recover a PoV", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 78 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 67, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 0, + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(polkadot_parachain_availability_recovery_time_chunk_request_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Chunk request duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "bitfields", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic", + "seriesBy": "max" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 88 + }, + "id": 85, + "interval": "1s", + "maxDataPoints": 1340, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "rate(polkadot_parachain_availability_recovery_bytes_total{}[30s])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Bytes recovered", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Recovery throughtput", + "transformations": [], + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 88 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 84, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 0, + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(polkadot_parachain_availability_reencode_chunks_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Re-encoding chunks timing", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 98 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 83, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 0, + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(polkadot_parachain_availability_recovery_time_erasure_recovery_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Erasure recovery (no I/O)", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "Number of erasure-encoded chunks of data belonging to candidate blocks. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic", + "seriesBy": "max" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 108 + }, + "id": 86, + "interval": "1s", + "maxDataPoints": 1340, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(polkadot_parachain_availability_recovery_recoveries_finished{}[1s]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Finished", + "queryType": "randomWalk", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(polkadot_parachain_availability_recovery_recovieries_started{}[1s]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Started", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Recoveries", + "transformations": [], + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 118 + }, + "id": 2, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Approval voting", + "type": "row" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "style": "dark", + "tags": [ + "subsystem", + "benchmark" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "hide": 0, + "includeAll": false, + "label": "Source of data", + "multi": false, + "name": "data_source", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": true, + "text": "task_name", + "value": "task_name" + }, + "description": "Sum CPU usage by task name or task group.", + "hide": 0, + "includeAll": false, + "label": "Group CPU usage", + "multi": false, + "name": "cpu_group_by", + "options": [ + { + "selected": true, + "text": "task_name", + "value": "task_name" + }, + { + "selected": false, + "text": "task_group", + "value": "task_group" + } + ], + "query": "task_name, task_group", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s" + ] + }, + "timezone": "utc", + "title": "Data Availability Read", + "uid": "asdadasd1", + "version": 56, + "weekStart": "" + } \ No newline at end of file diff --git a/polkadot/node/subsystem-bench/src/grafana/task-cpu-usage.json b/polkadot/node/subsystem-bench/src/grafana/task-cpu-usage.json new file mode 100644 index 000000000000..90763444abf1 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/grafana/task-cpu-usage.json @@ -0,0 +1,755 @@ +{ + "annotations": { + "list": [ + { + "$$hashKey": "object:326", + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "limit": 100, + "name": "Annotations & Alerts", + "showIn": 0, + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + }, + { + "$$hashKey": "object:327", + "datasource": { + "uid": "$data_source" + }, + "enable": true, + "expr": "increase(${metric_namespace}_tasks_ended_total{reason=\"panic\", node=~\"${nodename}\"}[10m])", + "hide": true, + "iconColor": "rgba(255, 96, 96, 1)", + "limit": 100, + "name": "Task panics", + "rawQuery": "SELECT\n extract(epoch from time_column) AS time,\n text_column as text,\n tags_column as tags\nFROM\n metric_table\nWHERE\n $__timeFilter(time_column)\n", + "showIn": 0, + "step": "10m", + "tags": [], + "textFormat": "{{node}} - {{task_name}}", + "titleFormat": "Panic!", + "type": "tags" + }, + { + "$$hashKey": "object:621", + "datasource": { + "uid": "$data_source" + }, + "enable": true, + "expr": "changes(${metric_namespace}_process_start_time_seconds{node=~\"${nodename}\"}[10m])", + "hide": false, + "iconColor": "#8AB8FF", + "name": "Node reboots", + "showIn": 0, + "step": "10m", + "textFormat": "{{node}}", + "titleFormat": "Reboots" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 29, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Tasks", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 3, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 1 + }, + "hiddenSeries": false, + "id": 11, + "interval": "1s", + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "repeat": "nodename", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "sum(rate(substrate_tasks_polling_duration_sum{}[$__rate_interval])) by (task_name)", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "CPU time spent on each task", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2721", + "format": "percentunit", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:2722", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 3, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 10 + }, + "hiddenSeries": false, + "id": 30, + "interval": "1s", + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "repeat": "nodename", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "rate(substrate_tasks_polling_duration_count{}[$__rate_interval])", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Task polling rate per second", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2571", + "format": "cps", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:2572", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 43, + "interval": "1s", + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "repeat": "nodename", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "increase(substrate_tasks_polling_duration_sum{}[$__rate_interval]) / increase(substrate_tasks_polling_duration_count{}[$__rate_interval])", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Average time it takes to call Future::poll()", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2571", + "format": "s", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:2572", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 22 + }, + "hiddenSeries": false, + "id": 15, + "interval": "1s", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": true, + "values": true + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "repeat": "nodename", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": true, + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "increase(substrate_tasks_spawned_total{}[$__rate_interval])", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Number of tasks started", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:771", + "format": "short", + "logBase": 10, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:772", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 2, + "interval": "1s", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "repeat": "nodename", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "substrate_tasks_spawned_total{} - sum(substrate_tasks_ended_total{}) without(reason)\n\n# Fallback if tasks_ended_total is null for that task\nor on(task_name) substrate_tasks_spawned_total{}", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Number of tasks running", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:919", + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:920", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 34 + }, + "hiddenSeries": false, + "id": 7, + "interval": "1s", + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "repeat": "nodename", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": true, + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "irate(substrate_tasks_polling_duration_bucket{le=\"+Inf\"}[$__rate_interval])\n - ignoring(le)\n irate(substrate_tasks_polling_duration_bucket{le=\"1.024\"}[$__rate_interval]) > 0", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Number of calls to `Future::poll` that took more than one second", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:3040", + "format": "cps", + "label": "Calls to `Future::poll`/second", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:3041", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 27, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Unbounded Channels", + "type": "row" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "utc", + "title": "Substrate Service Tasks with substrate prefix", + "uid": "S7sc-M_Gk", + "version": 17, + "weekStart": "" + } \ No newline at end of file From cd18f8de2d4963c4fafe05423290c25a667be190 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 28 Nov 2023 10:14:09 +0200 Subject: [PATCH 28/52] network stats fixes Signed-off-by: Andrei Sandu --- .../subsystem-bench/src/availability/mod.rs | 10 ------ .../subsystem-bench/src/core/configuration.rs | 7 +++-- .../node/subsystem-bench/src/core/display.rs | 4 +-- .../subsystem-bench/src/core/environment.rs | 2 +- .../src/core/mock/network_bridge.rs | 31 +++++++++++++------ polkadot/node/subsystem-bench/src/core/mod.rs | 8 ----- .../node/subsystem-bench/src/core/network.rs | 23 ++++++++++++-- 7 files changed, 49 insertions(+), 36 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index ae4e743205e3..a5f1a0866a5b 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -100,16 +100,6 @@ pub fn prepare_test( prepare_test_inner(config, state, TestEnvironmentDependencies::default()) } -/// Takes a test configuration and uses it to creates the `TestEnvironment`. -#[allow(unused)] -pub fn prepare_test_with_dependencies( - config: TestConfiguration, - state: &mut TestState, - dependencies: TestEnvironmentDependencies, -) -> (TestEnvironment, ProtocolConfig) { - prepare_test_inner(config, state, dependencies) -} - fn prepare_test_inner( config: TestConfiguration, state: &mut TestState, diff --git a/polkadot/node/subsystem-bench/src/core/configuration.rs b/polkadot/node/subsystem-bench/src/core/configuration.rs index 35fa51790c91..340b5c03ab84 100644 --- a/polkadot/node/subsystem-bench/src/core/configuration.rs +++ b/polkadot/node/subsystem-bench/src/core/configuration.rs @@ -13,13 +13,14 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::path::Path; - +// +//! Test configuration definition and helpers. use super::*; use keyring::Keyring; +use std::{path::Path, time::Duration}; pub use crate::cli::TestObjective; -use polkadot_primitives::ValidatorId; +use polkadot_primitives::{AuthorityDiscoveryId, ValidatorId}; use rand::{distributions::Uniform, prelude::Distribution, thread_rng}; use serde::{Deserialize, Serialize}; diff --git a/polkadot/node/subsystem-bench/src/core/display.rs b/polkadot/node/subsystem-bench/src/core/display.rs index 13ea7d375e95..f21a8b907d11 100644 --- a/polkadot/node/subsystem-bench/src/core/display.rs +++ b/polkadot/node/subsystem-bench/src/core/display.rs @@ -14,8 +14,8 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . // -//! Some helper methods for parsing prometheus metrics to a format that can be -//! displayed in the CLI. +//! Display implementations and helper methods for parsing prometheus metrics +//! to a format that can be displayed in the CLI. //! //! Currently histogram buckets are skipped. use super::LOG_TARGET; diff --git a/polkadot/node/subsystem-bench/src/core/environment.rs b/polkadot/node/subsystem-bench/src/core/environment.rs index 24d10ecb1fa1..5c04071c442f 100644 --- a/polkadot/node/subsystem-bench/src/core/environment.rs +++ b/polkadot/node/subsystem-bench/src/core/environment.rs @@ -13,7 +13,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . - +//! Test environment implementation use crate::{ core::{mock::AlwaysSupportsParachains, network::NetworkEmulator}, TestConfiguration, diff --git a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs index a45cacd0241a..c14a3895e238 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs @@ -80,7 +80,16 @@ impl MockNetworkBridgeTx { match request { Requests::ChunkFetchingV1(outgoing_request) => { + let authority_discovery_id = match outgoing_request.peer { + req_res::Recipient::Authority(authority_discovery_id) => authority_discovery_id, + _ => unimplemented!("Peer recipient not supported yet"), + }; + // Account our sent request bytes. self.network.peer_stats(0).inc_sent(outgoing_request.payload.encoded_size()); + // Account for remote received request bytes. + self.network + .peer_stats_by_id(authority_discovery_id.clone()) + .inc_received(outgoing_request.payload.encoded_size()); let validator_index: usize = outgoing_request.payload.index.0 as usize; let candidate_hash = outgoing_request.payload.candidate_hash; @@ -107,10 +116,6 @@ impl MockNetworkBridgeTx { Ok(req_res::v1::ChunkFetchingResponse::from(Some(chunk)).encode()) }; - let authority_discovery_id = match outgoing_request.peer { - req_res::Recipient::Authority(authority_discovery_id) => authority_discovery_id, - _ => unimplemented!("Peer recipient not supported yet"), - }; let authority_discovery_id_clone = authority_discovery_id.clone(); let future = async move { @@ -142,7 +147,18 @@ impl MockNetworkBridgeTx { .candidate_hashes .get(&candidate_hash) .expect("candidate was generated previously; qed"); - gum::warn!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + gum::debug!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + + let authority_discovery_id = match outgoing_request.peer { + req_res::Recipient::Authority(authority_discovery_id) => authority_discovery_id, + _ => unimplemented!("Peer recipient not supported yet"), + }; + // Account our sent request bytes. + self.network.peer_stats(0).inc_sent(outgoing_request.payload.encoded_size()); + // Account for remote received request bytes. + self.network + .peer_stats_by_id(authority_discovery_id.clone()) + .inc_received(outgoing_request.payload.encoded_size()); let available_data = self.availabilty.available_data.get(*candidate_index as usize).unwrap().clone(); @@ -161,10 +177,6 @@ impl MockNetworkBridgeTx { } .boxed(); - let authority_discovery_id = match outgoing_request.peer { - req_res::Recipient::Authority(authority_discovery_id) => authority_discovery_id, - _ => unimplemented!("Peer recipient not supported yet"), - }; let authority_discovery_id_clone = authority_discovery_id.clone(); let future_wrapper = async move { @@ -243,6 +255,7 @@ impl MockNetworkBridgeTx { gum::debug!(target: LOG_TARGET, request = ?request, "Processing request"); self.network.inc_sent(request_size(&request)); let action = self.respond_to_send_request(request, &mut ingress_tx); + // Will account for our node sending the request over the emulated // network. self.network.submit_peer_action(action.peer(), action); diff --git a/polkadot/node/subsystem-bench/src/core/mod.rs b/polkadot/node/subsystem-bench/src/core/mod.rs index 11ca03dbda4c..282788d143b4 100644 --- a/polkadot/node/subsystem-bench/src/core/mod.rs +++ b/polkadot/node/subsystem-bench/src/core/mod.rs @@ -14,16 +14,8 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::{ - collections::HashMap, - sync::Arc, - time::{Duration, Instant}, -}; const LOG_TARGET: &str = "subsystem-bench::core"; -use polkadot_primitives::AuthorityDiscoveryId; -use sc_service::SpawnTaskHandle; - pub mod configuration; pub mod display; pub mod environment; diff --git a/polkadot/node/subsystem-bench/src/core/network.rs b/polkadot/node/subsystem-bench/src/core/network.rs index f36c0967466b..40809ce36e8d 100644 --- a/polkadot/node/subsystem-bench/src/core/network.rs +++ b/polkadot/node/subsystem-bench/src/core/network.rs @@ -15,8 +15,17 @@ // along with Polkadot. If not, see . use super::*; use colored::Colorize; +use polkadot_primitives::AuthorityDiscoveryId; use prometheus_endpoint::U64; -use std::sync::atomic::{AtomicU64, Ordering}; +use sc_service::SpawnTaskHandle; +use std::{ + collections::HashMap, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + time::{Duration, Instant}, +}; use tokio::sync::mpsc::UnboundedSender; // An emulated node egress traffic rate_limiter. @@ -309,11 +318,20 @@ impl NetworkEmulator { self.peers[*index].send(action); } - // Returns the sent/received stats for all peers. + // Returns the sent/received stats for `peer_index`. pub fn peer_stats(&mut self, peer_index: usize) -> Arc { self.stats[peer_index].clone() } + // Returns the sent/received stats for `peer`. + pub fn peer_stats_by_id(&mut self, peer: AuthorityDiscoveryId) -> Arc { + let peer_index = self + .validator_authority_ids + .get(&peer) + .expect("all test authorities are valid; qed"); + self.stats[*peer_index].clone() + } + // Returns the sent/received stats for all peers. pub fn stats(&self) -> Vec { let r = self @@ -334,7 +352,6 @@ impl NetworkEmulator { } // Increment bytes received by our node (the node that contains the subsystem under test) - #[allow(unused)] pub fn inc_received(&self, bytes: u64) { // Our node always is peer 0. self.metrics.on_peer_received(0, bytes); From e8506b3d663a408b67cbff21749c3d273aa0c031 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 28 Nov 2023 10:16:41 +0200 Subject: [PATCH 29/52] move examples and grafana Signed-off-by: Andrei Sandu --- .../{src => }/grafana/availability-read.json | 0 .../{src => }/grafana/task-cpu-usage.json | 0 .../examples/availability_read.yaml} | 27 ++++++++++--------- 3 files changed, 14 insertions(+), 13 deletions(-) rename polkadot/node/subsystem-bench/{src => }/grafana/availability-read.json (100%) rename polkadot/node/subsystem-bench/{src => }/grafana/task-cpu-usage.json (100%) rename polkadot/node/subsystem-bench/{test_sequence.yaml => src/examples/availability_read.yaml} (75%) diff --git a/polkadot/node/subsystem-bench/src/grafana/availability-read.json b/polkadot/node/subsystem-bench/grafana/availability-read.json similarity index 100% rename from polkadot/node/subsystem-bench/src/grafana/availability-read.json rename to polkadot/node/subsystem-bench/grafana/availability-read.json diff --git a/polkadot/node/subsystem-bench/src/grafana/task-cpu-usage.json b/polkadot/node/subsystem-bench/grafana/task-cpu-usage.json similarity index 100% rename from polkadot/node/subsystem-bench/src/grafana/task-cpu-usage.json rename to polkadot/node/subsystem-bench/grafana/task-cpu-usage.json diff --git a/polkadot/node/subsystem-bench/test_sequence.yaml b/polkadot/node/subsystem-bench/src/examples/availability_read.yaml similarity index 75% rename from polkadot/node/subsystem-bench/test_sequence.yaml rename to polkadot/node/subsystem-bench/src/examples/availability_read.yaml index 088a7e15729b..889309e64a2b 100644 --- a/polkadot/node/subsystem-bench/test_sequence.yaml +++ b/polkadot/node/subsystem-bench/src/examples/availability_read.yaml @@ -1,10 +1,10 @@ TestConfiguration: # Test 1 - objective: !DataAvailabilityRead - fetch_from_backers: false + fetch_from_backers: true n_validators: 300 - n_cores: 10 - min_pov_size: 1120 + n_cores: 20 + min_pov_size: 5120 max_pov_size: 5120 peer_bandwidth: 52428800 bandwidth: 52428800 @@ -16,13 +16,14 @@ TestConfiguration: secs: 0 nanos: 100000000 error: 3 - num_blocks: 10 + num_blocks: 3 + # Test 2 - objective: !DataAvailabilityRead - fetch_from_backers: false + fetch_from_backers: true n_validators: 500 - n_cores: 10 - min_pov_size: 1120 + n_cores: 20 + min_pov_size: 5120 max_pov_size: 5120 peer_bandwidth: 52428800 bandwidth: 52428800 @@ -34,14 +35,14 @@ TestConfiguration: secs: 0 nanos: 100000000 error: 3 - num_blocks: 10 + num_blocks: 3 -# Test 2 +# Test 3 - objective: !DataAvailabilityRead - fetch_from_backers: false + fetch_from_backers: true n_validators: 1000 - n_cores: 10 - min_pov_size: 1120 + n_cores: 20 + min_pov_size: 5120 max_pov_size: 5120 peer_bandwidth: 52428800 bandwidth: 52428800 @@ -53,4 +54,4 @@ TestConfiguration: secs: 0 nanos: 100000000 error: 3 - num_blocks: 10 + num_blocks: 3 From cbb677202c14fe04c013100c7910dfeecdf26b5b Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 28 Nov 2023 14:52:42 +0200 Subject: [PATCH 30/52] Add readme Signed-off-by: Andrei Sandu --- polkadot/node/subsystem-bench/README.md | 182 +++++++++++++++++- .../{src => }/examples/availability_read.yaml | 0 .../subsystem-bench/src/availability/cli.rs | 5 +- .../subsystem-bench/src/availability/mod.rs | 2 +- polkadot/node/subsystem-bench/src/cli.rs | 2 +- .../node/subsystem-bench/src/core/display.rs | 18 +- .../subsystem-bench/src/subsystem-bench.rs | 24 +-- 7 files changed, 211 insertions(+), 22 deletions(-) rename polkadot/node/subsystem-bench/{src => }/examples/availability_read.yaml (100%) diff --git a/polkadot/node/subsystem-bench/README.md b/polkadot/node/subsystem-bench/README.md index 8843f9883116..4ed25ff9078c 100644 --- a/polkadot/node/subsystem-bench/README.md +++ b/polkadot/node/subsystem-bench/README.md @@ -1,6 +1,182 @@ # Subsystem benchmark client -Run subsystem performance tests in isolation. +Run parachain consensus stress and performance tests on your development machine. + +## Motivation +The parachain consensus node implementation spans across many modules which we call subsystems. Each subsystem is responsible for a small part of logic of the parachain consensus pipeline, but in general the most load and performance issues are localized in just a few core subsystems like `availability-recovery`, `approval-voting` or `dispute-coordinator`. In the absence of this client, we would run large test nets in order to load/stress test these parts of the system. Setting up and making sense of the amount of data produced by such a large test is very expensive, hard to orchestrate and is a huge development time sink. + +This tool aims to solve this problem by making it easy to: +- set up and run core subsystem load tests locally on your development machine +- iterate and conclude faster when benchmarking new optimizations or comparing implementations +- automate and keep track of performance regressions in CI runs +- simulate various networking topologies, bandwidth and connectivity issues + +## Test environment setup + +`cargo build --profile=testnet --bin subsystem-bench -p polkadot-subsystem-bench` + +The output binary will be placed in `target/testnet/subsystem-bench`. + +### Test metrics +Subsystem, CPU usage and network metrics are exposed via a prometheus endpoint during the test execution. +A small subset of these collected metrics are displayed in the CLI, but for an in depth analysys of the test results, a local Grafana/Prometheus stack is needed. + +### Install Prometheus +Please follow the [official installation guide](https://prometheus.io/docs/prometheus/latest/installation/) for your platform/OS. + +After succesfully installing and starting up Prometheus, we need to alter it's configuration such that it +will scrape the benchmark prometheus endpoint `127.0.0.1:9999`. Please check the prometheus official documentation +regarding the location of `prometheus.yml`. On MacOS for example the full path `/opt/homebrew/etc/prometheus.yml` + +prometheus.yml: +``` +global: + scrape_interval: 5s + +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["localhost:9090"] + - job_name: "subsystem-bench" + scrape_interval: 0s500ms + static_configs: + - targets: ['localhost:9999'] +``` + +To complete this step restart Prometheus server such that it picks up the new configuration. +### Install and setup Grafana + +Follow the [installation guide](https://grafana.com/docs/grafana/latest/setup-grafana/installation/) relevant +to your operating system. + +Once you have the installation up and running, configure the local Prometheus as a data source by following +[this guide](https://grafana.com/docs/grafana/latest/datasources/prometheus/configure-prometheus-data-source/) + +#### Import dashboards + +Follow [this guide](https://grafana.com/docs/grafana/latest/dashboards/manage-dashboards/#export-and-import-dashboards) to import the dashboards from the repository `grafana` folder. + +## Running existing tests + +To run a test, you need to first choose a test objective. Currently, we support the following: + +``` +target/testnet/subsystem-bench --help +The almighty Subsystem Benchmark Tool™️ + +Usage: subsystem-bench [OPTIONS] + +Commands: + data-availability-read Benchmark availability recovery strategies + test-sequence Run a test sequence specified in a file + help Print this message or the help of the given subcommand(s) + +``` + +The `test-sequence` is a special test objective that wraps up an arbitrary number of test objectives. It is tipically used to run a suite of tests defined in a `yaml` file like in this [example](examples/availability_read.yaml). + +### Standard test options + +``` +Options: + --network The type of network to be emulated [default: ideal] [possible values: ideal, + healthy, degraded] + --n-cores Number of cores to fetch availability for [default: 100] + --n-validators Number of validators to fetch chunks from [default: 500] + --min-pov-size The minimum pov size in KiB [default: 5120] + --max-pov-size The maximum pov size bytes [default: 5120] + -n, --num-blocks The number of blocks the test is going to run [default: 1] + -p, --peer-bandwidth The bandwidth of simulated remote peers in KiB + -b, --bandwidth The bandwidth of our simulated node in KiB + --peer-error Simulated conection error ratio [0-100] + --peer-min-latency Minimum remote peer latency in milliseconds [0-5000] + --peer-max-latency Maximum remote peer latency in milliseconds [0-5000] + -h, --help Print help + -V, --version Print version +``` + +These apply to all test objectives, except `test-sequence` which relies on the values being specified in a file. + +### Test objectives +Each test objective can have it's specific configuration options, in contrast with the standard test options. + +For `data-availability-read` the recovery strategy to be used is configurable. +``` +target/testnet/subsystem-bench data-availability-read --help +Benchmark availability recovery strategies + +Usage: subsystem-bench data-availability-read [OPTIONS] + +Options: + -f, --fetch-from-backers Turbo boost AD Read by fetching the full availability datafrom backers first. Saves CPU as we + don't need to re-construct from chunks. Tipically this is only faster if nodes have enough + bandwidth + -h, --help Print help +``` +### Understanding the test configuration +A single test configuration `TestConfiguration` struct applies to a single run of a certain test objective. + +The configuration describes the following important parameters that influence the test duration and resource +usage: +- how many validators are on the emulated network (`n_validators`) +- how many cores per block the subsystem will have to do work on (`n_cores`) +- for how many blocks the test should run (`num_blocks`) + +From the perspective of the subsystem under test, this means that it will receive an `ActiveLeavesUpdate` signal +followed by an arbitrary amount of messages. The process repeat itself for `num_blocks`. These messages are generally test payloads pre-generated before the test run, or constructed on pre-genereated paylods. For example the `AvailabilityRecoveryMessage::RecoverAvailableData` message includes a `CandidateReceipt` which is generated before the test is started. + +### Example run + +Let's run an availabilty read test which will recover availability for 10 cores with max PoV size on a 500 +node validator network. + +``` + target/testnet/subsystem-bench --n-cores 10 data-availability-read +[2023-11-28T09:01:59Z INFO subsystem_bench::core::display] n_validators = 500, n_cores = 10, pov_size = 5120 - 5120, error = 0, latency = None +[2023-11-28T09:01:59Z INFO subsystem-bench::availability] Generating template candidate index=0 pov_size=5242880 +[2023-11-28T09:01:59Z INFO subsystem-bench::availability] Created test environment. +[2023-11-28T09:01:59Z INFO subsystem-bench::availability] Pre-generating 10 candidates. +[2023-11-28T09:02:01Z INFO subsystem-bench::core] Initializing network emulation for 500 peers. +[2023-11-28T09:02:01Z INFO substrate_prometheus_endpoint] 〽️ Prometheus exporter started at 127.0.0.1:9999 +[2023-11-28T09:02:01Z INFO subsystem-bench::availability] Current block 1/1 +[2023-11-28T09:02:01Z INFO subsystem_bench::availability] 10 recoveries pending +[2023-11-28T09:02:04Z INFO subsystem_bench::availability] Block time 3231ms +[2023-11-28T09:02:04Z INFO subsystem-bench::availability] Sleeping till end of block (2768ms) +[2023-11-28T09:02:07Z INFO subsystem_bench::availability] All blocks processed in 6001ms +[2023-11-28T09:02:07Z INFO subsystem_bench::availability] Throughput: 51200 KiB/block +[2023-11-28T09:02:07Z INFO subsystem_bench::availability] Block time: 6001 ms +[2023-11-28T09:02:07Z INFO subsystem_bench::availability] + + Total received from network: 66 MiB + Total sent to network: 58 KiB + Total subsystem CPU usage 4.16s + CPU usage per block 4.16s + Total test environment CPU usage 0.00s + CPU usage per block 0.00s +``` +### Test logs + +You can select node cateogries and verbosity as with the Polkadot clien, simply setting `RUST_LOOG="parachain=debug"` turns on debug logs for all parachain consensus subsystems in the test. + +### View test metrics + +Assuming the Grafana/Prometheus stack installation steps completed succesfully, you should be able to +view the test progress in real time by accessing [this link](http://localhost:3000/goto/i1vzLpNSR?orgId=1). + +Now run `target/testnet/subsystem-bench test-sequence --path polkadot/node/subsystem-bench/examples/availability_read.yaml` and view the metrics in real time and spot differences between different `n_valiator` values. + +## Create new test objectives +This tool is intended to make it easy to write new test objectives that focus individual subsystems, +or even multiple subsystems (for example `approval-distribution` and `approval-voting`). + +A special kind of test objectives are performance regression tests for the CI pipeline. These should be sequences +of tests that check the performance characteristics (such as CPU usage, speed) of the subsystem under test in both happy and negative scenarios (low bandwidth, network errors and low connectivity). + +### Reuaseble test components +To faster write a new test objective you need to use some higher level wrappers and logic: `TestEnvironment` `TestConfiguration`, `TestAuthorities`, `NetworkEmulator`. To create the `TestEnvironment` you will +need to also build an `Overseer`, but that should be easy using the mockups for subsystems in`core::mock`. + +### Mocking +Ideally we want to have a single mock implementation for subsystems that can be minimally configured to +be used in different tests. A good example is `runtime-api` which currently only responds to session information requests based on static data. It can be easily extended to service other requests. -Currently implemented benchmarks: -* `availability-recovery` diff --git a/polkadot/node/subsystem-bench/src/examples/availability_read.yaml b/polkadot/node/subsystem-bench/examples/availability_read.yaml similarity index 100% rename from polkadot/node/subsystem-bench/src/examples/availability_read.yaml rename to polkadot/node/subsystem-bench/examples/availability_read.yaml diff --git a/polkadot/node/subsystem-bench/src/availability/cli.rs b/polkadot/node/subsystem-bench/src/availability/cli.rs index 06fb2966d878..f86f1bfb700d 100644 --- a/polkadot/node/subsystem-bench/src/availability/cli.rs +++ b/polkadot/node/subsystem-bench/src/availability/cli.rs @@ -34,7 +34,8 @@ pub enum NetworkEmulation { #[allow(missing_docs)] pub struct DataAvailabilityReadOptions { #[clap(short, long, default_value_t = false)] - /// Turbo boost AD Read by fetching from backers first. Tipically this is only faster if nodes - /// have enough bandwidth. + /// Turbo boost AD Read by fetching the full availability datafrom backers first. Saves CPU as + /// we don't need to re-construct from chunks. Tipically this is only faster if nodes have + /// enough bandwidth. pub fetch_from_backers: bool, } diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index a5f1a0866a5b..e5543e5d3904 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -312,7 +312,7 @@ fn derive_erasure_chunks_with_proofs_and_root( (erasure_chunks, root) } -pub async fn bench_chunk_recovery(env: &mut TestEnvironment, mut state: TestState) { +pub async fn benchmark_availability_read(env: &mut TestEnvironment, mut state: TestState) { let config = env.config().clone(); env.import_block(new_block_import_info(Hash::repeat_byte(1), 1)).await; diff --git a/polkadot/node/subsystem-bench/src/cli.rs b/polkadot/node/subsystem-bench/src/cli.rs index ee67a01d449e..3352f33a3503 100644 --- a/polkadot/node/subsystem-bench/src/cli.rs +++ b/polkadot/node/subsystem-bench/src/cli.rs @@ -26,7 +26,7 @@ pub struct TestSequenceOptions { /// Define the supported benchmarks targets #[derive(Debug, Clone, clap::Parser, Serialize, Deserialize)] -#[command(about = "Test objectives", version, rename_all = "kebab-case")] +#[command(rename_all = "kebab-case")] pub enum TestObjective { /// Benchmark availability recovery strategies. DataAvailabilityRead(DataAvailabilityReadOptions), diff --git a/polkadot/node/subsystem-bench/src/core/display.rs b/polkadot/node/subsystem-bench/src/core/display.rs index f21a8b907d11..629fb2edc414 100644 --- a/polkadot/node/subsystem-bench/src/core/display.rs +++ b/polkadot/node/subsystem-bench/src/core/display.rs @@ -18,7 +18,7 @@ //! to a format that can be displayed in the CLI. //! //! Currently histogram buckets are skipped. -use super::LOG_TARGET; +use super::{LOG_TARGET, configuration::TestConfiguration}; use colored::Colorize; use prometheus::{ proto::{MetricFamily, MetricType}, @@ -181,3 +181,19 @@ pub fn parse_metrics(registry: &Registry) -> MetricCollection { } test_metrics.into() } + + +pub fn display_configuration(test_config: &TestConfiguration) { + gum::info!( + "{}, {}, {}, {}, {}", + format!("n_validators = {}", test_config.n_validators).blue(), + format!("n_cores = {}", test_config.n_cores).blue(), + format!( + "pov_size = {} - {}", + test_config.min_pov_size, test_config.max_pov_size + ) + .bright_black(), + format!("error = {}", test_config.error).bright_black(), + format!("latency = {:?}", test_config.latency).bright_black(), + ); +} diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index f9261d848778..a666ee06ad55 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -35,7 +35,8 @@ use core::{ }; use clap_num::number_range; -// const LOG_TARGET: &str = "subsystem-bench"; + +use crate::core::display::display_configuration; fn le_100(s: &str) -> Result { number_range(s, 0, 100) @@ -64,7 +65,7 @@ struct BenchCli { pub bandwidth: Option, #[clap(long, value_parser=le_100)] - /// Simulated connection error rate [0-100]. + /// Simulated conection error ratio [0-100]. pub peer_error: Option, #[clap(long, value_parser=le_5000)] @@ -95,22 +96,14 @@ impl BenchCli { ); for (index, test_config) in test_sequence.into_iter().enumerate() { gum::info!( - "{}, {}, {}, {}, {}, {}", + "{}", format!("Step {}/{}", index + 1, num_steps).bright_purple(), - format!("n_validators = {}", test_config.n_validators).blue(), - format!("n_cores = {}", test_config.n_cores).blue(), - format!( - "pov_size = {} - {}", - test_config.min_pov_size, test_config.max_pov_size - ) - .bright_black(), - format!("error = {}", test_config.error).bright_black(), - format!("latency = {:?}", test_config.latency).bright_black(), ); + display_configuration(&test_config); let mut state = TestState::new(&test_config); let (mut env, _protocol_config) = prepare_test(test_config, &mut state); - env.runtime().block_on(availability::bench_chunk_recovery(&mut env, state)); + env.runtime().block_on(availability::benchmark_availability_read(&mut env, state)); } return Ok(()) }, @@ -166,10 +159,12 @@ impl BenchCli { test_config.bandwidth = bandwidth * 1024; } + display_configuration(&test_config); + let mut state = TestState::new(&test_config); let (mut env, _protocol_config) = prepare_test(test_config, &mut state); // test_config.write_to_disk(); - env.runtime().block_on(availability::bench_chunk_recovery(&mut env, state)); + env.runtime().block_on(availability::benchmark_availability_read(&mut env, state)); Ok(()) } @@ -181,6 +176,7 @@ fn main() -> eyre::Result<()> { .filter(Some("hyper"), log::LevelFilter::Info) // Avoid `Terminating due to subsystem exit subsystem` warnings .filter(Some("polkadot_overseer"), log::LevelFilter::Error) + .filter(None, log::LevelFilter::Info) // .filter(None, log::LevelFilter::Trace) .try_init() .unwrap(); From 1a8087010d36a8712482ba9f03d383e17e547623 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 28 Nov 2023 15:04:32 +0200 Subject: [PATCH 31/52] fmt + readme updates Signed-off-by: Andrei Sandu --- polkadot/node/subsystem-bench/README.md | 22 ++++++++++--------- .../subsystem-bench/src/availability/cli.rs | 4 ++-- .../node/subsystem-bench/src/core/display.rs | 12 ++++------ .../subsystem-bench/src/subsystem-bench.rs | 11 +++++----- 4 files changed, 23 insertions(+), 26 deletions(-) diff --git a/polkadot/node/subsystem-bench/README.md b/polkadot/node/subsystem-bench/README.md index 4ed25ff9078c..5b58dc3a5be4 100644 --- a/polkadot/node/subsystem-bench/README.md +++ b/polkadot/node/subsystem-bench/README.md @@ -3,9 +3,9 @@ Run parachain consensus stress and performance tests on your development machine. ## Motivation -The parachain consensus node implementation spans across many modules which we call subsystems. Each subsystem is responsible for a small part of logic of the parachain consensus pipeline, but in general the most load and performance issues are localized in just a few core subsystems like `availability-recovery`, `approval-voting` or `dispute-coordinator`. In the absence of this client, we would run large test nets in order to load/stress test these parts of the system. Setting up and making sense of the amount of data produced by such a large test is very expensive, hard to orchestrate and is a huge development time sink. +The parachain consensus node implementation spans across many modules which we call subsystems. Each subsystem is responsible for a small part of logic of the parachain consensus pipeline, but in general the most load and performance issues are localized in just a few core subsystems like `availability-recovery`, `approval-voting` or `dispute-coordinator`. In the absence such a tool, we would run large test nets to load/stress test these parts of the system. Setting up and making sense of the amount of data produced by such a large test is very expensive, hard to orchestrate and is a huge development time sink. -This tool aims to solve this problem by making it easy to: +This tool aims to solve the problem by making it easy to: - set up and run core subsystem load tests locally on your development machine - iterate and conclude faster when benchmarking new optimizations or comparing implementations - automate and keep track of performance regressions in CI runs @@ -56,7 +56,7 @@ Once you have the installation up and running, configure the local Prometheus as Follow [this guide](https://grafana.com/docs/grafana/latest/dashboards/manage-dashboards/#export-and-import-dashboards) to import the dashboards from the repository `grafana` folder. -## Running existing tests +## How to run a test To run a test, you need to first choose a test objective. Currently, we support the following: @@ -68,12 +68,10 @@ Usage: subsystem-bench [OPTIONS] Commands: data-availability-read Benchmark availability recovery strategies - test-sequence Run a test sequence specified in a file - help Print this message or the help of the given subcommand(s) ``` -The `test-sequence` is a special test objective that wraps up an arbitrary number of test objectives. It is tipically used to run a suite of tests defined in a `yaml` file like in this [example](examples/availability_read.yaml). +Note: `test-sequence` is a special test objective that wraps up an arbitrary number of test objectives. It is tipically used to run a suite of tests defined in a `yaml` file like in this [example](examples/availability_read.yaml). ### Standard test options @@ -123,7 +121,7 @@ usage: - for how many blocks the test should run (`num_blocks`) From the perspective of the subsystem under test, this means that it will receive an `ActiveLeavesUpdate` signal -followed by an arbitrary amount of messages. The process repeat itself for `num_blocks`. These messages are generally test payloads pre-generated before the test run, or constructed on pre-genereated paylods. For example the `AvailabilityRecoveryMessage::RecoverAvailableData` message includes a `CandidateReceipt` which is generated before the test is started. +followed by an arbitrary amount of messages. This process repeats itself for `num_blocks`. The messages are generally test payloads pre-generated before the test run, or constructed on pre-genereated payloads. For example the `AvailabilityRecoveryMessage::RecoverAvailableData` message includes a `CandidateReceipt` which is generated before the test is started. ### Example run @@ -154,14 +152,18 @@ node validator network. Total test environment CPU usage 0.00s CPU usage per block 0.00s ``` + +`Block time` in the context of `data-availability-read` has a different meaning. It measures the amount of time it took the subsystem to finish processing all of the messages sent in the context of the current test block. + + ### Test logs -You can select node cateogries and verbosity as with the Polkadot clien, simply setting `RUST_LOOG="parachain=debug"` turns on debug logs for all parachain consensus subsystems in the test. +You can select log target, subtarget and verbosity just like with Polkadot node CLI, simply setting `RUST_LOOG="parachain=debug"` turns on debug logs for all parachain consensus subsystems in the test. ### View test metrics Assuming the Grafana/Prometheus stack installation steps completed succesfully, you should be able to -view the test progress in real time by accessing [this link](http://localhost:3000/goto/i1vzLpNSR?orgId=1). +view the test progress in real time by accessing [this link](http://localhost:3000/goto/SM5B8pNSR?orgId=1). Now run `target/testnet/subsystem-bench test-sequence --path polkadot/node/subsystem-bench/examples/availability_read.yaml` and view the metrics in real time and spot differences between different `n_valiator` values. @@ -172,7 +174,7 @@ or even multiple subsystems (for example `approval-distribution` and `approval-v A special kind of test objectives are performance regression tests for the CI pipeline. These should be sequences of tests that check the performance characteristics (such as CPU usage, speed) of the subsystem under test in both happy and negative scenarios (low bandwidth, network errors and low connectivity). -### Reuaseble test components +### Reusable test components To faster write a new test objective you need to use some higher level wrappers and logic: `TestEnvironment` `TestConfiguration`, `TestAuthorities`, `NetworkEmulator`. To create the `TestEnvironment` you will need to also build an `Overseer`, but that should be easy using the mockups for subsystems in`core::mock`. diff --git a/polkadot/node/subsystem-bench/src/availability/cli.rs b/polkadot/node/subsystem-bench/src/availability/cli.rs index f86f1bfb700d..8da4a59253c6 100644 --- a/polkadot/node/subsystem-bench/src/availability/cli.rs +++ b/polkadot/node/subsystem-bench/src/availability/cli.rs @@ -34,8 +34,8 @@ pub enum NetworkEmulation { #[allow(missing_docs)] pub struct DataAvailabilityReadOptions { #[clap(short, long, default_value_t = false)] - /// Turbo boost AD Read by fetching the full availability datafrom backers first. Saves CPU as - /// we don't need to re-construct from chunks. Tipically this is only faster if nodes have + /// Turbo boost AD Read by fetching the full availability datafrom backers first. Saves CPU as + /// we don't need to re-construct from chunks. Tipically this is only faster if nodes have /// enough bandwidth. pub fetch_from_backers: bool, } diff --git a/polkadot/node/subsystem-bench/src/core/display.rs b/polkadot/node/subsystem-bench/src/core/display.rs index 629fb2edc414..03a5c13aeb47 100644 --- a/polkadot/node/subsystem-bench/src/core/display.rs +++ b/polkadot/node/subsystem-bench/src/core/display.rs @@ -14,11 +14,11 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . // -//! Display implementations and helper methods for parsing prometheus metrics +//! Display implementations and helper methods for parsing prometheus metrics //! to a format that can be displayed in the CLI. //! //! Currently histogram buckets are skipped. -use super::{LOG_TARGET, configuration::TestConfiguration}; +use super::{configuration::TestConfiguration, LOG_TARGET}; use colored::Colorize; use prometheus::{ proto::{MetricFamily, MetricType}, @@ -182,17 +182,13 @@ pub fn parse_metrics(registry: &Registry) -> MetricCollection { test_metrics.into() } - pub fn display_configuration(test_config: &TestConfiguration) { gum::info!( "{}, {}, {}, {}, {}", format!("n_validators = {}", test_config.n_validators).blue(), format!("n_cores = {}", test_config.n_cores).blue(), - format!( - "pov_size = {} - {}", - test_config.min_pov_size, test_config.max_pov_size - ) - .bright_black(), + format!("pov_size = {} - {}", test_config.min_pov_size, test_config.max_pov_size) + .bright_black(), format!("error = {}", test_config.error).bright_black(), format!("latency = {:?}", test_config.latency).bright_black(), ); diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index a666ee06ad55..5337a13e9729 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -95,15 +95,13 @@ impl BenchCli { format!("Sequence contains {} step(s)", num_steps).bright_purple() ); for (index, test_config) in test_sequence.into_iter().enumerate() { - gum::info!( - "{}", - format!("Step {}/{}", index + 1, num_steps).bright_purple(), - ); + gum::info!("{}", format!("Step {}/{}", index + 1, num_steps).bright_purple(),); display_configuration(&test_config); let mut state = TestState::new(&test_config); let (mut env, _protocol_config) = prepare_test(test_config, &mut state); - env.runtime().block_on(availability::benchmark_availability_read(&mut env, state)); + env.runtime() + .block_on(availability::benchmark_availability_read(&mut env, state)); } return Ok(()) }, @@ -164,7 +162,8 @@ impl BenchCli { let mut state = TestState::new(&test_config); let (mut env, _protocol_config) = prepare_test(test_config, &mut state); // test_config.write_to_disk(); - env.runtime().block_on(availability::benchmark_availability_read(&mut env, state)); + env.runtime() + .block_on(availability::benchmark_availability_read(&mut env, state)); Ok(()) } From eb49ea0277b89ecbd1cc14613dc0f6eb6e0bd9b7 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 28 Nov 2023 15:14:26 +0200 Subject: [PATCH 32/52] update dashboard and sample Signed-off-by: Andrei Sandu --- .../examples/availability_read.yaml | 6 +- .../grafana/availability-read.json | 3418 +++++++++-------- 2 files changed, 1713 insertions(+), 1711 deletions(-) diff --git a/polkadot/node/subsystem-bench/examples/availability_read.yaml b/polkadot/node/subsystem-bench/examples/availability_read.yaml index 889309e64a2b..311ea972141f 100644 --- a/polkadot/node/subsystem-bench/examples/availability_read.yaml +++ b/polkadot/node/subsystem-bench/examples/availability_read.yaml @@ -1,7 +1,7 @@ TestConfiguration: # Test 1 - objective: !DataAvailabilityRead - fetch_from_backers: true + fetch_from_backers: false n_validators: 300 n_cores: 20 min_pov_size: 5120 @@ -20,7 +20,7 @@ TestConfiguration: # Test 2 - objective: !DataAvailabilityRead - fetch_from_backers: true + fetch_from_backers: false n_validators: 500 n_cores: 20 min_pov_size: 5120 @@ -39,7 +39,7 @@ TestConfiguration: # Test 3 - objective: !DataAvailabilityRead - fetch_from_backers: true + fetch_from_backers: false n_validators: 1000 n_cores: 20 min_pov_size: 5120 diff --git a/polkadot/node/subsystem-bench/grafana/availability-read.json b/polkadot/node/subsystem-bench/grafana/availability-read.json index 4fbbe1f58731..31c4ad3c7952 100644 --- a/polkadot/node/subsystem-bench/grafana/availability-read.json +++ b/polkadot/node/subsystem-bench/grafana/availability-read.json @@ -1,1872 +1,1874 @@ { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "description": "Subsystem and test environment metrics", - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 2, - "links": [], - "liveNow": false, - "panels": [ + "annotations": { + "list": [ { + "builtIn": 1, "datasource": { - "type": "prometheus", - "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "type": "dashboard" + } + ] + }, + "description": "Subsystem and test environment metrics", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": 60000, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": 60000, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 90, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" - }, - "editorMode": "code", - "expr": "subsystem_benchmark_n_validators{}", - "instant": false, - "legendFormat": "n_vaidators", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" - }, - "editorMode": "code", - "expr": "subsystem_benchmark_n_cores{}", - "hide": false, - "instant": false, - "legendFormat": "n_cores", - "range": true, - "refId": "B" - } - ], - "title": "Test configuration", - "type": "timeseries" + "overrides": [] }, - { - "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 90, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "id": 31, - "panels": [], - "targets": [ - { - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "refId": "A" - } - ], - "title": "Overview", - "type": "row" + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - { - "datasource": { - "type": "prometheus", - "uid": "$data_source" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "subsystem_benchmark_n_validators{}", + "instant": false, + "legendFormat": "n_vaidators", + "range": true, + "refId": "A" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "subsystem_benchmark_n_cores{}", + "hide": false, + "instant": false, + "legendFormat": "n_cores", + "range": true, + "refId": "B" + } + ], + "title": "Test configuration", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 31, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$data_source" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" }, - "unit": "percentunit" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 57, - "interval": "1s", - "options": { - "legend": { - "calcs": [ - "mean", - "min", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Mean", - "sortDesc": true + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } + "unit": "percentunit" }, - "pluginVersion": "10.0.2", - "repeat": "nodename", - "targets": [ - { - "datasource": { - "uid": "$data_source" - }, - "editorMode": "code", - "expr": "sum(rate(substrate_tasks_polling_duration_sum{}[2s])) by ($cpu_group_by)", - "interval": "", - "legendFormat": "{{task_group}}", - "range": true, - "refId": "A" - } - ], - "title": "All tasks CPU usage breakdown", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "$data_source" + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 57, + "interval": "1s", + "options": { + "legend": { + "calcs": [ + "mean", + "min", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.2", + "repeat": "nodename", + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "sum(rate(substrate_tasks_polling_duration_sum{}[2s])) by ($cpu_group_by)", + "interval": "", + "legendFormat": "{{task_group}}", + "range": true, + "refId": "A" + } + ], + "title": "All tasks CPU usage breakdown", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$data_source" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "area" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 6 - } - ] + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" }, - "unit": "s" + "thresholdsStyle": { + "mode": "area" + } }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 93, - "interval": "1s", - "options": { - "legend": { - "calcs": [ - "mean", - "min", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Mean", - "sortDesc": true + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 6 + } + ] }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } + "unit": "s" }, - "pluginVersion": "10.0.2", - "targets": [ - { - "datasource": { - "uid": "$data_source" - }, - "editorMode": "code", - "expr": "increase(substrate_tasks_polling_duration_sum{task_group=\"availability-recovery-subsystem\"}[6s])", - "interval": "", - "legendFormat": "{{task_name}}", - "range": true, - "refId": "A" - } - ], - "title": "Availability subsystem CPU usage per block", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "$data_source" + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 93, + "interval": "1s", + "options": { + "legend": { + "calcs": [ + "mean", + "min", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "increase(substrate_tasks_polling_duration_sum{task_group=\"availability-recovery\"}[6s])", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Availability subsystem CPU usage per block", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$data_source" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 10, - "type": "log" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 10, + "type": "log" }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "s" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 94, - "interval": "1s", - "options": { - "legend": { - "calcs": [ - "last" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } + "unit": "s" }, - "pluginVersion": "10.0.2", - "targets": [ - { - "datasource": { - "uid": "$data_source" - }, - "editorMode": "code", - "expr": "sum(substrate_tasks_polling_duration_sum{}) by ($cpu_group_by)", - "interval": "", - "legendFormat": "{{task_name}}", - "range": true, - "refId": "A" - } - ], - "title": "Total CPU burn", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "$data_source" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 94, + "interval": "1s", + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "sum(substrate_tasks_polling_duration_sum{}) by ($cpu_group_by)", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Total CPU burn", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$data_source" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 10, - "type": "log" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "area" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 10, + "type": "log" }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "dark-red", - "value": 6000 - } - ] + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "ms" + "thresholdsStyle": { + "mode": "area" + } }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 40 - }, - "id": 95, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "sortBy": "Last", - "sortDesc": true + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-red", + "value": 6000 + } + ] }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } + "unit": "ms" }, - "pluginVersion": "10.0.2", - "targets": [ - { - "datasource": { - "uid": "$data_source" - }, - "editorMode": "code", - "expr": "subsystem_benchmark_block_time", - "interval": "", - "legendFormat": "Instant block time", - "range": true, - "refId": "A" - } - ], - "title": "Block time", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 95, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "subsystem_benchmark_block_time", + "interval": "", + "legendFormat": "Instant block time", + "range": true, + "refId": "A" + } + ], + "title": "All candidates in block recovery time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "hue", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 2, - "scaleDistribution": { - "log": 2, - "type": "log" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 2, + "scaleDistribution": { + "log": 2, + "type": "log" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" }, - "unit": "binBps" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 40 - }, - "id": 89, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + "unit": "binBps" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" - }, - "editorMode": "code", - "expr": "sum(rate(subsystem_benchmark_network_peer_total_bytes_sent{}[5s]))", - "instant": false, - "legendFormat": "Received", - "range": true, - "refId": "A" + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 89, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" }, - { - "datasource": { - "type": "prometheus", - "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" - }, - "editorMode": "code", - "expr": "sum(rate(subsystem_benchmark_network_peer_total_bytes_received{}[5s]))", - "hide": false, - "instant": false, - "legendFormat": "Sent", - "range": true, - "refId": "B" - } - ], - "title": "Emulated network throughput ", - "type": "timeseries" + "editorMode": "code", + "expr": "sum(rate(subsystem_benchmark_network_peer_total_bytes_received{}[5s]))", + "instant": false, + "legendFormat": "Received", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "sum(rate(subsystem_benchmark_network_peer_total_bytes_sent{}[5s]))", + "hide": false, + "instant": false, + "legendFormat": "Sent", + "range": true, + "refId": "B" + } + ], + "title": "Emulated network throughput ", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" }, - { - "datasource": { - "type": "prometheus", - "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "log" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "bytes" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [] - }, - "gridPos": { - "h": 15, - "w": 12, - "x": 0, - "y": 52 - }, - "id": 88, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "unit": "bytes" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" - }, - "editorMode": "code", - "expr": "rate(subsystem_benchmark_network_peer_total_bytes_received{}[10s])", - "instant": false, - "legendFormat": "Received by {{peer}}", - "range": true, - "refId": "A" + "overrides": [] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 88, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" }, - { - "datasource": { - "type": "prometheus", - "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + "editorMode": "code", + "expr": "rate(subsystem_benchmark_network_peer_total_bytes_received{}[10s])", + "instant": false, + "legendFormat": "Received by {{peer}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "rate(subsystem_benchmark_network_peer_total_bytes_sent{}[10s])", + "hide": false, + "instant": false, + "legendFormat": "Sent by {{peer}}", + "range": true, + "refId": "B" + } + ], + "title": "Emulated peer throughput", + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "editorMode": "code", - "expr": "rate(subsystem_benchmark_network_peer_total_bytes_sent{}[10s])", - "hide": false, - "instant": false, - "legendFormat": "Sent by {{peer}}", - "range": true, - "refId": "B" + "scaleDistribution": { + "type": "linear" + } } - ], - "title": "Emulated peer throughput", - "type": "timeseries" + }, + "overrides": [] }, - { - "cards": {}, + "gridPos": { + "h": 15, + "w": 12, + "x": 12, + "y": 52 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 92, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateInferno", "exponent": 0.5, - "mode": "spectrum" + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 }, - "dataFormat": "tsbuckets", - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 15, - "w": 12, - "x": 12, - "y": 52 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 92, - "interval": "1s", "legend": { "show": true }, - "maxDataPoints": 1340, - "options": { - "calculate": false, - "calculation": {}, - "cellGap": 2, - "cellValues": { - "decimals": 0 - }, - "color": { - "exponent": 0.5, - "fill": "#b4ff00", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "Inferno", - "steps": 128 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "showValue": "never", - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "decimals": 0, - "reverse": false, - "unit": "bytes" - } + "rowsFrame": { + "layout": "auto" }, - "pluginVersion": "10.1.1", - "reverseYBuckets": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(increase(subsystem_benchmark_pov_size_bucket{}[$__rate_interval])) by (le)", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "B" - } - ], - "title": "Recovered PoV sizes", + "showValue": "never", "tooltip": { "show": true, - "showHistogram": true - }, - "tooltipDecimals": 0, - "transformations": [], - "type": "heatmap", - "xAxis": { - "show": true + "yHistogram": true }, "yAxis": { + "axisPlacement": "left", "decimals": 0, - "format": "s", - "logBase": 1, - "show": true - }, - "yBucketBound": "auto" + "reverse": false, + "unit": "bytes" + } }, - { - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "description": "Number of erasure-encoded chunks of data belonging to candidate blocks. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic", - "seriesBy": "max" + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(subsystem_benchmark_pov_size_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Recovered PoV sizes", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "Number of erasure-encoded chunks of data belonging to candidate blocks. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic", + "seriesBy": "max" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "chunks/s" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 67 - }, - "id": 43, - "interval": "1s", - "maxDataPoints": 1340, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + "unit": "chunks/s" }, - "pluginVersion": "8.2.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(polkadot_parachain_availability_recovery_chunk_requests_issued{}[10s]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "Chunks requested", - "queryType": "randomWalk", - "refId": "B" - } - ], - "title": "Availability", - "transformations": [], - "type": "timeseries" + "overrides": [] }, - { - "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 77 + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 67 + }, + "id": 43, + "interval": "1s", + "maxDataPoints": 1340, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "id": 35, - "panels": [], - "targets": [ - { - "datasource": { - "type": "datasource", - "uid": "grafana" + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(polkadot_parachain_availability_recovery_chunk_requests_issued{}[10s]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Chunks requested", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Availability", + "transformations": [], + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 77 + }, + "id": 35, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Availability subystem metrics", + "type": "row" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "refId": "A" + "scaleDistribution": { + "type": "linear" + } } - ], - "title": "Availability subystem metrics", - "type": "row" + }, + "overrides": [] }, - { - "cards": {}, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 78 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 68, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateInferno", "exponent": 0.5, - "mode": "spectrum" + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 }, - "dataFormat": "tsbuckets", - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 78 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 68, - "interval": "1s", "legend": { "show": true }, - "maxDataPoints": 1340, - "options": { - "calculate": false, - "calculation": {}, - "cellGap": 2, - "cellValues": { - "decimals": 0 - }, - "color": { - "exponent": 0.5, - "fill": "#b4ff00", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "Inferno", - "steps": 128 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "showValue": "never", - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "decimals": 0, - "reverse": false, - "unit": "s" - } + "rowsFrame": { + "layout": "auto" }, - "pluginVersion": "10.1.1", - "reverseYBuckets": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(increase(polkadot_parachain_availability_recovery_time_total_bucket{}[$__rate_interval])) by (le)", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "B" - } - ], - "title": "Time to recover a PoV", + "showValue": "never", "tooltip": { "show": true, - "showHistogram": true - }, - "tooltipDecimals": 0, - "transformations": [], - "type": "heatmap", - "xAxis": { - "show": true + "yHistogram": true }, "yAxis": { + "axisPlacement": "left", "decimals": 0, - "format": "s", - "logBase": 1, - "show": true + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(polkadot_parachain_availability_recovery_time_total_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Time to recover a PoV", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } }, - "yBucketBound": "auto" + "overrides": [] }, - { - "cards": {}, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 78 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 67, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateInferno", "exponent": 0.5, - "mode": "spectrum" + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 }, - "dataFormat": "tsbuckets", - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 78 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 67, - "interval": "1s", "legend": { "show": true }, - "maxDataPoints": 1340, - "options": { - "calculate": false, - "calculation": {}, - "cellGap": 2, - "cellValues": { - "decimals": 0 - }, - "color": { - "exponent": 0.5, - "fill": "#b4ff00", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "Inferno", - "steps": 128 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "showValue": "never", - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "decimals": 0, - "reverse": false, - "unit": "s" - } + "rowsFrame": { + "layout": "auto" }, - "pluginVersion": "10.1.1", - "reverseYBuckets": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(increase(polkadot_parachain_availability_recovery_time_chunk_request_bucket{}[$__rate_interval])) by (le)", - "format": "heatmap", - "instant": false, - "interval": "", - "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "A" - } - ], - "title": "Chunk request duration", + "showValue": "never", "tooltip": { "show": true, - "showHistogram": true - }, - "tooltipDecimals": 0, - "transformations": [], - "type": "heatmap", - "xAxis": { - "show": true + "yHistogram": true }, "yAxis": { + "axisPlacement": "left", "decimals": 0, - "format": "bitfields", - "logBase": 1, - "show": true - }, - "yBucketBound": "auto" + "reverse": false, + "unit": "s" + } }, - { - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic", - "seriesBy": "max" + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(polkadot_parachain_availability_recovery_time_chunk_request_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Chunk request duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "bitfields", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic", + "seriesBy": "max" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "Bps" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 88 - }, - "id": 85, - "interval": "1s", - "maxDataPoints": 1340, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + "unit": "Bps" }, - "pluginVersion": "8.2.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${data_source}" + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 88 + }, + "id": 85, + "interval": "1s", + "maxDataPoints": 1340, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "rate(polkadot_parachain_availability_recovery_bytes_total{}[30s])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Bytes recovered", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Recovery throughtput", + "transformations": [], + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "editorMode": "code", - "exemplar": true, - "expr": "rate(polkadot_parachain_availability_recovery_bytes_total{}[30s])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "Bytes recovered", - "queryType": "randomWalk", - "refId": "B" + "scaleDistribution": { + "type": "linear" + } } - ], - "title": "Recovery throughtput", - "transformations": [], - "type": "timeseries" + }, + "overrides": [] }, - { - "cards": {}, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 88 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 84, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateInferno", "exponent": 0.5, - "mode": "spectrum" + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 }, - "dataFormat": "tsbuckets", - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 88 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 84, - "interval": "1s", "legend": { "show": true }, - "maxDataPoints": 1340, - "options": { - "calculate": false, - "calculation": {}, - "cellGap": 2, - "cellValues": { - "decimals": 0 - }, - "color": { - "exponent": 0.5, - "fill": "#b4ff00", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "Inferno", - "steps": 128 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "showValue": "never", - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "decimals": 0, - "reverse": false, - "unit": "s" - } + "rowsFrame": { + "layout": "auto" }, - "pluginVersion": "10.1.1", - "reverseYBuckets": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(increase(polkadot_parachain_availability_reencode_chunks_bucket{}[$__rate_interval])) by (le)", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "B" - } - ], - "title": "Re-encoding chunks timing", + "showValue": "never", "tooltip": { "show": true, - "showHistogram": true - }, - "tooltipDecimals": 0, - "transformations": [], - "type": "heatmap", - "xAxis": { - "show": true + "yHistogram": true }, "yAxis": { + "axisPlacement": "left", "decimals": 0, - "format": "s", - "logBase": 1, - "show": true + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(polkadot_parachain_availability_reencode_chunks_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Re-encoding chunks timing", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } }, - "yBucketBound": "auto" + "overrides": [] }, - { - "cards": {}, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 98 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 83, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateInferno", "exponent": 0.5, - "mode": "spectrum" + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 }, - "dataFormat": "tsbuckets", - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 98 - }, - "heatmap": {}, - "hideZeroBuckets": true, - "highlightCards": true, - "id": 83, - "interval": "1s", "legend": { "show": true }, - "maxDataPoints": 1340, - "options": { - "calculate": false, - "calculation": {}, - "cellGap": 2, - "cellValues": { - "decimals": 0 - }, - "color": { - "exponent": 0.5, - "fill": "#b4ff00", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "Inferno", - "steps": 128 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "showValue": "never", - "tooltip": { - "show": true, - "yHistogram": true - }, - "yAxis": { - "axisPlacement": "left", - "decimals": 0, - "reverse": false, - "unit": "s" - } + "rowsFrame": { + "layout": "auto" }, - "pluginVersion": "10.1.1", - "reverseYBuckets": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(increase(polkadot_parachain_availability_recovery_time_erasure_recovery_bucket{}[$__rate_interval])) by (le)", - "format": "heatmap", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "{{le}}", - "queryType": "randomWalk", - "refId": "B" - } - ], - "title": "Erasure recovery (no I/O)", + "showValue": "never", "tooltip": { "show": true, - "showHistogram": true - }, - "tooltipDecimals": 0, - "transformations": [], - "type": "heatmap", - "xAxis": { - "show": true + "yHistogram": true }, "yAxis": { + "axisPlacement": "left", "decimals": 0, - "format": "s", - "logBase": 1, - "show": true - }, - "yBucketBound": "auto" + "reverse": false, + "unit": "s" + } }, - { - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "description": "Number of erasure-encoded chunks of data belonging to candidate blocks. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic", - "seriesBy": "max" + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(polkadot_parachain_availability_recovery_time_erasure_recovery_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Erasure recovery (no I/O)", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "Number of erasure-encoded chunks of data belonging to candidate blocks. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic", + "seriesBy": "max" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "stepAfter", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" }, - "unit": "cps" + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 108 - }, - "id": 86, - "interval": "1s", - "maxDataPoints": 1340, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + "unit": "cps" }, - "pluginVersion": "8.2.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(polkadot_parachain_availability_recovery_recoveries_finished{}[1s]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "Finished", - "queryType": "randomWalk", - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${data_source}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(polkadot_parachain_availability_recovery_recovieries_started{}[1s]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "Started", - "queryType": "randomWalk", - "refId": "A" - } - ], - "title": "Recoveries", - "transformations": [], - "type": "timeseries" + "overrides": [] }, - { - "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 118 + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 108 + }, + "id": 86, + "interval": "1s", + "maxDataPoints": 1340, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "id": 2, - "panels": [], - "targets": [ - { - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "refId": "A" - } - ], - "title": "Approval voting", - "type": "row" - } - ], - "refresh": "5s", - "schemaVersion": 38, - "style": "dark", - "tags": [ - "subsystem", - "benchmark" - ], - "templating": { - "list": [ + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.2.2", + "targets": [ { - "current": { - "selected": false, - "text": "Prometheus", - "value": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" - }, - "hide": 0, - "includeAll": false, - "label": "Source of data", - "multi": false, - "name": "data_source", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(polkadot_parachain_availability_recovery_recoveries_finished{}[1s]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Finished", + "queryType": "randomWalk", + "refId": "B" }, { - "current": { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(polkadot_parachain_availability_recovery_recovieries_started{}[1s]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Started", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Recoveries", + "transformations": [], + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 118 + }, + "id": 2, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Approval voting", + "type": "row" + } + ], + "refresh": false, + "schemaVersion": 38, + "style": "dark", + "tags": [ + "subsystem", + "benchmark" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "hide": 0, + "includeAll": false, + "label": "Source of data", + "multi": false, + "name": "data_source", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": true, + "text": "task_name", + "value": "task_name" + }, + "description": "Sum CPU usage by task name or task group.", + "hide": 0, + "includeAll": false, + "label": "Group CPU usage", + "multi": false, + "name": "cpu_group_by", + "options": [ + { "selected": true, "text": "task_name", "value": "task_name" }, - "description": "Sum CPU usage by task name or task group.", - "hide": 0, - "includeAll": false, - "label": "Group CPU usage", - "multi": false, - "name": "cpu_group_by", - "options": [ - { - "selected": true, - "text": "task_name", - "value": "task_name" - }, - { - "selected": false, - "text": "task_group", - "value": "task_group" - } - ], - "query": "task_name, task_group", - "queryValue": "", - "skipUrlSync": false, - "type": "custom" - } - ] - }, - "time": { - "from": "now-5m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s" - ] - }, - "timezone": "utc", - "title": "Data Availability Read", - "uid": "asdadasd1", - "version": 56, - "weekStart": "" - } \ No newline at end of file + { + "selected": false, + "text": "task_group", + "value": "task_group" + } + ], + "query": "task_name, task_group", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "2023-11-28T13:05:32.794Z", + "to": "2023-11-28T13:06:56.173Z" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s" + ] + }, + "timezone": "utc", + "title": "Data Availability Read", + "uid": "asdadasd1", + "version": 58, + "weekStart": "" +} \ No newline at end of file From b2490560da9d8924a393a31322de2ee59e31ca72 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 28 Nov 2023 15:51:16 +0200 Subject: [PATCH 33/52] remove unused Signed-off-by: Andrei Sandu --- polkadot/node/subsystem-bench/src/availability/cli.rs | 4 ---- polkadot/node/subsystem-bench/src/availability/mod.rs | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability/cli.rs b/polkadot/node/subsystem-bench/src/availability/cli.rs index 8da4a59253c6..65df8c1552aa 100644 --- a/polkadot/node/subsystem-bench/src/availability/cli.rs +++ b/polkadot/node/subsystem-bench/src/availability/cli.rs @@ -15,10 +15,6 @@ // along with Polkadot. If not, see . use serde::{Deserialize, Serialize}; -#[derive(Debug, clap::Parser, Clone)] -#[clap(rename_all = "kebab-case")] -#[allow(missing_docs)] -pub struct NetworkOptions {} #[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq)] #[value(rename_all = "kebab-case")] diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index e5543e5d3904..cbd2f8287633 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -68,7 +68,7 @@ use sc_service::SpawnTaskHandle; mod cli; pub mod configuration; -pub use cli::{DataAvailabilityReadOptions, NetworkEmulation, NetworkOptions}; +pub use cli::{DataAvailabilityReadOptions, NetworkEmulation}; pub use configuration::AvailabilityRecoveryConfiguration; fn build_overseer( From fb34181c26cc58f84ffa687fbb0b823e8f5233fb Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 28 Nov 2023 15:58:24 +0200 Subject: [PATCH 34/52] revert unneeded changes Signed-off-by: Andrei Sandu --- .../node/subsystem-test-helpers/src/lib.rs | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/polkadot/node/subsystem-test-helpers/src/lib.rs b/polkadot/node/subsystem-test-helpers/src/lib.rs index 1c3c47150ac6..3f92513498c4 100644 --- a/polkadot/node/subsystem-test-helpers/src/lib.rs +++ b/polkadot/node/subsystem-test-helpers/src/lib.rs @@ -187,7 +187,6 @@ pub struct TestSubsystemContext { tx: TestSubsystemSender, rx: mpsc::Receiver>, spawn: S, - name: &'static str, } #[async_trait::async_trait] @@ -224,7 +223,7 @@ where name: &'static str, s: Pin + Send>>, ) -> SubsystemResult<()> { - self.spawn.spawn(name, Some(self.name), s); + self.spawn.spawn(name, None, s); Ok(()) } @@ -233,7 +232,7 @@ where name: &'static str, s: Pin + Send>>, ) -> SubsystemResult<()> { - self.spawn.spawn_blocking(name, Some(self.name), s); + self.spawn.spawn_blocking(name, None, s); Ok(()) } @@ -279,13 +278,6 @@ impl TestSubsystemContextHandle { .expect("Test subsystem no longer live") } - /// Receive the next message from the subsystem. - pub async fn maybe_recv(&mut self) -> Option { - self.try_recv() - .timeout(Self::TIMEOUT) - .await - .expect("`fn recv` does not timeout") - } /// Receive the next message from the subsystem, or `None` if the channel has been closed. pub async fn try_recv(&mut self) -> Option { self.rx @@ -300,9 +292,8 @@ impl TestSubsystemContextHandle { /// of the tests. pub fn make_subsystem_context( spawner: S, - name: &'static str, ) -> (TestSubsystemContext>, TestSubsystemContextHandle) { - make_buffered_subsystem_context(spawner, 0, name) + make_buffered_subsystem_context(spawner, 0) } /// Make a test subsystem context with buffered overseer channel. Some tests (e.g. @@ -311,7 +302,6 @@ pub fn make_subsystem_context( pub fn make_buffered_subsystem_context( spawner: S, buffer_size: usize, - name: &'static str, ) -> (TestSubsystemContext>, TestSubsystemContextHandle) { let (overseer_tx, overseer_rx) = mpsc::channel(buffer_size); let (all_messages_tx, all_messages_rx) = mpsc::unbounded(); @@ -321,7 +311,6 @@ pub fn make_buffered_subsystem_context( tx: TestSubsystemSender { tx: all_messages_tx }, rx: overseer_rx, spawn: SpawnGlue(spawner), - name, }, TestSubsystemContextHandle { tx: overseer_tx, rx: all_messages_rx }, ) @@ -343,7 +332,7 @@ pub fn subsystem_test_harness( Test: Future, { let pool = TaskExecutor::new(); - let (context, handle) = make_subsystem_context(pool, "default"); + let (context, handle) = make_subsystem_context(pool); let overseer = overseer_factory(handle); let test = test_factory(context); From 3a716a54830aac41d75b9e6f411829966de0a92f Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 29 Nov 2023 12:11:08 +0200 Subject: [PATCH 35/52] add missing comments and minor fixes Signed-off-by: Andrei Sandu --- .../subsystem-bench/src/availability/mod.rs | 31 ++++++++++--------- .../node/subsystem-bench/src/core/display.rs | 4 --- .../subsystem-bench/src/core/environment.rs | 18 +++++------ .../src/core/mock/network_bridge.rs | 20 ++++++------ .../node/subsystem-bench/src/core/network.rs | 16 +++++----- 5 files changed, 43 insertions(+), 46 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index cbd2f8287633..f4c39893215b 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -105,11 +105,6 @@ fn prepare_test_inner( state: &mut TestState, dependencies: TestEnvironmentDependencies, ) -> (TestEnvironment, ProtocolConfig) { - // We need to first create the high level test state object. - // This will then be decomposed into per subsystem states. - let candidate_count = config.n_cores * config.num_blocks; - state.generate_candidates(candidate_count); - // Generate test authorities. let test_authorities = config.generate_authorities(); @@ -173,6 +168,7 @@ fn prepare_test_inner( pub struct TestState { // Full test configuration config: TestConfiguration, + // A cycle iterator on all PoV sizes used in the test. pov_sizes: Cycle>, // Generated candidate receipts to be used in the test candidates: Cycle>, @@ -181,9 +177,11 @@ pub struct TestState { // Map from generated candidate hashes to candidate index in `available_data` // and `chunks`. candidate_hashes: HashMap, - - candidate_receipts: Vec, + // Per candidate index receipts. + candidate_receipt_templates: Vec, + // Per candidate index `AvailableData` available_data: Vec, + // Per candiadte index chunks chunks: Vec>, } @@ -200,7 +198,8 @@ impl TestState { } /// Generate candidates to be used in the test. - pub fn generate_candidates(&mut self, count: usize) { + fn generate_candidates(&mut self) { + let count = self.config.n_cores * self.config.num_blocks; gum::info!(target: LOG_TARGET,"{}", format!("Pre-generating {} candidates.", count).bright_blue()); // Generate all candidates @@ -211,7 +210,8 @@ impl TestState { .pov_size_to_candidate .get(&pov_size) .expect("pov_size always exists; qed"); - let mut candidate_receipt = self.candidate_receipts[candidate_index].clone(); + let mut candidate_receipt = + self.candidate_receipt_templates[candidate_index].clone(); // Make it unique. candidate_receipt.descriptor.relay_parent = Hash::from_low_u64_be(index as u64); @@ -232,7 +232,7 @@ impl TestState { let mut chunks = Vec::new(); let mut available_data = Vec::new(); - let mut candidate_receipts = Vec::new(); + let mut candidate_receipt_templates = Vec::new(); let mut pov_size_to_candidate = HashMap::new(); // we use it for all candidates. @@ -266,22 +266,25 @@ impl TestState { chunks.push(new_chunks); available_data.push(new_available_data); pov_size_to_candidate.insert(pov_size, index); - candidate_receipts.push(candidate_receipt); + candidate_receipt_templates.push(candidate_receipt); } let pov_sizes = config.pov_sizes().to_vec().into_iter().cycle(); gum::info!(target: LOG_TARGET, "{}","Created test environment.".bright_blue()); - Self { + let mut _self = Self { config, available_data, - candidate_receipts, + candidate_receipt_templates, chunks, pov_size_to_candidate, pov_sizes, candidate_hashes: HashMap::new(), candidates: Vec::new().into_iter().cycle(), - } + }; + + _self.generate_candidates(); + _self } } diff --git a/polkadot/node/subsystem-bench/src/core/display.rs b/polkadot/node/subsystem-bench/src/core/display.rs index 03a5c13aeb47..b9ff82d1c06a 100644 --- a/polkadot/node/subsystem-bench/src/core/display.rs +++ b/polkadot/node/subsystem-bench/src/core/display.rs @@ -36,10 +36,6 @@ impl From> for MetricCollection { } impl MetricCollection { - pub fn get(&self, name: &str) -> Vec<&TestMetric> { - self.all().into_iter().filter(|metric| &metric.name == name).collect() - } - pub fn all(&self) -> &Vec { &self.0 } diff --git a/polkadot/node/subsystem-bench/src/core/environment.rs b/polkadot/node/subsystem-bench/src/core/environment.rs index 5c04071c442f..247596474078 100644 --- a/polkadot/node/subsystem-bench/src/core/environment.rs +++ b/polkadot/node/subsystem-bench/src/core/environment.rs @@ -20,6 +20,7 @@ use crate::{ }; use colored::Colorize; use core::time::Duration; +use futures::FutureExt; use polkadot_overseer::{BlockInfo, Handle as OverseerHandle}; use polkadot_node_subsystem::{messages::AllMessages, Overseer, SpawnGlue, TimeoutExt}; @@ -179,23 +180,22 @@ const MAX_TIME_OF_FLIGHT: Duration = Duration::from_millis(5000); /// ### CLI /// A subset of the Prometheus metrics are printed at the end of the test. pub struct TestEnvironment { - // Test dependencies + /// Test dependencies dependencies: TestEnvironmentDependencies, - // A runtime handle + /// A runtime handle runtime_handle: tokio::runtime::Handle, - // A handle to the lovely overseer + /// A handle to the lovely overseer overseer_handle: OverseerHandle, - // The test configuration. + /// The test configuration. config: TestConfiguration, - // A handle to the network emulator. + /// A handle to the network emulator. network: NetworkEmulator, - // Configuration/env metrics + /// Configuration/env metrics metrics: TestEnvironmentMetrics, } impl TestEnvironment { - // Create a new test environment with specified initial state and prometheus registry. - // We use prometheus metrics to collect per job task poll time and subsystem metrics. + /// Create a new test environment pub fn new( dependencies: TestEnvironmentDependencies, config: TestConfiguration, @@ -207,8 +207,8 @@ impl TestEnvironment { .expect("Metrics need to be registered"); let spawn_handle = dependencies.task_manager.spawn_handle(); + spawn_handle.spawn_blocking("overseer", "overseer", overseer.run().boxed()); - spawn_handle.spawn_blocking("overseer", "overseer", overseer.run()); let registry_clone = dependencies.registry.clone(); dependencies.task_manager.spawn_handle().spawn_blocking( "prometheus", diff --git a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs index c14a3895e238..2bc8d22234b6 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs @@ -219,19 +219,18 @@ impl MockNetworkBridgeTx { // Initialize our node bandwidth limits. let mut rx_limiter = RateLimit::new(10, self.config.bandwidth); - // Get a handle to our node network emulation stats. - let our_network_stats = self.network.peer_stats(0); - // This task will handle receipt of messages on our simulated network of the node. + let our_network = self.network.clone(); + + // This task will handle node messages receipt from the simulated network. let _ = ctx .spawn_blocking( - "node0-rx", + "network-receive", async move { while let Some(action) = ingress_rx.recv().await { let size = action.size(); // account for our node receiving the data. - our_network_stats.inc_received(size); - + our_network.inc_received(size); rx_limiter.reap(size).await; action.run().await; } @@ -271,12 +270,11 @@ impl MockNetworkBridgeTx { } // A helper to determine the request payload size. -fn request_size(request: &Requests) -> u64 { +fn request_size(request: &Requests) -> usize { match request { - Requests::ChunkFetchingV1(outgoing_request) => - outgoing_request.payload.encoded_size() as u64, + Requests::ChunkFetchingV1(outgoing_request) => outgoing_request.payload.encoded_size(), Requests::AvailableDataFetchingV1(outgoing_request) => - outgoing_request.payload.encoded_size() as u64, - _ => panic!("received an unexpected request"), + outgoing_request.payload.encoded_size(), + _ => unimplemented!("received an unexpected request"), } } diff --git a/polkadot/node/subsystem-bench/src/core/network.rs b/polkadot/node/subsystem-bench/src/core/network.rs index 40809ce36e8d..67dc0e0f267e 100644 --- a/polkadot/node/subsystem-bench/src/core/network.rs +++ b/polkadot/node/subsystem-bench/src/core/network.rs @@ -225,12 +225,12 @@ impl PeerEmulatorStats { pub fn inc_sent(&self, bytes: usize) { self.tx_bytes_total.fetch_add(bytes as u64, Ordering::Relaxed); - self.metrics.on_peer_sent(self.peer_index, bytes as u64); + self.metrics.on_peer_sent(self.peer_index, bytes); } pub fn inc_received(&self, bytes: usize) { self.rx_bytes_total.fetch_add(bytes as u64, Ordering::Relaxed); - self.metrics.on_peer_received(self.peer_index, bytes as u64); + self.metrics.on_peer_received(self.peer_index, bytes); } pub fn sent(&self) -> u64 { @@ -346,13 +346,13 @@ impl NetworkEmulator { } // Increment bytes sent by our node (the node that contains the subsystem under test) - pub fn inc_sent(&self, bytes: u64) { + pub fn inc_sent(&self, bytes: usize) { // Our node always is peer 0. self.metrics.on_peer_sent(0, bytes); } // Increment bytes received by our node (the node that contains the subsystem under test) - pub fn inc_received(&self, bytes: u64) { + pub fn inc_received(&self, bytes: usize) { // Our node always is peer 0. self.metrics.on_peer_received(0, bytes); } @@ -398,16 +398,16 @@ impl Metrics { } /// Increment total sent for a peer. - pub fn on_peer_sent(&self, peer_index: usize, bytes: u64) { + pub fn on_peer_sent(&self, peer_index: usize, bytes: usize) { self.peer_total_sent .with_label_values(vec![format!("node{}", peer_index).as_str()].as_slice()) - .inc_by(bytes); + .inc_by(bytes as u64); } /// Increment total receioved for a peer. - pub fn on_peer_received(&self, peer_index: usize, bytes: u64) { + pub fn on_peer_received(&self, peer_index: usize, bytes: usize) { self.peer_total_received .with_label_values(vec![format!("node{}", peer_index).as_str()].as_slice()) - .inc_by(bytes); + .inc_by(bytes as u64); } } From a092b764aad74194632d70224d1c2b53bd15dd63 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 29 Nov 2023 12:49:37 +0200 Subject: [PATCH 36/52] clippy Signed-off-by: Andrei Sandu --- polkadot/node/subsystem-bench/src/availability/mod.rs | 2 +- polkadot/node/subsystem-bench/src/core/network.rs | 2 -- polkadot/node/subsystem-bench/src/subsystem-bench.rs | 5 ----- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index f4c39893215b..7d6865fee958 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -120,7 +120,7 @@ fn prepare_test_inner( }; let network = NetworkEmulator::new( - config.n_validators.clone(), + config.n_validators, test_authorities.validator_authority_id, config.peer_bandwidth, dependencies.task_manager.spawn_handle(), diff --git a/polkadot/node/subsystem-bench/src/core/network.rs b/polkadot/node/subsystem-bench/src/core/network.rs index 67dc0e0f267e..3d38a8f36b19 100644 --- a/polkadot/node/subsystem-bench/src/core/network.rs +++ b/polkadot/node/subsystem-bench/src/core/network.rs @@ -96,8 +96,6 @@ impl RateLimit { #[cfg(test)] mod tests { - use super::*; - use polkadot_node_metrics::metered::CoarseDuration; use std::time::Instant; use super::RateLimit; diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 5337a13e9729..0f3ae0f41417 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -184,8 +184,3 @@ fn main() -> eyre::Result<()> { cli.launch()?; Ok(()) } - -#[cfg(test)] -mod tests { - use super::*; -} From ca27370c275a0a5f3f10823b4e8ccf1dae3f1a36 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 29 Nov 2023 13:00:12 +0200 Subject: [PATCH 37/52] zepter format features --fix Signed-off-by: Andrei Sandu --- polkadot/node/network/availability-recovery/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/node/network/availability-recovery/Cargo.toml b/polkadot/node/network/availability-recovery/Cargo.toml index 4a3f5c26e7b9..3d77652acd03 100644 --- a/polkadot/node/network/availability-recovery/Cargo.toml +++ b/polkadot/node/network/availability-recovery/Cargo.toml @@ -40,4 +40,4 @@ polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" } polkadot-primitives-test-helpers = { path = "../../../primitives/test-helpers" } [features] -subsystem-benchmarks = [] \ No newline at end of file +subsystem-benchmarks = [] From be814e554ae2445e1835e95ee7f9780519e643c8 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 29 Nov 2023 14:24:15 +0200 Subject: [PATCH 38/52] fix markdown Signed-off-by: Andrei Sandu --- polkadot/node/subsystem-bench/README.md | 82 +++++++++++++++++-------- 1 file changed, 57 insertions(+), 25 deletions(-) diff --git a/polkadot/node/subsystem-bench/README.md b/polkadot/node/subsystem-bench/README.md index 5b58dc3a5be4..351e07b6abca 100644 --- a/polkadot/node/subsystem-bench/README.md +++ b/polkadot/node/subsystem-bench/README.md @@ -3,9 +3,16 @@ Run parachain consensus stress and performance tests on your development machine. ## Motivation -The parachain consensus node implementation spans across many modules which we call subsystems. Each subsystem is responsible for a small part of logic of the parachain consensus pipeline, but in general the most load and performance issues are localized in just a few core subsystems like `availability-recovery`, `approval-voting` or `dispute-coordinator`. In the absence such a tool, we would run large test nets to load/stress test these parts of the system. Setting up and making sense of the amount of data produced by such a large test is very expensive, hard to orchestrate and is a huge development time sink. + +The parachain consensus node implementation spans across many modules which we call subsystems. Each subsystem is +responsible for a small part of logic of the parachain consensus pipeline, but in general the most load and +performance issues are localized in just a few core subsystems like `availability-recovery`, `approval-voting` or +`dispute-coordinator`. In the absence such a tool, we would run large test nets to load/stress test these parts of +the system. Setting up and making sense of the amount of data produced by such a large test is very expensive, hard +to orchestrate and is a huge development time sink. This tool aims to solve the problem by making it easy to: + - set up and run core subsystem load tests locally on your development machine - iterate and conclude faster when benchmarking new optimizations or comparing implementations - automate and keep track of performance regressions in CI runs @@ -18,17 +25,22 @@ This tool aims to solve the problem by making it easy to: The output binary will be placed in `target/testnet/subsystem-bench`. ### Test metrics + Subsystem, CPU usage and network metrics are exposed via a prometheus endpoint during the test execution. -A small subset of these collected metrics are displayed in the CLI, but for an in depth analysys of the test results, a local Grafana/Prometheus stack is needed. +A small subset of these collected metrics are displayed in the CLI, but for an in depth analysys of the test results, +a local Grafana/Prometheus stack is needed. ### Install Prometheus -Please follow the [official installation guide](https://prometheus.io/docs/prometheus/latest/installation/) for your platform/OS. + +Please follow the [official installation guide](https://prometheus.io/docs/prometheus/latest/installation/) for your +platform/OS. After succesfully installing and starting up Prometheus, we need to alter it's configuration such that it will scrape the benchmark prometheus endpoint `127.0.0.1:9999`. Please check the prometheus official documentation -regarding the location of `prometheus.yml`. On MacOS for example the full path `/opt/homebrew/etc/prometheus.yml` +regarding the location of `prometheus.yml`. On MacOS for example the full path `/opt/homebrew/etc/prometheus.yml` prometheus.yml: + ``` global: scrape_interval: 5s @@ -44,6 +56,7 @@ scrape_configs: ``` To complete this step restart Prometheus server such that it picks up the new configuration. + ### Install and setup Grafana Follow the [installation guide](https://grafana.com/docs/grafana/latest/setup-grafana/installation/) relevant @@ -54,7 +67,8 @@ Once you have the installation up and running, configure the local Prometheus as #### Import dashboards -Follow [this guide](https://grafana.com/docs/grafana/latest/dashboards/manage-dashboards/#export-and-import-dashboards) to import the dashboards from the repository `grafana` folder. +Follow [this guide](https://grafana.com/docs/grafana/latest/dashboards/manage-dashboards/#export-and-import-dashboards) +to import the dashboards from the repository `grafana` folder. ## How to run a test @@ -71,14 +85,15 @@ Commands: ``` -Note: `test-sequence` is a special test objective that wraps up an arbitrary number of test objectives. It is tipically used to run a suite of tests defined in a `yaml` file like in this [example](examples/availability_read.yaml). +Note: `test-sequence` is a special test objective that wraps up an arbitrary number of test objectives. It is tipically + used to run a suite of tests defined in a `yaml` file like in this [example](examples/availability_read.yaml). ### Standard test options ``` Options: - --network The type of network to be emulated [default: ideal] [possible values: ideal, - healthy, degraded] + --network The type of network to be emulated [default: ideal] [possible values: + ideal, healthy, degraded] --n-cores Number of cores to fetch availability for [default: 100] --n-validators Number of validators to fetch chunks from [default: 500] --min-pov-size The minimum pov size in KiB [default: 5120] @@ -96,9 +111,11 @@ Options: These apply to all test objectives, except `test-sequence` which relies on the values being specified in a file. ### Test objectives + Each test objective can have it's specific configuration options, in contrast with the standard test options. For `data-availability-read` the recovery strategy to be used is configurable. + ``` target/testnet/subsystem-bench data-availability-read --help Benchmark availability recovery strategies @@ -106,31 +123,38 @@ Benchmark availability recovery strategies Usage: subsystem-bench data-availability-read [OPTIONS] Options: - -f, --fetch-from-backers Turbo boost AD Read by fetching the full availability datafrom backers first. Saves CPU as we - don't need to re-construct from chunks. Tipically this is only faster if nodes have enough - bandwidth + -f, --fetch-from-backers Turbo boost AD Read by fetching the full availability datafrom backers first. Saves CPU + as we don't need to re-construct from chunks. Tipically this is only faster if nodes + have enough bandwidth -h, --help Print help ``` + ### Understanding the test configuration + A single test configuration `TestConfiguration` struct applies to a single run of a certain test objective. The configuration describes the following important parameters that influence the test duration and resource usage: + - how many validators are on the emulated network (`n_validators`) - how many cores per block the subsystem will have to do work on (`n_cores`) - for how many blocks the test should run (`num_blocks`) -From the perspective of the subsystem under test, this means that it will receive an `ActiveLeavesUpdate` signal -followed by an arbitrary amount of messages. This process repeats itself for `num_blocks`. The messages are generally test payloads pre-generated before the test run, or constructed on pre-genereated payloads. For example the `AvailabilityRecoveryMessage::RecoverAvailableData` message includes a `CandidateReceipt` which is generated before the test is started. +From the perspective of the subsystem under test, this means that it will receive an `ActiveLeavesUpdate` signal +followed by an arbitrary amount of messages. This process repeats itself for `num_blocks`. The messages are generally +test payloads pre-generated before the test run, or constructed on pre-genereated payloads. For example the +`AvailabilityRecoveryMessage::RecoverAvailableData` message includes a `CandidateReceipt` which is generated before +the test is started. -### Example run +### Example run Let's run an availabilty read test which will recover availability for 10 cores with max PoV size on a 500 node validator network. ``` target/testnet/subsystem-bench --n-cores 10 data-availability-read -[2023-11-28T09:01:59Z INFO subsystem_bench::core::display] n_validators = 500, n_cores = 10, pov_size = 5120 - 5120, error = 0, latency = None +[2023-11-28T09:01:59Z INFO subsystem_bench::core::display] n_validators = 500, n_cores = 10, pov_size = 5120 - 5120, + error = 0, latency = None [2023-11-28T09:01:59Z INFO subsystem-bench::availability] Generating template candidate index=0 pov_size=5242880 [2023-11-28T09:01:59Z INFO subsystem-bench::availability] Created test environment. [2023-11-28T09:01:59Z INFO subsystem-bench::availability] Pre-generating 10 candidates. @@ -153,32 +177,40 @@ node validator network. CPU usage per block 0.00s ``` -`Block time` in the context of `data-availability-read` has a different meaning. It measures the amount of time it took the subsystem to finish processing all of the messages sent in the context of the current test block. - +`Block time` in the context of `data-availability-read` has a different meaning. It measures the amount of time it +took the subsystem to finish processing all of the messages sent in the context of the current test block. ### Test logs -You can select log target, subtarget and verbosity just like with Polkadot node CLI, simply setting `RUST_LOOG="parachain=debug"` turns on debug logs for all parachain consensus subsystems in the test. +You can select log target, subtarget and verbosity just like with Polkadot node CLI, simply setting +`RUST_LOOG="parachain=debug"` turns on debug logs for all parachain consensus subsystems in the test. ### View test metrics -Assuming the Grafana/Prometheus stack installation steps completed succesfully, you should be able to +Assuming the Grafana/Prometheus stack installation steps completed succesfully, you should be able to view the test progress in real time by accessing [this link](http://localhost:3000/goto/SM5B8pNSR?orgId=1). -Now run `target/testnet/subsystem-bench test-sequence --path polkadot/node/subsystem-bench/examples/availability_read.yaml` and view the metrics in real time and spot differences between different `n_valiator` values. +Now run +`target/testnet/subsystem-bench test-sequence --path polkadot/node/subsystem-bench/examples/availability_read.yaml` +and view the metrics in real time and spot differences between different `n_valiator` values. ## Create new test objectives -This tool is intended to make it easy to write new test objectives that focus individual subsystems, + +This tool is intended to make it easy to write new test objectives that focus individual subsystems, or even multiple subsystems (for example `approval-distribution` and `approval-voting`). A special kind of test objectives are performance regression tests for the CI pipeline. These should be sequences -of tests that check the performance characteristics (such as CPU usage, speed) of the subsystem under test in both happy and negative scenarios (low bandwidth, network errors and low connectivity). +of tests that check the performance characteristics (such as CPU usage, speed) of the subsystem under test in both +happy and negative scenarios (low bandwidth, network errors and low connectivity). ### Reusable test components -To faster write a new test objective you need to use some higher level wrappers and logic: `TestEnvironment` `TestConfiguration`, `TestAuthorities`, `NetworkEmulator`. To create the `TestEnvironment` you will + +To faster write a new test objective you need to use some higher level wrappers and logic: `TestEnvironment`, +`TestConfiguration`, `TestAuthorities`, `NetworkEmulator`. To create the `TestEnvironment` you will need to also build an `Overseer`, but that should be easy using the mockups for subsystems in`core::mock`. ### Mocking -Ideally we want to have a single mock implementation for subsystems that can be minimally configured to -be used in different tests. A good example is `runtime-api` which currently only responds to session information requests based on static data. It can be easily extended to service other requests. +Ideally we want to have a single mock implementation for subsystems that can be minimally configured to +be used in different tests. A good example is `runtime-api` which currently only responds to session information +requests based on static data. It can be easily extended to service other requests. From 11ce8f5121ed5d6448a77e46647a4a0ffcfad066 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 29 Nov 2023 18:01:40 +0200 Subject: [PATCH 39/52] remove sleep till end of block Signed-off-by: Andrei Sandu --- polkadot/node/subsystem-bench/src/availability/mod.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 7d6865fee958..77888fa6058c 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -363,13 +363,9 @@ pub async fn benchmark_availability_read(env: &mut TestEnvironment, mut state: T let block_time = Instant::now().sub(block_start_ts).as_millis() as u64; env.metrics().set_block_time(block_time); - gum::info!("Block time {}", format!("{:?}ms", block_time).cyan()); - gum::info!(target: LOG_TARGET,"{}", format!("Sleeping till end of block ({}ms)", block_time_delta.as_millis()).bright_black()); - tokio::time::sleep(block_time_delta).await; + gum::info!("All work for block completed in {}", format!("{:?}ms", block_time).cyan()); } - env.stop().await; - let duration: u128 = start_marker.elapsed().as_millis(); let availability_bytes = availability_bytes / 1024; gum::info!("All blocks processed in {}", format!("{:?}ms", duration).cyan()); @@ -384,4 +380,5 @@ pub async fn benchmark_availability_read(env: &mut TestEnvironment, mut state: T ); gum::info!("{}", &env); + env.stop().await; } From 8d93abc6dd73a7cf82668b17c570235e3a4d7dbc Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 29 Nov 2023 20:04:37 +0200 Subject: [PATCH 40/52] review Signed-off-by: Andrei Sandu --- .../network/availability-recovery/src/lib.rs | 2 +- polkadot/node/subsystem-bench/README.md | 2 +- .../src/availability/configuration.rs | 24 ------------------- .../subsystem-bench/src/availability/mod.rs | 13 +--------- .../subsystem-bench/src/core/mock/av_store.rs | 2 +- .../src/core/mock/network_bridge.rs | 2 +- 6 files changed, 5 insertions(+), 40 deletions(-) delete mode 100644 polkadot/node/subsystem-bench/src/availability/configuration.rs diff --git a/polkadot/node/network/availability-recovery/src/lib.rs b/polkadot/node/network/availability-recovery/src/lib.rs index c454028b8650..d029bce04173 100644 --- a/polkadot/node/network/availability-recovery/src/lib.rs +++ b/polkadot/node/network/availability-recovery/src/lib.rs @@ -856,7 +856,7 @@ async fn erasure_task_thread( } // In benchmarks this is a very hot loop not yielding at all. - // To update promehteus metrics for the task we need to yield. + // To update CPU metrics for the task we need to yield. #[cfg(feature = "subsystem-benchmarks")] tokio::task::yield_now().await; } diff --git a/polkadot/node/subsystem-bench/README.md b/polkadot/node/subsystem-bench/README.md index 351e07b6abca..f4ea04662f9e 100644 --- a/polkadot/node/subsystem-bench/README.md +++ b/polkadot/node/subsystem-bench/README.md @@ -7,7 +7,7 @@ Run parachain consensus stress and performance tests on your development machine The parachain consensus node implementation spans across many modules which we call subsystems. Each subsystem is responsible for a small part of logic of the parachain consensus pipeline, but in general the most load and performance issues are localized in just a few core subsystems like `availability-recovery`, `approval-voting` or -`dispute-coordinator`. In the absence such a tool, we would run large test nets to load/stress test these parts of +`dispute-coordinator`. In the absence of such a tool, we would run large test nets to load/stress test these parts of the system. Setting up and making sense of the amount of data produced by such a large test is very expensive, hard to orchestrate and is a huge development time sink. diff --git a/polkadot/node/subsystem-bench/src/availability/configuration.rs b/polkadot/node/subsystem-bench/src/availability/configuration.rs deleted file mode 100644 index 1274862a8e4a..000000000000 --- a/polkadot/node/subsystem-bench/src/availability/configuration.rs +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (C) Parity Technologies (UK) Ltd. -// This file is part of Polkadot. - -// Polkadot is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// Polkadot is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// You should have received a copy of the GNU General Public License -// along with Polkadot. If not, see . - -use serde::{Deserialize, Serialize}; - -/// The test input parameters -#[derive(Clone, Default, Debug, Serialize, Deserialize)] -pub struct AvailabilityRecoveryConfiguration { - /// Prefer the fast path (try fetch from backers first) - pub use_fast_path: bool, -} diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 77888fa6058c..ca2e800d4c89 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -14,13 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . use itertools::Itertools; -use std::{ - collections::HashMap, - iter::Cycle, - ops::Sub, - sync::Arc, - time::{Duration, Instant}, -}; +use std::{collections::HashMap, iter::Cycle, ops::Sub, sync::Arc, time::Instant}; use crate::TestEnvironment; use polkadot_node_subsystem::{Overseer, OverseerConnector, SpawnGlue}; @@ -67,9 +61,7 @@ use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; use sc_service::SpawnTaskHandle; mod cli; -pub mod configuration; pub use cli::{DataAvailabilityReadOptions, NetworkEmulation}; -pub use configuration::AvailabilityRecoveryConfiguration; fn build_overseer( spawn_task_handle: SpawnTaskHandle, @@ -358,9 +350,6 @@ pub async fn benchmark_availability_read(env: &mut TestEnvironment, mut state: T availability_bytes += available_data.encoded_size() as u128; } - let block_time_delta = - Duration::from_secs(6).saturating_sub(Instant::now().sub(block_start_ts)); - let block_time = Instant::now().sub(block_start_ts).as_millis() as u64; env.metrics().set_block_time(block_time); gum::info!("All work for block completed in {}", format!("{:?}ms", block_time).cyan()); diff --git a/polkadot/node/subsystem-bench/src/core/mock/av_store.rs b/polkadot/node/subsystem-bench/src/core/mock/av_store.rs index 1ff7d1728af9..88747affc8c0 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/av_store.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/av_store.rs @@ -129,7 +129,7 @@ impl MockAvailabilityStore { let _ = tx.send(Some(chunk_size)); }, _ => { - unimplemented!("Unexpected runtime-api message") + unimplemented!("Unexpected av-store message") }, }, } diff --git a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs index 2bc8d22234b6..53f4fb9631f2 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs @@ -261,7 +261,7 @@ impl MockNetworkBridgeTx { } }, _ => { - unimplemented!("Unexpected runtime-api message") + unimplemented!("Unexpected network bridge message") }, }, } From af141eefcc198926f7da7734228e208e8411deac Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 1 Dec 2023 15:28:09 +0200 Subject: [PATCH 41/52] Emulated network improvements Signed-off-by: Andrei Sandu --- .../subsystem-bench/src/availability/mod.rs | 8 +- .../subsystem-bench/src/core/configuration.rs | 33 ++++- .../src/core/mock/network_bridge.rs | 55 +++++++- .../node/subsystem-bench/src/core/network.rs | 128 ++++++++++++++---- 4 files changed, 184 insertions(+), 40 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index ca2e800d4c89..244119735966 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -111,13 +111,7 @@ fn prepare_test_inner( chunks: state.chunks.clone(), }; - let network = NetworkEmulator::new( - config.n_validators, - test_authorities.validator_authority_id, - config.peer_bandwidth, - dependencies.task_manager.spawn_handle(), - &dependencies.registry, - ); + let network = NetworkEmulator::new(&config, &dependencies, &test_authorities); let network_bridge_tx = network_bridge::MockNetworkBridgeTx::new( config.clone(), diff --git a/polkadot/node/subsystem-bench/src/core/configuration.rs b/polkadot/node/subsystem-bench/src/core/configuration.rs index 340b5c03ab84..adb5ce80c0d4 100644 --- a/polkadot/node/subsystem-bench/src/core/configuration.rs +++ b/polkadot/node/subsystem-bench/src/core/configuration.rs @@ -43,6 +43,21 @@ pub struct PeerLatency { pub max_latency: Duration, } +// Default PoV size in KiB. +fn default_pov_size() -> usize { + 5120 +} + +// Default bandwidth in bytes +fn default_bandwidth() -> usize { + 52428800 +} + +// Default connectivity percentage +fn default_connectivity() -> usize { + 100 +} + /// The test input parameters #[derive(Clone, Debug, Serialize, Deserialize)] pub struct TestConfiguration { @@ -53,22 +68,31 @@ pub struct TestConfiguration { /// Number of cores pub n_cores: usize, /// The min PoV size + #[serde(default = "default_pov_size")] pub min_pov_size: usize, /// The max PoV size, + #[serde(default = "default_pov_size")] pub max_pov_size: usize, /// Randomly sampled pov_sizes #[serde(skip)] pov_sizes: Vec, /// The amount of bandiwdth remote validators have. + #[serde(default = "default_bandwidth")] pub peer_bandwidth: usize, /// The amount of bandiwdth our node has. + #[serde(default = "default_bandwidth")] pub bandwidth: usize, /// Optional peer emulation latency + #[serde(default)] pub latency: Option, - /// Error probability + /// Error probability, applies to sending messages to the emulated network peers + #[serde(default)] pub error: usize, - /// Number of blocks - /// In one block `n_cores` candidates are recovered + /// Connectivity ratio, the percentage of peers we are not connected to, but ar part of + /// the topology. + #[serde(default = "default_connectivity")] + pub connectivity: usize, + /// Number of blocks to run the test for pub num_blocks: usize, } @@ -166,6 +190,7 @@ impl TestConfiguration { num_blocks, min_pov_size, max_pov_size, + connectivity: 100, } } @@ -192,6 +217,7 @@ impl TestConfiguration { num_blocks, min_pov_size, max_pov_size, + connectivity: 95, } } @@ -218,6 +244,7 @@ impl TestConfiguration { num_blocks, min_pov_size, max_pov_size, + connectivity: 67, } } } diff --git a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs index 53f4fb9631f2..fa4730209183 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs @@ -16,10 +16,10 @@ //! //! A generic av store subsystem mockup suitable to be used in benchmarks. +use futures::Future; use parity_scale_codec::Encode; use polkadot_node_subsystem_types::OverseerSignal; - -use std::collections::HashMap; +use std::{collections::HashMap, pin::Pin}; use futures::FutureExt; @@ -35,6 +35,7 @@ use polkadot_node_subsystem::{ use polkadot_node_network_protocol::request_response::{ self as req_res, v1::ChunkResponse, Requests, }; +use polkadot_primitives::AuthorityDiscoveryId; use crate::core::{ configuration::{random_error, random_latency, TestConfiguration}, @@ -71,7 +72,24 @@ impl MockNetworkBridgeTx { Self { config, availabilty, network } } - pub fn respond_to_send_request( + fn not_connected_response( + &self, + authority_discovery_id: &AuthorityDiscoveryId, + future: Pin + Send>>, + ) -> NetworkAction { + // The network action will send the error after a random delay expires. + return NetworkAction::new( + authority_discovery_id.clone(), + future, + 0, + // Generate a random latency based on configuration. + random_latency(self.config.latency.as_ref()), + ) + } + /// Returns an `NetworkAction` corresponding to the peer sending the response. If + /// the peer is connected, the error is sent with a randomized latency as defined in + /// configuration. + fn respond_to_send_request( &mut self, request: Requests, ingress_tx: &mut tokio::sync::mpsc::UnboundedSender, @@ -86,9 +104,23 @@ impl MockNetworkBridgeTx { }; // Account our sent request bytes. self.network.peer_stats(0).inc_sent(outgoing_request.payload.encoded_size()); + + // If peer is disconnected return an error to the caller + if !self.network.is_peer_connected(&authority_discovery_id) { + // We always send `NotConnected` error and we ignore `IfDisconnected` value in + // the caller. + let future = async move { + let _ = outgoing_request + .pending_response + .send(Err(RequestFailure::NotConnected)); + } + .boxed(); + return self.not_connected_response(&authority_discovery_id, future) + } + // Account for remote received request bytes. self.network - .peer_stats_by_id(authority_discovery_id.clone()) + .peer_stats_by_id(&authority_discovery_id) .inc_received(outgoing_request.payload.encoded_size()); let validator_index: usize = outgoing_request.payload.index.0 as usize; @@ -153,11 +185,24 @@ impl MockNetworkBridgeTx { req_res::Recipient::Authority(authority_discovery_id) => authority_discovery_id, _ => unimplemented!("Peer recipient not supported yet"), }; + // Account our sent request bytes. self.network.peer_stats(0).inc_sent(outgoing_request.payload.encoded_size()); + + // If peer is disconnected return an error to the caller + if !self.network.is_peer_connected(&authority_discovery_id) { + let future = async move { + let _ = outgoing_request + .pending_response + .send(Err(RequestFailure::NotConnected)); + } + .boxed(); + return self.not_connected_response(&authority_discovery_id, future) + } + // Account for remote received request bytes. self.network - .peer_stats_by_id(authority_discovery_id.clone()) + .peer_stats_by_id(&authority_discovery_id) .inc_received(outgoing_request.payload.encoded_size()); let available_data = diff --git a/polkadot/node/subsystem-bench/src/core/network.rs b/polkadot/node/subsystem-bench/src/core/network.rs index 3d38a8f36b19..09943becb65c 100644 --- a/polkadot/node/subsystem-bench/src/core/network.rs +++ b/polkadot/node/subsystem-bench/src/core/network.rs @@ -13,10 +13,15 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use super::*; +use super::{ + configuration::{TestAuthorities, TestConfiguration}, + environment::TestEnvironmentDependencies, + *, +}; use colored::Colorize; use polkadot_primitives::AuthorityDiscoveryId; use prometheus_endpoint::U64; +use rand::{seq::SliceRandom, thread_rng}; use sc_service::SpawnTaskHandle; use std::{ collections::HashMap, @@ -268,44 +273,97 @@ impl NetworkAction { } } +/// The state of a peer on the emulated network. +#[derive(Clone)] +enum Peer { + Connected(PeerEmulator), + Disconnected(PeerEmulator), +} + +impl Peer { + pub fn disconnect(&mut self) { + let new_self = match self { + Peer::Connected(peer) => Peer::Disconnected(peer.clone()), + _ => return, + }; + *self = new_self; + } + + pub fn is_connected(&self) -> bool { + if let Peer::Connected(_) = self { + true + } else { + false + } + } + + pub fn emulator(&mut self) -> &mut PeerEmulator { + match self { + Peer::Connected(ref mut emulator) => emulator, + Peer::Disconnected(ref mut emulator) => emulator, + } + } +} + /// Mocks the network bridge and an arbitrary number of connected peer nodes. /// Implements network latency, bandwidth and connection errors. #[derive(Clone)] pub struct NetworkEmulator { // Per peer network emulation. - peers: Vec, + peers: Vec, /// Per peer stats. stats: Vec>, - /// Network throughput metrics - metrics: Metrics, /// Each emulated peer is a validator. validator_authority_ids: HashMap, } impl NetworkEmulator { pub fn new( - n_peers: usize, - validator_authority_ids: Vec, - bandwidth: usize, - spawn_task_handle: SpawnTaskHandle, - registry: &Registry, + config: &TestConfiguration, + dependencies: &TestEnvironmentDependencies, + authorities: &TestAuthorities, ) -> Self { - gum::info!(target: LOG_TARGET, "{}",format!("Initializing network emulation for {} peers.", n_peers).bright_blue()); + let n_peers = config.n_validators; + gum::info!(target: LOG_TARGET, "{}",format!("Initializing emulation for a {} peer network.", n_peers).bright_blue()); + gum::info!(target: LOG_TARGET, "{}",format!("connectivity {}%, error {}%", config.connectivity, config.error).bright_black()); - let metrics = Metrics::new(®istry).expect("Metrics always register succesfully"); + let metrics = + Metrics::new(&dependencies.registry).expect("Metrics always register succesfully"); let mut validator_authority_id_mapping = HashMap::new(); // Create a `PeerEmulator` for each peer. - let (stats, peers) = (0..n_peers) - .zip(validator_authority_ids.into_iter()) + let (stats, mut peers): (_, Vec<_>) = (0..n_peers) + .zip(authorities.validator_authority_id.clone().into_iter()) .map(|(peer_index, authority_id)| { validator_authority_id_mapping.insert(authority_id, peer_index); let stats = Arc::new(PeerEmulatorStats::new(peer_index, metrics.clone())); - (stats.clone(), PeerEmulator::new(bandwidth, spawn_task_handle.clone(), stats)) + ( + stats.clone(), + Peer::Connected(PeerEmulator::new( + config.peer_bandwidth, + dependencies.task_manager.spawn_handle(), + stats, + )), + ) }) .unzip(); - Self { peers, stats, metrics, validator_authority_ids: validator_authority_id_mapping } + let connected_count = config.n_validators as f64 / (100.0 / config.connectivity as f64); + + let (_connected, to_disconnect) = + peers.partial_shuffle(&mut thread_rng(), connected_count as usize); + + for peer in to_disconnect { + peer.disconnect(); + } + + gum::info!(target: LOG_TARGET, "{}",format!("Network created, connected validator count {}", connected_count).bright_black()); + + Self { peers, stats, validator_authority_ids: validator_authority_id_mapping } + } + + pub fn is_peer_connected(&self, peer: &AuthorityDiscoveryId) -> bool { + self.peer(peer).is_connected() } pub fn submit_peer_action(&mut self, peer: AuthorityDiscoveryId, action: NetworkAction) { @@ -313,21 +371,41 @@ impl NetworkEmulator { .validator_authority_ids .get(&peer) .expect("all test authorities are valid; qed"); - self.peers[*index].send(action); + + let peer = self.peers.get_mut(*index).expect("We just retrieved the index above; qed"); + + // Only actions of size 0 are allowed on disconnected peers. + // Typically this are delayed error response sends. + if action.size() > 0 && !peer.is_connected() { + gum::warn!(target: LOG_TARGET, peer_index = index, "Attempted to send data from a disconnected peer, operation ignored"); + return + } + + peer.emulator().send(action); } // Returns the sent/received stats for `peer_index`. - pub fn peer_stats(&mut self, peer_index: usize) -> Arc { + pub fn peer_stats(&self, peer_index: usize) -> Arc { self.stats[peer_index].clone() } - // Returns the sent/received stats for `peer`. - pub fn peer_stats_by_id(&mut self, peer: AuthorityDiscoveryId) -> Arc { - let peer_index = self + // Helper to get peer index by `AuthorityDiscoveryId` + fn peer_index(&self, peer: &AuthorityDiscoveryId) -> usize { + *self .validator_authority_ids - .get(&peer) - .expect("all test authorities are valid; qed"); - self.stats[*peer_index].clone() + .get(peer) + .expect("all test authorities are valid; qed") + } + + // Return the Peer entry for a given `AuthorityDiscoveryId`. + fn peer(&self, peer: &AuthorityDiscoveryId) -> &Peer { + &self.peers[self.peer_index(peer)] + } + // Returns the sent/received stats for `peer`. + pub fn peer_stats_by_id(&mut self, peer: &AuthorityDiscoveryId) -> Arc { + let peer_index = self.peer_index(peer); + + self.stats[peer_index].clone() } // Returns the sent/received stats for all peers. @@ -346,13 +424,13 @@ impl NetworkEmulator { // Increment bytes sent by our node (the node that contains the subsystem under test) pub fn inc_sent(&self, bytes: usize) { // Our node always is peer 0. - self.metrics.on_peer_sent(0, bytes); + self.peer_stats(0).inc_sent(bytes); } // Increment bytes received by our node (the node that contains the subsystem under test) pub fn inc_received(&self, bytes: usize) { // Our node always is peer 0. - self.metrics.on_peer_received(0, bytes); + self.peer_stats(0).inc_received(bytes); } } From 29d80fa638ea4315319d7e96ec391e80d3a2350c Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 1 Dec 2023 16:21:47 +0200 Subject: [PATCH 42/52] fix comment Signed-off-by: Andrei Sandu --- polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs index fa4730209183..c8140843b3b9 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs @@ -105,7 +105,7 @@ impl MockNetworkBridgeTx { // Account our sent request bytes. self.network.peer_stats(0).inc_sent(outgoing_request.payload.encoded_size()); - // If peer is disconnected return an error to the caller + // If peer is disconnected return an error if !self.network.is_peer_connected(&authority_discovery_id) { // We always send `NotConnected` error and we ignore `IfDisconnected` value in // the caller. @@ -189,7 +189,7 @@ impl MockNetworkBridgeTx { // Account our sent request bytes. self.network.peer_stats(0).inc_sent(outgoing_request.payload.encoded_size()); - // If peer is disconnected return an error to the caller + // If peer is disconnected return an error if !self.network.is_peer_connected(&authority_discovery_id) { let future = async move { let _ = outgoing_request From 4d21e5bae1baf8f225a31b831dd47cc4ad4e646c Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 8 Dec 2023 13:35:43 +0200 Subject: [PATCH 43/52] cargo lock Signed-off-by: Andrei Sandu --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 74438749859e..f31b1091500e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13279,7 +13279,7 @@ version = "1.0.0" dependencies = [ "assert_matches", "async-trait", - "clap 4.4.6", + "clap 4.4.10", "clap-num", "color-eyre", "colored", From 3e25fdc1d5df30b4c61961e58eb9f694519a1d5b Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 8 Dec 2023 13:47:17 +0200 Subject: [PATCH 44/52] more review feedback Signed-off-by: Andrei Sandu --- Cargo.lock | 2 ++ .../availability-recovery/src/tests.rs | 28 +--------------- .../subsystem-bench/src/availability/mod.rs | 33 ++----------------- .../node/subsystem-test-helpers/Cargo.toml | 3 ++ .../node/subsystem-test-helpers/src/lib.rs | 31 +++++++++++++++++ .../node/subsystem-test-helpers/src/mock.rs | 9 ++--- 6 files changed, 41 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f31b1091500e..f51bcb279d3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12658,6 +12658,8 @@ dependencies = [ "async-trait", "futures", "parking_lot 0.12.1", + "polkadot-erasure-coding", + "polkadot-node-primitives", "polkadot-node-subsystem", "polkadot-node-subsystem-util", "polkadot-primitives", diff --git a/polkadot/node/network/availability-recovery/src/tests.rs b/polkadot/node/network/availability-recovery/src/tests.rs index 63ccf0e94f91..18d3d41d88d5 100644 --- a/polkadot/node/network/availability-recovery/src/tests.rs +++ b/polkadot/node/network/availability-recovery/src/tests.rs @@ -24,6 +24,7 @@ use parity_scale_codec::Encode; use polkadot_node_network_protocol::request_response::{ self as req_res, IncomingRequest, Recipient, ReqProtocolNames, Requests, }; +use polkadot_node_subsystem_test_helpers::derive_erasure_chunks_with_proofs_and_root; use super::*; @@ -456,33 +457,6 @@ fn validator_authority_id(val_ids: &[Sr25519Keyring]) -> Vec), -) -> (Vec, Hash) { - let mut chunks: Vec> = obtain_chunks(n_validators, available_data).unwrap(); - - for (i, chunk) in chunks.iter_mut().enumerate() { - alter_chunk(i, chunk) - } - - // create proofs for each erasure chunk - let branches = branches(chunks.as_ref()); - - let root = branches.root(); - let erasure_chunks = branches - .enumerate() - .map(|(index, (proof, chunk))| ErasureChunk { - chunk: chunk.to_vec(), - index: ValidatorIndex(index as _), - proof: Proof::try_from(proof).unwrap(), - }) - .collect::>(); - - (erasure_chunks, root) -} - impl Default for TestState { fn default() -> Self { let validators = vec![ diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 244119735966..40b8f4abceeb 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -18,8 +18,8 @@ use std::{collections::HashMap, iter::Cycle, ops::Sub, sync::Arc, time::Instant} use crate::TestEnvironment; use polkadot_node_subsystem::{Overseer, OverseerConnector, SpawnGlue}; +use polkadot_node_subsystem_test_helpers::derive_erasure_chunks_with_proofs_and_root; use polkadot_overseer::Handle as OverseerHandle; - use sc_network::request_responses::ProtocolConfig; use colored::Colorize; @@ -31,9 +31,8 @@ use polkadot_availability_recovery::AvailabilityRecoverySubsystem; use crate::GENESIS_HASH; use parity_scale_codec::Encode; -use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks}; use polkadot_node_network_protocol::request_response::{IncomingRequest, ReqProtocolNames}; -use polkadot_node_primitives::{BlockData, PoV, Proof}; +use polkadot_node_primitives::{BlockData, PoV}; use polkadot_node_subsystem::messages::{AllMessages, AvailabilityRecoveryMessage}; use crate::core::{ @@ -55,7 +54,6 @@ use super::{cli::TestObjective, core::mock::AlwaysSupportsParachains}; use polkadot_node_subsystem_test_helpers::mock::new_block_import_info; use polkadot_primitives::{ CandidateHash, CandidateReceipt, GroupIndex, Hash, HeadData, PersistedValidationData, - ValidatorIndex, }; use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; use sc_service::SpawnTaskHandle; @@ -274,33 +272,6 @@ impl TestState { } } -fn derive_erasure_chunks_with_proofs_and_root( - n_validators: usize, - available_data: &AvailableData, - alter_chunk: impl Fn(usize, &mut Vec), -) -> (Vec, Hash) { - let mut chunks: Vec> = obtain_chunks(n_validators, available_data).unwrap(); - - for (i, chunk) in chunks.iter_mut().enumerate() { - alter_chunk(i, chunk) - } - - // create proofs for each erasure chunk - let branches = branches(chunks.as_ref()); - - let root = branches.root(); - let erasure_chunks = branches - .enumerate() - .map(|(index, (proof, chunk))| ErasureChunk { - chunk: chunk.to_vec(), - index: ValidatorIndex(index as _), - proof: Proof::try_from(proof).unwrap(), - }) - .collect::>(); - - (erasure_chunks, root) -} - pub async fn benchmark_availability_read(env: &mut TestEnvironment, mut state: TestState) { let config = env.config().clone(); diff --git a/polkadot/node/subsystem-test-helpers/Cargo.toml b/polkadot/node/subsystem-test-helpers/Cargo.toml index 9087ca11f5d2..ccbca1c3ea1d 100644 --- a/polkadot/node/subsystem-test-helpers/Cargo.toml +++ b/polkadot/node/subsystem-test-helpers/Cargo.toml @@ -12,8 +12,11 @@ async-trait = "0.1.57" futures = "0.3.21" parking_lot = "0.12.0" polkadot-node-subsystem = { path = "../subsystem" } +polkadot-erasure-coding = { path = "../../erasure-coding" } polkadot-node-subsystem-util = { path = "../subsystem-util" } polkadot-primitives = { path = "../../primitives" } +polkadot-node-primitives = { path = "../primitives" } + sc-client-api = { path = "../../../substrate/client/api" } sc-utils = { path = "../../../substrate/client/utils" } sp-core = { path = "../../../substrate/primitives/core" } diff --git a/polkadot/node/subsystem-test-helpers/src/lib.rs b/polkadot/node/subsystem-test-helpers/src/lib.rs index 3f92513498c4..daa8a10e6171 100644 --- a/polkadot/node/subsystem-test-helpers/src/lib.rs +++ b/polkadot/node/subsystem-test-helpers/src/lib.rs @@ -18,11 +18,14 @@ #![warn(missing_docs)] +use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks}; +use polkadot_node_primitives::{AvailableData, BlockData, ErasureChunk, PoV, Proof}; use polkadot_node_subsystem::{ messages::AllMessages, overseer, FromOrchestra, OverseerSignal, SpawnGlue, SpawnedSubsystem, SubsystemError, SubsystemResult, TrySendError, }; use polkadot_node_subsystem_util::TimeoutExt; +use polkadot_primitives::{Hash, ValidatorIndex}; use futures::{channel::mpsc, poll, prelude::*}; use parking_lot::Mutex; @@ -440,6 +443,34 @@ impl Future for Yield { } } +// Helper for chunking available data. +pub fn derive_erasure_chunks_with_proofs_and_root( + n_validators: usize, + available_data: &AvailableData, + alter_chunk: impl Fn(usize, &mut Vec), +) -> (Vec, Hash) { + let mut chunks: Vec> = obtain_chunks(n_validators, available_data).unwrap(); + + for (i, chunk) in chunks.iter_mut().enumerate() { + alter_chunk(i, chunk) + } + + // create proofs for each erasure chunk + let branches = branches(chunks.as_ref()); + + let root = branches.root(); + let erasure_chunks = branches + .enumerate() + .map(|(index, (proof, chunk))| ErasureChunk { + chunk: chunk.to_vec(), + index: ValidatorIndex(index as _), + proof: Proof::try_from(proof).unwrap(), + }) + .collect::>(); + + (erasure_chunks, root) +} + #[cfg(test)] mod tests { use super::*; diff --git a/polkadot/node/subsystem-test-helpers/src/mock.rs b/polkadot/node/subsystem-test-helpers/src/mock.rs index fc2dd6a4e34e..14026960ac13 100644 --- a/polkadot/node/subsystem-test-helpers/src/mock.rs +++ b/polkadot/node/subsystem-test-helpers/src/mock.rs @@ -16,7 +16,7 @@ use std::sync::Arc; -use polkadot_node_subsystem::{jaeger, ActivatedLeaf,BlockInfo}; +use polkadot_node_subsystem::{jaeger, ActivatedLeaf, BlockInfo}; use sc_client_api::UnpinHandle; use sc_keystore::LocalKeystore; use sc_utils::mpsc::tracing_unbounded; @@ -62,10 +62,5 @@ pub fn new_leaf(hash: Hash, number: BlockNumber) -> ActivatedLeaf { /// Create a new leaf with the given hash and number. pub fn new_block_import_info(hash: Hash, number: BlockNumber) -> BlockInfo { - BlockInfo { - hash, - parent_hash: Hash::default(), - number, - unpin_handle: dummy_unpin_handle(hash), - } + BlockInfo { hash, parent_hash: Hash::default(), number, unpin_handle: dummy_unpin_handle(hash) } } From 1458a73dbc2b96741de3cdadc22d381ac9dccb84 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 8 Dec 2023 13:51:35 +0200 Subject: [PATCH 45/52] change back to debug Signed-off-by: Andrei Sandu --- polkadot/node/network/availability-recovery/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/node/network/availability-recovery/src/lib.rs b/polkadot/node/network/availability-recovery/src/lib.rs index d029bce04173..fb8064878f4f 100644 --- a/polkadot/node/network/availability-recovery/src/lib.rs +++ b/polkadot/node/network/availability-recovery/src/lib.rs @@ -667,7 +667,7 @@ impl AvailabilityRecoverySubsystem { } }, None => { - gum::trace!( + gum::debug!( target: LOG_TARGET, "Erasure task channel closed", ); From baa124ecfcbaa5ac5b705b44f0983f1a06158e48 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 12 Dec 2023 14:11:58 +0200 Subject: [PATCH 46/52] fix test build Signed-off-by: Andrei Sandu --- polkadot/node/subsystem-test-helpers/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polkadot/node/subsystem-test-helpers/src/lib.rs b/polkadot/node/subsystem-test-helpers/src/lib.rs index daa8a10e6171..dfa78e04b8c9 100644 --- a/polkadot/node/subsystem-test-helpers/src/lib.rs +++ b/polkadot/node/subsystem-test-helpers/src/lib.rs @@ -19,7 +19,7 @@ #![warn(missing_docs)] use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks}; -use polkadot_node_primitives::{AvailableData, BlockData, ErasureChunk, PoV, Proof}; +use polkadot_node_primitives::{AvailableData, ErasureChunk, Proof}; use polkadot_node_subsystem::{ messages::AllMessages, overseer, FromOrchestra, OverseerSignal, SpawnGlue, SpawnedSubsystem, SubsystemError, SubsystemResult, TrySendError, @@ -443,7 +443,7 @@ impl Future for Yield { } } -// Helper for chunking available data. +/// Helper for chunking available data. pub fn derive_erasure_chunks_with_proofs_and_root( n_validators: usize, available_data: &AvailableData, From fde982f349ebd1eef806a2d7955d891332fa1e1f Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 12 Dec 2023 14:15:46 +0200 Subject: [PATCH 47/52] fix markdown Signed-off-by: Andrei Sandu --- polkadot/node/subsystem-bench/README.md | 36 ++++++++++++------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/polkadot/node/subsystem-bench/README.md b/polkadot/node/subsystem-bench/README.md index f4ea04662f9e..21844853334b 100644 --- a/polkadot/node/subsystem-bench/README.md +++ b/polkadot/node/subsystem-bench/README.md @@ -4,11 +4,11 @@ Run parachain consensus stress and performance tests on your development machine ## Motivation -The parachain consensus node implementation spans across many modules which we call subsystems. Each subsystem is -responsible for a small part of logic of the parachain consensus pipeline, but in general the most load and -performance issues are localized in just a few core subsystems like `availability-recovery`, `approval-voting` or -`dispute-coordinator`. In the absence of such a tool, we would run large test nets to load/stress test these parts of -the system. Setting up and making sense of the amount of data produced by such a large test is very expensive, hard +The parachain consensus node implementation spans across many modules which we call subsystems. Each subsystem is +responsible for a small part of logic of the parachain consensus pipeline, but in general the most load and +performance issues are localized in just a few core subsystems like `availability-recovery`, `approval-voting` or +`dispute-coordinator`. In the absence of such a tool, we would run large test nets to load/stress test these parts of +the system. Setting up and making sense of the amount of data produced by such a large test is very expensive, hard to orchestrate and is a huge development time sink. This tool aims to solve the problem by making it easy to: @@ -27,12 +27,12 @@ The output binary will be placed in `target/testnet/subsystem-bench`. ### Test metrics Subsystem, CPU usage and network metrics are exposed via a prometheus endpoint during the test execution. -A small subset of these collected metrics are displayed in the CLI, but for an in depth analysys of the test results, +A small subset of these collected metrics are displayed in the CLI, but for an in depth analysys of the test results, a local Grafana/Prometheus stack is needed. ### Install Prometheus -Please follow the [official installation guide](https://prometheus.io/docs/prometheus/latest/installation/) for your +Please follow the [official installation guide](https://prometheus.io/docs/prometheus/latest/installation/) for your platform/OS. After succesfully installing and starting up Prometheus, we need to alter it's configuration such that it @@ -67,7 +67,7 @@ Once you have the installation up and running, configure the local Prometheus as #### Import dashboards -Follow [this guide](https://grafana.com/docs/grafana/latest/dashboards/manage-dashboards/#export-and-import-dashboards) +Follow [this guide](https://grafana.com/docs/grafana/latest/dashboards/manage-dashboards/#export-and-import-dashboards) to import the dashboards from the repository `grafana` folder. ## How to run a test @@ -141,9 +141,9 @@ usage: - for how many blocks the test should run (`num_blocks`) From the perspective of the subsystem under test, this means that it will receive an `ActiveLeavesUpdate` signal -followed by an arbitrary amount of messages. This process repeats itself for `num_blocks`. The messages are generally -test payloads pre-generated before the test run, or constructed on pre-genereated payloads. For example the -`AvailabilityRecoveryMessage::RecoverAvailableData` message includes a `CandidateReceipt` which is generated before +followed by an arbitrary amount of messages. This process repeats itself for `num_blocks`. The messages are generally +test payloads pre-generated before the test run, or constructed on pre-genereated payloads. For example the +`AvailabilityRecoveryMessage::RecoverAvailableData` message includes a `CandidateReceipt` which is generated before the test is started. ### Example run @@ -177,12 +177,12 @@ node validator network. CPU usage per block 0.00s ``` -`Block time` in the context of `data-availability-read` has a different meaning. It measures the amount of time it +`Block time` in the context of `data-availability-read` has a different meaning. It measures the amount of time it took the subsystem to finish processing all of the messages sent in the context of the current test block. ### Test logs -You can select log target, subtarget and verbosity just like with Polkadot node CLI, simply setting +You can select log target, subtarget and verbosity just like with Polkadot node CLI, simply setting `RUST_LOOG="parachain=debug"` turns on debug logs for all parachain consensus subsystems in the test. ### View test metrics @@ -190,8 +190,8 @@ You can select log target, subtarget and verbosity just like with Polkadot node Assuming the Grafana/Prometheus stack installation steps completed succesfully, you should be able to view the test progress in real time by accessing [this link](http://localhost:3000/goto/SM5B8pNSR?orgId=1). -Now run -`target/testnet/subsystem-bench test-sequence --path polkadot/node/subsystem-bench/examples/availability_read.yaml` +Now run +`target/testnet/subsystem-bench test-sequence --path polkadot/node/subsystem-bench/examples/availability_read.yaml` and view the metrics in real time and spot differences between different `n_valiator` values. ## Create new test objectives @@ -200,17 +200,17 @@ This tool is intended to make it easy to write new test objectives that focus in or even multiple subsystems (for example `approval-distribution` and `approval-voting`). A special kind of test objectives are performance regression tests for the CI pipeline. These should be sequences -of tests that check the performance characteristics (such as CPU usage, speed) of the subsystem under test in both +of tests that check the performance characteristics (such as CPU usage, speed) of the subsystem under test in both happy and negative scenarios (low bandwidth, network errors and low connectivity). ### Reusable test components -To faster write a new test objective you need to use some higher level wrappers and logic: `TestEnvironment`, +To faster write a new test objective you need to use some higher level wrappers and logic: `TestEnvironment`, `TestConfiguration`, `TestAuthorities`, `NetworkEmulator`. To create the `TestEnvironment` you will need to also build an `Overseer`, but that should be easy using the mockups for subsystems in`core::mock`. ### Mocking Ideally we want to have a single mock implementation for subsystems that can be minimally configured to -be used in different tests. A good example is `runtime-api` which currently only responds to session information +be used in different tests. A good example is `runtime-api` which currently only responds to session information requests based on static data. It can be easily extended to service other requests. From 47c2643e30806715d11196782b5d0f8ce22814df Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 13 Dec 2023 10:47:59 +0200 Subject: [PATCH 48/52] fix test Signed-off-by: Andrei Sandu --- polkadot/node/network/availability-recovery/src/tests.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/polkadot/node/network/availability-recovery/src/tests.rs b/polkadot/node/network/availability-recovery/src/tests.rs index 18d3d41d88d5..1cb52757bac9 100644 --- a/polkadot/node/network/availability-recovery/src/tests.rs +++ b/polkadot/node/network/availability-recovery/src/tests.rs @@ -30,7 +30,6 @@ use super::*; use sc_network::{config::RequestResponseConfig, IfDisconnected, OutboundFailure, RequestFailure}; -use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks}; use polkadot_node_primitives::{BlockData, PoV, Proof}; use polkadot_node_subsystem::messages::{ AllMessages, NetworkBridgeTxMessage, RuntimeApiMessage, RuntimeApiRequest, From 8b49077d47f4e093922e6fec78de2ec431cbb369 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 13 Dec 2023 19:36:57 +0200 Subject: [PATCH 49/52] taplo fix Signed-off-by: Andrei Sandu --- Cargo.toml | 2 +- polkadot/node/subsystem-bench/Cargo.toml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 056dae789e91..d02bf766c8c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -135,7 +135,6 @@ members = [ "polkadot/node/gum/proc-macro", "polkadot/node/jaeger", "polkadot/node/malus", - "polkadot/node/subsystem-bench", "polkadot/node/metrics", "polkadot/node/network/approval-distribution", "polkadot/node/network/availability-distribution", @@ -151,6 +150,7 @@ members = [ "polkadot/node/primitives", "polkadot/node/service", "polkadot/node/subsystem", + "polkadot/node/subsystem-bench", "polkadot/node/subsystem-test-helpers", "polkadot/node/subsystem-types", "polkadot/node/subsystem-util", diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml index f775a1ff9efe..08d1a31adf55 100644 --- a/polkadot/node/subsystem-bench/Cargo.toml +++ b/polkadot/node/subsystem-bench/Cargo.toml @@ -22,9 +22,9 @@ polkadot-node-subsystem-types = { path = "../subsystem-types" } polkadot-node-primitives = { path = "../primitives" } polkadot-primitives = { path = "../../primitives" } polkadot-node-network-protocol = { path = "../network/protocol" } -polkadot-availability-recovery = { path = "../network/availability-recovery", features=["subsystem-benchmarks"]} +polkadot-availability-recovery = { path = "../network/availability-recovery", features = ["subsystem-benchmarks"] } color-eyre = { version = "0.6.1", default-features = false } -polkadot-overseer = { path = "../overseer" } +polkadot-overseer = { path = "../overseer" } colored = "2.0.4" assert_matches = "1.5" async-trait = "0.1.57" @@ -39,7 +39,7 @@ polkadot-erasure-coding = { package = "polkadot-erasure-coding", path = "../../e log = "0.4.17" env_logger = "0.9.0" rand = "0.8.5" -parity-scale-codec = { version = "3.6.1", features = ["std", "derive"] } +parity-scale-codec = { version = "3.6.1", features = ["derive", "std"] } tokio = "1.24.2" clap-num = "1.0.2" polkadot-node-subsystem-test-helpers = { path = "../subsystem-test-helpers" } @@ -55,7 +55,7 @@ prometheus = { version = "0.13.0", default-features = false } serde = "1.0.192" serde_yaml = "0.9" paste = "1.0.14" -orchestra = { version = "0.3.3", default-features = false, features=["futures_channel"] } +orchestra = { version = "0.3.3", default-features = false, features = ["futures_channel"] } [features] default = [] From 4c8669125ae53e4fb5d60f8160ee3e1ca16737a2 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 13 Dec 2023 19:37:57 +0200 Subject: [PATCH 50/52] cargo lock Signed-off-by: Andrei Sandu --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 211d8e3c91d5..40e2d9ebf3da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13295,7 +13295,7 @@ version = "1.0.0" dependencies = [ "assert_matches", "async-trait", - "clap 4.4.10", + "clap 4.4.11", "clap-num", "color-eyre", "colored", From bd128b332e7d79f11df3ef1f33b9ba85ba22f6a6 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Thu, 14 Dec 2023 11:00:43 +0200 Subject: [PATCH 51/52] clippy Signed-off-by: Andrei Sandu --- .../subsystem-bench/src/availability/mod.rs | 2 +- .../subsystem-bench/src/core/configuration.rs | 18 +++------ .../node/subsystem-bench/src/core/display.rs | 8 ++-- .../subsystem-bench/src/core/mock/av_store.rs | 7 ++-- .../src/core/mock/network_bridge.rs | 40 +++++++++---------- .../src/core/mock/runtime_api.rs | 2 +- .../subsystem-bench/src/subsystem-bench.rs | 2 +- 7 files changed, 35 insertions(+), 44 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 40b8f4abceeb..4451ec27c401 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -123,7 +123,7 @@ fn prepare_test_inner( }; let (collation_req_receiver, req_cfg) = - IncomingRequest::get_config_receiver(&ReqProtocolNames::new(&GENESIS_HASH, None)); + IncomingRequest::get_config_receiver(&ReqProtocolNames::new(GENESIS_HASH, None)); let subsystem = if use_fast_path { AvailabilityRecoverySubsystem::with_fast_path( diff --git a/polkadot/node/subsystem-bench/src/core/configuration.rs b/polkadot/node/subsystem-bench/src/core/configuration.rs index adb5ce80c0d4..c5cf9acb0436 100644 --- a/polkadot/node/subsystem-bench/src/core/configuration.rs +++ b/polkadot/node/subsystem-bench/src/core/configuration.rs @@ -121,7 +121,7 @@ impl TestSequence { impl TestSequence { pub fn new_from_file(path: &Path) -> std::io::Result { - let string = String::from_utf8(std::fs::read(&path)?).expect("File is valid UTF8"); + let string = String::from_utf8(std::fs::read(path)?).expect("File is valid UTF8"); Ok(serde_yaml::from_str(&string).expect("File is valid test sequence YA")) } } @@ -150,7 +150,7 @@ impl TestConfiguration { /// Generates the authority keys we need for the network emulation. pub fn generate_authorities(&self) -> TestAuthorities { let keyrings = (0..self.n_validators) - .map(|peer_index| Keyring::new(format!("Node{}", peer_index).into())) + .map(|peer_index| Keyring::new(format!("Node{}", peer_index))) .collect::>(); // Generate `AuthorityDiscoveryId`` for each peer @@ -162,8 +162,7 @@ impl TestConfiguration { let validator_authority_id: Vec = keyrings .iter() .map(|keyring| keyring.clone().public().into()) - .collect::>() - .into(); + .collect::>(); TestAuthorities { keyrings, validator_public, validator_authority_id } } @@ -251,14 +250,9 @@ impl TestConfiguration { /// Produce a randomized duration between `min` and `max`. pub fn random_latency(maybe_peer_latency: Option<&PeerLatency>) -> Option { - if let Some(peer_latency) = maybe_peer_latency { - Some( - Uniform::from(peer_latency.min_latency..=peer_latency.max_latency) - .sample(&mut thread_rng()), - ) - } else { - None - } + maybe_peer_latency.map(|peer_latency| { + Uniform::from(peer_latency.min_latency..=peer_latency.max_latency).sample(&mut thread_rng()) + }) } /// Generate a random error based on `probability`. diff --git a/polkadot/node/subsystem-bench/src/core/display.rs b/polkadot/node/subsystem-bench/src/core/display.rs index b9ff82d1c06a..b610c8e0b517 100644 --- a/polkadot/node/subsystem-bench/src/core/display.rs +++ b/polkadot/node/subsystem-bench/src/core/display.rs @@ -43,7 +43,7 @@ impl MetricCollection { /// Sums up all metrics with the given name in the collection pub fn sum_by(&self, name: &str) -> f64 { self.all() - .into_iter() + .iter() .filter(|metric| &metric.name == name) .map(|metric| metric.value) .sum() @@ -71,7 +71,7 @@ impl MetricCollection { impl Display for MetricCollection { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - writeln!(f, "")?; + writeln!(f)?; let metrics = self.all(); for metric in metrics { writeln!(f, "{}", metric)?; @@ -150,7 +150,7 @@ pub fn parse_metrics(registry: &Registry) -> MetricCollection { }, MetricType::HISTOGRAM => { let h = m.get_histogram(); - let h_name = name.clone() + "_sum".into(); + let h_name = name.clone() + "_sum"; test_metrics.push(TestMetric { name: h_name, label_names: label_names.clone(), @@ -158,7 +158,7 @@ pub fn parse_metrics(registry: &Registry) -> MetricCollection { value: h.get_sample_sum(), }); - let h_name = name.clone() + "_count".into(); + let h_name = name.clone() + "_count"; test_metrics.push(TestMetric { name: h_name, label_names, diff --git a/polkadot/node/subsystem-bench/src/core/mock/av_store.rs b/polkadot/node/subsystem-bench/src/core/mock/av_store.rs index 88747affc8c0..1b8162e6ee43 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/av_store.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/av_store.rs @@ -68,7 +68,7 @@ impl MockAvailabilityStore { let v = self .state .chunks - .get(*candidate_index as usize) + .get(*candidate_index) .unwrap() .iter() .filter(|c| send_chunk(c.index.0 as usize)) @@ -123,9 +123,8 @@ impl MockAvailabilityStore { .expect("candidate was generated previously; qed"); gum::debug!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); - let chunk_size = self.state.chunks.get(*candidate_index as usize).unwrap() - [0] - .encoded_size(); + let chunk_size = + self.state.chunks.get(*candidate_index).unwrap()[0].encoded_size(); let _ = tx.send(Some(chunk_size)); }, _ => { diff --git a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs index c8140843b3b9..8e0d746468c2 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs @@ -133,11 +133,10 @@ impl MockNetworkBridgeTx { .expect("candidate was generated previously; qed"); gum::warn!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); - let chunk: ChunkResponse = - self.availabilty.chunks.get(*candidate_index as usize).unwrap() - [validator_index] - .clone() - .into(); + let chunk: ChunkResponse = self.availabilty.chunks.get(*candidate_index).unwrap() + [validator_index] + .clone() + .into(); let mut size = chunk.encoded_size(); let response = if random_error(self.config.error) { @@ -206,7 +205,7 @@ impl MockNetworkBridgeTx { .inc_received(outgoing_request.payload.encoded_size()); let available_data = - self.availabilty.available_data.get(*candidate_index as usize).unwrap().clone(); + self.availabilty.available_data.get(*candidate_index).unwrap().clone(); let size = available_data.encoded_size(); @@ -267,22 +266,21 @@ impl MockNetworkBridgeTx { let our_network = self.network.clone(); // This task will handle node messages receipt from the simulated network. - let _ = ctx - .spawn_blocking( - "network-receive", - async move { - while let Some(action) = ingress_rx.recv().await { - let size = action.size(); - - // account for our node receiving the data. - our_network.inc_received(size); - rx_limiter.reap(size).await; - action.run().await; - } + ctx.spawn_blocking( + "network-receive", + async move { + while let Some(action) = ingress_rx.recv().await { + let size = action.size(); + + // account for our node receiving the data. + our_network.inc_received(size); + rx_limiter.reap(size).await; + action.run().await; } - .boxed(), - ) - .expect("We never fail to spawn tasks"); + } + .boxed(), + ) + .expect("We never fail to spawn tasks"); // Main subsystem loop. loop { diff --git a/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs b/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs index 9cbe025ae806..2017d9f56bbc 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs @@ -48,7 +48,7 @@ impl MockRuntimeApi { .map(|i| ValidatorIndex(i as _)) .collect::>(); - let validator_groups = all_validators.chunks(5).map(|x| Vec::from(x)).collect::>(); + let validator_groups = all_validators.chunks(5).map(Vec::from).collect::>(); SessionInfo { validators: self.state.authorities.validator_public.clone().into(), diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 0f3ae0f41417..813a94b8e275 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -171,7 +171,7 @@ impl BenchCli { fn main() -> eyre::Result<()> { color_eyre::install()?; - let _ = env_logger::builder() + env_logger::builder() .filter(Some("hyper"), log::LevelFilter::Info) // Avoid `Terminating due to subsystem exit subsystem` warnings .filter(Some("polkadot_overseer"), log::LevelFilter::Error) From 1021efbe35e6403382f1891e7b3566913a6e1877 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Thu, 14 Dec 2023 11:46:35 +0200 Subject: [PATCH 52/52] more clippy Signed-off-by: Andrei Sandu --- polkadot/node/subsystem-bench/src/availability/mod.rs | 3 ++- .../node/subsystem-bench/src/core/configuration.rs | 2 +- polkadot/node/subsystem-bench/src/core/display.rs | 2 +- .../node/subsystem-bench/src/core/mock/av_store.rs | 8 ++++---- .../subsystem-bench/src/core/mock/network_bridge.rs | 8 ++++---- .../node/subsystem-bench/src/core/mock/runtime_api.rs | 8 ++++---- polkadot/node/subsystem-bench/src/core/network.rs | 10 +++------- polkadot/node/subsystem-bench/src/subsystem-bench.rs | 2 +- 8 files changed, 20 insertions(+), 23 deletions(-) diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs index 4451ec27c401..7c81b9313659 100644 --- a/polkadot/node/subsystem-bench/src/availability/mod.rs +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -253,7 +253,8 @@ impl TestState { candidate_receipt_templates.push(candidate_receipt); } - let pov_sizes = config.pov_sizes().to_vec().into_iter().cycle(); + let pov_sizes = config.pov_sizes().to_owned(); + let pov_sizes = pov_sizes.into_iter().cycle(); gum::info!(target: LOG_TARGET, "{}","Created test environment.".bright_blue()); let mut _self = Self { diff --git a/polkadot/node/subsystem-bench/src/core/configuration.rs b/polkadot/node/subsystem-bench/src/core/configuration.rs index c5cf9acb0436..164addb51900 100644 --- a/polkadot/node/subsystem-bench/src/core/configuration.rs +++ b/polkadot/node/subsystem-bench/src/core/configuration.rs @@ -107,7 +107,7 @@ pub struct TestSequence { } impl TestSequence { - pub fn to_vec(self) -> Vec { + pub fn into_vec(self) -> Vec { self.test_configurations .into_iter() .map(|mut config| { diff --git a/polkadot/node/subsystem-bench/src/core/display.rs b/polkadot/node/subsystem-bench/src/core/display.rs index b610c8e0b517..d600cc484c14 100644 --- a/polkadot/node/subsystem-bench/src/core/display.rs +++ b/polkadot/node/subsystem-bench/src/core/display.rs @@ -44,7 +44,7 @@ impl MetricCollection { pub fn sum_by(&self, name: &str) -> f64 { self.all() .iter() - .filter(|metric| &metric.name == name) + .filter(|metric| metric.name == name) .map(|metric| metric.value) .sum() } diff --git a/polkadot/node/subsystem-bench/src/core/mock/av_store.rs b/polkadot/node/subsystem-bench/src/core/mock/av_store.rs index 1b8162e6ee43..a471230f1b3f 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/av_store.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/av_store.rs @@ -96,10 +96,10 @@ impl MockAvailabilityStore { let msg = ctx.recv().await.expect("Overseer never fails us"); match msg { - orchestra::FromOrchestra::Signal(signal) => match signal { - OverseerSignal::Conclude => return, - _ => {}, - }, + orchestra::FromOrchestra::Signal(signal) => + if signal == OverseerSignal::Conclude { + return + }, orchestra::FromOrchestra::Communication { msg } => match msg { AvailabilityStoreMessage::QueryAvailableData(candidate_hash, tx) => { gum::debug!(target: LOG_TARGET, candidate_hash = ?candidate_hash, "Responding to QueryAvailableData"); diff --git a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs index 8e0d746468c2..b106b832011a 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs @@ -287,10 +287,10 @@ impl MockNetworkBridgeTx { let msg = ctx.recv().await.expect("Overseer never fails us"); match msg { - orchestra::FromOrchestra::Signal(signal) => match signal { - OverseerSignal::Conclude => return, - _ => {}, - }, + orchestra::FromOrchestra::Signal(signal) => + if signal == OverseerSignal::Conclude { + return + }, orchestra::FromOrchestra::Communication { msg } => match msg { NetworkBridgeTxMessage::SendRequests(requests, _if_disconnected) => { for request in requests { diff --git a/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs b/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs index 2017d9f56bbc..d664ebead3cc 100644 --- a/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs +++ b/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs @@ -84,10 +84,10 @@ impl MockRuntimeApi { let msg = ctx.recv().await.expect("Overseer never fails us"); match msg { - orchestra::FromOrchestra::Signal(signal) => match signal { - OverseerSignal::Conclude => return, - _ => {}, - }, + orchestra::FromOrchestra::Signal(signal) => + if signal == OverseerSignal::Conclude { + return + }, orchestra::FromOrchestra::Communication { msg } => { gum::debug!(target: LOG_TARGET, msg=?msg, "recv message"); diff --git a/polkadot/node/subsystem-bench/src/core/network.rs b/polkadot/node/subsystem-bench/src/core/network.rs index 09943becb65c..c4e20b421d34 100644 --- a/polkadot/node/subsystem-bench/src/core/network.rs +++ b/polkadot/node/subsystem-bench/src/core/network.rs @@ -117,7 +117,7 @@ mod tests { let mut reap_amount = 0; while rate_limiter.total_ticks < tick_rate { reap_amount += 1; - reap_amount = reap_amount % 100; + reap_amount %= 100; rate_limiter.reap(reap_amount).await; total_sent += reap_amount; @@ -290,11 +290,7 @@ impl Peer { } pub fn is_connected(&self) -> bool { - if let Peer::Connected(_) = self { - true - } else { - false - } + matches!(self, Peer::Connected(_)) } pub fn emulator(&mut self) -> &mut PeerEmulator { @@ -333,7 +329,7 @@ impl NetworkEmulator { // Create a `PeerEmulator` for each peer. let (stats, mut peers): (_, Vec<_>) = (0..n_peers) - .zip(authorities.validator_authority_id.clone().into_iter()) + .zip(authorities.validator_authority_id.clone()) .map(|(peer_index, authority_id)| { validator_authority_id_mapping.insert(authority_id, peer_index); let stats = Arc::new(PeerEmulatorStats::new(peer_index, metrics.clone())); diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs index 813a94b8e275..da7e5441f748 100644 --- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -88,7 +88,7 @@ impl BenchCli { let test_sequence = core::configuration::TestSequence::new_from_file(Path::new(&options.path)) .expect("File exists") - .to_vec(); + .into_vec(); let num_steps = test_sequence.len(); gum::info!( "{}",