diff --git a/Cargo.lock b/Cargo.lock index 494c7f2d04a..050c55657b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6651,6 +6651,7 @@ dependencies = [ "ansi_term", "anyhow", "borsh 1.0.0", + "bytesize", "chrono", "clap 4.2.4", "cloud-storage", diff --git a/core/o11y/src/env_filter.rs b/core/o11y/src/env_filter.rs index 8a6582e1ce6..1787a33d972 100644 --- a/core/o11y/src/env_filter.rs +++ b/core/o11y/src/env_filter.rs @@ -12,6 +12,7 @@ const DEFAULT_RUST_LOG: &str = "tokio_reactor=info,\ db=info,\ delay_detector=info,\ near-performance-metrics=info,\ + state_viewer=info,\ warn"; #[non_exhaustive] diff --git a/core/store/src/flat/manager.rs b/core/store/src/flat/manager.rs index c9de6f8a2a3..13ab49cc062 100644 --- a/core/store/src/flat/manager.rs +++ b/core/store/src/flat/manager.rs @@ -64,7 +64,7 @@ impl FlatStorageManager { ); } - /// Creates flat storage instance for shard `shard_id`. The function also checks that + /// Creates flat storage instance for shard `shard_uid`. The function also checks that /// the shard's flat storage state hasn't been set before, otherwise it panics. /// TODO (#7327): this behavior may change when we implement support for state sync /// and resharding. diff --git a/tools/state-viewer/Cargo.toml b/tools/state-viewer/Cargo.toml index d376fb2da79..7c2e03b9e53 100644 --- a/tools/state-viewer/Cargo.toml +++ b/tools/state-viewer/Cargo.toml @@ -13,6 +13,7 @@ actix.workspace = true ansi_term.workspace = true anyhow.workspace = true borsh.workspace = true +bytesize.workspace = true chrono.workspace = true clap.workspace = true cloud-storage.workspace = true @@ -51,11 +52,7 @@ testlib.workspace = true insta.workspace = true [features] -sandbox = [ - "node-runtime/sandbox", - "near-chain/sandbox", - "near-client/sandbox", -] +sandbox = ["node-runtime/sandbox", "near-chain/sandbox", "near-client/sandbox"] nightly = [ "nightly_protocol", "near-chain-configs/nightly", diff --git a/tools/state-viewer/src/cli.rs b/tools/state-viewer/src/cli.rs index 754ac6cbb14..8457f1fa2dd 100644 --- a/tools/state-viewer/src/cli.rs +++ b/tools/state-viewer/src/cli.rs @@ -83,6 +83,9 @@ pub enum StateViewerSubCommand { StateChanges(StateChangesCmd), /// Dump or apply state parts. StateParts(StatePartsCmd), + /// Iterates over the Flat State and prints some statistics. + /// e.g. large accounts, total, average and median size, middle account + StateStats(StateStatsCmd), /// Benchmark how long does it take to iterate the trie. TrieIterationBenchmark(TrieIterationBenchmarkCmd), /// View head of the storage. @@ -144,6 +147,7 @@ impl StateViewerSubCommand { StateViewerSubCommand::State => state(home_dir, near_config, store), StateViewerSubCommand::StateChanges(cmd) => cmd.run(home_dir, near_config, store), StateViewerSubCommand::StateParts(cmd) => cmd.run(home_dir, near_config, store), + StateViewerSubCommand::StateStats(cmd) => cmd.run(home_dir, near_config, store), StateViewerSubCommand::ViewChain(cmd) => cmd.run(near_config, store), StateViewerSubCommand::ViewTrie(cmd) => cmd.run(store), StateViewerSubCommand::TrieIterationBenchmark(cmd) => cmd.run(near_config, store), @@ -617,6 +621,16 @@ impl StatePartsCmd { ); } } + +#[derive(clap::Parser)] +pub struct StateStatsCmd {} + +impl StateStatsCmd { + pub fn run(self, home_dir: &Path, near_config: NearConfig, store: Store) { + print_state_stats(home_dir, store, near_config); + } +} + #[derive(clap::Parser)] pub struct ViewChainCmd { #[clap(long)] diff --git a/tools/state-viewer/src/commands.rs b/tools/state-viewer/src/commands.rs index 606a067bff1..8aea507d72d 100644 --- a/tools/state-viewer/src/commands.rs +++ b/tools/state-viewer/src/commands.rs @@ -7,6 +7,7 @@ use crate::state_dump::state_dump_redis; use crate::tx_dump::dump_tx_from_block; use crate::{apply_chunk, epoch_info}; use ansi_term::Color::Red; +use bytesize::ByteSize; use near_chain::chain::collect_receipts_from_response; use near_chain::migrations::check_if_block_is_first_with_chunk_of_version; use near_chain::types::ApplyTransactionResult; @@ -23,15 +24,19 @@ use near_primitives::hash::CryptoHash; use near_primitives::shard_layout::ShardLayout; use near_primitives::shard_layout::ShardUId; use near_primitives::sharding::ChunkHash; +use near_primitives::state::FlatStateValue; +use near_primitives::state_record::state_record_to_account_id; use near_primitives::state_record::StateRecord; use near_primitives::trie_key::TrieKey; use near_primitives::types::{chunk_extra::ChunkExtra, BlockHeight, ShardId, StateRoot}; use near_primitives_core::types::Gas; use near_store::test_utils::create_test_store; +use near_store::TrieStorage; use near_store::{DBCol, Store, Trie, TrieCache, TrieCachingStorage, TrieConfig, TrieDBStorage}; use nearcore::{NearConfig, NightshadeRuntime}; use node_runtime::adapter::ViewRuntimeAdapter; use serde_json::json; +use std::collections::BinaryHeap; use std::collections::HashMap; use std::fs::{self, File}; use std::io::Write; @@ -1057,6 +1062,148 @@ pub(crate) fn clear_cache(store: Store) { store_update.commit().unwrap(); } +#[derive(PartialEq, Eq)] +pub struct StateStatsStateRecord { + key: Vec, + value: Vec, +} + +impl StateStatsStateRecord { + pub fn size(&self) -> ByteSize { + ByteSize::b(self.key.len() as u64 + self.value.len() as u64) + } +} + +impl Ord for StateStatsStateRecord { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.size().cmp(&other.size()).reverse() + } +} + +impl PartialOrd for StateStatsStateRecord { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl std::fmt::Debug for StateStatsStateRecord { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let state_record = StateRecord::from_raw_key_value(self.key.clone(), self.value.clone()); + + let Some(state_record) = state_record else { return None::.fmt(f) }; + + f.debug_struct("StateStatsStateRecord") + .field("account_id", &state_record_to_account_id(&state_record).as_str()) + .field("type", &state_record.get_type_string()) + .field("size", &self.size()) + .finish() + } +} + +#[derive(Default)] +pub struct StateStats { + pub total_size: ByteSize, + pub total_count: usize, + + pub top_state_records: BinaryHeap, + + pub middle_state_record: Option, +} + +impl core::fmt::Debug for StateStats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let average_size = self + .total_size + .as_u64() + .checked_div(self.total_count as u64) + .map(ByteSize::b) + .unwrap_or_default(); + f.debug_struct("StateStats") + .field("total_size", &self.total_size) + .field("total_count", &self.total_count) + .field("average_size", &average_size) + .field("middle_state_record", &self.middle_state_record.as_ref().unwrap()) + .field("top_state_records", &self.top_state_records) + .finish() + } +} + +impl StateStats { + pub fn new() -> Self { + Self::default() + } + + pub fn push(&mut self, key: Vec, value: Vec) -> () { + self.total_size += key.len() as u64 + value.len() as u64; + self.total_count += 1; + + self.top_state_records.push(StateStatsStateRecord { key, value }); + if self.top_state_records.len() > 5 { + self.top_state_records.pop(); + } + } +} + +fn read_flat_state_value( + trie_storage: &TrieDBStorage, + flat_state_value: FlatStateValue, +) -> Vec { + match flat_state_value { + FlatStateValue::Ref(val) => trie_storage.retrieve_raw_bytes(&val.hash).unwrap().to_vec(), + FlatStateValue::Inlined(val) => val, + } +} + +pub(crate) fn print_state_stats(home_dir: &Path, store: Store, near_config: NearConfig) { + let (epoch_manager, runtime, _, block_header) = + load_trie(store.clone(), home_dir, &near_config); + + let block_hash = *block_header.hash(); + let shard_layout = epoch_manager.get_shard_layout_from_prev_block(&block_hash).unwrap(); + + for shard_uid in shard_layout.get_shard_uids() { + let flat_storage_manager = runtime.get_flat_storage_manager(); + flat_storage_manager.create_flat_storage_for_shard(shard_uid).unwrap(); + let trie_storage = TrieDBStorage::new(store.clone(), shard_uid); + let chunk_view = flat_storage_manager.chunk_view(shard_uid, block_hash).unwrap(); + + let mut state_stats = StateStats::new(); + + let iter = chunk_view.iter_flat_state_entries(None, None); + for item in iter { + let Ok((key, value)) = item else { + continue; + }; + + let value = read_flat_state_value(&trie_storage, value); + + state_stats.push(key, value); + } + + let middle_size = state_stats.total_size.as_u64() / 2; + + let mut current_size = 0; + let iter = chunk_view.iter_flat_state_entries(None, None); + for item in iter { + let Ok((key, value)) = item else { + tracing::warn!(target: "state_viewer", ?item, "error in iter item"); + continue; + }; + + let value = read_flat_state_value(&trie_storage, value); + + current_size += key.len() + value.len(); + if middle_size <= current_size as u64 { + state_stats.middle_state_record = Some(StateStatsStateRecord { key, value }); + break; + } + } + + tracing::info!(target: "state_viewer", "{shard_uid:?}"); + tracing::info!(target: "state_viewer", "{state_stats:#?}"); + } +} + #[cfg(test)] mod tests { use near_chain::types::RuntimeAdapter;