Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(resharding): implemented state-stats state-viewer command #10094

Merged
merged 8 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions core/o11y/src/env_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ const DEFAULT_RUST_LOG: &str = "tokio_reactor=info,\
db=info,\
delay_detector=info,\
near-performance-metrics=info,\
state_viewer=info,\
warn";

#[non_exhaustive]
Expand Down
2 changes: 1 addition & 1 deletion core/store/src/flat/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ impl FlatStorageManager {
);
}

/// Creates flat storage instance for shard `shard_id`. The function also checks that
/// Creates flat storage instance for shard `shard_uid`. The function also checks that
/// the shard's flat storage state hasn't been set before, otherwise it panics.
/// TODO (#7327): this behavior may change when we implement support for state sync
/// and resharding.
Expand Down
7 changes: 2 additions & 5 deletions tools/state-viewer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ actix.workspace = true
ansi_term.workspace = true
anyhow.workspace = true
borsh.workspace = true
bytesize.workspace = true
chrono.workspace = true
clap.workspace = true
cloud-storage.workspace = true
Expand Down Expand Up @@ -51,11 +52,7 @@ testlib.workspace = true
insta.workspace = true

[features]
sandbox = [
"node-runtime/sandbox",
"near-chain/sandbox",
"near-client/sandbox",
]
sandbox = ["node-runtime/sandbox", "near-chain/sandbox", "near-client/sandbox"]
nightly = [
"nightly_protocol",
"near-chain-configs/nightly",
Expand Down
14 changes: 14 additions & 0 deletions tools/state-viewer/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ pub enum StateViewerSubCommand {
StateChanges(StateChangesCmd),
/// Dump or apply state parts.
StateParts(StatePartsCmd),
/// Iterates over the Flat State and prints some statistics.
/// e.g. large accounts, total, average and median size, middle account
StateStats(StateStatsCmd),
/// Benchmark how long does it take to iterate the trie.
TrieIterationBenchmark(TrieIterationBenchmarkCmd),
/// View head of the storage.
Expand Down Expand Up @@ -144,6 +147,7 @@ impl StateViewerSubCommand {
StateViewerSubCommand::State => state(home_dir, near_config, store),
StateViewerSubCommand::StateChanges(cmd) => cmd.run(home_dir, near_config, store),
StateViewerSubCommand::StateParts(cmd) => cmd.run(home_dir, near_config, store),
StateViewerSubCommand::StateStats(cmd) => cmd.run(home_dir, near_config, store),
StateViewerSubCommand::ViewChain(cmd) => cmd.run(near_config, store),
StateViewerSubCommand::ViewTrie(cmd) => cmd.run(store),
StateViewerSubCommand::TrieIterationBenchmark(cmd) => cmd.run(near_config, store),
Expand Down Expand Up @@ -617,6 +621,16 @@ impl StatePartsCmd {
);
}
}

#[derive(clap::Parser)]
pub struct StateStatsCmd {}

impl StateStatsCmd {
pub fn run(self, home_dir: &Path, near_config: NearConfig, store: Store) {
print_state_stats(home_dir, store, near_config);
}
}

#[derive(clap::Parser)]
pub struct ViewChainCmd {
#[clap(long)]
Expand Down
147 changes: 147 additions & 0 deletions tools/state-viewer/src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use crate::state_dump::state_dump_redis;
use crate::tx_dump::dump_tx_from_block;
use crate::{apply_chunk, epoch_info};
use ansi_term::Color::Red;
use bytesize::ByteSize;
use near_chain::chain::collect_receipts_from_response;
use near_chain::migrations::check_if_block_is_first_with_chunk_of_version;
use near_chain::types::ApplyTransactionResult;
Expand All @@ -23,15 +24,19 @@ use near_primitives::hash::CryptoHash;
use near_primitives::shard_layout::ShardLayout;
use near_primitives::shard_layout::ShardUId;
use near_primitives::sharding::ChunkHash;
use near_primitives::state::FlatStateValue;
use near_primitives::state_record::state_record_to_account_id;
use near_primitives::state_record::StateRecord;
use near_primitives::trie_key::TrieKey;
use near_primitives::types::{chunk_extra::ChunkExtra, BlockHeight, ShardId, StateRoot};
use near_primitives_core::types::Gas;
use near_store::test_utils::create_test_store;
use near_store::TrieStorage;
use near_store::{DBCol, Store, Trie, TrieCache, TrieCachingStorage, TrieConfig, TrieDBStorage};
use nearcore::{NearConfig, NightshadeRuntime};
use node_runtime::adapter::ViewRuntimeAdapter;
use serde_json::json;
use std::collections::BinaryHeap;
use std::collections::HashMap;
use std::fs::{self, File};
use std::io::Write;
Expand Down Expand Up @@ -1057,6 +1062,148 @@ pub(crate) fn clear_cache(store: Store) {
store_update.commit().unwrap();
}

#[derive(PartialEq, Eq)]
pub struct StateStatsStateRecord {
key: Vec<u8>,
value: Vec<u8>,
}

impl StateStatsStateRecord {
pub fn size(&self) -> ByteSize {
ByteSize::b(self.key.len() as u64 + self.value.len() as u64)
}
}

impl Ord for StateStatsStateRecord {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.size().cmp(&other.size()).reverse()
}
}

impl PartialOrd for StateStatsStateRecord {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}

impl std::fmt::Debug for StateStatsStateRecord {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let state_record = StateRecord::from_raw_key_value(self.key.clone(), self.value.clone());

let Some(state_record) = state_record else { return None::<StateRecord>.fmt(f) };

f.debug_struct("StateStatsStateRecord")
.field("account_id", &state_record_to_account_id(&state_record).as_str())
.field("type", &state_record.get_type_string())
.field("size", &self.size())
.finish()
}
}

#[derive(Default)]
pub struct StateStats {
pub total_size: ByteSize,
pub total_count: usize,

pub top_state_records: BinaryHeap<StateStatsStateRecord>,

pub middle_state_record: Option<StateStatsStateRecord>,
}

impl core::fmt::Debug for StateStats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let average_size = self
.total_size
.as_u64()
.checked_div(self.total_count as u64)
.map(ByteSize::b)
.unwrap_or_default();
f.debug_struct("StateStats")
.field("total_size", &self.total_size)
.field("total_count", &self.total_count)
.field("average_size", &average_size)
.field("middle_state_record", &self.middle_state_record.as_ref().unwrap())
.field("top_state_records", &self.top_state_records)
.finish()
}
}

impl StateStats {
pub fn new() -> Self {
Self::default()
}

pub fn push(&mut self, key: Vec<u8>, value: Vec<u8>) -> () {
self.total_size += key.len() as u64 + value.len() as u64;
self.total_count += 1;

self.top_state_records.push(StateStatsStateRecord { key, value });
if self.top_state_records.len() > 5 {
self.top_state_records.pop();
}
}
}

fn read_flat_state_value(
Copy link
Contributor

@shreyan-gupta shreyan-gupta Nov 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the value itself is used anywhere... Quick optimization, we can perhaps just work with the size of the value instead of the vector value itself? But can leave as is

trie_storage: &TrieDBStorage,
flat_state_value: FlatStateValue,
) -> Vec<u8> {
match flat_state_value {
FlatStateValue::Ref(val) => trie_storage.retrieve_raw_bytes(&val.hash).unwrap().to_vec(),
FlatStateValue::Inlined(val) => val,
}
}

pub(crate) fn print_state_stats(home_dir: &Path, store: Store, near_config: NearConfig) {
let (epoch_manager, runtime, _, block_header) =
load_trie(store.clone(), home_dir, &near_config);

let block_hash = *block_header.hash();
let shard_layout = epoch_manager.get_shard_layout_from_prev_block(&block_hash).unwrap();

for shard_uid in shard_layout.get_shard_uids() {
let flat_storage_manager = runtime.get_flat_storage_manager();
flat_storage_manager.create_flat_storage_for_shard(shard_uid).unwrap();
let trie_storage = TrieDBStorage::new(store.clone(), shard_uid);
let chunk_view = flat_storage_manager.chunk_view(shard_uid, block_hash).unwrap();

let mut state_stats = StateStats::new();

let iter = chunk_view.iter_flat_state_entries(None, None);
for item in iter {
let Ok((key, value)) = item else {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this syntax! Didn't know it existed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Looking at the implementation of iter_flat_state_entries, it might be fine to just unwrap key value here but can leave as is.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sytax is cool yeah, just learned it recently too. :)
Actually I wanted to print a warning in there, let me do that.

continue;
};

let value = read_flat_state_value(&trie_storage, value);

state_stats.push(key, value);
}

let middle_size = state_stats.total_size.as_u64() / 2;

let mut current_size = 0;
let iter = chunk_view.iter_flat_state_entries(None, None);
for item in iter {
let Ok((key, value)) = item else {
tracing::warn!(target: "state_viewer", ?item, "error in iter item");
continue;
};

let value = read_flat_state_value(&trie_storage, value);

current_size += key.len() + value.len();
if middle_size <= current_size as u64 {
state_stats.middle_state_record = Some(StateStatsStateRecord { key, value });
break;
}
}

tracing::info!(target: "state_viewer", "{shard_uid:?}");
tracing::info!(target: "state_viewer", "{state_stats:#?}");
}
}

#[cfg(test)]
mod tests {
use near_chain::types::RuntimeAdapter;
Expand Down
Loading