Skip to content

Commit

Permalink
feat: Script to analyse RocksDB data distribution (near#9171)
Browse files Browse the repository at this point in the history
A tool for analyzing data distribution in local RocksDB, for more information, take a look at `tools/analyse-storage/README.md`
  • Loading branch information
Jure Bajic authored Jun 28, 2023
1 parent 5e23504 commit 6719d37
Show file tree
Hide file tree
Showing 9 changed files with 320 additions and 0 deletions.
13 changes: 13 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ near-adjust-db-tool = { path = "tools/adjust-db", package = "adjust-db-tool" }
near-account-id = { path = "core/account-id", features = ["internal_unstable"] }
near-actix-test-utils = { path = "test-utils/actix-test-utils" }
near-amend-genesis = { path = "tools/amend-genesis" }
near-database-tool= { path = "tools/database" }
near-async = { path = "core/async" }
near-cache = { path = "utils/near-cache" }
near-chain = { path = "chain/chain" }
Expand Down
1 change: 1 addition & 0 deletions neard/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ near-client.workspace = true
near-cold-store-tool.workspace = true
near-config-utils.workspace = true
near-crypto.workspace = true
near-database-tool.workspace = true
near-dyn-configs.workspace = true
near-flat-storage.workspace = true
near-jsonrpc-primitives.workspace = true
Expand Down
7 changes: 7 additions & 0 deletions neard/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use near_amend_genesis::AmendGenesisCommand;
use near_chain_configs::GenesisValidationMode;
use near_client::ConfigUpdater;
use near_cold_store_tool::ColdStoreCommand;
use near_database_tool::commands::DatabaseCommand;
use near_dyn_configs::{UpdateableConfigLoader, UpdateableConfigLoaderError, UpdateableConfigs};
use near_flat_storage::commands::FlatStorageCommand;
use near_jsonrpc_primitives::types::light_client::RpcLightClientExecutionProofResponse;
Expand Down Expand Up @@ -131,6 +132,9 @@ impl NeardCmd {
NeardSubCommand::AdjustDb(cmd) => {
cmd.run(&home_dir)?;
}
NeardSubCommand::Database(cmd) => {
cmd.run(&home_dir)?;
}
};
Ok(())
}
Expand Down Expand Up @@ -253,6 +257,9 @@ pub(super) enum NeardSubCommand {

/// Adjust DB for testing purposes.
AdjustDb(AdjustDbCommand),

/// Set of commands to run on database
Database(DatabaseCommand),
}

#[derive(clap::Parser)]
Expand Down
19 changes: 19 additions & 0 deletions tools/database/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[package]
name = "near-database-tool"
version.workspace = true
authors.workspace = true
edition.workspace = true
rust-version.workspace = true
repository.workspace = true
license.workspace = true
publish = false

[dependencies]
anyhow.workspace = true
clap.workspace = true
rayon.workspace = true
strum.workspace = true

near-store.workspace = true
nearcore.workspace = true

39 changes: 39 additions & 0 deletions tools/database/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Database

A set of tools useful when working with the underlying database.

## Analyse Database

The analyse database script provides an efficient way to assess the size distribution
of keys and values within RocksDB.

### Usage

To run the script, use the following example:
```bash
cargo run --bin neard -- --home /home/ubuntu/.nerd database analyse-data-size-distribution --column State --top_k 50
```
The arguments are as follows:

- `--home`: The path to the RocksDB directory.
- `--column`: The specific column to inspect.
- `--top_k`: The maximum number of counts to display (default is 100).

The resulting output will show the following:

- Total number of key-value pairs per column family
- Sizes of each column family
- Key and value size distribution

### Tips for Handling Large Column Families
As this script is designed to read as many column families as possible at the start,
you may need to adjust the max_open_files limit on your operating system.

For Ubuntu 18.04, you can check the current limit by using the command `ulimit -Hn`.
To adjust this limit, modify the `/etc/security/limits.conf` file and add the following
entry (adjust parameters to suit your needs):
```
* soft nofile 100000
* hard nofile 100000
```

214 changes: 214 additions & 0 deletions tools/database/src/analyse_data_size_distribution.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
use clap::Parser;
use near_store::db::{Database, RocksDB};
use near_store::{DBCol, StoreConfig};
use rayon::prelude::*;
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use std::{panic, println};
use strum::IntoEnumIterator;

#[derive(Parser)]
pub struct AnalyseDataSizeDistributionCommand {
#[arg(short, long)]
/// If specified only this column will be analysed
column: Option<String>,

#[arg(short, long, default_value_t = 100)]
/// Number of count sizes to output
top_k: usize,
}

fn resolve_db_col(col: &str) -> Option<DBCol> {
DBCol::iter().filter(|db_col| <&str>::from(db_col) == col).next()
}

#[derive(Clone)]
struct ColumnFamilyCountAndSize {
number_of_pairs: usize,
size: usize,
}

struct DataSizeDistribution {
key_sizes: Vec<(usize, usize)>,
value_sizes: Vec<(usize, usize)>,
total_num_of_pairs: usize,
column_families_data: Vec<(String, ColumnFamilyCountAndSize)>,
}

impl DataSizeDistribution {
fn new(
mut key_sizes: Vec<(usize, usize)>,
mut value_sizes: Vec<(usize, usize)>,
col_families_data: Vec<(String, ColumnFamilyCountAndSize)>,
) -> Self {
// The reason we sort here is because we want to display sorted
// output that shows the most occurring sizes (the ones with the
// biggest count) in descending order, to have histogram like order
key_sizes.sort_by(|a, b| b.1.cmp(&a.1));
value_sizes.sort_by(|a, b| b.1.cmp(&a.1));
let total_num_of_pairs = key_sizes.iter().map(|(_, count)| count).sum::<usize>();

Self {
key_sizes: key_sizes,
value_sizes: value_sizes,
total_num_of_pairs: total_num_of_pairs,
column_families_data: col_families_data,
}
}

fn print_results(&self, top_k: usize) {
self.print_column_family_data();
self.print_sizes_count(&self.key_sizes, "Key", top_k);
self.print_sizes_count(&self.value_sizes, "Value", top_k);
}

fn print_column_family_data(&self) {
for (column_family_name, column_family_data) in self.column_families_data.iter() {
println!(
"Column family {} has {} number of pairs and {} bytes size",
column_family_name, column_family_data.number_of_pairs, column_family_data.size
);
}
}

fn print_sizes_count(
&self,
sizes_count: &Vec<(usize, usize)>,
size_count_type: &str,
top_k: usize,
) {
println!(
"Total number of pairs read {}\n",
sizes_count.into_iter().map(|(_, count)| count).sum::<usize>()
);

// Print out distributions
println!("{} Size Distribution:", size_count_type);
println!(
"Minimum size {}: {:?}",
size_count_type,
sizes_count.iter().map(|(size, _)| size).min().unwrap()
);
println!(
"Maximum size {}: {:?}",
size_count_type,
sizes_count.iter().map(|(size, _)| size).max().unwrap()
);
println!("Most occurring size {}: {:?}", size_count_type, sizes_count.first().unwrap());
println!("Least occurring size {}: {:?}", size_count_type, sizes_count.last().unwrap());

let total_sizes_bytes_sum = sizes_count.iter().map(|a| a.0 * a.1).sum::<usize>();
println!(
"Average size {}: {:?}",
size_count_type,
total_sizes_bytes_sum as f64 / self.total_num_of_pairs as f64
);
let mut size_bytes_median = 0;
let mut median_index = self.total_num_of_pairs / 2;
for (size, count) in sizes_count.iter().take(top_k) {
if median_index < *count {
size_bytes_median = *size;
break;
} else {
median_index -= count;
}
}
println!("Median size {} {}", size_count_type, size_bytes_median);
for (size, count) in sizes_count.iter().take(top_k) {
println!("Size: {}, Count: {}", size, count);
}
println!("");
}
}

fn read_all_pairs(db: &RocksDB, col_families: &Vec<DBCol>) -> DataSizeDistribution {
// Initialize counters
let key_sizes: Arc<Mutex<HashMap<usize, usize>>> = Arc::new(Mutex::new(HashMap::new()));
let value_sizes: Arc<Mutex<HashMap<usize, usize>>> = Arc::new(Mutex::new(HashMap::new()));
let column_families_data: Arc<Mutex<HashMap<String, ColumnFamilyCountAndSize>>> =
Arc::new(Mutex::new(HashMap::new()));

// Iterate over key-value pairs
let update_map = |global_map: &Arc<Mutex<HashMap<usize, usize>>>,
local_map: &HashMap<usize, usize>| {
let mut global_sizes_guard = global_map.lock().unwrap();
for (key, value) in local_map {
*global_sizes_guard.entry(*key).or_insert(0) += *value;
}
};
col_families.par_iter().for_each(|col_family| {
let mut local_key_sizes: HashMap<usize, usize> = HashMap::new();
let mut local_value_sizes: HashMap<usize, usize> = HashMap::new();

//let cf_handle = db.cf_handle(col_family).unwrap();
for res in db.iter_raw_bytes(*col_family) {
match res {
Ok(tuple) => {
// Count key sizes
let key_len = tuple.0.len();
*local_key_sizes.entry(key_len).or_insert(0) += 1;

// Count value sizes
let value_len = tuple.1.len();
*local_value_sizes.entry(value_len).or_insert(0) += 1;
}
Err(err) => {
panic!("Error occurred during iteration of {}: {}", col_family, err);
}
}
}

{
let mut guard = column_families_data.lock().unwrap();
let column_number_of_pairs = local_key_sizes.values().sum::<usize>();
let column_size =
local_key_sizes.iter().map(|(&size, &count)| size * count).sum::<usize>();
let column_family = ColumnFamilyCountAndSize {
number_of_pairs: column_number_of_pairs,
size: column_size,
};
guard.insert(col_family.to_string(), column_family);
}

update_map(&key_sizes, &local_key_sizes);
update_map(&value_sizes, &local_value_sizes);
});

let key_sizes: Vec<(usize, usize)> = key_sizes.lock().unwrap().clone().into_iter().collect();
let value_sizes: Vec<(usize, usize)> =
value_sizes.lock().unwrap().clone().into_iter().collect();
let column_families: Vec<(String, ColumnFamilyCountAndSize)> =
column_families_data.lock().unwrap().clone().into_iter().collect();

DataSizeDistribution::new(key_sizes, value_sizes, column_families)
}

fn get_column_families(input_col: &Option<String>) -> Vec<DBCol> {
match input_col {
Some(column_name) => {
vec![resolve_db_col(&column_name).unwrap()]
}
None => DBCol::iter().collect(),
}
}

impl AnalyseDataSizeDistributionCommand {
pub fn run(&self, home: &PathBuf) -> anyhow::Result<()> {
// Set db options for maximum read performance
let store_config = StoreConfig::default();
let db = RocksDB::open(
home,
&store_config,
near_store::Mode::ReadOnly,
near_store::Temperature::Hot,
)
.unwrap();

let column_families = get_column_families(&self.column);
let results = read_all_pairs(&db, &column_families);
results.print_results(self.top_k);

Ok(())
}
}
24 changes: 24 additions & 0 deletions tools/database/src/commands.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use crate::analyse_data_size_distribution::AnalyseDataSizeDistributionCommand;
use clap::Parser;
use std::path::PathBuf;

#[derive(Parser)]
pub struct DatabaseCommand {
#[clap(subcommand)]
subcmd: SubCommand,
}

#[derive(Parser)]
#[clap(subcommand_required = true, arg_required_else_help = true)]
enum SubCommand {
/// Analyse data size distribution in RocksDB
AnalyseDataSizeDistribution(AnalyseDataSizeDistributionCommand),
}

impl DatabaseCommand {
pub fn run(&self, home: &PathBuf) -> anyhow::Result<()> {
match &self.subcmd {
SubCommand::AnalyseDataSizeDistribution(cmd) => cmd.run(home),
}
}
}
2 changes: 2 additions & 0 deletions tools/database/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
mod analyse_data_size_distribution;
pub mod commands;

0 comments on commit 6719d37

Please sign in to comment.