feat: Script to analyse RocksDB data distribution (near#9171)

A tool for analyzing data distribution in local RocksDB, for more information, take a look at `tools/analyse-storage/README.md`
robin-near · Jun 28, 2023 · 6719d37 · 6719d37
1 parent 5e23504
commit 6719d37
Show file tree

Hide file tree

Showing 9 changed files with 320 additions and 0 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -172,6 +172,7 @@ near-adjust-db-tool = { path = "tools/adjust-db", package = "adjust-db-tool"  }
 near-account-id = { path = "core/account-id", features = ["internal_unstable"] }
 near-actix-test-utils = { path = "test-utils/actix-test-utils" }
 near-amend-genesis = { path = "tools/amend-genesis" }
+near-database-tool= { path = "tools/database" }
 near-async = { path = "core/async" }
 near-cache = { path = "utils/near-cache" }
 near-chain = { path = "chain/chain" }

diff --git a/neard/Cargo.toml b/neard/Cargo.toml
@@ -39,6 +39,7 @@ near-client.workspace = true
 near-cold-store-tool.workspace = true
 near-config-utils.workspace = true
 near-crypto.workspace = true
+near-database-tool.workspace = true
 near-dyn-configs.workspace = true
 near-flat-storage.workspace = true
 near-jsonrpc-primitives.workspace = true

diff --git a/neard/src/cli.rs b/neard/src/cli.rs
@@ -5,6 +5,7 @@ use near_amend_genesis::AmendGenesisCommand;
 use near_chain_configs::GenesisValidationMode;
 use near_client::ConfigUpdater;
 use near_cold_store_tool::ColdStoreCommand;
+use near_database_tool::commands::DatabaseCommand;
 use near_dyn_configs::{UpdateableConfigLoader, UpdateableConfigLoaderError, UpdateableConfigs};
 use near_flat_storage::commands::FlatStorageCommand;
 use near_jsonrpc_primitives::types::light_client::RpcLightClientExecutionProofResponse;
@@ -131,6 +132,9 @@ impl NeardCmd {
             NeardSubCommand::AdjustDb(cmd) => {
                 cmd.run(&home_dir)?;
             }
+            NeardSubCommand::Database(cmd) => {
+                cmd.run(&home_dir)?;
+            }
         };
         Ok(())
     }
@@ -253,6 +257,9 @@ pub(super) enum NeardSubCommand {
 
     /// Adjust DB for testing purposes.
     AdjustDb(AdjustDbCommand),
+
+    /// Set of commands to run on database
+    Database(DatabaseCommand),
 }
 
 #[derive(clap::Parser)]

diff --git a/tools/database/Cargo.toml b/tools/database/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "near-database-tool"
+version.workspace = true
+authors.workspace = true
+edition.workspace = true
+rust-version.workspace = true
+repository.workspace = true
+license.workspace = true
+publish = false
+
+[dependencies]
+anyhow.workspace = true
+clap.workspace = true
+rayon.workspace = true
+strum.workspace = true
+
+near-store.workspace = true
+nearcore.workspace = true
+
diff --git a/tools/database/README.md b/tools/database/README.md
@@ -0,0 +1,39 @@
+# Database
+
+A set of tools useful when working with the underlying database.
+
+## Analyse Database
+
+The analyse database script provides an efficient way to assess the size distribution
+of keys and values within RocksDB.
+
+### Usage
+
+To run the script, use the following example:
+```bash
+cargo run --bin neard -- --home /home/ubuntu/.nerd database analyse-data-size-distribution --column State --top_k 50
+```
+The arguments are as follows:
+
+ - `--home`: The path to the RocksDB directory.
+ - `--column`: The specific column to inspect.
+ - `--top_k`: The maximum number of counts to display (default is 100).
+
+The resulting output will show the following:
+
+ - Total number of key-value pairs per column family
+ - Sizes of each column family
+ - Key and value size distribution
+
+### Tips for Handling Large Column Families
+As this script is designed to read as many column families as possible at the start,
+you may need to adjust the max_open_files limit on your operating system.
+
+For Ubuntu 18.04, you can check the current limit by using the command `ulimit -Hn`.
+To adjust this limit, modify the `/etc/security/limits.conf` file and add the following
+entry (adjust parameters to suit your needs):
+```
+* soft nofile 100000
+* hard nofile 100000
+```
+
diff --git a/tools/database/src/analyse_data_size_distribution.rs b/tools/database/src/analyse_data_size_distribution.rs
@@ -0,0 +1,214 @@
+use clap::Parser;
+use near_store::db::{Database, RocksDB};
+use near_store::{DBCol, StoreConfig};
+use rayon::prelude::*;
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::sync::{Arc, Mutex};
+use std::{panic, println};
+use strum::IntoEnumIterator;
+
+#[derive(Parser)]
+pub struct AnalyseDataSizeDistributionCommand {
+    #[arg(short, long)]
+    /// If specified only this column will be analysed
+    column: Option<String>,
+
+    #[arg(short, long, default_value_t = 100)]
+    /// Number of count sizes to output
+    top_k: usize,
+}
+
+fn resolve_db_col(col: &str) -> Option<DBCol> {
+    DBCol::iter().filter(|db_col| <&str>::from(db_col) == col).next()
+}
+
+#[derive(Clone)]
+struct ColumnFamilyCountAndSize {
+    number_of_pairs: usize,
+    size: usize,
+}
+
+struct DataSizeDistribution {
+    key_sizes: Vec<(usize, usize)>,
+    value_sizes: Vec<(usize, usize)>,
+    total_num_of_pairs: usize,
+    column_families_data: Vec<(String, ColumnFamilyCountAndSize)>,
+}
+
+impl DataSizeDistribution {
+    fn new(
+        mut key_sizes: Vec<(usize, usize)>,
+        mut value_sizes: Vec<(usize, usize)>,
+        col_families_data: Vec<(String, ColumnFamilyCountAndSize)>,
+    ) -> Self {
+        // The reason we sort here is because we want to display sorted
+        // output that shows the most occurring sizes (the ones with the
+        // biggest count) in descending order, to have histogram like order
+        key_sizes.sort_by(|a, b| b.1.cmp(&a.1));
+        value_sizes.sort_by(|a, b| b.1.cmp(&a.1));
+        let total_num_of_pairs = key_sizes.iter().map(|(_, count)| count).sum::<usize>();
+
+        Self {
+            key_sizes: key_sizes,
+            value_sizes: value_sizes,
+            total_num_of_pairs: total_num_of_pairs,
+            column_families_data: col_families_data,
+        }
+    }
+
+    fn print_results(&self, top_k: usize) {
+        self.print_column_family_data();
+        self.print_sizes_count(&self.key_sizes, "Key", top_k);
+        self.print_sizes_count(&self.value_sizes, "Value", top_k);
+    }
+
+    fn print_column_family_data(&self) {
+        for (column_family_name, column_family_data) in self.column_families_data.iter() {
+            println!(
+                "Column family {} has {} number of pairs and {} bytes size",
+                column_family_name, column_family_data.number_of_pairs, column_family_data.size
+            );
+        }
+    }
+
+    fn print_sizes_count(
+        &self,
+        sizes_count: &Vec<(usize, usize)>,
+        size_count_type: &str,
+        top_k: usize,
+    ) {
+        println!(
+            "Total number of pairs read {}\n",
+            sizes_count.into_iter().map(|(_, count)| count).sum::<usize>()
+        );
+
+        // Print out distributions
+        println!("{} Size Distribution:", size_count_type);
+        println!(
+            "Minimum size {}: {:?}",
+            size_count_type,
+            sizes_count.iter().map(|(size, _)| size).min().unwrap()
+        );
+        println!(
+            "Maximum size {}: {:?}",
+            size_count_type,
+            sizes_count.iter().map(|(size, _)| size).max().unwrap()
+        );
+        println!("Most occurring size {}: {:?}", size_count_type, sizes_count.first().unwrap());
+        println!("Least occurring size {}: {:?}", size_count_type, sizes_count.last().unwrap());
+
+        let total_sizes_bytes_sum = sizes_count.iter().map(|a| a.0 * a.1).sum::<usize>();
+        println!(
+            "Average size {}: {:?}",
+            size_count_type,
+            total_sizes_bytes_sum as f64 / self.total_num_of_pairs as f64
+        );
+        let mut size_bytes_median = 0;
+        let mut median_index = self.total_num_of_pairs / 2;
+        for (size, count) in sizes_count.iter().take(top_k) {
+            if median_index < *count {
+                size_bytes_median = *size;
+                break;
+            } else {
+                median_index -= count;
+            }
+        }
+        println!("Median size {} {}", size_count_type, size_bytes_median);
+        for (size, count) in sizes_count.iter().take(top_k) {
+            println!("Size: {}, Count: {}", size, count);
+        }
+        println!("");
+    }
+}
+
+fn read_all_pairs(db: &RocksDB, col_families: &Vec<DBCol>) -> DataSizeDistribution {
+    // Initialize counters
+    let key_sizes: Arc<Mutex<HashMap<usize, usize>>> = Arc::new(Mutex::new(HashMap::new()));
+    let value_sizes: Arc<Mutex<HashMap<usize, usize>>> = Arc::new(Mutex::new(HashMap::new()));
+    let column_families_data: Arc<Mutex<HashMap<String, ColumnFamilyCountAndSize>>> =
+        Arc::new(Mutex::new(HashMap::new()));
+
+    // Iterate over key-value pairs
+    let update_map = |global_map: &Arc<Mutex<HashMap<usize, usize>>>,
+                      local_map: &HashMap<usize, usize>| {
+        let mut global_sizes_guard = global_map.lock().unwrap();
+        for (key, value) in local_map {
+            *global_sizes_guard.entry(*key).or_insert(0) += *value;
+        }
+    };
+    col_families.par_iter().for_each(|col_family| {
+        let mut local_key_sizes: HashMap<usize, usize> = HashMap::new();
+        let mut local_value_sizes: HashMap<usize, usize> = HashMap::new();
+
+        //let cf_handle = db.cf_handle(col_family).unwrap();
+        for res in db.iter_raw_bytes(*col_family) {
+            match res {
+                Ok(tuple) => {
+                    // Count key sizes
+                    let key_len = tuple.0.len();
+                    *local_key_sizes.entry(key_len).or_insert(0) += 1;
+
+                    // Count value sizes
+                    let value_len = tuple.1.len();
+                    *local_value_sizes.entry(value_len).or_insert(0) += 1;
+                }
+                Err(err) => {
+                    panic!("Error occurred during iteration of {}: {}", col_family, err);
+                }
+            }
+        }
+
+        {
+            let mut guard = column_families_data.lock().unwrap();
+            let column_number_of_pairs = local_key_sizes.values().sum::<usize>();
+            let column_size =
+                local_key_sizes.iter().map(|(&size, &count)| size * count).sum::<usize>();
+            let column_family = ColumnFamilyCountAndSize {
+                number_of_pairs: column_number_of_pairs,
+                size: column_size,
+            };
+            guard.insert(col_family.to_string(), column_family);
+        }
+
+        update_map(&key_sizes, &local_key_sizes);
+        update_map(&value_sizes, &local_value_sizes);
+    });
+
+    let key_sizes: Vec<(usize, usize)> = key_sizes.lock().unwrap().clone().into_iter().collect();
+    let value_sizes: Vec<(usize, usize)> =
+        value_sizes.lock().unwrap().clone().into_iter().collect();
+    let column_families: Vec<(String, ColumnFamilyCountAndSize)> =
+        column_families_data.lock().unwrap().clone().into_iter().collect();
+
+    DataSizeDistribution::new(key_sizes, value_sizes, column_families)
+}
+
+fn get_column_families(input_col: &Option<String>) -> Vec<DBCol> {
+    match input_col {
+        Some(column_name) => {
+            vec![resolve_db_col(&column_name).unwrap()]
+        }
+        None => DBCol::iter().collect(),
+    }
+}
+
+impl AnalyseDataSizeDistributionCommand {
+    pub fn run(&self, home: &PathBuf) -> anyhow::Result<()> {
+        // Set db options for maximum read performance
+        let store_config = StoreConfig::default();
+        let db = RocksDB::open(
+            home,
+            &store_config,
+            near_store::Mode::ReadOnly,
+            near_store::Temperature::Hot,
+        )
+        .unwrap();
+
+        let column_families = get_column_families(&self.column);
+        let results = read_all_pairs(&db, &column_families);
+        results.print_results(self.top_k);
+
+        Ok(())
+    }
+}
diff --git a/tools/database/src/commands.rs b/tools/database/src/commands.rs
@@ -0,0 +1,24 @@
+use crate::analyse_data_size_distribution::AnalyseDataSizeDistributionCommand;
+use clap::Parser;
+use std::path::PathBuf;
+
+#[derive(Parser)]
+pub struct DatabaseCommand {
+    #[clap(subcommand)]
+    subcmd: SubCommand,
+}
+
+#[derive(Parser)]
+#[clap(subcommand_required = true, arg_required_else_help = true)]
+enum SubCommand {
+    /// Analyse data size distribution in RocksDB
+    AnalyseDataSizeDistribution(AnalyseDataSizeDistributionCommand),
+}
+
+impl DatabaseCommand {
+    pub fn run(&self, home: &PathBuf) -> anyhow::Result<()> {
+        match &self.subcmd {
+            SubCommand::AnalyseDataSizeDistribution(cmd) => cmd.run(home),
+        }
+    }
+}
diff --git a/tools/database/src/lib.rs b/tools/database/src/lib.rs
@@ -0,0 +1,2 @@
+mod analyse_data_size_distribution;
+pub mod commands;