From 31b77eb24ceffa33e1c5badb865062018feeb32c Mon Sep 17 00:00:00 2001 From: max143672 Date: Mon, 31 Jul 2023 15:06:30 +0400 Subject: [PATCH 1/2] Add page size and memory usage in bytes to perf_monitor This commit adds `page_size` as a parameter in several function definitions in `builder.rs`. It also adds counters to track `resident_set_size_bytes` and `virtual_memory_size_bytes` in `counters.rs`. These changes are designed to enable accurate memory monitoring and provide additional details about memory usage. The new `page_size` parameter allows us to associate memory usage with the page size, which will be helpful for monitoring and troubleshooting. The `page_size` crate has also been added to `Cargo.toml` to manage the retrieval of system's memory page size. This addition is part of the effort to accurately track and report memory usage in bytes. --- metrics/perf_monitor/Cargo.toml | 2 +- metrics/perf_monitor/src/builder.rs | 23 +++++++++++++++++++---- metrics/perf_monitor/src/counters.rs | 10 ++++++++++ metrics/perf_monitor/src/lib.rs | 3 +++ 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/metrics/perf_monitor/Cargo.toml b/metrics/perf_monitor/Cargo.toml index cba5fe625..9e9f51a5a 100644 --- a/metrics/perf_monitor/Cargo.toml +++ b/metrics/perf_monitor/Cargo.toml @@ -14,6 +14,6 @@ log.workspace = true thiserror.workspace = true perf_monitor = {version = "0.2.0", git = "https://github.com/larksuite/perf-monitor-rs", rev = "3b2d52768f229c481186b991ae56559ab4cd087a"} - tokio = { workspace = true, features = ["rt", "macros", "time"] } portable-atomic = {version = "1.4.1", features = ["float"]} +page_size = "0.5.0" \ No newline at end of file diff --git a/metrics/perf_monitor/src/builder.rs b/metrics/perf_monitor/src/builder.rs index 046c1b07c..7ec234b21 100644 --- a/metrics/perf_monitor/src/builder.rs +++ b/metrics/perf_monitor/src/builder.rs @@ -8,6 +8,7 @@ pub struct Builder { tick_service: TS, fetch_interval: D, fetch_callback: CB, + page_size: usize, } impl Builder { @@ -18,19 +19,24 @@ impl Builder { impl Default for Builder { fn default() -> Self { - Self { tick_service: Unspecified {}, fetch_interval: Unspecified {}, fetch_callback: Unspecified {} } + Self { + tick_service: Unspecified {}, + fetch_interval: Unspecified {}, + fetch_callback: Unspecified {}, + page_size: page_size::get(), + } } } impl Builder { pub fn with_tick_service>(self, tick_service: TS) -> Builder { - Builder { tick_service, fetch_interval: self.fetch_interval, fetch_callback: self.fetch_callback } + Builder { tick_service, fetch_interval: self.fetch_interval, fetch_callback: self.fetch_callback, page_size: self.page_size } } } impl Builder { pub fn with_fetch_interval(self, fetch_interval: Duration) -> Builder { - Builder { tick_service: self.tick_service, fetch_interval, fetch_callback: self.fetch_callback } + Builder { tick_service: self.tick_service, fetch_interval, fetch_callback: self.fetch_callback, page_size: self.page_size } } } @@ -39,7 +45,12 @@ impl Builder { self, fetch_callback: CB, ) -> Builder> { - Builder { tick_service: self.tick_service, fetch_interval: self.fetch_interval, fetch_callback: Box::new(fetch_callback) as _ } + Builder { + tick_service: self.tick_service, + fetch_interval: self.fetch_interval, + fetch_callback: Box::new(fetch_callback) as _, + page_size: self.page_size, + } } } @@ -50,6 +61,7 @@ impl> Builder { fetch_interval: Duration::from_secs(1), counters: Default::default(), fetch_callback: None, + page_size: self.page_size as u64, } } } @@ -61,6 +73,7 @@ impl> Builder { fetch_interval: self.fetch_interval, counters: Default::default(), fetch_callback: None, + page_size: self.page_size as u64, } } } @@ -72,6 +85,7 @@ impl> Builder> Builder> { fetch_interval: Duration, counters: Counters, fetch_callback: Option>, + page_size: u64, } impl> Monitor { @@ -59,6 +60,8 @@ impl> Monitor { let counters_snapshot = CountersSnapshot { resident_set_size, virtual_memory_size, + resident_set_size_bytes: resident_set_size * self.page_size, + virtual_memory_size_bytes: virtual_memory_size * self.page_size, core_num, cpu_usage, fd_num, From b3faea10bf8046b14102c010700e926d5d826432 Mon Sep 17 00:00:00 2001 From: max143672 Date: Mon, 31 Jul 2023 18:14:01 +0400 Subject: [PATCH 2/2] Add performance monitoring to RPC In this commit, we have integrated the performance monitoring module into the RPC layer of our application. The PerfMonitor module is now added as a dependency in various Cargo.toml files, and methods related to performance monitoring have been added/updated in several .rs files. This allows us to collect various metrics related to our application's performance, including memory usage, CPU usage, disc I/O, etc. which will help in evaluating our software's efficiency. We chose to collect these specific metrics as they give a comprehensive overview of the software's performance and can help us detect potential bottlenecks or issues. We have also updated the ProcessMetrics structure to include these new metrics. --- Cargo.lock | 13 +++++++++++++ kaspad/src/main.rs | 22 ++++++++++------------ rpc/core/Cargo.toml | 1 + rpc/core/src/model/message.rs | 12 ++++++++++-- rpc/service/Cargo.toml | 1 + rpc/service/src/service.rs | 33 ++++++++++++++++++++++++++++++++- 6 files changed, 67 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c33041877..d51065d04 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2564,6 +2564,7 @@ version = "0.1.6" dependencies = [ "kaspa-core", "log", + "page_size", "perf_monitor", "portable-atomic", "thiserror", @@ -2602,6 +2603,7 @@ dependencies = [ "kaspa-math", "kaspa-mining-errors", "kaspa-notify", + "kaspa-perf-monitor", "kaspa-txscript", "kaspa-utils", "log", @@ -2646,6 +2648,7 @@ dependencies = [ "kaspa-notify", "kaspa-p2p-flows", "kaspa-p2p-lib", + "kaspa-perf-monitor", "kaspa-rpc-core", "kaspa-txscript", "kaspa-utils", @@ -3656,6 +3659,16 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "page_size" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b7663cbd190cfd818d08efa8497f6cd383076688c49a391ef7c0d03cd12b561" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "parking" version = "2.1.0" diff --git a/kaspad/src/main.rs b/kaspad/src/main.rs index 1342264c8..080fdb4cb 100644 --- a/kaspad/src/main.rs +++ b/kaspad/src/main.rs @@ -232,18 +232,17 @@ do you confirm? (answer y/n or pass --yes to the Kaspad command line to confirm let consensus_manager = Arc::new(ConsensusManager::new(consensus_factory)); let consensus_monitor = Arc::new(ConsensusMonitor::new(processing_counters.clone(), tick_service.clone())); - let perf_monitor = args.perf_metrics.then(|| { + let perf_monitor_builder = PerfMonitorBuilder::new() + .with_fetch_interval(Duration::from_secs(args.perf_metrics_interval_sec)) + .with_tick_service(tick_service.clone()); + let perf_monitor = if args.perf_metrics { let cb = move |counters| { trace!("[{}] metrics: {:?}", kaspa_perf_monitor::SERVICE_NAME, counters); }; - Arc::new( - PerfMonitorBuilder::new() - .with_fetch_interval(Duration::from_secs(args.perf_metrics_interval_sec)) - .with_fetch_cb(cb) - .with_tick_service(tick_service.clone()) - .build(), - ) - }); + Arc::new(perf_monitor_builder.with_fetch_cb(cb).build()) + } else { + Arc::new(perf_monitor_builder.build()) + }; let notify_service = Arc::new(NotifyService::new(notification_root.clone(), notification_recv)); let index_service: Option> = if args.utxoindex { @@ -291,6 +290,7 @@ do you confirm? (answer y/n or pass --yes to the Kaspad command line to confirm processing_counters, wrpc_borsh_counters.clone(), wrpc_json_counters.clone(), + perf_monitor.clone(), )); let grpc_service = Arc::new(GrpcService::new(grpc_server_addr, rpc_core_service.clone(), args.rpc_max_clients)); @@ -305,9 +305,7 @@ do you confirm? (answer y/n or pass --yes to the Kaspad command line to confirm async_runtime.register(grpc_service); async_runtime.register(p2p_service); async_runtime.register(consensus_monitor); - if let Some(perf_monitor) = perf_monitor { - async_runtime.register(perf_monitor); - } + async_runtime.register(perf_monitor); let wrpc_service_tasks: usize = 2; // num_cpus::get() / 2; // Register wRPC servers based on command line arguments [ diff --git a/rpc/core/Cargo.toml b/rpc/core/Cargo.toml index 41d48b6b2..9951925de 100644 --- a/rpc/core/Cargo.toml +++ b/rpc/core/Cargo.toml @@ -18,6 +18,7 @@ kaspa-index-core.workspace = true kaspa-txscript.workspace = true kaspa-math.workspace = true kaspa-mining-errors.workspace = true +kaspa-perf-monitor.workspace = true faster-hex.workspace = true serde.workspace = true diff --git a/rpc/core/src/model/message.rs b/rpc/core/src/model/message.rs index 801c69374..ce052dfdd 100644 --- a/rpc/core/src/model/message.rs +++ b/rpc/core/src/model/message.rs @@ -689,10 +689,18 @@ pub struct GetMetricsRequest { #[serde(rename_all = "camelCase")] pub struct ProcessMetrics { // pub uptime: u64, - // pub memory_used: u64, - // pub storage_used: u64, // pub grpc_connections: u32, // pub wrpc_connections: u32, + pub resident_set_size_bytes: u64, + pub virtual_memory_size_bytes: u64, + pub core_num: u64, + pub cpu_usage: f64, + pub fd_num: u64, + pub disk_io_read_bytes: u64, + pub disk_io_write_bytes: u64, + pub disk_io_read_per_sec: f64, + pub disk_io_write_per_sec: f64, + pub borsh_live_connections: u64, pub borsh_connection_attempts: u64, pub borsh_handshake_failures: u64, diff --git a/rpc/service/Cargo.toml b/rpc/service/Cargo.toml index cd8c2dce3..3a66d6fb3 100644 --- a/rpc/service/Cargo.toml +++ b/rpc/service/Cargo.toml @@ -25,6 +25,7 @@ kaspa-p2p-flows.workspace = true kaspa-math.workspace = true kaspa-utxoindex.workspace = true kaspa-wrpc-core.workspace = true +kaspa-perf-monitor.workspace = true log.workspace = true async-trait.workspace = true diff --git a/rpc/service/src/service.rs b/rpc/service/src/service.rs index e2e322698..83a201697 100644 --- a/rpc/service/src/service.rs +++ b/rpc/service/src/service.rs @@ -23,6 +23,7 @@ use kaspa_core::{ kaspad_env::version, signals::Shutdown, task::service::{AsyncService, AsyncServiceError, AsyncServiceFuture}, + task::tick::TickService, trace, warn, }; use kaspa_index_core::{ @@ -40,6 +41,7 @@ use kaspa_notify::{ subscriber::{Subscriber, SubscriptionManager}, }; use kaspa_p2p_flows::flow_context::FlowContext; +use kaspa_perf_monitor::{counters::CountersSnapshot, Monitor as PerfMonitor}; use kaspa_rpc_core::{ api::rpc::{RpcApi, MAX_SAFE_WINDOW_SIZE}, model::*, @@ -89,6 +91,7 @@ pub struct RpcCoreService { wrpc_borsh_counters: Arc, wrpc_json_counters: Arc, shutdown: SingleTrigger, + perf_monitor: Arc>>, } const RPC_CORE: &str = "rpc-core"; @@ -107,6 +110,7 @@ impl RpcCoreService { processing_counters: Arc, wrpc_borsh_counters: Arc, wrpc_json_counters: Arc, + perf_monitor: Arc>>, ) -> Self { // Prepare consensus-notify objects let consensus_notify_channel = Channel::::default(); @@ -168,6 +172,7 @@ impl RpcCoreService { wrpc_borsh_counters, wrpc_json_counters, shutdown: SingleTrigger::default(), + perf_monitor, } } @@ -352,7 +357,12 @@ impl RpcApi for RpcCoreService { } async fn get_mempool_entry_call(&self, request: GetMempoolEntryRequest) -> RpcResult { - let Some(transaction) = self.mining_manager.clone().get_transaction(request.transaction_id, !request.filter_transaction_pool, request.include_orphan_pool).await else { + let Some(transaction) = self + .mining_manager + .clone() + .get_transaction(request.transaction_id, !request.filter_transaction_pool, request.include_orphan_pool) + .await + else { return Err(RpcError::TransactionNotFound(request.transaction_id)); }; let session = self.consensus_manager.consensus().session().await; @@ -630,7 +640,28 @@ impl RpcApi for RpcCoreService { // UNIMPLEMENTED METHODS async fn get_metrics_call(&self, req: GetMetricsRequest) -> RpcResult { + let CountersSnapshot { + resident_set_size_bytes, + virtual_memory_size_bytes, + core_num, + cpu_usage, + fd_num, + disk_io_read_bytes, + disk_io_write_bytes, + disk_io_read_per_sec, + disk_io_write_per_sec, + .. + } = self.perf_monitor.snapshot(); let process_metrics = req.process_metrics.then_some(ProcessMetrics { + resident_set_size_bytes, + virtual_memory_size_bytes, + core_num: core_num as u64, + cpu_usage, + fd_num: fd_num as u64, + disk_io_read_bytes, + disk_io_write_bytes, + disk_io_read_per_sec, + disk_io_write_per_sec, borsh_live_connections: self.wrpc_borsh_counters.live_connections.load(Ordering::Relaxed), borsh_connection_attempts: self.wrpc_borsh_counters.connection_attempts.load(Ordering::Relaxed), borsh_handshake_failures: self.wrpc_borsh_counters.handshake_failures.load(Ordering::Relaxed),