Skip to content

Commit

Permalink
Count in-flight GRPC requests (#4489)
Browse files Browse the repository at this point in the history
Looking at PR #4488, it seems very suspicious that we need to raise GRPC limit. Counting how many in-flight requests we have per request path can allow to shed some light and potentially show the problem.

Cardinality of created counters is very low - O(num of grpc methods).

Even thought we are potentially going to migrate to new networking layer, I think having those counters will help medium term.
  • Loading branch information
andll authored Sep 6, 2022
1 parent 9714d06 commit 3a50bae
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 2 deletions.
5 changes: 4 additions & 1 deletion crates/sui-node/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use anyhow::anyhow;
use anyhow::bail;
use anyhow::Result;
use futures::TryFutureExt;
use mysten_network::server::ServerBuilder;
use parking_lot::Mutex;
use prometheus::Registry;
use std::option::Option::None;
Expand Down Expand Up @@ -39,6 +40,7 @@ use sui_types::messages::{CertifiedTransaction, CertifiedTransactionEffects};
use tokio::sync::mpsc::channel;
use tracing::{error, info};

use crate::metrics::GrpcMetrics;
use sui_core::authority_client::NetworkAuthorityClientMetrics;
use sui_core::epoch::epoch_store::EpochStore;
use sui_json_rpc::event_api::EventReadApiImpl;
Expand Down Expand Up @@ -278,7 +280,8 @@ impl SuiNode {
let mut server_conf = mysten_network::config::Config::new();
server_conf.global_concurrency_limit = config.grpc_concurrency_limit;
server_conf.load_shed = config.grpc_load_shed;
let mut server_builder = server_conf.server_builder();
let mut server_builder =
ServerBuilder::from_config(&server_conf, GrpcMetrics::new(&prometheus_registry));

if let Some(validator_service) = validator_service {
server_builder =
Expand Down
34 changes: 33 additions & 1 deletion crates/sui-node/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
// SPDX-License-Identifier: Apache-2.0

use axum::{extract::Extension, http::StatusCode, routing::get, Router};
use prometheus::{Registry, TextEncoder};
use mysten_network::metrics::MetricsCallbackProvider;
use prometheus::{register_int_gauge_vec_with_registry, IntGaugeVec, Registry, TextEncoder};
use std::net::SocketAddr;
use std::time::Duration;
use sui_network::tonic::Code;

const METRICS_ROUTE: &str = "/metrics";

Expand Down Expand Up @@ -34,3 +37,32 @@ async fn metrics(Extension(registry): Extension<Registry>) -> (StatusCode, Strin
),
}
}

#[derive(Clone)]
pub struct GrpcMetrics {
inflight_grpc: IntGaugeVec,
}

impl GrpcMetrics {
pub fn new(registry: &Registry) -> Self {
Self {
inflight_grpc: register_int_gauge_vec_with_registry!(
"inflight_grpc",
"Total in-flight GRPC per route",
&["path"],
registry,
)
.unwrap(),
}
}
}

impl MetricsCallbackProvider for GrpcMetrics {
fn on_request(&self, path: String) {
self.inflight_grpc.with_label_values(&[&path]).inc();
}

fn on_response(&self, path: String, _latency: Duration, _status: u16, _grpc_status_code: Code) {
self.inflight_grpc.with_label_values(&[&path]).dec();
}
}

0 comments on commit 3a50bae

Please sign in to comment.