Skip to content

Commit

Permalink
Add node authority status metric (paritytech#4699)
Browse files Browse the repository at this point in the history
* Check authority status on active leaves update

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* cargo changes

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* Fix tests

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* Add metric for authority status

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* Revert "Fix tests"

This reverts commit 5bd56bb.

* Revert "cargo changes"

This reverts commit ffea18f.

* Revert "Check authority status on active leaves update"

This reverts commit 55a30ac.

* Test fixups

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* fix

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* update

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* undo damage

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* dont update status on runtime errors

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* Fix tests

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* fix inconsistency

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* Review feedback

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* Dont derive primitive Default

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* add dummy_session_info helper

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* unset parachain validator status if no longer authority

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* update

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* damn

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>

* 🤦

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>
  • Loading branch information
sandreim authored and Wizdave97 committed Feb 3, 2022
1 parent fb65e97 commit 8eb3d82
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 3 deletions.
66 changes: 64 additions & 2 deletions node/network/gossip-support/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,18 @@ use polkadot_node_subsystem::{
RuntimeApiRequest,
},
overseer, ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, SubsystemContext,
SubsystemError,
SubsystemError, SubsystemSender,
};
use polkadot_node_subsystem_util as util;
use polkadot_primitives::v1::{AuthorityDiscoveryId, Hash, SessionIndex};

#[cfg(test)]
mod tests;

mod metrics;

use metrics::Metrics;

const LOG_TARGET: &str = "parachain::gossip-support";
// How much time should we wait to reissue a connection request
// since the last authority discovery resolution failure.
Expand Down Expand Up @@ -104,14 +108,21 @@ pub struct GossipSupport<AD> {
connected_authorities_by_peer_id: HashMap<PeerId, HashSet<AuthorityDiscoveryId>>,
/// Authority discovery service.
authority_discovery: AD,

/// Subsystem metrics.
metrics: Metrics,
}

impl<AD> GossipSupport<AD>
where
AD: AuthorityDiscovery,
{
/// Create a new instance of the [`GossipSupport`] subsystem.
pub fn new(keystore: SyncCryptoStorePtr, authority_discovery: AD) -> Self {
pub fn new(keystore: SyncCryptoStorePtr, authority_discovery: AD, metrics: Metrics) -> Self {
// Initialize metrics to `0`.
metrics.on_is_not_authority();
metrics.on_is_not_parachain_validator();

Self {
keystore,
last_session_index: None,
Expand All @@ -121,6 +132,7 @@ where
connected_authorities: HashMap::new(),
connected_authorities_by_peer_id: HashMap::new(),
authority_discovery,
metrics,
}
}

Expand Down Expand Up @@ -223,10 +235,60 @@ where

if is_new_session {
update_gossip_topology(ctx, our_index, all_authorities, relay_parent).await?;
self.update_authority_status_metrics(leaf, ctx.sender()).await?;
}
}
}
Ok(())
}

async fn update_authority_status_metrics(
&mut self,
leaf: Hash,
sender: &mut impl SubsystemSender,
) -> Result<(), util::Error> {
if let Some(session_info) = util::request_session_info(
leaf,
self.last_session_index
.expect("Last session index is always set on every session index change"),
sender,
)
.await
.await??
{
let maybe_index = match ensure_i_am_an_authority(
&self.keystore,
&session_info.discovery_keys,
)
.await
{
Ok(index) => {
self.metrics.on_is_authority();
Some(index)
},
Err(util::Error::NotAValidator) => {
self.metrics.on_is_not_authority();
self.metrics.on_is_not_parachain_validator();
None
},
// Don't update on runtime errors.
Err(_) => None,
};

if let Some(validator_index) = maybe_index {
// The subset of authorities participating in parachain consensus.
let parachain_validators_this_session = session_info.validators;

// First `maxValidators` entries are the parachain validators. We'll check
// if our index is in this set to avoid searching for the keys.
// https://github.com/paritytech/polkadot/blob/a52dca2be7840b23c19c153cf7e110b1e3e475f8/runtime/parachains/src/configuration.rs#L148
if validator_index < parachain_validators_this_session.len() {
self.metrics.on_is_parachain_validator();
} else {
self.metrics.on_is_not_parachain_validator();
}
}
}
Ok(())
}

Expand Down
90 changes: 90 additions & 0 deletions node/network/gossip-support/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Copyright 2021 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.

// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.

use polkadot_node_subsystem_util::{
metrics,
metrics::{
prometheus,
prometheus::{Gauge, PrometheusError, Registry, U64},
},
};

/// Dispute Distribution metrics.
#[derive(Clone, Default)]
pub struct Metrics(Option<MetricsInner>);

#[derive(Clone)]
struct MetricsInner {
/// Tracks authority status for producing relay chain blocks.
is_authority: Gauge<U64>,
/// Tracks authority status for parachain approval checking.
is_parachain_validator: Gauge<U64>,
}

impl Metrics {
/// Dummy constructor for testing.
#[cfg(test)]
pub fn new_dummy() -> Self {
Self(None)
}

/// Set the `relaychain validator` metric.
pub fn on_is_authority(&self) {
if let Some(metrics) = &self.0 {
metrics.is_authority.set(1);
}
}

/// Unset the `relaychain validator` metric.
pub fn on_is_not_authority(&self) {
if let Some(metrics) = &self.0 {
metrics.is_authority.set(0);
}
}

/// Set the `parachain validator` metric.
pub fn on_is_parachain_validator(&self) {
if let Some(metrics) = &self.0 {
metrics.is_parachain_validator.set(1);
}
}

/// Unset the `parachain validator` metric.
pub fn on_is_not_parachain_validator(&self) {
if let Some(metrics) = &self.0 {
metrics.is_parachain_validator.set(0);
}
}
}

impl metrics::Metrics for Metrics {
fn try_register(registry: &Registry) -> Result<Self, PrometheusError> {
let metrics = MetricsInner {
is_authority: prometheus::register(
Gauge::new("polkadot_node_is_authority", "Tracks the node authority status across sessions. \
An authority is any node that is a potential block producer in a session.")?,
registry,
)?,
is_parachain_validator: prometheus::register(
Gauge::new("polkadot_node_is_parachain_validator",
"Tracks the node parachain validator status across sessions. Parachain validators are a \
subset of authorities that perform approval checking of all parachain candidates in a session.")?,
registry,
)?,
};
Ok(Metrics(Some(metrics)))
}
}
42 changes: 41 additions & 1 deletion node/network/gossip-support/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,11 @@ async fn get_other_authorities_addrs_map() -> HashMap<AuthorityDiscoveryId, Hash
}

fn make_subsystem() -> GossipSupport<MockAuthorityDiscovery> {
GossipSupport::new(make_ferdie_keystore(), MOCK_AUTHORITY_DISCOVERY.clone())
GossipSupport::new(
make_ferdie_keystore(),
MOCK_AUTHORITY_DISCOVERY.clone(),
Metrics::new_dummy(),
)
}

fn test_harness<T: Future<Output = VirtualOverseer>, AD: AuthorityDiscovery>(
Expand Down Expand Up @@ -230,6 +234,7 @@ fn issues_a_connection_request_on_new_session() {
tx.send(Ok(1)).unwrap();
}
);

assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
Expand All @@ -254,6 +259,17 @@ fn issues_a_connection_request_on_new_session() {

test_neighbors(overseer).await;

assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::SessionInfo(1, sender),
)) => {
assert_eq!(relay_parent, hash);
sender.send(Ok(None)).unwrap();
}
);

virtual_overseer
});

Expand Down Expand Up @@ -296,6 +312,7 @@ fn issues_a_connection_request_on_new_session() {
tx.send(Ok(2)).unwrap();
}
);

assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
Expand All @@ -320,6 +337,17 @@ fn issues_a_connection_request_on_new_session() {

test_neighbors(overseer).await;

assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::SessionInfo(2, sender),
)) => {
assert_eq!(relay_parent, hash);
sender.send(Ok(None)).unwrap();
}
);

virtual_overseer
});
assert_eq!(state.last_session_index, Some(2));
Expand Down Expand Up @@ -378,6 +406,7 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() {
tx.send(Ok(1)).unwrap();
}
);

assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
Expand Down Expand Up @@ -406,6 +435,17 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() {

test_neighbors(overseer).await;

assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::SessionInfo(1, sender),
)) => {
assert_eq!(relay_parent, hash);
sender.send(Ok(None)).unwrap();
}
);

virtual_overseer
})
};
Expand Down
1 change: 1 addition & 0 deletions node/service/src/overseer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ where
.gossip_support(GossipSupportSubsystem::new(
keystore.clone(),
authority_discovery_service.clone(),
Metrics::register(registry)?,
))
.dispute_coordinator(if disputes_enabled {
DisputeCoordinatorSubsystem::new(
Expand Down

0 comments on commit 8eb3d82

Please sign in to comment.