Skip to content

Commit

Permalink
Add health checkup mechanism (#1145)
Browse files Browse the repository at this point in the history
* Added health checkup mechanism for CosmosSDK

* Added SDK compat diagnostic check

* Added fix for leading v in semver

* Lowered log to warn. Added early exit

* Reverted the one-chain patch to enable indexing

* changelog

* Improve health_checkup async code flow

* Tentative fix for tx_search parsing error bug which Soares found.

* Update compatibility version bound to v0.42.6 to work with gaia v5.0.0

* Aesthetic & comments improvements.

* Removed stale tracing import (became stale after rebasing onto master)

Co-authored-by: Soares Chen <soares.chen@maybevoid.com>
  • Loading branch information
adizere and soareschen authored Jul 6, 2021
1 parent de62a4b commit 3da7dc1
Show file tree
Hide file tree
Showing 7 changed files with 281 additions and 6 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,26 @@

## Unreleased

Many thanks to Fraccaroli Gianmarco (@Fraccaman) for helping us improve the
reliability of Hermes ([#697]).

### FEATURES

- [ibc-relayer-cli]
- Added `config validate` CLI to Hermes ([#600])
- Added basic channel filter ([#1140])
- Added `query channel ends` CLI command ([#1062])
- Added a health checkup mechanism for Hermes ([#697, #1057])

### IMPROVEMENTS

- Update to `tendermint-rs` v0.20.0 ([#1125])
- Add inline documentation to config.toml ([#1127])

[#600]: https://github.com/informalsystems/ibc-rs/issues/600
[#697]: https://github.com/informalsystems/ibc-rs/issues/697
[#1062]: https://github.com/informalsystems/ibc-rs/issues/1062
[#1057]: https://github.com/informalsystems/ibc-rs/issues/1057
[#1125]: https://github.com/informalsystems/ibc-rs/issues/1125
[#1127]: https://github.com/informalsystems/ibc-rs/issues/1127
[#1140]: https://github.com/informalsystems/ibc-rs/issues/1140
Expand Down
11 changes: 9 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions proto/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ pub mod cosmos {
pub mod v1beta1 {
include!("prost/cosmos.base.v1beta1.rs");
}
pub mod tendermint {
pub mod v1beta1 {
include!("prost/cosmos.base.tendermint.v1beta1.rs");
}
}
}
pub mod crypto {
pub mod multisig {
Expand Down
1 change: 1 addition & 0 deletions relayer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ dyn-clone = "1.0.3"
retry = { version = "1.2.1", default-features = false }
async-stream = "0.3.2"
fraction = {version = "0.8.0", default-features = false }
semver = "1.0"

[dependencies.tendermint]
version = "=0.20.0"
Expand Down
119 changes: 115 additions & 4 deletions relayer/src/chain/cosmos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ use tendermint::consensus::Params;
use tendermint_light_client::types::LightBlock as TMLightBlock;
use tendermint_proto::Protobuf;
use tendermint_rpc::endpoint::tx::Response as ResultTx;
use tendermint_rpc::query::Query;
use tendermint_rpc::query::{EventType, Query};
use tendermint_rpc::{endpoint::broadcast::tx_sync::Response, Client, HttpClient, Order};
use tokio::runtime::Runtime as TokioRuntime;
use tonic::codegen::http::Uri;
use tracing::{debug, trace};
use tracing::{debug, trace, warn};

use ibc::downcast;
use ibc::events::{from_tx_response_event, IbcEvent};
Expand All @@ -50,6 +50,8 @@ use ibc::query::{QueryTxHash, QueryTxRequest};
use ibc::signer::Signer;
use ibc::Height as ICSHeight;
use ibc_proto::cosmos::auth::v1beta1::{BaseAccount, QueryAccountRequest};
use ibc_proto::cosmos::base::tendermint::v1beta1::service_client::ServiceClient;
use ibc_proto::cosmos::base::tendermint::v1beta1::GetNodeInfoRequest;
use ibc_proto::cosmos::base::v1beta1::Coin;
use ibc_proto::cosmos::tx::v1beta1::mode_info::{Single, Sum};
use ibc_proto::cosmos::tx::v1beta1::{
Expand Down Expand Up @@ -82,6 +84,8 @@ use crate::light_client::Verified;

use super::Chain;

mod compatibility;

const DEFAULT_MAX_GAS: u64 = 300_000;
const DEFAULT_GAS_PRICE_ADJUSTMENT: f64 = 0.1;

Expand Down Expand Up @@ -109,6 +113,109 @@ pub struct CosmosSdkChain {
}

impl CosmosSdkChain {
/// Does multiple RPC calls to the full node, to check for
/// reachability and that some basic APIs are available.
///
/// Currently this checks that:
/// - the node responds OK to `/health` RPC call;
/// - the node has transaction indexing enabled;
/// - the SDK version is supported.
///
/// Emits a log warning in case anything is amiss.
/// Exits early if any health check fails, without doing any
/// further checks.
fn health_checkup(&self) {
async fn do_health_checkup(chain: &CosmosSdkChain) -> Result<(), Error> {
let chain_id = chain.id();
let grpc_address = chain.grpc_addr.to_string();
let rpc_address = chain.config.rpc_addr.to_string();

// Checkup on the self-reported health endpoint
chain
.rpc_client
.health()
.await
.map_err(|e| Kind::HealthCheckJsonRpc {
chain_id: chain_id.clone(),
address: rpc_address.clone(),
endpoint: "/health".to_string(),
cause: e,
})?;

// Checkup on transaction indexing
chain
.rpc_client
.tx_search(
Query::from(EventType::NewBlock),
false,
1,
1,
Order::Ascending,
)
.await
.map_err(|e| Kind::HealthCheckJsonRpc {
chain_id: chain_id.clone(),
address: rpc_address.clone(),
endpoint: "/tx_search".to_string(),
cause: e,
})?;

let mut client = ServiceClient::connect(chain.grpc_addr.clone())
.await
.map_err(|e| {
// Failed to create the gRPC client to call into `/node_info`.
Kind::HealthCheckGrpc {
chain_id: chain_id.clone(),
address: grpc_address.clone(),
endpoint: "tendermint::ServiceClient".to_string(),
cause: e.to_string(),
}
})?;

let request = tonic::Request::new(GetNodeInfoRequest {});

let response =
client
.get_node_info(request)
.await
.map_err(|e| Kind::HealthCheckGrpc {
chain_id: chain_id.clone(),
address: grpc_address.clone(),
endpoint: "tendermint::GetNodeInfoRequest".to_string(),
cause: e.to_string(),
})?;

let version =
response
.into_inner()
.application_version
.ok_or_else(|| Kind::HealthCheckGrpc {
chain_id: chain_id.clone(),
address: grpc_address.clone(),
endpoint: "tendermint::GetNodeInfoRequest".to_string(),
cause: "the gRPC response contains no application version information"
.to_string(),
})?;

// Checkup on the underlying SDK version
if let Some(diagnostic) = compatibility::run_diagnostic(version) {
return Err(Kind::SdkModuleVersion {
chain_id: chain_id.clone(),
address: grpc_address.clone(),
cause: diagnostic.to_string(),
}
.into());
}

Ok(())
}

if let Err(e) = self.block_on(do_health_checkup(self)) {
warn!("{}", e);
warn!("some Hermes features may not work in this mode!");
}
}

/// The unbonding period of this chain
pub fn unbonding_period(&self) -> Result<Duration, Error> {
crate::time!("unbonding_period");
Expand Down Expand Up @@ -573,14 +680,18 @@ impl Chain for CosmosSdkChain {
let grpc_addr =
Uri::from_str(&config.grpc_addr.to_string()).map_err(|e| Kind::Grpc.context(e))?;

Ok(Self {
let chain = Self {
config,
rpc_client,
grpc_addr,
rt,
keybase,
account: None,
})
};

chain.health_checkup();

Ok(chain)
}

fn init_light_client(&self) -> Result<Box<dyn LightClient<Self>>, Error> {
Expand Down
122 changes: 122 additions & 0 deletions relayer/src/chain/cosmos/compatibility.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
//! Cosmos-SDK compatibility constants and helper methods.

use thiserror::Error;

use ibc_proto::cosmos::base::tendermint::v1beta1::VersionInfo;

/// Specifies the SDK module path, as it is expected to appear
/// in the application version information.
///
/// The module identification is captured in a [`Module`]
/// with the following structure as an example:
/// ```json,ignore
/// Module {
/// path: "github.com/cosmos/cosmos-sdk",
/// version: "v0.42.4",
/// sum: "h1:yaD4PyOx0LnyfiWasC5egg1U76lT83GRxjJjupPo7Gk=",
/// },
/// ```
const SDK_MODULE_NAME: &str = "cosmos/cosmos-sdk";

/// Specifies the SDK module version requirement.
///
/// # Note: Should be consistent with [features] guide page.
///
/// [features]: https://hermes.informal.systems/features.html
const SDK_MODULE_VERSION_REQ: &str = ">=0.41.3, <=0.42.6";

/// Helper struct to capture all the reported information of an
/// IBC application, e.g., `gaiad`.
#[derive(Clone, Debug)]
pub struct AppInfo {
app_name: String,
version: String,
git_commit: String,
}

impl std::fmt::Display for AppInfo {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}-{}", self.app_name, self.version, self.git_commit)
}
}

#[derive(Error, Debug)]
pub enum Diagnostic {
#[error("no SDK module '{pattern}' was found for application {app}")]
SdkModuleNotFound { pattern: String, app: AppInfo },

#[error("failed parsing the SDK module ('{module_path}') version number '{raw_version}' into a semver for application {app}; cause: {cause}")]
VersionParsingFailed {
module_path: String,
raw_version: String,
cause: String,
app: AppInfo,
},

#[error("SDK module at version '{found}' does not meet compatibility requirements {requirements} for application {app}")]
MismatchingSdkModuleVersion {
requirements: String,
found: String,
app: AppInfo,
},
}

/// Runs a diagnostic check on the provided [`VersionInfo`]
/// to ensure that the Sdk module version matches the
/// predefined requirements.
///
/// Returns `None` upon success, or a [`Diagnostic`] upon
/// an error.
///
/// Relies on the constant [`SDK_MODULE_NAME`] to find the
/// Sdk module by name, as well as the constant
/// [`SDK_MODULE_VERSION_REQ`] for version compatibility
/// requirements.
pub(crate) fn run_diagnostic(v: VersionInfo) -> Option<Diagnostic> {
let app_info = AppInfo {
app_name: v.app_name,
version: v.version,
git_commit: v.git_commit,
};

// Parse the requirements into a semver
let reqs = semver::VersionReq::parse(SDK_MODULE_VERSION_REQ)
.expect("parsing the SDK module requirements into semver");

// Find the Cosmos SDK module
match v
.build_deps
.iter()
.find(|&m| m.path.contains(SDK_MODULE_NAME))
{
None => Some(Diagnostic::SdkModuleNotFound {
pattern: SDK_MODULE_NAME.to_string(),
app: app_info,
}),
Some(sdk_module) => {
// The raw version number has a leading 'v', trim it out;
let plain_version = sdk_module.version.trim_start_matches('v');

// Parse the module version
match semver::Version::parse(plain_version).map_err(|e| {
Diagnostic::VersionParsingFailed {
module_path: sdk_module.path.clone(),
raw_version: sdk_module.version.clone(),
cause: e.to_string(),
app: app_info.clone(),
}
}) {
// Finally, check the version requirements
Ok(v) => match reqs.matches(&v) {
true => None,
false => Some(Diagnostic::MismatchingSdkModuleVersion {
requirements: SDK_MODULE_VERSION_REQ.to_string(),
found: v.to_string(),
app: app_info,
}),
},
Err(d) => Some(d),
}
}
}
}
Loading

0 comments on commit 3da7dc1

Please sign in to comment.