Skip to content
This repository has been archived by the owner on Jan 22, 2025. It is now read-only.

Change getHealth to compare optimistically confirmed slots #33651

Merged
merged 11 commits into from
Oct 16, 2023
4 changes: 2 additions & 2 deletions core/src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -943,7 +943,8 @@ impl Validator {
// (by both replay stage and banking stage)
let prioritization_fee_cache = Arc::new(PrioritizationFeeCache::default());

let rpc_override_health_check = Arc::new(AtomicBool::new(false));
let rpc_override_health_check =
Arc::new(AtomicBool::new(config.rpc_config.disable_health_check));
let (
json_rpc_service,
pubsub_service,
Expand Down Expand Up @@ -980,7 +981,6 @@ impl Validator {
ledger_path,
config.validator_exit.clone(),
exit.clone(),
config.known_validators.clone(),
rpc_override_health_check.clone(),
startup_verification_complete,
optimistically_confirmed_bank.clone(),
Expand Down
10 changes: 4 additions & 6 deletions docs/src/api/http.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,11 @@ Some methods support providing a `filters` object to enable pre-filtering the da
Although not a JSON RPC API, a `GET /health` at the RPC HTTP Endpoint provides a
health-check mechanism for use by load balancers or other network
infrastructure. This request will always return a HTTP 200 OK response with a body of
"ok", "behind" or "unknown" based on the following conditions:
"ok", "behind" or "unknown":

1. If one or more `--known-validator` arguments are provided to `solana-validator` - "ok" is returned
when the node has within `HEALTH_CHECK_SLOT_DISTANCE` slots of the highest
known validator, otherwise "behind". "unknown" is returned when no slot
information from known validators is not yet available.
2. "ok" is always returned if no known validators are provided.
- `ok`: The node is within `HEALTH_CHECK_SLOT_DISTANCE` slots from the latest cluster confirmed slot
- `behind { distance }`: The node is behind `distance` slots from the latest cluster confirmed slot where `distance > HEALTH_CHECK_SLOT_DISTANCE`
- `unknown`: The node is unable to determine where it stands in relation to the cluster

## JSON RPC API Reference

Expand Down
9 changes: 2 additions & 7 deletions docs/src/api/methods/_getHealth.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,8 @@ import {

## getHealth

Returns the current health of the node.

:::caution
If one or more `--known-validator` arguments are provided to `solana-validator` - "ok" is returned
when the node has within `HEALTH_CHECK_SLOT_DISTANCE` slots of the highest known validator,
otherwise an error is returned. "ok" is always returned if no known validators are provided.
:::
Returns the current health of the node. A healthy node is one that is within
`HEALTH_CHECK_SLOT_DISTANCE` slots of the latest cluster confirmed slot.

<DocSideBySide>
<CodeParams>
Expand Down
47 changes: 30 additions & 17 deletions rpc/src/rpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,15 @@ pub struct JsonRpcConfig {
pub obsolete_v1_7_api: bool,
pub rpc_scan_and_fix_roots: bool,
pub max_request_body_size: Option<usize>,
/// Disable the health check, used for tests and TestValidator
pub disable_health_check: bool,
}

impl JsonRpcConfig {
pub fn default_for_test() -> Self {
Self {
full_api: true,
disable_health_check: true,
..Self::default()
}
}
Expand Down Expand Up @@ -374,31 +377,33 @@ impl JsonRpcRequestProcessor {
);

let leader_schedule_cache = Arc::new(LeaderScheduleCache::new_from_bank(&bank));
let startup_verification_complete = Arc::clone(bank.get_startup_verification_complete());
let slot = bank.slot();
let optimistically_confirmed_bank =
Arc::new(RwLock::new(OptimisticallyConfirmedBank { bank }));
Self {
config: JsonRpcConfig::default(),
snapshot_config: None,
bank_forks,
block_commitment_cache: Arc::new(RwLock::new(BlockCommitmentCache::new(
HashMap::new(),
0,
CommitmentSlots::new_from_slot(bank.slot()),
CommitmentSlots::new_from_slot(slot),
))),
blockstore,
blockstore: Arc::clone(&blockstore),
validator_exit: create_validator_exit(exit.clone()),
health: Arc::new(RpcHealth::new(
cluster_info.clone(),
None,
Arc::clone(&optimistically_confirmed_bank),
blockstore,
0,
exit,
Arc::clone(bank.get_startup_verification_complete()),
startup_verification_complete,
)),
cluster_info,
genesis_hash,
transaction_sender: Arc::new(Mutex::new(sender)),
bigtable_ledger_storage: None,
optimistically_confirmed_bank: Arc::new(RwLock::new(OptimisticallyConfirmedBank {
bank,
})),
optimistically_confirmed_bank,
largest_accounts_cache: Arc::new(RwLock::new(LargestAccountsCache::new(30))),
max_slots: Arc::new(MaxSlots::default()),
leader_schedule_cache,
Expand Down Expand Up @@ -4787,6 +4792,8 @@ pub mod tests {
// note that this means that slot 0 will always be considered complete
let max_complete_transaction_status_slot = Arc::new(AtomicU64::new(0));
let max_complete_rewards_slot = Arc::new(AtomicU64::new(0));
let optimistically_confirmed_bank =
OptimisticallyConfirmedBank::locked_from_bank_forks_root(&bank_forks);

let meta = JsonRpcRequestProcessor::new(
config,
Expand All @@ -4795,11 +4802,11 @@ pub mod tests {
block_commitment_cache.clone(),
blockstore.clone(),
validator_exit,
RpcHealth::stub(),
RpcHealth::stub(optimistically_confirmed_bank.clone(), blockstore.clone()),
cluster_info,
Hash::default(),
None,
OptimisticallyConfirmedBank::locked_from_bank_forks_root(&bank_forks),
optimistically_confirmed_bank,
Arc::new(RwLock::new(LargestAccountsCache::new(30))),
max_slots.clone(),
Arc::new(LeaderScheduleCache::new_from_bank(&bank)),
Expand Down Expand Up @@ -6398,7 +6405,11 @@ pub mod tests {
let blockstore = Arc::new(Blockstore::open(&ledger_path).unwrap());
let block_commitment_cache = Arc::new(RwLock::new(BlockCommitmentCache::default()));
let (bank_forks, mint_keypair, ..) = new_bank_forks();
let health = RpcHealth::stub();
let optimistically_confirmed_bank =
OptimisticallyConfirmedBank::locked_from_bank_forks_root(&bank_forks);
let health = RpcHealth::stub(optimistically_confirmed_bank.clone(), blockstore.clone());
// Mark the node as healthy to start
health.stub_set_health_status(Some(RpcHealthStatus::Ok));

// Freeze bank 0 to prevent a panic in `run_transaction_simulation()`
bank_forks.write().unwrap().get(0).unwrap().freeze();
Expand Down Expand Up @@ -6429,7 +6440,7 @@ pub mod tests {
cluster_info,
Hash::default(),
None,
OptimisticallyConfirmedBank::locked_from_bank_forks_root(&bank_forks),
optimistically_confirmed_bank,
Arc::new(RwLock::new(LargestAccountsCache::new(30))),
Arc::new(MaxSlots::default()),
Arc::new(LeaderScheduleCache::default()),
Expand Down Expand Up @@ -6690,18 +6701,20 @@ pub mod tests {
.my_contact_info()
.tpu(connection_cache.protocol())
.unwrap();
let optimistically_confirmed_bank =
OptimisticallyConfirmedBank::locked_from_bank_forks_root(&bank_forks);
let (request_processor, receiver) = JsonRpcRequestProcessor::new(
JsonRpcConfig::default(),
None,
bank_forks.clone(),
block_commitment_cache,
blockstore,
blockstore.clone(),
validator_exit,
RpcHealth::stub(),
RpcHealth::stub(optimistically_confirmed_bank.clone(), blockstore),
cluster_info,
Hash::default(),
None,
OptimisticallyConfirmedBank::locked_from_bank_forks_root(&bank_forks),
optimistically_confirmed_bank,
Arc::new(RwLock::new(LargestAccountsCache::new(30))),
Arc::new(MaxSlots::default()),
Arc::new(LeaderScheduleCache::default()),
Expand Down Expand Up @@ -8327,9 +8340,9 @@ pub mod tests {
None,
bank_forks.clone(),
block_commitment_cache,
blockstore,
blockstore.clone(),
validator_exit,
RpcHealth::stub(),
RpcHealth::stub(optimistically_confirmed_bank.clone(), blockstore.clone()),
cluster_info,
Hash::default(),
None,
Expand Down
Loading