From c5ed816c80eae43348593e636e4b56da98d8af6b Mon Sep 17 00:00:00 2001 From: SW van Heerden Date: Tue, 12 Sep 2023 17:40:47 +0200 Subject: [PATCH] fix: potential sync stuck (#5760) Description --- Fixes a potential case where sync process can get stuck Motivation and Context --- There exists a potential where latency keeps increasing forever and the local node will be stuck in sync forever. This adds in a max amount of times the local node will try again before exiting. --- .../core/src/base_node/sync/block_sync/synchronizer.rs | 7 +++++++ .../core/src/base_node/sync/header_sync/synchronizer.rs | 7 +++++++ .../src/base_node/sync/horizon_state_sync/synchronizer.rs | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/base_layer/core/src/base_node/sync/block_sync/synchronizer.rs b/base_layer/core/src/base_node/sync/block_sync/synchronizer.rs index 20d93ad53f..30fd8f7465 100644 --- a/base_layer/core/src/base_node/sync/block_sync/synchronizer.rs +++ b/base_layer/core/src/base_node/sync/block_sync/synchronizer.rs @@ -50,6 +50,8 @@ use crate::{ const LOG_TARGET: &str = "c::bn::block_sync"; +const MAX_LATENCY_INCREASES: usize = 5; + pub struct BlockSynchronizer<'a, B> { config: BlockchainSyncConfig, db: AsyncBlockchainDb, @@ -99,6 +101,7 @@ impl<'a, B: BlockchainBackend + 'static> BlockSynchronizer<'a, B> { pub async fn synchronize(&mut self) -> Result<(), BlockSyncError> { let mut max_latency = self.config.initial_max_sync_latency; let mut sync_round = 0; + let mut latency_increases_counter = 0; loop { match self.attempt_block_sync(max_latency).await { Ok(_) => return Ok(()), @@ -111,6 +114,10 @@ impl<'a, B: BlockchainBackend + 'static> BlockSynchronizer<'a, B> { max_latency, self.sync_peers.len() ); + latency_increases_counter += 1; + if latency_increases_counter > MAX_LATENCY_INCREASES { + return Err(err); + } // Prohibit using a few slow sync peers only, rather get new sync peers assigned if self.sync_peers.len() < 2 { return Err(err); diff --git a/base_layer/core/src/base_node/sync/header_sync/synchronizer.rs b/base_layer/core/src/base_node/sync/header_sync/synchronizer.rs index 7e13dbf2c3..61a5b20fd7 100644 --- a/base_layer/core/src/base_node/sync/header_sync/synchronizer.rs +++ b/base_layer/core/src/base_node/sync/header_sync/synchronizer.rs @@ -55,6 +55,8 @@ const LOG_TARGET: &str = "c::bn::header_sync"; const NUM_INITIAL_HEADERS_TO_REQUEST: usize = 1000; +const MAX_LATENCY_INCREASES: usize = 5; + pub struct HeaderSynchronizer<'a, B> { config: BlockchainSyncConfig, db: AsyncBlockchainDb, @@ -113,6 +115,7 @@ impl<'a, B: BlockchainBackend + 'static> HeaderSynchronizer<'a, B> { self.sync_peers.len() ); let mut max_latency = self.config.initial_max_sync_latency; + let mut latency_increases_counter = 0; loop { match self.try_sync_from_all_peers(max_latency).await { Ok(sync_peer) => break Ok(sync_peer), @@ -122,6 +125,10 @@ impl<'a, B: BlockchainBackend + 'static> HeaderSynchronizer<'a, B> { return Err(err); } max_latency += self.config.max_latency_increase; + latency_increases_counter += 1; + if latency_increases_counter > MAX_LATENCY_INCREASES { + return Err(err); + } }, Err(err) => break Err(err), } diff --git a/base_layer/core/src/base_node/sync/horizon_state_sync/synchronizer.rs b/base_layer/core/src/base_node/sync/horizon_state_sync/synchronizer.rs index 22d073250f..5694ec3c1a 100644 --- a/base_layer/core/src/base_node/sync/horizon_state_sync/synchronizer.rs +++ b/base_layer/core/src/base_node/sync/horizon_state_sync/synchronizer.rs @@ -78,6 +78,8 @@ use crate::{ const LOG_TARGET: &str = "c::bn::state_machine_service::states::horizon_state_sync"; +const MAX_LATENCY_INCREASES: usize = 5; + pub struct HorizonStateSynchronization<'a, B> { config: BlockchainSyncConfig, db: AsyncBlockchainDb, @@ -153,6 +155,7 @@ impl<'a, B: BlockchainBackend + 'static> HorizonStateSynchronization<'a, B> { } })?; + let mut latency_increases_counter = 0; loop { match self.sync(&header).await { Ok(()) => return Ok(()), @@ -172,6 +175,10 @@ impl<'a, B: BlockchainBackend + 'static> HorizonStateSynchronization<'a, B> { return Err(err); } self.max_latency += self.config.max_latency_increase; + latency_increases_counter += 1; + if latency_increases_counter > MAX_LATENCY_INCREASES { + return Err(err); + } }, Err(err) => return Err(err), }