From 08772b466ef4681bd91503805789b4af311ed9cd Mon Sep 17 00:00:00 2001 From: Matt Whitehead Date: Mon, 1 Jul 2024 08:48:21 +0100 Subject: [PATCH] Add info-level diagnostic logs to aid with resolving stalled BFT chains (#7271) * Add info-level diagnostic logs to aid with resolving stalled BFT chains Signed-off-by: Matthew Whitehead * Add javadoc Signed-off-by: Matthew Whitehead --------- Signed-off-by: Matthew Whitehead Signed-off-by: Matt Whitehead --- CHANGELOG.md | 1 + .../besu/consensus/common/bft/RoundTimer.java | 16 +++++++++ .../statemachine/QbftBlockHeightManager.java | 13 +++++-- .../QbftBlockHeightManagerFactory.java | 3 +- .../qbft/statemachine/RoundChangeManager.java | 34 ++++++++++++++++++- 5 files changed, 63 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ba6b4dc5dd..819bb05a7cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ - Refactored how code, initcode, and max stack size are configured in forks. [#7245](https://github.com/hyperledger/besu/pull/7245) - Nodes in a permissioned chain maintain (and retry) connections to bootnodes [#7257](https://github.com/hyperledger/besu/pull/7257) - Promote experimental `besu storage x-trie-log` subcommand to production-ready [#7278](https://github.com/hyperledger/besu/pull/7278) +- Enhanced BFT round-change diagnostics [#7271](https://github.com/hyperledger/besu/pull/7271) ### Bug fixes - Validation errors ignored in accounts-allowlist and empty list [#7138](https://github.com/hyperledger/besu/issues/7138) diff --git a/consensus/common/src/main/java/org/hyperledger/besu/consensus/common/bft/RoundTimer.java b/consensus/common/src/main/java/org/hyperledger/besu/consensus/common/bft/RoundTimer.java index a500a1ad7ee..6a8b02991d6 100644 --- a/consensus/common/src/main/java/org/hyperledger/besu/consensus/common/bft/RoundTimer.java +++ b/consensus/common/src/main/java/org/hyperledger/besu/consensus/common/bft/RoundTimer.java @@ -20,8 +20,14 @@ import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + /** Class for starting and keeping organised round timers */ public class RoundTimer { + + private static final Logger LOG = LoggerFactory.getLogger(RoundTimer.class); + private final BftExecutors bftExecutors; private Optional> currentTimerTask; private final BftEventQueue queue; @@ -71,6 +77,16 @@ public synchronized void startTimer(final ConsensusRoundIdentifier round) { final ScheduledFuture newTimerTask = bftExecutors.scheduleTask(newTimerRunnable, expiryTime, TimeUnit.MILLISECONDS); + + // Once we are up to round 2 start logging round expiries + if (round.getRoundNumber() >= 2) { + LOG.info( + "QBFT round {} expired. Moved to round {} which will expire in {} seconds", + round.getRoundNumber() - 1, + round.getRoundNumber(), + (expiryTime / 1000)); + } + currentTimerTask = Optional.of(newTimerTask); } } diff --git a/consensus/qbft/src/main/java/org/hyperledger/besu/consensus/qbft/statemachine/QbftBlockHeightManager.java b/consensus/qbft/src/main/java/org/hyperledger/besu/consensus/qbft/statemachine/QbftBlockHeightManager.java index 0940d35df6b..f79537cc0f9 100644 --- a/consensus/qbft/src/main/java/org/hyperledger/besu/consensus/qbft/statemachine/QbftBlockHeightManager.java +++ b/consensus/qbft/src/main/java/org/hyperledger/besu/consensus/qbft/statemachine/QbftBlockHeightManager.java @@ -274,17 +274,26 @@ private

> void actionOrBufferMessage( @Override public void handleRoundChangePayload(final RoundChange message) { final ConsensusRoundIdentifier targetRound = message.getRoundIdentifier(); - LOG.trace("Received a RoundChange Payload for {}", targetRound); + + LOG.debug( + "Round change from {}: block {}, round {}", + message.getAuthor(), + message.getRoundIdentifier().getSequenceNumber(), + message.getRoundIdentifier().getRoundNumber()); + + // Diagnostic logging (only logs anything if the chain has stalled) + roundChangeManager.storeAndLogRoundChangeSummary(message); final MessageAge messageAge = determineAgeOfPayload(message.getRoundIdentifier().getRoundNumber()); if (messageAge == MessageAge.PRIOR_ROUND) { - LOG.trace("Received RoundChange Payload for a prior round. targetRound={}", targetRound); + LOG.debug("Received RoundChange Payload for a prior round. targetRound={}", targetRound); return; } final Optional> result = roundChangeManager.appendRoundChangeMessage(message); + if (result.isPresent()) { LOG.debug( "Received sufficient RoundChange messages to change round to targetRound={}", diff --git a/consensus/qbft/src/main/java/org/hyperledger/besu/consensus/qbft/statemachine/QbftBlockHeightManagerFactory.java b/consensus/qbft/src/main/java/org/hyperledger/besu/consensus/qbft/statemachine/QbftBlockHeightManagerFactory.java index 0d306b1fc9f..889f83f98a7 100644 --- a/consensus/qbft/src/main/java/org/hyperledger/besu/consensus/qbft/statemachine/QbftBlockHeightManagerFactory.java +++ b/consensus/qbft/src/main/java/org/hyperledger/besu/consensus/qbft/statemachine/QbftBlockHeightManagerFactory.java @@ -86,7 +86,8 @@ private BaseQbftBlockHeightManager createFullBlockHeightManager(final BlockHeade new RoundChangeManager( BftHelpers.calculateRequiredValidatorQuorum(finalState.getValidators().size()), messageValidatorFactory.createRoundChangeMessageValidator( - parentHeader.getNumber() + 1L, parentHeader)), + parentHeader.getNumber() + 1L, parentHeader), + finalState.getLocalAddress()), roundFactory, finalState.getClock(), messageValidatorFactory, diff --git a/consensus/qbft/src/main/java/org/hyperledger/besu/consensus/qbft/statemachine/RoundChangeManager.java b/consensus/qbft/src/main/java/org/hyperledger/besu/consensus/qbft/statemachine/RoundChangeManager.java index ca671a6f527..89e2888395c 100644 --- a/consensus/qbft/src/main/java/org/hyperledger/besu/consensus/qbft/statemachine/RoundChangeManager.java +++ b/consensus/qbft/src/main/java/org/hyperledger/besu/consensus/qbft/statemachine/RoundChangeManager.java @@ -100,19 +100,51 @@ public Collection createRoundChangeCertificate() { @VisibleForTesting final Map roundChangeCache = Maps.newHashMap(); + /** A summary of the latest round each validator is on, for diagnostic purposes only */ + private final Map roundSummary = Maps.newHashMap(); + private final long quorum; private final RoundChangeMessageValidator roundChangeMessageValidator; + private final Address localAddress; /** * Instantiates a new Round change manager. * * @param quorum the quorum * @param roundChangeMessageValidator the round change message validator + * @param localAddress this node's address */ public RoundChangeManager( - final long quorum, final RoundChangeMessageValidator roundChangeMessageValidator) { + final long quorum, + final RoundChangeMessageValidator roundChangeMessageValidator, + final Address localAddress) { this.quorum = quorum; this.roundChangeMessageValidator = roundChangeMessageValidator; + this.localAddress = localAddress; + } + + /** + * Store the latest round for a node, and if chain is stalled log a summary of which round each + * address is on + * + * @param message the round-change message that has just been received + */ + public void storeAndLogRoundChangeSummary(final RoundChange message) { + roundSummary.put(message.getAuthor(), message.getRoundIdentifier()); + if (roundChangeCache.keySet().stream() + .findFirst() + .orElse(new ConsensusRoundIdentifier(0, 0)) + .getRoundNumber() + >= 2) { + LOG.info("BFT round summary (quorum = {})", quorum); + for (Map.Entry nextEntry : roundSummary.entrySet()) { + LOG.info( + "Address: {} Round: {} {}", + nextEntry.getKey(), + nextEntry.getValue().getRoundNumber(), + nextEntry.getKey().equals(localAddress) ? "(Local node)" : ""); + } + } } /**