Skip to content

Commit

Permalink
Add info-level diagnostic logs to aid with resolving stalled BFT chai…
Browse files Browse the repository at this point in the history
…ns (#7271)

* Add info-level diagnostic logs to aid with resolving stalled BFT chains

Signed-off-by: Matthew Whitehead <matthew1001@gmail.com>

* Add javadoc

Signed-off-by: Matthew Whitehead <matthew1001@gmail.com>

---------

Signed-off-by: Matthew Whitehead <matthew1001@gmail.com>
Signed-off-by: Matt Whitehead <matthew.whitehead@kaleido.io>
  • Loading branch information
matthew1001 authored Jul 1, 2024
1 parent 20b82a4 commit 08772b4
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
- Refactored how code, initcode, and max stack size are configured in forks. [#7245](https://github.com/hyperledger/besu/pull/7245)
- Nodes in a permissioned chain maintain (and retry) connections to bootnodes [#7257](https://github.com/hyperledger/besu/pull/7257)
- Promote experimental `besu storage x-trie-log` subcommand to production-ready [#7278](https://github.com/hyperledger/besu/pull/7278)
- Enhanced BFT round-change diagnostics [#7271](https://github.com/hyperledger/besu/pull/7271)

### Bug fixes
- Validation errors ignored in accounts-allowlist and empty list [#7138](https://github.com/hyperledger/besu/issues/7138)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,14 @@
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** Class for starting and keeping organised round timers */
public class RoundTimer {

private static final Logger LOG = LoggerFactory.getLogger(RoundTimer.class);

private final BftExecutors bftExecutors;
private Optional<ScheduledFuture<?>> currentTimerTask;
private final BftEventQueue queue;
Expand Down Expand Up @@ -71,6 +77,16 @@ public synchronized void startTimer(final ConsensusRoundIdentifier round) {

final ScheduledFuture<?> newTimerTask =
bftExecutors.scheduleTask(newTimerRunnable, expiryTime, TimeUnit.MILLISECONDS);

// Once we are up to round 2 start logging round expiries
if (round.getRoundNumber() >= 2) {
LOG.info(
"QBFT round {} expired. Moved to round {} which will expire in {} seconds",
round.getRoundNumber() - 1,
round.getRoundNumber(),
(expiryTime / 1000));
}

currentTimerTask = Optional.of(newTimerTask);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -274,17 +274,26 @@ private <P extends Payload, M extends BftMessage<P>> void actionOrBufferMessage(
@Override
public void handleRoundChangePayload(final RoundChange message) {
final ConsensusRoundIdentifier targetRound = message.getRoundIdentifier();
LOG.trace("Received a RoundChange Payload for {}", targetRound);

LOG.debug(
"Round change from {}: block {}, round {}",
message.getAuthor(),
message.getRoundIdentifier().getSequenceNumber(),
message.getRoundIdentifier().getRoundNumber());

// Diagnostic logging (only logs anything if the chain has stalled)
roundChangeManager.storeAndLogRoundChangeSummary(message);

final MessageAge messageAge =
determineAgeOfPayload(message.getRoundIdentifier().getRoundNumber());
if (messageAge == MessageAge.PRIOR_ROUND) {
LOG.trace("Received RoundChange Payload for a prior round. targetRound={}", targetRound);
LOG.debug("Received RoundChange Payload for a prior round. targetRound={}", targetRound);
return;
}

final Optional<Collection<RoundChange>> result =
roundChangeManager.appendRoundChangeMessage(message);

if (result.isPresent()) {
LOG.debug(
"Received sufficient RoundChange messages to change round to targetRound={}",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ private BaseQbftBlockHeightManager createFullBlockHeightManager(final BlockHeade
new RoundChangeManager(
BftHelpers.calculateRequiredValidatorQuorum(finalState.getValidators().size()),
messageValidatorFactory.createRoundChangeMessageValidator(
parentHeader.getNumber() + 1L, parentHeader)),
parentHeader.getNumber() + 1L, parentHeader),
finalState.getLocalAddress()),
roundFactory,
finalState.getClock(),
messageValidatorFactory,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,19 +100,51 @@ public Collection<RoundChange> createRoundChangeCertificate() {
@VisibleForTesting
final Map<ConsensusRoundIdentifier, RoundChangeStatus> roundChangeCache = Maps.newHashMap();

/** A summary of the latest round each validator is on, for diagnostic purposes only */
private final Map<Address, ConsensusRoundIdentifier> roundSummary = Maps.newHashMap();

private final long quorum;
private final RoundChangeMessageValidator roundChangeMessageValidator;
private final Address localAddress;

/**
* Instantiates a new Round change manager.
*
* @param quorum the quorum
* @param roundChangeMessageValidator the round change message validator
* @param localAddress this node's address
*/
public RoundChangeManager(
final long quorum, final RoundChangeMessageValidator roundChangeMessageValidator) {
final long quorum,
final RoundChangeMessageValidator roundChangeMessageValidator,
final Address localAddress) {
this.quorum = quorum;
this.roundChangeMessageValidator = roundChangeMessageValidator;
this.localAddress = localAddress;
}

/**
* Store the latest round for a node, and if chain is stalled log a summary of which round each
* address is on
*
* @param message the round-change message that has just been received
*/
public void storeAndLogRoundChangeSummary(final RoundChange message) {
roundSummary.put(message.getAuthor(), message.getRoundIdentifier());
if (roundChangeCache.keySet().stream()
.findFirst()
.orElse(new ConsensusRoundIdentifier(0, 0))
.getRoundNumber()
>= 2) {
LOG.info("BFT round summary (quorum = {})", quorum);
for (Map.Entry<Address, ConsensusRoundIdentifier> nextEntry : roundSummary.entrySet()) {
LOG.info(
"Address: {} Round: {} {}",
nextEntry.getKey(),
nextEntry.getValue().getRoundNumber(),
nextEntry.getKey().equals(localAddress) ? "(Local node)" : "");
}
}
}

/**
Expand Down

0 comments on commit 08772b4

Please sign in to comment.