From d6f6ddf3f6388fea5835cf6cf0ee76148f023d55 Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Thu, 28 Mar 2024 17:20:56 +0100 Subject: [PATCH 01/20] Streamline exception matching with `ViewPatterns` --- .../Genesis/Tests/DensityDisconnect.hs | 8 ++--- .../Test/Consensus/Genesis/Tests/LoE.hs | 7 ++--- .../Test/Consensus/Genesis/Tests/LoP.hs | 26 +++++++---------- .../PeerSimulator/Tests/LinkedThreads.hs | 29 ++++++------------- .../Consensus/PeerSimulator/Tests/Timeouts.hs | 6 ++-- 5 files changed, 27 insertions(+), 49 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs index f770bac5fb..8ce27f9b99 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs @@ -1,6 +1,7 @@ {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE ViewPatterns #-} module Test.Consensus.Genesis.Tests.DensityDisconnect (tests) where @@ -338,11 +339,8 @@ prop_densityDisconnectTriggersChainSel = ( \GenesisTest {gtBlockTree} stateView@StateView {svTipBlock} -> let exnCorrect = case exceptionsByComponent ChainSyncClient stateView of - [exn] -> - case fromException exn of - Just DensityTooLow -> True - _ -> False - _ -> False + [fromException -> Just DensityTooLow] -> True + _ -> False tipPointCorrect = Just (getTrunkTip gtBlockTree) == svTipBlock in exnCorrect && tipPointCorrect ) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs index 4f2c13b357..fe6b842b54 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs @@ -2,6 +2,8 @@ {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE ViewPatterns #-} module Test.Consensus.Genesis.Tests.LoE (tests) where @@ -69,10 +71,7 @@ prop_adversaryHitsTimeouts timeoutsEnabled = -- `ExceededTimeLimit` exception in the adversary's ChainSync. exceptionsCorrect = case exceptionsByComponent ChainSyncClient stateView of [] -> not timeoutsEnabled - [exn] -> - case fromException exn of - Just (ExceededTimeLimit _) -> timeoutsEnabled - _ -> False + [fromException -> Just (ExceededTimeLimit _)] -> timeoutsEnabled _ -> False in selectedCorrect && exceptionsCorrect ) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs index 3faf2eeeee..4c97bac307 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs @@ -2,6 +2,7 @@ {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} module Test.Consensus.Genesis.Tests.LoP (tests) where @@ -67,12 +68,9 @@ prop_wait mustTimeout = shrinkPeerSchedules ( \_ stateView -> case exceptionsByComponent ChainSyncClient stateView of - [] -> not mustTimeout - [exn] -> - case fromException exn of - Just CSClient.EmptyBucket -> mustTimeout - _ -> False - _ -> False + [] -> not mustTimeout + [fromException -> Just CSClient.EmptyBucket] -> mustTimeout + _ -> False ) where dullSchedule :: (HasHeader blk) => DiffTime -> AnchoredFragment blk -> Peers (PeerSchedule blk) @@ -147,12 +145,9 @@ prop_serve mustTimeout = shrinkPeerSchedules ( \_ stateView -> case exceptionsByComponent ChainSyncClient stateView of - [] -> not mustTimeout - [exn] -> - case fromException exn of - Just CSClient.EmptyBucket -> mustTimeout - _ -> False - _ -> False + [] -> not mustTimeout + [fromException -> Just CSClient.EmptyBucket] -> mustTimeout + _ -> False ) where lbpCapacity :: Integer = 10 @@ -211,10 +206,9 @@ prop_delayAttack lopEnabled = -- If LoP is enabled, then we expect exactly one `EmptyBucket` -- exception in the adversary's ChainSync. exceptionsCorrect = case exceptionsByComponent ChainSyncClient stateView of - [] -> not lopEnabled - [exn] -> - lopEnabled == (fromException exn == Just CSClient.EmptyBucket) - _ -> False + [] -> not lopEnabled + [fromException -> Just CSClient.EmptyBucket] -> lopEnabled + _ -> False in selectedCorrect && exceptionsCorrect ) where diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs index 3d64c55719..9a79bbab6e 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE ViewPatterns #-} -- | The scheduled ChainSync and BlockFetch servers are supposed to be linked, -- such that if one gets disconnected, then so does the other. This module @@ -47,29 +48,17 @@ prop_chainSyncKillsBlockFetch = do ( \_ stateView@StateView {svTipBlock} -> svTipBlock == Nothing && case exceptionsByComponent ChainSyncClient stateView of - [exn] -> - case fromException exn of - Just (ExceededTimeLimit _) -> True - _ -> False - _ -> False + [fromException -> Just (ExceededTimeLimit _)] -> True + _ -> False && case exceptionsByComponent BlockFetchClient stateView of - [exn] -> - case fromException exn of - Just (AsyncCancelled) -> True - _ -> False - _ -> False + [fromException -> Just AsyncCancelled] -> True + _ -> False && case exceptionsByComponent ChainSyncServer stateView of - [exn] -> - case fromException exn of - Just (AsyncCancelled) -> True - _ -> False - _ -> False + [fromException -> Just AsyncCancelled] -> True + _ -> False && case exceptionsByComponent BlockFetchServer stateView of - [exn] -> - case fromException exn of - Just (AsyncCancelled) -> True - _ -> False - _ -> False + [fromException -> Just AsyncCancelled] -> True + _ -> False ) where dullSchedule :: GenesisTest blk () -> DiffTime -> PeersSchedule blk diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs index 9570f62595..25f64ca973 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} module Test.Consensus.PeerSimulator.Tests.Timeouts (tests) where @@ -49,10 +50,7 @@ prop_timeouts mustTimeout = do case exceptionsByComponent ChainSyncClient stateView of [] -> counterexample ("result: " ++ condense (svSelectedChain stateView)) (not mustTimeout) - [exn] -> - case fromException exn of - Just (ExceededTimeLimit _) -> property mustTimeout - _ -> counterexample ("exception: " ++ show exn) False + [fromException -> Just (ExceededTimeLimit _)] -> property mustTimeout exns -> counterexample ("exceptions: " ++ show exns) False ) From 6c253109dc4133242555b12ca3c46c4637fbeeac Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Tue, 2 Apr 2024 16:36:31 +0200 Subject: [PATCH 02/20] Enable the time limited leashing attack test * Use real bucket parameters to compute time bounds * Add classifiers for empty/trivial adversarial schedules * Allow fetching of the same block from many peers * Time limited leashing attack: Extend the schedule and disable timeouts * Change BlockFetch initialization to use FetchModeDeadline Co-authored-by: Alexander Esgen Co-authored-by: Nicolas BACQUEY --- .../Test/Consensus/Genesis/Setup.hs | 2 + .../Consensus/Genesis/Setup/Classifiers.hs | 21 ++++- .../Test/Consensus/Genesis/Tests/Uniform.hs | 77 ++++++++++++++----- .../Consensus/PeerSimulator/BlockFetch.hs | 8 +- 4 files changed, 83 insertions(+), 25 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs index c208b44d45..ec293f6771 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs @@ -104,6 +104,8 @@ forAllGenesisTest generator schedulerConfig shrinker mkProperty = classify (genesisWindowAfterIntersection cls) "Full genesis window after intersection" $ classify (adversaryRollback schCls) "An adversary did a rollback" $ classify (honestRollback schCls) "The honest peer did a rollback" $ + classify (allAdversariesEmpty schCls) "All adversaries have empty schedules" $ + classify (allAdversariesTrivial schCls) "All adversaries have trivial schedules" $ tabulate "Adversaries killed by LoP" [printf "%.1f%%" $ adversariesKilledByLoP resCls] $ tabulate "Adversaries killed by GDD" [printf "%.1f%%" $ adversariesKilledByGDD resCls] $ tabulate "Adversaries killed by Timeout" [printf "%.1f%%" $ adversariesKilledByTimeout resCls] $ diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/Classifiers.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/Classifiers.hs index a33f1d91a2..1f6f2e7267 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/Classifiers.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/Classifiers.hs @@ -199,9 +199,15 @@ resultClassifiers GenesisTest{gtSchedule} RunGenesisTestResult{rgtrStateView} = data ScheduleClassifiers = ScheduleClassifiers{ -- | There is an adversary that did a rollback - adversaryRollback :: Bool, + adversaryRollback :: Bool, -- | The honest peer did a rollback - honestRollback :: Bool + honestRollback :: Bool, + -- | All adversaries have an empty schedule: the only way to disconnect them are + -- network timeouts. + allAdversariesEmpty :: Bool, + -- | All adversaries have trivial schedules: they only have an initial state, and + -- do nothing afterwards. + allAdversariesTrivial :: Bool } scheduleClassifiers :: GenesisTestFull TestBlock -> ScheduleClassifiers @@ -209,6 +215,8 @@ scheduleClassifiers GenesisTest{gtSchedule = schedule} = ScheduleClassifiers { adversaryRollback , honestRollback + , allAdversariesEmpty + , allAdversariesTrivial } where hasRollback :: PeerSchedule TestBlock -> Bool @@ -247,6 +255,15 @@ scheduleClassifiers GenesisTest{gtSchedule = schedule} = honestRollback = value $ honest rollbacks + allAdversariesEmpty = all value $ others $ null <$> schedule + + isTrivial :: PeerSchedule TestBlock -> Bool + isTrivial = \case + [] -> True + (t0, _):points -> all ((== t0) . fst) points + + allAdversariesTrivial = all value $ others $ isTrivial <$> schedule + simpleHash :: HeaderHash block ~ TestHash => ChainHash block -> diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index 9b957c0100..17bc4a4dcc 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -20,7 +20,7 @@ import Control.Monad.Class.MonadTime.SI (DiffTime, Time (Time), import Data.List (intercalate, sort) import qualified Data.List.NonEmpty as NE import qualified Data.Map.Strict as Map -import Data.Maybe (catMaybes, mapMaybe) +import Data.Maybe (catMaybes, fromMaybe, mapMaybe) import Data.Word (Word64) import GHC.Stack (HasCallStack) import Ouroboros.Consensus.Block.Abstract (WithOrigin (NotOrigin)) @@ -256,11 +256,16 @@ prop_leashingAttackStalling = -- See Note [Leashing attacks] prop_leashingAttackTimeLimited :: Property prop_leashingAttackTimeLimited = - expectFailure $ forAllGenesisTest + forAllGenesisTest - (genChains (QC.choose (1, 4)) `enrichedWith` genTimeLimitedSchedule) + (disableBoringTimeouts <$> genChains (QC.choose (1, 4)) `enrichedWith` genTimeLimitedSchedule) - (defaultSchedulerConfig {scTrace = False}) + defaultSchedulerConfig + { scTrace = False + , scEnableLoE = True + , scEnableLoP = True + , scEnableBlockFetchTimeouts = False + } shrinkPeerSchedules @@ -271,34 +276,64 @@ prop_leashingAttackTimeLimited = genTimeLimitedSchedule :: GenesisTest TestBlock () -> QC.Gen (PeersSchedule TestBlock) genTimeLimitedSchedule genesisTest = do Peers honest advs0 <- genUniformSchedulePoints genesisTest - let timeLimit = estimateTimeBound (value honest) (map value $ Map.elems advs0) + let timeLimit = estimateTimeBound + (gtChainSyncTimeouts genesisTest) + (gtLoPBucketParams genesisTest) + (value honest) + (map value $ Map.elems advs0) advs = fmap (fmap (takePointsUntil timeLimit)) advs0 - pure $ Peers honest advs + extendedHonest = extendScheduleUntil timeLimit <$> honest + pure $ Peers extendedHonest advs takePointsUntil limit = takeWhile ((<= limit) . fst) - estimateTimeBound :: AF.HasHeader blk => PeerSchedule blk -> [PeerSchedule blk] -> Time - estimateTimeBound honest advs = - let firstTipPointBlock = headCallStack (mapMaybe fromTipPoint honest) + extendScheduleUntil + :: Time -> [(Time, SchedulePoint TestBlock)] -> [(Time, SchedulePoint TestBlock)] + extendScheduleUntil t [] = [(t, ScheduleTipPoint Origin)] + extendScheduleUntil t xs = + let (t', p) = last xs + in if t < t' then xs + else xs ++ [(t, p)] + + disableBoringTimeouts gt = + gt { gtChainSyncTimeouts = (gtChainSyncTimeouts gt) + { canAwaitTimeout = Nothing + , mustReplyTimeout = Nothing + , idleTimeout = Nothing + } + } + + estimateTimeBound + :: AF.HasHeader blk + => ChainSyncTimeout + -> LoPBucketParams + -> PeerSchedule blk + -> [PeerSchedule blk] + -> Time + estimateTimeBound cst LoPBucketParams{lbpCapacity, lbpRate} honest advs = + let firstTipPointTime = fst $ headCallStack (mapMaybe fromTipPoint honest) lastBlockPoint = last (mapMaybe fromBlockPoint honest) - peerCount = length advs + 1 - maxBlockNo = maximum $ 0 : blockPointNos honest ++ concatMap blockPointNos advs - -- 0.020s is the amount of time LoP grants per interesting header - -- 5s is the initial fill of the LoP bucket + peerCount = fromIntegral $ length advs + 1 + maxBlockNo = fromIntegral $ maximum $ 0 : blockPointNos honest ++ concatMap blockPointNos advs + timeCapacity = fromRational $ (fromIntegral lbpCapacity) / lbpRate + timePerToken = fromRational $ 1 / lbpRate + intersectDiffTime = fromMaybe (error "no intersect timeout") (intersectTimeout cst) + -- Since the moment a peer offers the first tip, LoP should + -- start ticking for it. This can be no later than what the intersect + -- timeout allows for all peers. -- - -- Since the moment the honest peer offers the first tip, LoP should - -- start ticking. Syncing all the blocks might take longer than it - -- takes to dispatch all ticks to the honest peer. In this case - -- the syncing time is the time bound for the test. If dispatching - -- all the ticks takes longer, then the dispatching time becomes - -- the time bound. + -- Additionally, the actual delay might be greater if the honest peer + -- has its last tick dispatched later. -- -- Adversarial peers might cause more ticks to be sent as well. We -- bound it all by considering the highest block number that is ever -- sent. - in max + in addTime 1 $ max (fst lastBlockPoint) - (addTime (0.020 * fromIntegral maxBlockNo + 5 * fromIntegral peerCount) (fst firstTipPointBlock)) + (addTime + (intersectDiffTime + timePerToken * maxBlockNo + timeCapacity * peerCount) + firstTipPointTime + ) blockPointNos :: AF.HasHeader blk => [(Time, SchedulePoint blk)] -> [Word64] blockPointNos = diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index d4a487818a..536a49f2fc 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -93,7 +93,11 @@ startBlockFetchLogic registry tracer chainDb fetchClientRegistry getCandidates = -- do not serialize the blocks. (\_hdr -> 1000) slotForgeTime - (pure FetchModeBulkSync) + -- Initially, we tried FetchModeBulkSync, but adversaries had the + -- opportunity to delay syncing by not responding to block requests. + -- The BlockFetch logic would then wait for the timeout to expire + -- before trying to download the block from another peer. + (pure FetchModeDeadline) -- Values taken from -- ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs @@ -108,7 +112,7 @@ startBlockFetchLogic registry tracer chainDb fetchClientRegistry getCandidates = -- advanced to allow completion of the batch. -- bfcMaxConcurrencyBulkSync = 50 - , bfcMaxConcurrencyDeadline = 2 + , bfcMaxConcurrencyDeadline = 50 , bfcMaxRequestsInflight = 10 , bfcDecisionLoopInterval = 0 , bfcSalt = 0 From dcb20f505e11155b29fc6d156ee2b4a5359fa5a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 11 Apr 2024 15:29:12 +0000 Subject: [PATCH 03/20] Have GDD disconnect nodes in previously inconclusive situations * Disconnect peers with 0 density This affects ChainSync jumping, where genesis windows with no headers prevent jumps from happening. * Have GDD disconnect peers with disagreeing chains of equal density Before this change, the GDD governor would tolerate different chains having the same density. This is problematic to ChainSync jumping if the only active peers (dynamo and objector) disagree with chains of the same density, and GDD cannot disconnect any of them. * Don't mix idling in the computation of the upper bound * Simplify the condition to disconnect 0-density peers * Note the redundancy when checking for disagrement in chains * Peers with less than k+1 headers can disconnect peers which sent all headers in the genesis window * Rename fragment to clippedFragment in GDD --- .../Test/Consensus/PeerSimulator/Trace.hs | 6 +- .../Ouroboros/Consensus/Genesis/Governor.hs | 92 ++++++++++++++----- 2 files changed, 72 insertions(+), 26 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs index cc76540ad4..2342b3e01e 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs @@ -376,7 +376,7 @@ terseGDDEvent = \case [ " Candidate suffixes (bounds):" ] ++ - showPeers (terseHFragment . fragment <$> bounds) ++ + showPeers (terseHFragment . clippedFragment <$> bounds) ++ [" Density bounds:"] ++ showPeers (showBounds <$> bounds) ++ [" New candidate tips:"] ++ @@ -386,7 +386,7 @@ terseGDDEvent = \case " Setting loeFrag: " ++ terseAnchor (AF.castAnchor loeHead) ] where - showBounds DensityBounds {fragment, offersMoreThanK, lowerBound, upperBound, hasBlockAfter, latestSlot, idling} = + showBounds DensityBounds {clippedFragment, offersMoreThanK, lowerBound, upperBound, hasBlockAfter, latestSlot, idling} = show lowerBound ++ "/" ++ show upperBound ++ "[" ++ more ++ "], " ++ lastPoint ++ "latest: " ++ showLatestSlot latestSlot ++ block ++ showIdling where @@ -396,7 +396,7 @@ terseGDDEvent = \case lastPoint = "point: " ++ - tersePoint (castPoint @(Header TestBlock) @TestBlock (AF.headPoint fragment)) ++ + tersePoint (castPoint @(Header TestBlock) @TestBlock (AF.headPoint clippedFragment)) ++ ", " showLatestSlot = \case diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs index 2448dcd755..02cb70527d 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs @@ -169,7 +169,7 @@ runGdd loEUpdater varLoEFrag chainDb getTrigger = data DensityBounds blk = DensityBounds { - fragment :: AnchoredFragment (Header blk), + clippedFragment :: AnchoredFragment (Header blk), offersMoreThanK :: Bool, lowerBound :: Word64, upperBound :: Word64, @@ -208,7 +208,7 @@ densityDisconnect (GenesisWindow sgen) (SecurityParam k) states candidateSuffixe (losingPeers, densityBounds) where densityBounds = Map.fromList $ do - (peer, fragment) <- Map.toList competingFrags + (peer, clippedFragment) <- Map.toList clippedFrags state <- maybeToList (states Map.!? peer) -- Skip peers that haven't sent any headers yet. -- They should be disconnected by timeouts instead. @@ -222,25 +222,23 @@ densityDisconnect (GenesisWindow sgen) (SecurityParam k) states candidateSuffixe max (AF.headSlot candidateSuffix) latestSlot >= NotOrigin firstSlotAfterGenesisWindow - -- If the peer is idling, we assume it has no more headers to send. - -- -- If the slot of the latest header we know of is _after_ the end of -- the Genesis window (either because the candidate fragment extends -- beyond it or because we are waiting to validate a header beyond the -- forecast horizon that we already received), there can be no headers -- in between and 'potentialSlots' is 0. - -- - -- If the peer has more headers that it hasn't sent yet, each slot - -- between the latest header we know of and the end of the Genesis - -- window could contain a block, so the upper bound for the total - -- number of blocks in the window is the sum of the known blocks and - -- that number of remaining slots. potentialSlots = - if idling || hasBlockAfter then 0 - else sgen - totalBlockCount + if hasBlockAfter then 0 + else unknownTrailingSlots + + -- Number of trailing slots in the genesis window that could have + -- headers which haven't been sent yet + unknownTrailingSlots = unSlotNo $ + -- cannot underflow as the fragment is clipped to the genesis window + firstSlotAfterGenesisWindow - succWithOrigin (AF.headSlot clippedFragment) -- The number of blocks within the Genesis window we know with certainty - lowerBound = fromIntegral $ AF.length fragment + lowerBound = fromIntegral $ AF.length clippedFragment upperBound = lowerBound + potentialSlots @@ -252,20 +250,45 @@ densityDisconnect (GenesisWindow sgen) (SecurityParam k) states candidateSuffixe -- If not, it is not qualified to compete by density (yet). offersMoreThanK = totalBlockCount > k - pure (peer, DensityBounds {fragment, offersMoreThanK, lowerBound, upperBound, hasBlockAfter, latestSlot, idling}) + pure (peer, DensityBounds {clippedFragment, offersMoreThanK, lowerBound, upperBound, hasBlockAfter, latestSlot, idling}) - losingPeers = nubOrd $ do - (peer0 , DensityBounds {fragment = frag0, upperBound = ub0}) <- - Map.toList densityBounds - (_peer1, DensityBounds {fragment = frag1, offersMoreThanK, lowerBound = lb1 }) <- + losingPeers = nubOrd $ Map.toList densityBounds >>= \ + (peer0 , DensityBounds { clippedFragment = frag0 + , lowerBound = lb0 + , upperBound = ub0 + , hasBlockAfter = hasBlockAfter0 + , idling = idling0 + }) -> + -- If the density is 0, the peer should be disconnected. This affects + -- ChainSync jumping, where genesis windows with no headers prevent jumps + -- from happening. + if ub0 == 0 then pure peer0 else do + (_peer1, DensityBounds {clippedFragment = frag1, offersMoreThanK, lowerBound = lb1 }) <- Map.toList densityBounds + -- Don't disconnect peer0 if it sent no headers after the intersection yet + -- and it is not idling. + -- + -- See Note [Chain disagreement] + -- + -- Note: hasBlockAfter0 is False if frag0 is empty and ub0>0. + -- But we leave it here as a reminder that we care about it. + guard $ idling0 || not (AF.null frag0) || hasBlockAfter0 -- ensure that the two peer fragments don't share any -- headers after the LoE guard $ AF.lastPoint frag0 /= AF.lastPoint frag1 - -- peer1 offers more than k blocks - guard offersMoreThanK - -- peer1 definitely has higher density than peer0 - guard $ lb1 > ub0 + -- peer1 offers more than k blocks or peer0 has sent all headers in the + -- genesis window after the intersection (idling or not) + guard $ offersMoreThanK || lb0 == ub0 + -- peer1 has the same or better density than peer0 + -- If peer0 is idling, we assume no more headers will be sent. + -- + -- Having the same density is enough to disconnect peer0, as the honest + -- chain is expected to have a strictly higher density than all of the + -- other chains. + -- + -- This matters to ChainSync jumping, where adversarial dynamo and + -- objector could offer chains of equal density. + guard $ lb1 >= (if idling0 then lb0 else ub0) pure peer0 loeIntersectionSlot = AF.headSlot loeFrag @@ -276,9 +299,32 @@ densityDisconnect (GenesisWindow sgen) (SecurityParam k) states candidateSuffixe dropBeyondGenesisWindow = AF.takeWhileOldest ((< firstSlotAfterGenesisWindow) . blockSlot) - competingFrags = + clippedFrags = Map.map dropBeyondGenesisWindow candidateSuffixes +-- Note [Chain disagreement] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Imagine two peers serving the following chain: +-- +-- > k: 1 +-- > sgen: 2 +-- > +-- > 0 1 2 +-- > G---1-2 +-- +-- Say peer1 sent no headers yet and peer2 sent 2 headers. +-- The intersection of both is G, the density of peer2's chain is 2, +-- while the upperbound of the density of peer1 is also 2. +-- +-- For GDD to disconnect peer1 safely, it is essential that both chains +-- disagree after the intersection. +-- +-- To know if the chains will dissagree we defer disconnecting peer1 +-- until it declares to have no more headers, or until it sends one header +-- after the intersection. If both chains agree on the next header after +-- the intersection, we don't disconnect peer1 either. + data TraceGDDEvent peer blk = TraceGDDEvent { bounds :: Map peer (DensityBounds blk), From d70e6049e321898766383639844c5c965d4348ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 11 Apr 2024 20:26:17 +0000 Subject: [PATCH 04/20] Fix shrinking behavior for prop_densityDisconnectTriggersChainSel and explain the result of the test --- .../Test/Consensus/Genesis/Tests/DensityDisconnect.hs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs index 8ce27f9b99..cba130d57c 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs @@ -336,13 +336,17 @@ prop_densityDisconnectTriggersChainSel = shrinkByRemovingAdversaries - ( \GenesisTest {gtBlockTree} stateView@StateView {svTipBlock} -> + ( \GenesisTest {gtBlockTree, gtSchedule} stateView@StateView {svTipBlock} -> let + othersCount = Map.size (others gtSchedule) exnCorrect = case exceptionsByComponent ChainSyncClient stateView of [fromException -> Just DensityTooLow] -> True + [] | othersCount == 0 -> True _ -> False tipPointCorrect = Just (getTrunkTip gtBlockTree) == svTipBlock - in exnCorrect && tipPointCorrect + in counterexample "Unexpected exceptions" exnCorrect + .&&. + counterexample "The tip of the final selection is not the expected one" tipPointCorrect ) where From b0ed1c8990d08a752f85e0434966361ad8262b9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 11 Apr 2024 20:26:47 +0000 Subject: [PATCH 05/20] Make the documentation of addBranch and addBranch' more precise --- .../test/consensus-test/Test/Consensus/BlockTree.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/BlockTree.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/BlockTree.hs index 8f08045b7d..ac680ae2c8 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/BlockTree.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/BlockTree.hs @@ -79,7 +79,7 @@ mkTrunk btTrunk = BlockTree { btTrunk, btBranches = [] } -- | Add a branch to an existing block tree. -- --- PRECONDITION: The given fragment intersects with the trunk or its anchor. +-- Yields @Nothing@ if the given fragment does not intersect with the trunk or its anchor. -- -- FIXME: we should enforce that the branch's prefix shares the same anchor as -- the trunk. @@ -94,7 +94,7 @@ addBranch branch bt = do let btbFull = fromJust $ AF.join btbPrefix btbSuffix pure $ bt { btBranches = BlockTreeBranch { .. } : btBranches bt } --- | Same as @addBranch@ but assumes that the precondition holds. +-- | Same as @addBranch@ but calls to 'error' if the former yields 'Nothing'. addBranch' :: AF.HasHeader blk => AF.AnchoredFragment blk -> BlockTree blk -> BlockTree blk addBranch' branch blockTree = fromMaybe (error "addBranch': precondition does not hold") $ addBranch branch blockTree From 97942922e79c4670b399bf3d6cf8aaa89c6debd2 Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Wed, 3 Apr 2024 17:34:35 +0200 Subject: [PATCH 06/20] Introduce shrinking for honest peers --- .../ouroboros-consensus-diffusion.cabal | 1 + .../test/consensus-test/Main.hs | 2 + .../Test/Consensus/Genesis/Tests/Uniform.hs | 5 +- .../Test/Consensus/PointSchedule/Peers.hs | 3 + .../Test/Consensus/PointSchedule/Shrinking.hs | 93 +++++++++++++-- .../PointSchedule/Shrinking/Tests.hs | 107 ++++++++++++++++++ 6 files changed, 201 insertions(+), 10 deletions(-) create mode 100644 ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs diff --git a/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal b/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal index 65774575e3..bca3f28292 100644 --- a/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal +++ b/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal @@ -259,6 +259,7 @@ test-suite consensus-test Test.Consensus.PointSchedule Test.Consensus.PointSchedule.Peers Test.Consensus.PointSchedule.Shrinking + Test.Consensus.PointSchedule.Shrinking.Tests Test.Consensus.PointSchedule.SinglePeer Test.Consensus.PointSchedule.SinglePeer.Indices Test.Consensus.PointSchedule.Tests diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Main.hs b/ouroboros-consensus-diffusion/test/consensus-test/Main.hs index 0f54918b1d..e7174e7503 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Main.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Main.hs @@ -5,6 +5,7 @@ import qualified Test.Consensus.GSM (tests) import qualified Test.Consensus.HardFork.Combinator (tests) import qualified Test.Consensus.Node (tests) import qualified Test.Consensus.PeerSimulator.Tests (tests) +import qualified Test.Consensus.PointSchedule.Shrinking.Tests (tests) import qualified Test.Consensus.PointSchedule.Tests (tests) import Test.Tasty import Test.Util.TestEnv (defaultMainWithTestEnv, @@ -25,5 +26,6 @@ tests = , Test.Consensus.Genesis.Tests.tests , testGroup "GSM" Test.Consensus.GSM.tests , Test.Consensus.PeerSimulator.Tests.tests + , Test.Consensus.PointSchedule.Shrinking.Tests.tests , Test.Consensus.PointSchedule.Tests.tests ] diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index 17bc4a4dcc..c519b71337 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -11,7 +11,10 @@ -- block tree with the right age (roughly @k@ blocks from the tip). Contrary to -- other tests cases (eg. long range attack), the schedules are not particularly -- biased towards a specific situation. -module Test.Consensus.Genesis.Tests.Uniform (tests) where +module Test.Consensus.Genesis.Tests.Uniform ( + genUniformSchedulePoints + , tests + ) where import Cardano.Slotting.Slot (SlotNo (SlotNo), WithOrigin (..)) import Control.Monad (replicateM) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs index bacf1ea728..78e3ceebef 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs @@ -108,6 +108,9 @@ data Peers a = instance Functor Peers where fmap f Peers {honest, others} = Peers {honest = f <$> honest, others = fmap f <$> others} +instance Foldable Peers where + foldMap f Peers {honest, others} = (f . value) honest <> foldMap (f . value) others + -- | A set of peers with only one honest peer carrying the given value. peersOnlyHonest :: a -> Peers a peersOnlyHonest value = diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs index 9c7398b1d2..e424560fbe 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs @@ -1,24 +1,29 @@ +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} module Test.Consensus.PointSchedule.Shrinking ( shrinkByRemovingAdversaries + -- | Exported only for testing (that is, checking the properties of the function) + , shrinkHonestPeer , shrinkPeerSchedules - , trimBlockTree' ) where +import Control.Monad.Class.MonadTime.SI (DiffTime, Time, addTime, + diffTime) import Data.Containers.ListUtils (nubOrd) import Data.Functor ((<&>)) import qualified Data.Map.Strict as Map -import Data.Maybe (mapMaybe) +import Data.Maybe (mapMaybe, maybeToList) import Ouroboros.Network.AnchoredFragment (AnchoredFragment, AnchoredSeq (Empty), takeWhileOldest) import Test.Consensus.BlockTree (BlockTree (..), BlockTreeBranch (..), addBranch', mkTrunk) import Test.Consensus.PeerSimulator.StateView (StateView) -import Test.Consensus.PointSchedule - (GenesisTest (gtBlockTree, gtSchedule), GenesisTestFull, - PeerSchedule, PeersSchedule, peerSchedulesBlocks) -import Test.Consensus.PointSchedule.Peers (Peers (..)) +import Test.Consensus.PointSchedule (GenesisTest (..), + GenesisTestFull, PeerSchedule, PeersSchedule, + peerSchedulesBlocks) +import Test.Consensus.PointSchedule.Peers (Peer (..), Peers (..)) +import Test.Consensus.PointSchedule.SinglePeer (SchedulePoint (..)) import Test.QuickCheck (shrinkList) import Test.Util.TestBlock (TestBlock, isAncestorOf, isStrictAncestorOf) @@ -32,9 +37,17 @@ shrinkPeerSchedules :: StateView TestBlock -> [GenesisTestFull TestBlock] shrinkPeerSchedules genesisTest _stateView = - shrinkOtherPeers shrinkPeerSchedule (gtSchedule genesisTest) <&> \shrunkSchedule -> - let trimmedBlockTree = trimBlockTree' shrunkSchedule (gtBlockTree genesisTest) - in genesisTest{gtSchedule = shrunkSchedule, gtBlockTree = trimmedBlockTree} + let trimmedBlockTree sch = trimBlockTree' sch (gtBlockTree genesisTest) + shrunkOthers = shrinkOtherPeers shrinkPeerSchedule (gtSchedule genesisTest) <&> + \shrunkSchedule -> genesisTest + { gtSchedule = shrunkSchedule + , gtBlockTree = trimmedBlockTree shrunkSchedule + } + shrunkHonest = shrinkHonestPeer + (gtSchedule genesisTest) + -- No need to update the tree here, shrinking the honest peer never discards blocks + <&> \shrunkSchedule -> genesisTest {gtSchedule = shrunkSchedule} + in shrunkOthers ++ shrunkHonest -- | Shrink a 'Peers PeerSchedule' by removing adversaries. This does not affect -- the honest peer; and it does not remove ticks from the schedules of the @@ -60,6 +73,68 @@ shrinkOtherPeers shrink Peers{honest, others} = map (Peers honest . Map.fromList) $ shrinkList (traverse (traverse shrink)) $ Map.toList others +-- | Shrinks an honest peer by removing ticks. +-- Because we are manipulating `PeerSchedule` at that point, there is no proper +-- notion of a tick. Instead, we remove points of the honest `PeerSchedule`, +-- and move all other points sooner, including those on the adversarial schedule. +-- We check that this operation neither changes the final state of the honest peer, +-- nor that it removes points from the adversarial schedules. +shrinkHonestPeer :: Peers (PeerSchedule blk) -> [Peers (PeerSchedule blk)] +shrinkHonestPeer Peers{honest, others} = do + (at, speedUpBy) <- splits + (honest', others') <- maybeToList $ do + honest' <- traverse (speedUpHonestSchedule at speedUpBy) honest + others' <- mapM (traverse (speedUpAdversarialSchedule at speedUpBy)) others + pure (honest', others') + pure $ Peers honest' others' + where + -- | A list of non-zero time intervals between successive points of the honest schedule + splits :: [(Time, DiffTime)] + splits = mapMaybe + (\((t1, _), (t2, _)) -> + if t1 == t2 + then Nothing + else Just (t1, diffTime t2 t1) + ) + (zip (value honest) (drop 1 $ value honest)) + +-- | Speeds up an honest schedule after `at` time, by `speedUpBy`. +-- This "speeding up" is done by removing `speedUpBy` to all points after `at`, +-- and removing those points if they fall before `at`. We check that the operation +-- doesn't change the final state of the peer, i.e. it doesn't remove all TP, HP, and BP +-- in the sped up part. +speedUpHonestSchedule :: Time -> DiffTime -> PeerSchedule blk -> Maybe (PeerSchedule blk) +speedUpHonestSchedule at speedUpBy sch = + if stillValid then Just $ beforeSplit ++ spedUpSchedule else Nothing + where + (beforeSplit, afterSplit) = span ((< at) . fst) sch + threshold = addTime speedUpBy at + spedUpSchedule = mapMaybe + (\(t, p) -> if t < threshold then Nothing else Just (addTime (-speedUpBy) t, p)) + afterSplit + stillValid = + (hasTP spedUpSchedule == hasTP afterSplit) + && (hasHP spedUpSchedule == hasHP afterSplit) + && (hasBP spedUpSchedule == hasBP afterSplit) + hasTP = any (\case (_, ScheduleTipPoint _) -> True; _ -> False) + hasHP = any (\case (_, ScheduleHeaderPoint _) -> True; _ -> False) + hasBP = any (\case (_, ScheduleBlockPoint _) -> True; _ -> False) + +-- | Speeds up an adversarial schedule after `at` time, by `speedUpBy`. +-- This "speeding up" is done by removing `speedUpBy` to all points after `at`. +-- We check that the schedule had no points between `at` and `at + speedUpBy`. +-- We also keep the last point where it is, so that the end time stays the same. +speedUpAdversarialSchedule :: Time -> DiffTime -> PeerSchedule blk -> Maybe (PeerSchedule blk) +speedUpAdversarialSchedule at speedUpBy sch = + if losesPoint then Nothing else Just $ beforeSplit ++ spedUpSchedule ++ lastPoint + where + (beforeSplit, afterSplit) = span ((< at) . fst) sch + spedUpSchedule = map (\(t, p) -> (addTime (-speedUpBy) t, p)) $ take (length afterSplit - 1) afterSplit + losesPoint = any ((< (addTime speedUpBy at)) . fst) afterSplit + lastPoint = case afterSplit of + [] -> [] + as -> [last as] + -- | Remove blocks from the given block tree that are not necessary for the -- given peer schedules. If entire branches are unused, they are removed. If the -- trunk is unused, then it remains as an empty anchored fragment. diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs new file mode 100644 index 0000000000..8332f7be83 --- /dev/null +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs @@ -0,0 +1,107 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} + +-- | Test properties of the shrinking functions +module Test.Consensus.PointSchedule.Shrinking.Tests (tests) where + +import Data.Foldable (toList) +import Data.Map (keys) +import Data.Maybe (mapMaybe) +import Test.Consensus.Genesis.Setup (genChains) +import Test.Consensus.Genesis.Tests.Uniform (genUniformSchedulePoints) +import Test.Consensus.PointSchedule (PeerSchedule, PeersSchedule, + prettyPeersSchedule) +import Test.Consensus.PointSchedule.Peers (Peer (..), Peers (..)) +import Test.Consensus.PointSchedule.Shrinking (shrinkHonestPeer) +import Test.Consensus.PointSchedule.SinglePeer (SchedulePoint (..)) +import Test.QuickCheck (Property, conjoin, counterexample) +import Test.Tasty +import Test.Tasty.QuickCheck (choose, forAllBlind, testProperty) +import Test.Util.TestBlock (TestBlock) + +tests :: TestTree +tests = + testGroup "shrinking functions" + [ testGroup "honest peer shrinking" + [ testProperty "actually shortens the schedule" prop_shortens + , testProperty "preserves the final state all peers" prop_preservesFinalStates + , testProperty "doesn't remove points of the adversarial schedule" prop_preserversAdversarial + ] + ] + +prop_shortens :: Property +prop_shortens = checkShrinkProperty isShorterThan + +prop_preservesFinalStates :: Property +prop_preservesFinalStates = checkShrinkProperty doesNotChangeFinalState + +prop_preserversAdversarial :: Property +prop_preserversAdversarial = checkShrinkProperty doesNotRemoveAdversarialPoints + +-- | Apparently, `unsnoc` hasn't been invented yet, so we'll do this manually +lastM :: [a] -> Maybe a +lastM [] = Nothing +lastM [a] = Just a +lastM (_:ps) = lastM ps + +samePeers :: PeersSchedule blk -> PeersSchedule blk -> Bool +samePeers sch1 sch2 = (keys $ others sch1) == (keys $ others sch2) + +-- | Checks whether at least one peer schedule in the second given peers schedule +-- is shorter than its corresponding one in the fist given peers schedule. “Shorter” +-- here means that it executes in less time. +isShorterThan :: PeersSchedule blk -> PeersSchedule blk -> Bool +isShorterThan original shrunk = + samePeers original shrunk + && (or $ zipWith + (\oldSch newSch -> (fst <$> lastM newSch) < (fst <$> lastM oldSch)) + (toList original) + (toList shrunk) + ) + +doesNotChangeFinalState :: Eq blk => PeersSchedule blk -> PeersSchedule blk -> Bool +doesNotChangeFinalState original shrunk = + samePeers original shrunk + && (and $ zipWith + (\oldSch newSch -> + lastTP oldSch == lastTP newSch && + lastHP oldSch == lastHP newSch && + lastBP oldSch == lastBP newSch + ) + (toList original) + (toList shrunk) + ) + where + lastTP :: PeerSchedule blk -> Maybe (SchedulePoint blk) + lastTP sch = lastM $ mapMaybe (\case (_, p@(ScheduleTipPoint _)) -> Just p ; _ -> Nothing) sch + lastHP :: PeerSchedule blk -> Maybe (SchedulePoint blk) + lastHP sch = lastM $ mapMaybe (\case (_, p@(ScheduleHeaderPoint _)) -> Just p ; _ -> Nothing) sch + lastBP :: PeerSchedule blk -> Maybe (SchedulePoint blk) + lastBP sch = lastM $ mapMaybe (\case (_, p@(ScheduleBlockPoint _)) -> Just p ; _ -> Nothing) sch + +doesNotRemoveAdversarialPoints :: Eq blk => PeersSchedule blk -> PeersSchedule blk -> Bool +doesNotRemoveAdversarialPoints original shrunk = + samePeers original shrunk + && (and $ zipWith + (\oldSch newSch -> fmap snd oldSch == fmap snd newSch) + (toList $ (fmap value) $ others original) + (toList $ (fmap value) $ others shrunk) + ) + +checkShrinkProperty :: (PeersSchedule TestBlock -> PeersSchedule TestBlock -> Bool) -> Property +checkShrinkProperty prop = + forAllBlind + (genChains (choose (1, 4)) >>= genUniformSchedulePoints) + (\schedule -> + conjoin $ map + (\shrunk -> + counterexample + ( "Original schedule:\n" + ++ unlines (map (" " ++) $ prettyPeersSchedule schedule) + ++ "\nShrunk schedule:\n" + ++ unlines (map (" " ++) $ prettyPeersSchedule shrunk) + ) + (prop schedule shrunk) + ) + (shrinkHonestPeer schedule) + ) From 5d3c20010cc36a6f7a85ff45ef7e8bd9a5534aaf Mon Sep 17 00:00:00 2001 From: Torsten Schmits Date: Mon, 15 Apr 2024 23:35:32 +0200 Subject: [PATCH 07/20] Add shutdown/restart capabilities to the peer simulator --- .../ouroboros-consensus-diffusion.cabal | 1 + .../Test/Consensus/Genesis/Tests/Uniform.hs | 54 +-- .../Consensus/PeerSimulator/NodeLifecycle.hs | 220 ++++++++++++ .../Test/Consensus/PeerSimulator/Run.hs | 321 +++++++++++------- .../Test/Consensus/PeerSimulator/StateView.hs | 14 +- .../Test/Consensus/PeerSimulator/Trace.hs | 18 +- .../Test/Consensus/PointSchedule.hs | 162 ++++++++- 7 files changed, 634 insertions(+), 156 deletions(-) create mode 100644 ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs diff --git a/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal b/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal index bca3f28292..83aaa24e2b 100644 --- a/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal +++ b/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal @@ -244,6 +244,7 @@ test-suite consensus-test Test.Consensus.PeerSimulator.ChainSync Test.Consensus.PeerSimulator.Config Test.Consensus.PeerSimulator.Handlers + Test.Consensus.PeerSimulator.NodeLifecycle Test.Consensus.PeerSimulator.Resources Test.Consensus.PeerSimulator.Run Test.Consensus.PeerSimulator.ScheduledBlockFetchServer diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index c519b71337..0412a1c86b 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -18,12 +18,11 @@ module Test.Consensus.Genesis.Tests.Uniform ( import Cardano.Slotting.Slot (SlotNo (SlotNo), WithOrigin (..)) import Control.Monad (replicateM) -import Control.Monad.Class.MonadTime.SI (DiffTime, Time (Time), - addTime) +import Control.Monad.Class.MonadTime.SI (Time, addTime) import Data.List (intercalate, sort) import qualified Data.List.NonEmpty as NE import qualified Data.Map.Strict as Map -import Data.Maybe (catMaybes, fromMaybe, mapMaybe) +import Data.Maybe (fromMaybe, mapMaybe) import Data.Word (Word64) import GHC.Stack (HasCallStack) import Ouroboros.Consensus.Block.Abstract (WithOrigin (NotOrigin)) @@ -70,7 +69,13 @@ tests = adjustQuickCheckTests (`div` 10) $ testProperty "serve adversarial branches" prop_serveAdversarialBranches, adjustQuickCheckTests (`div` 100) $ - testProperty "the LoE stalls the chain, but the immutable tip is honest" prop_loeStalling + testProperty "the LoE stalls the chain, but the immutable tip is honest" prop_loeStalling, + adjustQuickCheckTests (`div` 100) $ + -- This is a crude way of ensuring that we don't get chains with more than 100 blocks, + -- because this test writes the immutable chain to disk and `instance Binary TestBlock` + -- chokes on long chains. + adjustQuickCheckMaxSize (const 10) $ + testProperty "the node is shut down and restarted after some time" prop_downtime ] theProperty :: @@ -204,22 +209,9 @@ prop_leashingAttackStalling = -- timeouts to disconnect adversaries. genLeashingSchedule :: GenesisTest TestBlock () -> QC.Gen (PeersSchedule TestBlock) genLeashingSchedule genesisTest = do - Peers honest advs0 <- genUniformSchedulePoints genesisTest - let peerCount = 1 + length advs0 - extendedHonest = - duplicateLastPoint (endingDelay peerCount genesisTest) <$> honest + Peers honest advs0 <- ensureScheduleDuration genesisTest <$> genUniformSchedulePoints genesisTest advs <- mapM (mapM dropRandomPoints) advs0 - pure $ Peers extendedHonest advs - - endingDelay peerCount gt = - let cst = gtChainSyncTimeouts gt - bft = gtBlockFetchTimeouts gt - in 1 + fromIntegral peerCount * maximum (0 : catMaybes - [ canAwaitTimeout cst - , intersectTimeout cst - , busyTimeout bft - , streamingTimeout bft - ]) + pure $ Peers honest advs disableBoringTimeouts gt = gt { gtChainSyncTimeouts = (gtChainSyncTimeouts gt) @@ -242,13 +234,6 @@ prop_leashingAttackStalling = let (ys, zs) = splitAt i xs in ys ++ dropElemsAt (drop 1 zs) is - duplicateLastPoint - :: DiffTime -> [(Time, SchedulePoint TestBlock)] -> [(Time, SchedulePoint TestBlock)] - duplicateLastPoint d [] = [(Time d, ScheduleTipPoint Origin)] - duplicateLastPoint d xs = - let (t, p) = last xs - in xs ++ [(addTime d t, p)] - -- | Test that the leashing attacks do not delay the immutable tip after. The -- immutable tip needs to be advanced enough when the honest peer has offered -- all of its ticks. @@ -393,3 +378,20 @@ prop_loeStalling = allTips = simpleHash . AF.headHash <$> (btTrunk : suffixes) suffixes = btbSuffix <$> btBranches + +-- | This test sets 'scDowntime', which instructs the scheduler to shut all components down whenever a tick's duration +-- is greater than 11 seconds, and restarts it while only preserving the immutable DB after advancing the time. +-- +-- This ensures that a user may shut down their machine while syncing without additional vulnerabilities. +prop_downtime :: Property +prop_downtime = forAllGenesisTest + + (genChains (QC.choose (1, 4)) `enrichedWith` \ gt -> + ensureScheduleDuration gt <$> stToGen (uniformPointsWithDowntime (gtSecurityParam gt) (gtBlockTree gt))) + + (defaultSchedulerConfig + {scEnableLoE = True, scEnableLoP = True, scDowntime = Just 11}) + + shrinkPeerSchedules + + theProperty diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs new file mode 100644 index 0000000000..f83d1c32b5 --- /dev/null +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs @@ -0,0 +1,220 @@ +{-# LANGUAGE DisambiguateRecordFields #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE ScopedTypeVariables #-} + +module Test.Consensus.PeerSimulator.NodeLifecycle ( + LiveInterval (..) + , LiveIntervalResult (..) + , LiveNode (..) + , LiveResources (..) + , NodeLifecycle (..) + , lifecycleStart + , lifecycleStop + , restoreNode + ) where + +import Control.Tracer (Tracer (..), traceWith) +import Data.Functor (void) +import Data.Set (Set) +import qualified Data.Set as Set +import Ouroboros.Consensus.Block +import Ouroboros.Consensus.Config (TopLevelConfig (..)) +import Ouroboros.Consensus.Storage.ChainDB.API +import qualified Ouroboros.Consensus.Storage.ChainDB.API as ChainDB +import qualified Ouroboros.Consensus.Storage.ChainDB.Impl as ChainDB +import Ouroboros.Consensus.Storage.ChainDB.Impl.Args (cdbsLoE, + updateTracer) +import Ouroboros.Consensus.Util.IOLike +import Ouroboros.Consensus.Util.ResourceRegistry +import Ouroboros.Network.AnchoredFragment (AnchoredFragment) +import qualified Ouroboros.Network.AnchoredFragment as AF +import qualified System.FS.Sim.MockFS as MockFS +import System.FS.Sim.MockFS (MockFS) +import Test.Consensus.PeerSimulator.Resources +import Test.Consensus.PeerSimulator.StateView +import Test.Consensus.PeerSimulator.Trace +import Test.Consensus.PointSchedule.Peers (PeerId) +import Test.Util.ChainDB +import Test.Util.Orphans.IOLike () +import Test.Util.TestBlock (TestBlock, testInitExtLedger) + +-- | Resources used for a single live interval of the node, constructed when the +-- node is started. +-- When the node is shut down, 'lnCopyToImmDb' is used to persist the current +-- chain. +data LiveNode blk m = LiveNode { + lnChainDb :: ChainDB m blk + , lnStateViewTracers :: StateViewTracers blk m + , lnStateTracer :: Tracer m () + + -- | Write persistent ChainDB state (the immutable and volatile DBs, but not + -- the ledger and GSM state) to the VFS TVars to preserve it for the next + -- interval. + -- Returns the immutable tip's slot for tracing. + , lnCopyToImmDb :: m (WithOrigin SlotNo) + + -- | The set of peers that should be started. + -- Based on the simulation results at node shutdown, disconnected peers are + -- removed for the next live interval. + , lnPeers :: Set PeerId + } + +-- | Result of a node shutdown at the end of a live interval. +data LiveIntervalResult blk = LiveIntervalResult { + -- | Used to initialize the 'StateViewTracers' of the next run to preserve + -- earlier disconnections for the final result. + lirPeerResults :: [PeerSimulatorResult blk] + + -- | The remaining peers, computed by removing all peers present in + -- 'lrPeerResults' from the current state in 'lnPeers'. + , lirActive :: Set PeerId + } + +-- | Resources used by the handlers 'lifecycleStart' and 'lifecycleStop' to +-- shut down running components, construct tracers used for single intervals, +-- and reset and persist state. +data LiveResources blk m = LiveResources { + lrRegistry :: ResourceRegistry m + , lrPeerSim :: PeerSimulatorResources m blk + , lrTracer :: Tracer m (TraceEvent blk) + , lrSTracer :: ChainDB m blk -> m (Tracer m ()) + , lrConfig :: TopLevelConfig blk + + -- | The chain DB state consists of several transient parts and the + -- immutable DB's virtual file system. + -- After 'lnCopyToImmDb' was executed, the latter will contain the final + -- state of an interval. + -- The rest is reset when the chain DB is recreated. + , lrCdb :: NodeDBs (StrictTVar m MockFS) + + -- | The LoE fragment must be reset for each live interval. + , lrLoEVar :: LoE (StrictTVar m (AnchoredFragment (Header blk))) + } + +data LiveInterval blk m = LiveInterval { + liResources :: LiveResources blk m + , liResult :: LiveIntervalResult blk + , liNode :: LiveNode blk m + } + +-- | Handlers for starting the node and shutting it down for each live interval, +-- using the state of the previous run. +data NodeLifecycle blk m = NodeLifecycle { + -- | The minimum tick duration that triggers a node downtime. + -- If this is 'Nothing', downtimes are disabled. + nlMinDuration :: Maybe DiffTime + + -- | Start the node with prior state. + -- For the first start, this must be called with an empty 'lirPeerResults' + -- and the initial set of all peers in 'lirActive'. + , nlStart :: LiveIntervalResult blk -> m (LiveNode blk m) + , nlShutdown :: LiveNode blk m -> m (LiveIntervalResult blk) + } + +-- | Create a ChainDB and start a BlockRunner that operate on the peers' +-- candidate fragments. +mkChainDb :: + IOLike m => + LiveResources TestBlock m -> + m (ChainDB m TestBlock, m (WithOrigin SlotNo)) +mkChainDb resources = do + atomically $ do + -- Reset only the non-persisted state of the ChainDB's file system mocks: + -- - GSM state and Ledger DB are discarded + -- - Immutable DB and Volatile DB are preserved for the next interval + modifyTVar (nodeDBsGsm lrCdb) (const MockFS.empty) + modifyTVar (nodeDBsLgr lrCdb) (const MockFS.empty) + chainDbArgs <- do + let args = updateTracer + (Tracer (traceWith lrTracer . TraceChainDBEvent)) + (fromMinimalChainDbArgs MinimalChainDbArgs { + mcdbTopLevelConfig = lrConfig + , mcdbChunkInfo = mkTestChunkInfo lrConfig + , mcdbInitLedger = testInitExtLedger + , mcdbRegistry = lrRegistry + , mcdbNodeDBs = lrCdb + }) + pure $ args { ChainDB.cdbsArgs = (ChainDB.cdbsArgs args) { + cdbsLoE = readTVarIO <$> lrLoEVar + } } + (_, (chainDB, internal)) <- allocate + lrRegistry + (\_ -> ChainDB.openDBInternal chainDbArgs False) + (ChainDB.closeDB . fst) + let ChainDB.Internal {intCopyToImmutableDB, intAddBlockRunner} = internal + void $ forkLinkedThread lrRegistry "AddBlockRunner" (void intAddBlockRunner) + pure (chainDB, intCopyToImmutableDB) + where + LiveResources {lrRegistry, lrTracer, lrConfig, lrCdb, lrLoEVar} = resources + +-- | Allocate all the resources that depend on the results of previous live +-- intervals, the ChainDB and its persisted state. +restoreNode :: + IOLike m => + LiveResources TestBlock m -> + LiveIntervalResult TestBlock -> + m (LiveNode TestBlock m) +restoreNode resources LiveIntervalResult {lirPeerResults, lirActive} = do + lnStateViewTracers <- stateViewTracersWithInitial lirPeerResults + (lnChainDb, lnCopyToImmDb) <- mkChainDb resources + lnStateTracer <- lrSTracer resources lnChainDb + pure LiveNode { + lnChainDb + , lnStateViewTracers + , lnStateTracer + , lnCopyToImmDb + , lnPeers = lirActive + } + +-- | Allocate resources with 'restoreNode' and pass them to the callback that +-- starts the node's threads. +lifecycleStart :: + forall m. + IOLike m => + (LiveInterval TestBlock m -> m ()) -> + LiveResources TestBlock m -> + LiveIntervalResult TestBlock -> + m (LiveNode TestBlock m) +lifecycleStart start liResources liResult = do + trace (TraceSchedulerEvent TraceNodeStartupStart) + liNode <- restoreNode liResources liResult + start LiveInterval {liResources, liResult, liNode} + chain <- atomically (ChainDB.getCurrentChain (lnChainDb liNode)) + trace (TraceSchedulerEvent (TraceNodeStartupComplete chain)) + pure liNode + where + trace = traceWith (lrTracer liResources) + +-- | Shut down the node by killing all its threads after extracting the +-- persistent state used to restart the node later. +lifecycleStop :: + (IOLike m, GetHeader blk) => + LiveResources blk m -> + LiveNode blk m -> + m (LiveIntervalResult blk) +lifecycleStop resources LiveNode {lnStateViewTracers, lnCopyToImmDb, lnPeers} = do + -- Trigger writing the immutable tip to the MockFS in our TVar for restoring in 'startNode' + immutableTip <- lnCopyToImmDb + trace (TraceSchedulerEvent (TraceNodeShutdownStart immutableTip)) + -- Remember which peers were still running before shutdown + lirPeerResults <- svtGetPeerSimulatorResults lnStateViewTracers + let disconnectedPeers = Set.fromList (psePeerId <$> lirPeerResults) + lirActive = lnPeers Set.\\ disconnectedPeers + -- Killing the peer overview threads should hopefully clean up all connections promptly + releaseAll lrRegistry + -- Reset the resources in TVars that were allocated by the simulator + atomically $ do + modifyTVar psrHandles (const mempty) + case lrLoEVar of + LoEEnabled var -> modifyTVar var (const (AF.Empty AF.AnchorGenesis)) + LoEDisabled -> pure () + trace (TraceSchedulerEvent TraceNodeShutdownComplete) + pure LiveIntervalResult {lirActive, lirPeerResults} + where + trace = traceWith lrTracer + LiveResources { + lrRegistry + , lrTracer + , lrPeerSim = PeerSimulatorResources {psrHandles} + , lrLoEVar + } = resources diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs index 7213552f85..c84201b986 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs @@ -10,6 +10,7 @@ module Test.Consensus.PeerSimulator.Run ( , runPointSchedule ) where +import Control.Monad (foldM) import Control.Monad.Class.MonadTime (MonadTime) import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.Tracer (Tracer (..), nullTracer, traceWith) @@ -17,6 +18,7 @@ import Data.Foldable (for_) import Data.Functor (void) import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map +import Ouroboros.Consensus.Block import Ouroboros.Consensus.Config (TopLevelConfig (..)) import Ouroboros.Consensus.Genesis.Governor (runGdd, updateLoEFragGenesis) @@ -29,14 +31,10 @@ import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (ChainDbView, import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient import Ouroboros.Consensus.Storage.ChainDB.API import qualified Ouroboros.Consensus.Storage.ChainDB.API as ChainDB -import qualified Ouroboros.Consensus.Storage.ChainDB.Impl as ChainDB.Impl -import Ouroboros.Consensus.Storage.ChainDB.Impl.Args - (ChainDbSpecificArgs (cdbsLoE), updateTracer) import Ouroboros.Consensus.Util.Condense (Condense (..)) import Ouroboros.Consensus.Util.IOLike import Ouroboros.Consensus.Util.ResourceRegistry -import Ouroboros.Network.AnchoredFragment (AnchoredFragment, - HasHeader) +import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.BlockFetch (FetchClientRegistry, bracketSyncWithFetchClient, newFetchClientRegistry) @@ -48,21 +46,20 @@ import Ouroboros.Network.Util.ShowProxy (ShowProxy) import qualified Test.Consensus.PeerSimulator.BlockFetch as BlockFetch import qualified Test.Consensus.PeerSimulator.ChainSync as ChainSync import Test.Consensus.PeerSimulator.Config +import Test.Consensus.PeerSimulator.NodeLifecycle import Test.Consensus.PeerSimulator.Resources import Test.Consensus.PeerSimulator.StateDiagram (peerSimStateDiagramSTMTracerDebug) import Test.Consensus.PeerSimulator.StateView import Test.Consensus.PeerSimulator.Trace -import qualified Test.Consensus.PointSchedule as PointSchedule import Test.Consensus.PointSchedule (BlockFetchTimeout, - GenesisTest (GenesisTest), GenesisTestFull, - LoPBucketParams (..), NodeState, PeersSchedule, - peersStatesRelative) + GenesisTest (..), GenesisTestFull, LoPBucketParams (..), + NodeState, PeersSchedule, peersStatesRelative) import Test.Consensus.PointSchedule.Peers (Peer (..), PeerId, getPeerIds) import Test.Util.ChainDB import Test.Util.Orphans.IOLike () -import Test.Util.TestBlock (Header (..), TestBlock, testInitExtLedger) +import Test.Util.TestBlock (TestBlock) -- | Behavior config for the scheduler. data SchedulerConfig = @@ -95,6 +92,10 @@ data SchedulerConfig = -- | Whether to enable to LoP. The parameters of the LoP come from -- 'GenesisTest'. , scEnableLoP :: Bool + + -- | Enable node downtime if this is 'Just', using the value as minimum tick + -- duration to trigger it. + , scDowntime :: Maybe DiffTime } -- | Default scheduler config @@ -107,7 +108,8 @@ defaultSchedulerConfig = scTrace = True, scTraceState = False, scEnableLoE = False, - scEnableLoP = False + scEnableLoP = False, + scDowntime = Nothing } -- | Enable debug tracing during a scheduler test. @@ -199,24 +201,39 @@ startBlockFetchConnectionThread dispatchTick :: forall m blk. IOLike m => Tracer m (TraceSchedulerEvent blk) -> - Tracer m () -> - ChainDB m blk -> StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> Map PeerId (PeerResources m blk) -> + NodeLifecycle blk m -> + LiveNode blk m -> (Int, (DiffTime, Peer (NodeState blk))) -> - m () -dispatchTick tracer stateTracer chainDb varHandles peers (number, (duration, Peer pid state)) = + m (LiveNode blk m) +dispatchTick tracer varHandles peers lifecycle node (number, (duration, Peer pid state)) = case peers Map.!? pid of Just PeerResources {prUpdateState} -> do traceNewTick atomically (prUpdateState state) - threadDelay duration - traceWith stateTracer () + newNode <- checkDowntime + traceWith (lnStateTracer newNode) () + pure newNode Nothing -> error "“The impossible happened,” as GHC would say." where + checkDowntime + | Just minInterval <- nlMinDuration + , duration > minInterval + = do + results <- nlShutdown node + threadDelay duration + nlStart results + | otherwise + = do + threadDelay duration + pure node + + NodeLifecycle {nlMinDuration, nlStart, nlShutdown} = lifecycle + traceNewTick :: m () traceNewTick = do - currentChain <- atomically $ ChainDB.getCurrentChain chainDb + currentChain <- atomically $ ChainDB.getCurrentChain (lnChainDb node) csState <- atomically $ do m <- readTVar varHandles traverse (readTVar . CSClient.cschState) (m Map.!? pid) @@ -230,18 +247,19 @@ dispatchTick tracer stateTracer chainDb varHandles peers (number, (duration, Pee runScheduler :: IOLike m => Tracer m (TraceSchedulerEvent blk) -> - Tracer m () -> - ChainDB m blk -> StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> PeersSchedule blk -> Map PeerId (PeerResources m blk) -> - m () -runScheduler tracer stateTracer chainDb varHandles ps peers = do + NodeLifecycle blk m -> + m (ChainDB m blk, StateViewTracers blk m) +runScheduler tracer varHandles ps peers lifecycle@NodeLifecycle {nlStart} = do + node0 <- nlStart LiveIntervalResult {lirActive = Map.keysSet peers, lirPeerResults = []} traceWith tracer TraceBeginningOfTime - mapM_ - (dispatchTick tracer stateTracer chainDb varHandles peers) - (zip [0..] (peersStatesRelative ps)) + LiveNode {lnChainDb, lnStateViewTracers} <- foldM tick node0 (zip [0..] (peersStatesRelative ps)) traceWith tracer TraceEndOfTime + pure (lnChainDb, lnStateViewTracers) + where + tick = dispatchTick tracer varHandles peers lifecycle -- | Create the shared resource for the LoE if the feature is enabled in the config. -- This is used by the ChainDB and the GDD governor. @@ -255,84 +273,103 @@ mkLoEVar SchedulerConfig {scEnableLoE} | otherwise = pure LoEDisabled --- | Construct STM resources, set up ChainSync and BlockFetch threads, and --- send all ticks in a 'PointSchedule' to all given peers in turn. -runPointSchedule :: +mkStateTracer :: + IOLike m => + SchedulerConfig -> + GenesisTest TestBlock s -> + PeerSimulatorResources m TestBlock -> + ChainDB m TestBlock -> + m (Tracer m ()) +mkStateTracer schedulerConfig GenesisTest {gtBlockTree} PeerSimulatorResources {psrHandles, psrPeers} chainDb + | scTraceState schedulerConfig + , let getCandidates = viewChainSyncState psrHandles CSClient.csCandidate + getCurrentChain = ChainDB.getCurrentChain chainDb + getPoints = traverse readTVar (srCurrentState . prShared <$> psrPeers) + = peerSimStateDiagramSTMTracerDebug gtBlockTree getCurrentChain getCandidates getPoints + | otherwise + = pure nullTracer + +-- | Start all threads for ChainSync, BlockFetch and GDD, using the resources +-- for a single live interval. +-- Only start peers that haven't been disconnected in a previous interval, +-- provided by 'LiveIntervalResult'. +startNode :: forall m. (IOLike m, MonadTime m, MonadTimer m) => SchedulerConfig -> GenesisTestFull TestBlock -> - Tracer m (TraceEvent TestBlock) -> - m (StateView TestBlock) -runPointSchedule schedulerConfig genesisTest tracer0 = - withRegistry $ \registry -> do - stateViewTracers <- defaultStateViewTracers - resources <- makePeerSimulatorResources tracer gtBlockTree (getPeerIds gtSchedule) - let - handles = psrHandles resources - getCandidates = viewChainSyncState handles CSClient.csCandidate - loEVar <- mkLoEVar schedulerConfig - chainDb <- mkChainDb tracer config registry (readTVarIO <$> loEVar) - fetchClientRegistry <- newFetchClientRegistry - let chainDbView = CSClient.defaultChainDbView chainDb - for_ (psrPeers resources) $ \PeerResources {prShared, prChainSync, prBlockFetch} -> do - forkLinkedThread registry ("Peer overview " ++ (show $ srPeerId prShared)) $ - -- The peerRegistry helps ensuring that if any thread fails, then - -- the registry is closed and all threads related to the peer are - -- killed. - withRegistry $ \peerRegistry -> do - (csClient, csServer) <- startChainSyncConnectionThread peerRegistry tracer config chainDbView fetchClientRegistry prShared prChainSync chainSyncTimeouts_ chainSyncLoPBucketConfig stateViewTracers (psrHandles resources) - BlockFetch.startKeepAliveThread peerRegistry fetchClientRegistry (srPeerId prShared) - (bfClient, bfServer) <- startBlockFetchConnectionThread peerRegistry tracer stateViewTracers fetchClientRegistry (pure Continue) prShared prBlockFetch blockFetchTimeouts_ - waitAnyThread [csClient, csServer, bfClient, bfServer] - -- The block fetch logic needs to be started after the block fetch clients - -- otherwise, an internal assertion fails because getCandidates yields more - -- peer fragments than registered clients. - let getCurrentChain = ChainDB.getCurrentChain chainDb - getPoints = traverse readTVar (srCurrentState . prShared <$> psrPeers resources) - mkStateTracer - | scTraceState schedulerConfig - = peerSimStateDiagramSTMTracerDebug gtBlockTree getCurrentChain getCandidates getPoints - | otherwise - = pure nullTracer - - gdd = updateLoEFragGenesis config (mkGDDTracerTestBlock tracer) (readTVar handles) - -- We make GDD rerun every time the anchor or the blocks of the - -- selection change. - gddTrigger = do - s <- viewChainSyncState handles (\ s -> (csLatestSlot s, csIdling s)) - c <- getCurrentChain - return (s, [AF.anchorToHash $ AF.headAnchor c]) - - stateTracer <- mkStateTracer - BlockFetch.startBlockFetchLogic registry tracer chainDb fetchClientRegistry getCandidates - for_ loEVar $ \ var -> - void $ forkLinkedThread registry "LoE updater background" $ - runGdd gdd var chainDb gddTrigger - runScheduler - (Tracer $ traceWith tracer . TraceSchedulerEvent) - stateTracer - chainDb - handles - gtSchedule - (psrPeers resources) - snapshotStateView stateViewTracers chainDb + LiveInterval TestBlock m -> + m () +startNode schedulerConfig genesisTest interval = do + let + handles = psrHandles lrPeerSim + getCandidates = viewChainSyncState handles CSClient.csCandidate + fetchClientRegistry <- newFetchClientRegistry + let chainDbView = CSClient.defaultChainDbView lnChainDb + activePeers = Map.restrictKeys (psrPeers lrPeerSim) (lirActive liveResult) + for_ activePeers $ \PeerResources {prShared, prChainSync, prBlockFetch} -> do + let pid = srPeerId prShared + forkLinkedThread lrRegistry ("Peer overview " ++ show pid) $ + -- The peerRegistry helps ensuring that if any thread fails, then + -- the registry is closed and all threads related to the peer are + -- killed. + withRegistry $ \peerRegistry -> do + (csClient, csServer) <- + startChainSyncConnectionThread + peerRegistry + lrTracer + lrConfig + chainDbView + fetchClientRegistry + prShared + prChainSync + chainSyncTimeouts_ + chainSyncLoPBucketConfig + lnStateViewTracers + handles + BlockFetch.startKeepAliveThread peerRegistry fetchClientRegistry pid + (bfClient, bfServer) <- + startBlockFetchConnectionThread + peerRegistry + lrTracer + lnStateViewTracers + fetchClientRegistry + (pure Continue) + prShared + prBlockFetch + blockFetchTimeouts_ + waitAnyThread [csClient, csServer, bfClient, bfServer] + -- The block fetch logic needs to be started after the block fetch clients + -- otherwise, an internal assertion fails because getCandidates yields more + -- peer fragments than registered clients. + let getCurrentChain = ChainDB.getCurrentChain lnChainDb + + gdd = updateLoEFragGenesis lrConfig (mkGDDTracerTestBlock lrTracer) (readTVar handles) + -- We make GDD rerun every time the anchor or the blocks of the + -- selection change. + gddTrigger = do + s <- viewChainSyncState handles (\ s -> (csLatestSlot s, csIdling s)) + c <- getCurrentChain + return (s, [AF.anchorToHash $ AF.headAnchor c]) + + BlockFetch.startBlockFetchLogic lrRegistry lrTracer lnChainDb fetchClientRegistry getCandidates + for_ lrLoEVar $ \ var -> do + forkLinkedThread lrRegistry "LoE updater background" $ + void $ runGdd gdd var lnChainDb gddTrigger where - GenesisTest { - PointSchedule.gtSecurityParam = k - , PointSchedule.gtBlockTree - , PointSchedule.gtSchedule - , PointSchedule.gtChainSyncTimeouts - , PointSchedule.gtBlockFetchTimeouts - , PointSchedule.gtLoPBucketParams = LoPBucketParams { lbpCapacity, lbpRate } - , PointSchedule.gtForecastRange - , PointSchedule.gtGenesisWindow - } = genesisTest + LiveResources {lrRegistry, lrTracer, lrConfig, lrPeerSim, lrLoEVar} = resources - config = defaultCfg k gtForecastRange gtGenesisWindow + LiveInterval { + liResources = resources + , liResult = liveResult + , liNode = LiveNode {lnChainDb, lnStateViewTracers} + } = interval - -- FIXME: This type of configuration should move to `Trace.mkTracer`. - tracer = if scTrace schedulerConfig then tracer0 else nullTracer + GenesisTest + { gtChainSyncTimeouts + , gtBlockFetchTimeouts + , gtLoPBucketParams = LoPBucketParams { lbpCapacity, lbpRate } + } = genesisTest chainSyncTimeouts_ = if scEnableChainSyncTimeouts schedulerConfig @@ -349,33 +386,69 @@ runPointSchedule schedulerConfig genesisTest tracer0 = then gtBlockFetchTimeouts else BlockFetch.blockFetchNoTimeouts --- | Create a ChainDB and start a BlockRunner that operate on the peers' --- candidate fragments. -mkChainDb :: - IOLike m => +-- | Set up all resources related to node start/shutdown. +nodeLifecycle :: + (IOLike m, MonadTime m, MonadTimer m) => + SchedulerConfig -> + GenesisTestFull TestBlock -> Tracer m (TraceEvent TestBlock) -> - TopLevelConfig TestBlock -> ResourceRegistry m -> - GetLoEFragment m TestBlock -> - m (ChainDB m TestBlock) -mkChainDb tracer nodeCfg registry cdbsLoE = do - chainDbArgs <- do - mcdbNodeDBs <- emptyNodeDBs - let args = updateTracer - (Tracer (traceWith tracer . TraceChainDBEvent)) - (fromMinimalChainDbArgs MinimalChainDbArgs { - mcdbTopLevelConfig = nodeCfg - , mcdbChunkInfo = mkTestChunkInfo nodeCfg - , mcdbInitLedger = testInitExtLedger - , mcdbRegistry = registry - , mcdbNodeDBs - } - ) - pure $ args { ChainDB.Impl.cdbsArgs = (ChainDB.Impl.cdbsArgs args) { cdbsLoE } } - (_, (chainDB, ChainDB.Impl.Internal{ChainDB.Impl.intAddBlockRunner})) <- - allocate - registry - (\_ -> ChainDB.Impl.openDBInternal chainDbArgs False) - (ChainDB.closeDB . fst) - _ <- forkLinkedThread registry "AddBlockRunner" intAddBlockRunner - pure chainDB + PeerSimulatorResources m TestBlock -> + m (NodeLifecycle TestBlock m) +nodeLifecycle schedulerConfig genesisTest lrTracer lrRegistry lrPeerSim = do + lrCdb <- emptyNodeDBs + lrLoEVar <- mkLoEVar schedulerConfig + let + resources = + LiveResources { + lrRegistry + , lrTracer + , lrSTracer = mkStateTracer schedulerConfig genesisTest lrPeerSim + , lrConfig + , lrPeerSim + , lrCdb + , lrLoEVar + } + pure NodeLifecycle { + nlMinDuration = scDowntime schedulerConfig + , nlStart = lifecycleStart (startNode schedulerConfig genesisTest) resources + , nlShutdown = lifecycleStop resources + } + where + lrConfig = defaultCfg k gtForecastRange gtGenesisWindow + + GenesisTest { + gtSecurityParam = k + , gtForecastRange + , gtGenesisWindow + } = genesisTest + +-- | Construct STM resources, set up ChainSync and BlockFetch threads, and +-- send all ticks in a 'PointSchedule' to all given peers in turn. +runPointSchedule :: + forall m. + (IOLike m, MonadTime m, MonadTimer m) => + SchedulerConfig -> + GenesisTestFull TestBlock -> + Tracer m (TraceEvent TestBlock) -> + m (StateView TestBlock) +runPointSchedule schedulerConfig genesisTest tracer0 = + withRegistry $ \registry -> do + peerSim <- makePeerSimulatorResources tracer gtBlockTree (getPeerIds gtSchedule) + lifecycle <- nodeLifecycle schedulerConfig genesisTest tracer registry peerSim + (chainDb, stateViewTracers) <- runScheduler + (Tracer $ traceWith tracer . TraceSchedulerEvent) + (psrHandles peerSim) + gtSchedule + (psrPeers peerSim) + lifecycle + snapshotStateView stateViewTracers chainDb + where + + GenesisTest { + gtBlockTree + , gtSchedule + } = genesisTest + + -- FIXME: This type of configuration should move to `Trace.mkTracer`. + tracer = if scTrace schedulerConfig then tracer0 else nullTracer diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateView.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateView.hs index 3d83b5a4fa..f8c1fbff7b 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateView.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateView.hs @@ -15,10 +15,12 @@ module Test.Consensus.PeerSimulator.StateView ( , exceptionsByComponent , pscrToException , snapshotStateView + , stateViewTracersWithInitial ) where -import Control.Tracer (Tracer) +import Control.Tracer (Tracer, traceWith) import Data.Containers.ListUtils (nubOrd) +import Data.Foldable (for_) import Data.List (sort) import Data.Maybe (mapMaybe) import Network.TypedProtocol.Codec (AnyMessage) @@ -196,6 +198,16 @@ defaultStateViewTracers = do (svtPeerSimulatorResultsTracer, svtGetPeerSimulatorResults) <- recordingTracerTVar pure StateViewTracers {svtPeerSimulatorResultsTracer, svtGetPeerSimulatorResults} +-- | Call 'defaultStateViewTracers' and add the provided results. +stateViewTracersWithInitial :: + IOLike m => + [PeerSimulatorResult blk] -> + m (StateViewTracers blk m) +stateViewTracersWithInitial initial = do + svt <- defaultStateViewTracers + for_ initial (traceWith (svtPeerSimulatorResultsTracer svt)) + pure svt + -- | Use the state view tracers as well as some extra information to produce a -- state view. This mostly consists in reading and storing the current state of -- the tracers. diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs index 2342b3e01e..b3f42c5b3f 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs @@ -59,6 +59,10 @@ data TraceSchedulerEvent blk -- @0@), the duration of the tick, the states, the current chain and the -- candidate fragment. TraceNewTick Int DiffTime (Peer (NodeState blk)) (AnchoredFragment (Header blk)) (Maybe (AnchoredFragment (Header blk))) + | TraceNodeShutdownStart (WithOrigin SlotNo) + | TraceNodeShutdownComplete + | TraceNodeStartupStart + | TraceNodeStartupComplete (AnchoredFragment (Header blk)) type HandlerName = String @@ -192,6 +196,15 @@ traceSchedulerEventTestBlockWith setTickTime tracer0 _tracer = \case " candidate fragment: " ++ maybe "Nothing" terseHFragment mCandidateFrag ] + TraceNodeShutdownStart immTip -> + traceWith tracer0 (" Initiating node shutdown with immutable tip at slot " ++ condense immTip) + TraceNodeShutdownComplete -> + traceWith tracer0 " Node shutdown complete" + TraceNodeStartupStart -> + traceWith tracer0 " Initiating node startup" + TraceNodeStartupComplete selection -> + traceWith tracer0 (" Node startup complete with selection " ++ terseHFragment selection) + traceScheduledServerHandlerEventTestBlockWith :: Tracer m String -> String -> @@ -394,9 +407,12 @@ terseGDDEvent = \case block = if hasBlockAfter then ", has header after sgen" else " " + -- Note: At some point, I changed this to use @headPoint@ erroneously, so to be clear about what this signifies: + -- The first point after the anchor (which is returned by @lastPoint@, clearly) is used for the condition that + -- the density comparison should not be applied to two peers if they share any headers after the LoE fragment. lastPoint = "point: " ++ - tersePoint (castPoint @(Header TestBlock) @TestBlock (AF.headPoint clippedFragment)) ++ + tersePoint (castPoint @(Header TestBlock) @TestBlock (AF.lastPoint clippedFragment)) ++ ", " showLatestSlot = \case diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs index b22189635f..0d3aaa7848 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs @@ -32,6 +32,7 @@ module Test.Consensus.PointSchedule ( , PeersSchedule , RunGenesisTestResult (..) , enrichedWith + , ensureScheduleDuration , genesisNodeState , longRangeAttack , nsTipTip @@ -43,6 +44,7 @@ module Test.Consensus.PointSchedule ( , prettyPeersSchedule , stToGen , uniformPoints + , uniformPointsWithDowntime ) where import Cardano.Slotting.Time (SlotLength) @@ -52,7 +54,7 @@ import Control.Monad.ST (ST) import Data.Foldable (toList) import Data.Functor (($>)) import Data.List (mapAccumL, partition, scanl') -import Data.Maybe (mapMaybe) +import Data.Maybe (catMaybes, fromMaybe, mapMaybe) import Data.Time (DiffTime) import Data.Word (Word64) import Ouroboros.Consensus.Block.Abstract (WithOrigin (..), @@ -60,13 +62,14 @@ import Ouroboros.Consensus.Block.Abstract (WithOrigin (..), import Ouroboros.Consensus.Ledger.SupportsProtocol (GenesisWindow (..)) import Ouroboros.Consensus.Network.NodeToNode (ChainSyncTimeout (..)) -import Ouroboros.Consensus.Protocol.Abstract (SecurityParam, - maxRollbacks) +import Ouroboros.Consensus.Protocol.Abstract + (SecurityParam (SecurityParam), maxRollbacks) import Ouroboros.Consensus.Util.Condense (Condense (..), CondenseList (..), PaddingDirection (..), condenseListWithPadding, padListWith) import qualified Ouroboros.Network.AnchoredFragment as AF -import Ouroboros.Network.Block (Tip (..), tipFromHeader) +import Ouroboros.Network.Block (SlotNo (..), Tip (..), blockSlot, + tipFromHeader) import Ouroboros.Network.Point (withOrigin) import qualified System.Random.Stateful as Random import System.Random.Stateful (STGenM, StatefulGen, runSTGen_) @@ -306,6 +309,132 @@ uniformPoints BlockTree {btTrunk, btBranches} g = do rollbackProb = 0.2 +minusClamp :: (Ord a, Num a) => a -> a -> a +minusClamp a b | a <= b = 0 + | otherwise = a - b + +zipPadN :: forall a . [[a]] -> [[Maybe a]] +zipPadN = + spin [] + where + spin acc as + | all null as + = reverse acc + | let (h, t) = unzip (takeNext <$> as) + = spin (h : acc) t + + takeNext = \case + [] -> (Nothing, []) + h : t -> (Just h, t) + +isTip :: SchedulePoint blk -> Bool +isTip = \case + ScheduleTipPoint _ -> True + _ -> False + +tipTimes :: [(Time, SchedulePoint blk)] -> [Time] +tipTimes = + fmap fst . filter (isTip . snd) + +bumpTips :: [Time] -> [(Time, SchedulePoint blk)] -> [(Time, SchedulePoint blk)] +bumpTips tips = + snd . mapAccumL step tips + where + step (t0 : tn) (_, p) + | isTip p + = (tn, (t0, p)) + step ts a = (ts, a) + +syncTips :: [(Time, SchedulePoint blk)] -> [[(Time, SchedulePoint blk)]] -> ([(Time, SchedulePoint blk)], [[(Time, SchedulePoint blk)]]) +syncTips honest advs = + (bump honest, bump <$> advs) + where + bump = bumpTips earliestTips + earliestTips = chooseEarliest <$> zipPadN (tipTimes <$> scheds) + scheds = honest : advs + chooseEarliest times = minimum (fromMaybe (Time 0) <$> times) + +-- | This is a variant of 'uniformPoints' that uses multiple tip points, used to simulate node downtimes. +-- Ultimately, this should be replaced by a redesign of the peer schedule generator that is aware of node liveness +-- intervals. +-- +-- Chooses the first tip points somewhere in the middle of the honest chain: +-- The "pause slot" is half of the honest head slot, or the slot of the kth block, whichever is greater. +-- The last block smaller than the pause slot is then used as the first tip for each branch. +-- The second tip is the last block of each branch. +-- +-- Includes rollbacks in some schedules. +uniformPointsWithDowntime :: + (StatefulGen g m, AF.HasHeader blk) => + SecurityParam -> + BlockTree blk -> + g -> + m (PeersSchedule blk) +uniformPointsWithDowntime (SecurityParam k) BlockTree {btTrunk, btBranches} g = do + let + kSlot = withOrigin 0 (fromIntegral . unSlotNo) (AF.headSlot (AF.takeOldest (fromIntegral k) btTrunk)) + midSlot = (AF.length btTrunk) `div` 2 + lowerBound = max kSlot midSlot + pauseSlot <- SlotNo . fromIntegral <$> Random.uniformRM (lowerBound, AF.length btTrunk - 1) g + honestTip0 <- firstTip pauseSlot btTrunk + honest <- mkSchedule [(IsTrunk, [honestTip0, minusClamp (AF.length btTrunk) 1])] [] + advs <- takeBranches pauseSlot btBranches + let (honest', advs') = syncTips honest advs + pure (mkPeers honest' advs') + where + takeBranches pause = \case + [] -> pure [] + [b] -> pure <$> withoutRollback pause b + b1 : b2 : branches -> do + a <- Random.uniformDouble01M g + if a < rollbackProb + then do + this <- withRollback pause b1 b2 + rest <- takeBranches pause branches + pure (this : rest) + else do + this <- withoutRollback pause b1 + rest <- takeBranches pause (b2 : branches) + pure (this : rest) + + withoutRollback pause branch = do + tips <- mkTips pause branch + mkSchedule tips [btbSuffix branch] + + withRollback pause b1 b2 = do + firstTips <- mkTips pause b1 + let secondTips = [minusClamp (AF.length (btbSuffix b2)) 1] + mkSchedule (firstTips ++ [(IsBranch, secondTips)]) [btbSuffix b1, btbSuffix b2] + + mkSchedule tips branches = do + params <- mkParams + peerScheduleFromTipPoints g params tips btTrunk branches + + mkTips pause branch + | AF.length full == 0 = + error "empty branch" + | otherwise = do + tip0 <- firstTip pause (btbFull branch) + let (pre, post) = partition (< firstSuffixBlock) [tip0, fullLen - 1] + pure ((if null pre then [] else [(IsTrunk, pre)]) ++ [(IsBranch, shift <$> post)]) + where + shift i = i - firstSuffixBlock + firstSuffixBlock = fullLen - AF.length (btbSuffix branch) + fullLen = AF.length full + full = btbFull branch + + firstTip pause frag = pure (minusClamp (AF.length (AF.dropWhileNewest (\ b -> blockSlot b > pause) frag)) 1) + + mkParams = do + -- These values appear to be large enough to create pauses of 100 seconds and more. + tipL <- uniformRMDiffTime (0.5, 1) g + tipU <- uniformRMDiffTime (1, 2) g + headerL <- uniformRMDiffTime (0.018, 0.03) g + headerU <- uniformRMDiffTime (0.021, 0.04) g + pure defaultPeerScheduleParams {pspTipDelayInterval = (tipL, tipU), pspHeaderDelayInterval = (headerL, headerU)} + + rollbackProb = 0.2 + newtype ForecastRange = ForecastRange { unForecastRange :: Word64 } deriving (Show) @@ -397,3 +526,28 @@ stToGen :: stToGen gen = do seed :: QCGen <- arbitrary pure (runSTGen_ seed gen) + +duplicateLastPoint + :: DiffTime -> [(Time, SchedulePoint blk)] -> [(Time, SchedulePoint blk)] +duplicateLastPoint d [] = [(Time d, ScheduleTipPoint Origin)] +duplicateLastPoint d xs = + let (t, p) = last xs + in xs ++ [(addTime d t, p)] + +ensureScheduleDuration :: GenesisTest blk a -> PeersSchedule blk -> PeersSchedule blk +ensureScheduleDuration gt Peers {honest, others} = + Peers {honest = extendHonest, others} + where + extendHonest = duplicateLastPoint endingDelay <$> honest + + endingDelay = + let cst = gtChainSyncTimeouts gt + bft = gtBlockFetchTimeouts gt + in 1 + fromIntegral peerCount * maximum (0 : catMaybes + [ canAwaitTimeout cst + , intersectTimeout cst + , busyTimeout bft + , streamingTimeout bft + ]) + + peerCount = 1 + length others From 9572256c6f657a004b2307df02f38c6da5000964 Mon Sep 17 00:00:00 2001 From: Alexander Esgen Date: Fri, 26 Apr 2024 14:28:59 +0200 Subject: [PATCH 08/20] LoP: more robust pausing while waiting for horizon advancement The previous implementation would not pause the bucket while waiting for the time translation horizon to advance. --- .../Consensus/MiniProtocol/ChainSync/Client.hs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs index d33796b324..991d2c9f2a 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs @@ -1435,7 +1435,7 @@ checkTime :: -> SlotNo -> m (UpdatedIntersectionState blk (LedgerView (BlockProtocol blk))) checkTime cfgEnv dynEnv intEnv = - \kis arrival slotNo -> castEarlyExitIntersects $ do + \kis arrival slotNo -> pauseBucket $ castEarlyExitIntersects $ do Intersects kis2 lst <- checkArrivalTime kis arrival Intersects kis3 ledgerView <- case projectLedgerView slotNo lst of Just ledgerView -> pure $ Intersects kis2 ledgerView @@ -1443,10 +1443,7 @@ checkTime cfgEnv dynEnv intEnv = EarlyExit.lift $ traceWith (tracer cfgEnv) $ TraceWaitingBeyondForecastHorizon slotNo - -- Pause the bucket if LedgerView-Starved. - EarlyExit.lift $ lbPause (loPBucket dynEnv) res <- readLedgerState kis2 (projectLedgerView slotNo) - EarlyExit.lift $ lbResume (loPBucket dynEnv) EarlyExit.lift $ traceWith (tracer cfgEnv) $ TraceAccessingForecastHorizon slotNo @@ -1558,6 +1555,17 @@ checkTime cfgEnv dynEnv intEnv = -- 'intersectsWithCurrentChain' before it. Nothing + -- Pause the LoP bucket for the entire duration of 'checkTime'. It will + -- either execute very fast, or it will block on the time translation or + -- forecast horizon, waiting for our selection to advance. During this + -- period, we should not leak tokens as our peer is not responsible for this + -- waiting time. + pauseBucket :: m a -> m a + pauseBucket = + bracket_ + (lbPause (loPBucket dynEnv)) + (lbResume (loPBucket dynEnv)) + -- | Update the 'KnownIntersectionState' according to the header, if it's valid -- -- Crucially: disconnects if it isn't. From 30ddc5c1e4eacb06332baedabaf0773a1b8a54a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Tue, 2 Apr 2024 12:33:59 +0100 Subject: [PATCH 09/20] Extract ChainSync client state to own file --- ouroboros-consensus/ouroboros-consensus.cabal | 1 + .../MiniProtocol/ChainSync/Client.hs | 48 +------------- .../MiniProtocol/ChainSync/Client/State.hs | 65 +++++++++++++++++++ 3 files changed, 67 insertions(+), 47 deletions(-) create mode 100644 ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs diff --git a/ouroboros-consensus/ouroboros-consensus.cabal b/ouroboros-consensus/ouroboros-consensus.cabal index 894fcb2c6b..7e55dcc5e0 100644 --- a/ouroboros-consensus/ouroboros-consensus.cabal +++ b/ouroboros-consensus/ouroboros-consensus.cabal @@ -169,6 +169,7 @@ library Ouroboros.Consensus.MiniProtocol.BlockFetch.Server Ouroboros.Consensus.MiniProtocol.ChainSync.Client Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck + Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State Ouroboros.Consensus.MiniProtocol.ChainSync.Server Ouroboros.Consensus.MiniProtocol.LocalStateQuery.Server Ouroboros.Consensus.MiniProtocol.LocalTxMonitor.Server diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs index 991d2c9f2a..786b1dfae0 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs @@ -94,6 +94,7 @@ import Ouroboros.Consensus.Ledger.Basics (LedgerState) import Ouroboros.Consensus.Ledger.Extended import Ouroboros.Consensus.Ledger.SupportsProtocol import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State import Ouroboros.Consensus.Node.NetworkProtocolVersion import Ouroboros.Consensus.Protocol.Abstract import Ouroboros.Consensus.Storage.ChainDB (ChainDB, @@ -182,53 +183,6 @@ newtype Our a = Our { unOur :: a } deriving stock (Eq) deriving newtype (Show, NoThunks) --- | A ChainSync client's state that's used by other components, like the GDD. -data ChainSyncState blk = ChainSyncState { - - -- | The current candidate fragment. - csCandidate :: !(AnchoredFragment (Header blk)) - - -- | This ChainSync client should ensure that its peer sets this flag while - -- and only while both of the following conditions are satisfied: the - -- peer's latest message has been fully processed (especially that its - -- candidate has been updated; previous argument) and its latest message - -- did not claim that it already has headers that extend its candidate. - -- - -- It's more important that the flag is unset promptly than it is for the - -- flag to be set promptly, because of how this is used by the GSM to - -- determine that the node is done syncing. - , csIdling :: !Bool - - -- | When the client receives a new header, it updates this field before - -- processing it further, and the latest slot may refer to a header beyond - -- the forecast horizon while the candidate fragment isn't extended yet, to - -- signal to GDD that the density is known up to this slot. - , csLatestSlot :: !(Maybe (WithOrigin SlotNo)) - } - deriving stock (Generic) - -deriving anyclass instance ( - HasHeader blk, - NoThunks (Header blk) - ) => NoThunks (ChainSyncState blk) - --- | An interface to a ChainSync client that's used by other components, like --- the GDD governor. -data ChainSyncClientHandle m blk = ChainSyncClientHandle { - -- | Disconnects from the peer when the GDD considers it adversarial - cschGDDKill :: !(m ()) - - -- | Data shared between the client and external components like GDD. - , cschState :: !(StrictTVar m (ChainSyncState blk)) - } - deriving stock (Generic) - -deriving anyclass instance ( - IOLike m, - HasHeader blk, - NoThunks (Header blk) - ) => NoThunks (ChainSyncClientHandle m blk) - -- | Convenience function for reading a nested set of TVars and extracting some -- data from 'ChainSyncState'. viewChainSyncState :: diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs new file mode 100644 index 0000000000..e1096156ec --- /dev/null +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs @@ -0,0 +1,65 @@ +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE UndecidableInstances #-} + +module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State ( + ChainSyncClientHandle (..) + , ChainSyncState (..) + ) where + +import Cardano.Slotting.Slot (SlotNo, WithOrigin) +import GHC.Generics (Generic) +import Ouroboros.Consensus.Block (HasHeader, Header) +import Ouroboros.Consensus.Util.IOLike (IOLike, NoThunks, StrictTVar) +import Ouroboros.Network.AnchoredFragment (AnchoredFragment) + +-- | A ChainSync client's state that's used by other components, like the GDD or +-- the jumping governor. +data ChainSyncState blk = ChainSyncState { + + -- | The current candidate fragment. + csCandidate :: !(AnchoredFragment (Header blk)) + + -- | This ChainSync client should ensure that its peer sets this flag while + -- and only while both of the following conditions are satisfied: the + -- peer's latest message has been fully processed (especially that its + -- candidate has been updated; previous argument) and its latest message + -- did not claim that it already has headers that extend its candidate. + -- + -- It's more important that the flag is unset promptly than it is for the + -- flag to be set promptly, because of how this is used by the GSM to + -- determine that the node is done syncing. + , csIdling :: !Bool + + -- | When the client receives a new header, it updates this field before + -- processing it further, and the latest slot may refer to a header beyond + -- the forecast horizon while the candidate fragment isn't extended yet, to + -- signal to GDD that the density is known up to this slot. + , csLatestSlot :: !(Maybe (WithOrigin SlotNo)) + } + deriving stock (Generic) + +deriving anyclass instance ( + HasHeader blk, + NoThunks (Header blk) + ) => NoThunks (ChainSyncState blk) + +-- | An interface to a ChainSync client that's used by other components, like +-- the GDD governor. +data ChainSyncClientHandle m blk = ChainSyncClientHandle { + -- | Disconnects from the peer when the GDD considers it adversarial + cschGDDKill :: !(m ()) + + -- | Data shared between the client and external components like GDD. + , cschState :: !(StrictTVar m (ChainSyncState blk)) + } + deriving stock (Generic) + +deriving anyclass instance ( + IOLike m, + HasHeader blk, + NoThunks (Header blk) + ) => NoThunks (ChainSyncClientHandle m blk) From cee466f25c985c15172497fbff36668b9843123d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 25 Apr 2024 16:09:10 +0000 Subject: [PATCH 10/20] Clarify documentation of csIdling --- .../Consensus/MiniProtocol/ChainSync/Client/State.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs index e1096156ec..c82e1afd24 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs @@ -23,7 +23,9 @@ data ChainSyncState blk = ChainSyncState { -- | The current candidate fragment. csCandidate :: !(AnchoredFragment (Header blk)) - -- | This ChainSync client should ensure that its peer sets this flag while + -- | Whether the last message sent by the peer was MsgAwaitReply. + -- + -- This ChainSync client should ensure that its peer sets this flag while -- and only while both of the following conditions are satisfied: the -- peer's latest message has been fully processed (especially that its -- candidate has been updated; previous argument) and its latest message From 0e265c44b527714680fccbc3440378f227d14acf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Wed, 27 Mar 2024 11:28:06 +0100 Subject: [PATCH 11/20] Implement ChainSync jumping See the documentation in ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs for an overview. --- .../Ouroboros/Consensus/Network/NodeToNode.hs | 5 +- .../Ouroboros/Consensus/Node.hs | 7 +- .../Test/ThreadNet/Network.hs | 1 + .../Test/Consensus/Genesis/Setup/GenChains.hs | 1 + .../Test/Consensus/Genesis/Tests/Uniform.hs | 8 +- .../Test/Consensus/PeerSimulator/ChainSync.hs | 13 +- .../Test/Consensus/PeerSimulator/Run.hs | 43 +- .../Test/Consensus/PeerSimulator/Trace.hs | 99 ++- .../Test/Consensus/PointSchedule.hs | 9 + .../Test/Consensus/PointSchedule/Peers.hs | 4 +- .../bench/ChainSync-client-bench/Main.hs | 1 + ouroboros-consensus/ouroboros-consensus.cabal | 1 + .../MiniProtocol/ChainSync/Client.hs | 300 ++++++- .../MiniProtocol/ChainSync/Client/Jumping.hs | 823 ++++++++++++++++++ .../MiniProtocol/ChainSync/Client/State.hs | 164 +++- .../MiniProtocol/ChainSync/Client.hs | 21 +- 16 files changed, 1417 insertions(+), 83 deletions(-) create mode 100644 ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Network/NodeToNode.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Network/NodeToNode.hs index 993b2c4d37..4dbf6a9361 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Network/NodeToNode.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Network/NodeToNode.hs @@ -540,10 +540,11 @@ mkApps :: -> ByteLimits bCS bBF bTX bKA -> m ChainSyncTimeout -> CsClient.ChainSyncLoPBucketConfig + -> CsClient.CSJConfig -> ReportPeerMetrics m (ConnectionId addrNTN) -> Handlers m addrNTN blk -> Apps m addrNTN bCS bBF bTX bKA bPS NodeToNodeInitiatorResult () -mkApps kernel Tracers {..} mkCodecs ByteLimits {..} genChainSyncTimeout lopBucketConfig ReportPeerMetrics {..} Handlers {..} = +mkApps kernel Tracers {..} mkCodecs ByteLimits {..} genChainSyncTimeout lopBucketConfig csjConfig ReportPeerMetrics {..} Handlers {..} = Apps {..} where aChainSyncClient @@ -572,6 +573,7 @@ mkApps kernel Tracers {..} mkCodecs ByteLimits {..} genChainSyncTimeout lopBucke them version lopBucketConfig + csjConfig $ \csState -> do chainSyncTimeout <- genChainSyncTimeout (r, trailing) <- @@ -593,6 +595,7 @@ mkApps kernel Tracers {..} mkCodecs ByteLimits {..} genChainSyncTimeout lopBucke , CsClient.idling = csvIdling csState , CsClient.loPBucket = csvLoPBucket csState , CsClient.setLatestSlot = csvSetLatestSlot csState + , CsClient.jumping = csvJumping csState } return (ChainSyncInitiatorResult r, trailing) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs index 61a28b8f65..b2e0609579 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs @@ -81,7 +81,7 @@ import Ouroboros.Consensus.Fragment.InFuture (CheckInFuture, import qualified Ouroboros.Consensus.Fragment.InFuture as InFuture import Ouroboros.Consensus.Ledger.Extended (ExtLedgerState (..)) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncLoPBucketConfig (..)) + (CSJConfig (..), ChainSyncLoPBucketConfig (..)) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck import qualified Ouroboros.Consensus.Network.NodeToClient as NTC import qualified Ouroboros.Consensus.Network.NodeToNode as NTN @@ -252,6 +252,9 @@ data LowLevelRunNodeArgs m addrNTN addrNTC versionDataNTN versionDataNTC blk -- | See 'CsClient.ChainSyncLoPBucketConfig' , llrnChainSyncLoPBucketConfig :: ChainSyncLoPBucketConfig + -- | See 'CsClient.CSJConfig' + , llrnCSJConfig :: CSJConfig + -- | How to run the data diffusion applications -- -- 'run' will not return before this does. @@ -519,6 +522,7 @@ runWith RunNodeArgs{..} encAddrNtN decAddrNtN LowLevelRunNodeArgs{..} = NTN.byteLimits llrnChainSyncTimeout llrnChainSyncLoPBucketConfig + llrnCSJConfig (reportMetric Diffusion.peerMetricsConfiguration peerMetrics) (NTN.mkHandlers nodeKernelArgs nodeKernel) @@ -857,6 +861,7 @@ stdLowLevelRunNodeArgsIO RunNodeArgs{ rnProtocolInfo { llrnBfcSalt , llrnChainSyncTimeout = fromMaybe Diffusion.defaultChainSyncTimeout srnChainSyncTimeout , llrnChainSyncLoPBucketConfig = ChainSyncLoPBucketDisabled + , llrnCSJConfig = CSJDisabled , llrnCustomiseHardForkBlockchainTimeArgs = id , llrnGsmAntiThunderingHerd , llrnKeepAliveRng diff --git a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs index 929826cc80..90dffaa070 100644 --- a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs +++ b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs @@ -1058,6 +1058,7 @@ runThreadNetwork systemTime ThreadNetworkArgs , idleTimeout = waitForever }) CSClient.ChainSyncLoPBucketDisabled + CSClient.CSJDisabled nullMetric -- The purpose of this test is not testing protocols, so -- returning constant empty list is fine if we have thorough diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs index 9975f37c45..9b6702c114 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs @@ -129,6 +129,7 @@ genChains genNumForks = do -- ^ REVIEW: Do we want to generate those randomly? For now, the chosen -- values carry no special meaning. Someone needs to think about what values -- would make for interesting tests. + gtCSJParams = CSJParams $ fromIntegral scg, gtBlockTree = foldl' (flip BT.addBranch') (BT.mkTrunk goodChain) $ zipWith (genAdversarialFragment goodBlocks) [1..] alternativeChainSchemas, gtSchedule = () } diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index 0412a1c86b..249aadc62c 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -150,7 +150,11 @@ prop_serveAdversarialBranches = forAllGenesisTest (genChains (QC.choose (1, 4)) `enrichedWith` genUniformSchedulePoints) (defaultSchedulerConfig - {scTraceState = False, scTrace = False, scEnableLoE = True}) + { scTraceState = False + , scTrace = False + , scEnableLoE = True + , scEnableCSJ = True + }) -- We cannot shrink by removing points from the adversarial schedules. -- Otherwise, the immutable tip could get stuck because a peer doesn't @@ -195,6 +199,7 @@ prop_leashingAttackStalling = { scTrace = False , scEnableLoE = True , scEnableLoP = True + , scEnableCSJ = True } shrinkPeerSchedules @@ -253,6 +258,7 @@ prop_leashingAttackTimeLimited = , scEnableLoE = True , scEnableLoP = True , scEnableBlockFetchTimeouts = False + , scEnableCSJ = True } shrinkPeerSchedules diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs index 3ac5deb34b..21894f8811 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs @@ -20,10 +20,10 @@ import Ouroboros.Consensus.Block (Header, Point) import Ouroboros.Consensus.Config (TopLevelConfig (..)) import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) -import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (ChainDbView, - ChainSyncClientHandle, ChainSyncLoPBucketConfig, - ChainSyncStateView (..), Consensus, bracketChainSyncClient, - chainSyncClient) +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client + (CSJConfig (..), ChainDbView, ChainSyncClientHandle, + ChainSyncLoPBucketConfig, ChainSyncStateView (..), + Consensus, bracketChainSyncClient, chainSyncClient) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck import Ouroboros.Consensus.Util (ShowProxy) @@ -92,6 +92,7 @@ basicChainSyncClient , CSClient.idling = csvIdling csState , CSClient.loPBucket = csvLoPBucket csState , CSClient.setLatestSlot = csvSetLatestSlot csState + , CSClient.jumping = csvJumping csState } where dummyHeaderInFutureCheck :: @@ -118,6 +119,8 @@ runChainSyncClient :: -- ^ Timeouts for this client. ChainSyncLoPBucketConfig -> -- ^ Configuration for the LoP bucket. + CSJConfig -> + -- ^ Configuration for ChainSync Jumping StateViewTracers blk m -> -- ^ Tracers used to record information for the future 'StateView'. StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> @@ -133,6 +136,7 @@ runChainSyncClient peerId chainSyncTimeouts lopBucketConfig + csjConfig StateViewTracers {svtPeerSimulatorResultsTracer} varHandles channel = do @@ -143,6 +147,7 @@ runChainSyncClient peerId (maxBound :: NodeToNodeVersion) lopBucketConfig + csjConfig $ \csState -> do res <- try $ diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs index c84201b986..3488fb6c79 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs @@ -10,7 +10,7 @@ module Test.Consensus.PeerSimulator.Run ( , runPointSchedule ) where -import Control.Monad (foldM) +import Control.Monad (foldM, forM) import Control.Monad.Class.MonadTime (MonadTime) import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.Tracer (Tracer (..), nullTracer, traceWith) @@ -24,7 +24,8 @@ import Ouroboros.Consensus.Genesis.Governor (runGdd, updateLoEFragGenesis) import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) -import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (ChainDbView, +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client + (CSJConfig (..), CSJEnabledConfig (..), ChainDbView, ChainSyncClientHandle, ChainSyncLoPBucketConfig (..), ChainSyncLoPBucketEnabledConfig (..), ChainSyncState (..), viewChainSyncState) @@ -52,7 +53,7 @@ import Test.Consensus.PeerSimulator.StateDiagram (peerSimStateDiagramSTMTracerDebug) import Test.Consensus.PeerSimulator.StateView import Test.Consensus.PeerSimulator.Trace -import Test.Consensus.PointSchedule (BlockFetchTimeout, +import Test.Consensus.PointSchedule (BlockFetchTimeout, CSJParams (..), GenesisTest (..), GenesisTestFull, LoPBucketParams (..), NodeState, PeersSchedule, peersStatesRelative) import Test.Consensus.PointSchedule.Peers (Peer (..), PeerId, @@ -89,13 +90,17 @@ data SchedulerConfig = -- governor (GDD). , scEnableLoE :: Bool - -- | Whether to enable to LoP. The parameters of the LoP come from + -- | Whether to enable the LoP. The parameters of the LoP come from -- 'GenesisTest'. , scEnableLoP :: Bool -- | Enable node downtime if this is 'Just', using the value as minimum tick -- duration to trigger it. , scDowntime :: Maybe DiffTime + + -- | Whether to enable ChainSync Jumping. The parameters come from + -- 'GenesisTest'. + , scEnableCSJ :: Bool } -- | Default scheduler config @@ -109,7 +114,8 @@ defaultSchedulerConfig = scTraceState = False, scEnableLoE = False, scEnableLoP = False, - scDowntime = Nothing + scDowntime = Nothing, + scEnableCSJ = False } -- | Enable debug tracing during a scheduler test. @@ -135,6 +141,7 @@ startChainSyncConnectionThread :: ChainSyncResources m blk -> ChainSyncTimeout -> ChainSyncLoPBucketConfig -> + CSJConfig -> StateViewTracers blk m -> StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> m (Thread m (), Thread m ()) @@ -148,6 +155,7 @@ startChainSyncConnectionThread ChainSyncResources {csrServer} chainSyncTimeouts_ chainSyncLoPBucketConfig + csjConfig tracers varHandles = do @@ -155,7 +163,7 @@ startChainSyncConnectionThread clientThread <- forkLinkedThread registry ("ChainSyncClient" <> condense srPeerId) $ bracketSyncWithFetchClient fetchClientRegistry srPeerId $ - ChainSync.runChainSyncClient tracer cfg chainDbView srPeerId chainSyncTimeouts_ chainSyncLoPBucketConfig tracers varHandles clientChannel + ChainSync.runChainSyncClient tracer cfg chainDbView srPeerId chainSyncTimeouts_ chainSyncLoPBucketConfig csjConfig tracers varHandles clientChannel serverThread <- forkLinkedThread registry ("ChainSyncServer" <> condense srPeerId) $ ChainSync.runChainSyncServer tracer srPeerId tracers csrServer serverChannel @@ -234,10 +242,20 @@ dispatchTick tracer varHandles peers lifecycle node (number, (duration, Peer pid traceNewTick :: m () traceNewTick = do currentChain <- atomically $ ChainDB.getCurrentChain (lnChainDb node) - csState <- atomically $ do + (csState, jumpingStates) <- atomically $ do m <- readTVar varHandles - traverse (readTVar . CSClient.cschState) (m Map.!? pid) - traceWith tracer $ TraceNewTick number duration (Peer pid state) currentChain (CSClient.csCandidate <$> csState) + csState <- traverse (readTVar . CSClient.cschState) (m Map.!? pid) + jumpingStates <- forM (Map.toList m) $ \(peer, h) -> do + st <- readTVar (CSClient.cschJumping h) + pure (peer, st) + pure (csState, jumpingStates) + traceWith tracer $ TraceNewTick + number + duration + (Peer pid state) + currentChain + (CSClient.csCandidate <$> csState) + jumpingStates -- | Iterate over a 'PointSchedule', sending each tick to the associated peer in turn, -- giving each peer a chunk of computation time, sequentially, until it satisfies the @@ -325,6 +343,7 @@ startNode schedulerConfig genesisTest interval = do prChainSync chainSyncTimeouts_ chainSyncLoPBucketConfig + csjConfig lnStateViewTracers handles BlockFetch.startKeepAliveThread peerRegistry fetchClientRegistry pid @@ -369,6 +388,7 @@ startNode schedulerConfig genesisTest interval = do { gtChainSyncTimeouts , gtBlockFetchTimeouts , gtLoPBucketParams = LoPBucketParams { lbpCapacity, lbpRate } + , gtCSJParams = CSJParams { csjpJumpSize } } = genesisTest chainSyncTimeouts_ = @@ -381,6 +401,11 @@ startNode schedulerConfig genesisTest interval = do then ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig { csbcCapacity = lbpCapacity, csbcRate = lbpRate } else ChainSyncLoPBucketDisabled + csjConfig = + if scEnableCSJ schedulerConfig + then CSJEnabled CSJEnabledConfig { csjcJumpSize = csjpJumpSize } + else CSJDisabled + blockFetchTimeouts_ = if scEnableBlockFetchTimeouts schedulerConfig then gtBlockFetchTimeouts diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs index b3f42c5b3f..583f4541e5 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs @@ -1,8 +1,9 @@ -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE NamedFieldPuns #-} -{-# LANGUAGE NumericUnderscores #-} -{-# LANGUAGE TypeApplications #-} -{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE ExistentialQuantification #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeFamilies #-} -- | Helpers for tracing used by the peer simulator. module Test.Consensus.PeerSimulator.Trace ( @@ -28,6 +29,12 @@ import Ouroboros.Consensus.Genesis.Governor (DensityBounds (..), TraceGDDEvent (..)) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (TraceChainSyncClientEvent (..)) +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping + (Instruction (..), JumpInstruction (..), JumpResult (..)) +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State + (ChainSyncJumpingJumperState (..), + ChainSyncJumpingState (..), DynamoInitState (..), + JumpInfo (..)) import Ouroboros.Consensus.Storage.ChainDB.API (LoE (..)) import qualified Ouroboros.Consensus.Storage.ChainDB.Impl as ChainDB import Ouroboros.Consensus.Storage.ChainDB.Impl.Types @@ -36,14 +43,15 @@ import Ouroboros.Consensus.Util.Condense (condense) import Ouroboros.Consensus.Util.IOLike (IOLike, MonadMonotonicTime, Time (Time), atomically, getMonotonicTime, readTVarIO, uncheckedNewTVarM, writeTVar) -import Ouroboros.Network.AnchoredFragment (AnchoredFragment) +import Ouroboros.Network.AnchoredFragment (AnchoredFragment, + headPoint) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (SlotNo (SlotNo), Tip, castPoint) import Test.Consensus.PointSchedule (NodeState) import Test.Consensus.PointSchedule.Peers (Peer (Peer), PeerId) import Test.Util.TersePrinting (terseAnchor, terseBlock, terseFragment, terseHFragment, terseHeader, tersePoint, - terseRealPoint, terseTip) + terseRealPoint, terseTip, terseWithOrigin) import Test.Util.TestBlock (TestBlock) import Text.Printf (printf) @@ -56,9 +64,15 @@ data TraceSchedulerEvent blk | -- | Right after running the last tick of the schedule. TraceEndOfTime | -- | When beginning a new tick. Contains the tick number (counting from - -- @0@), the duration of the tick, the states, the current chain and the - -- candidate fragment. - TraceNewTick Int DiffTime (Peer (NodeState blk)) (AnchoredFragment (Header blk)) (Maybe (AnchoredFragment (Header blk))) + -- @0@), the duration of the tick, the states, the current chain, the + -- candidate fragment, and the jumping states. + forall m. TraceNewTick + Int + DiffTime + (Peer (NodeState blk)) + (AnchoredFragment (Header blk)) + (Maybe (AnchoredFragment (Header blk))) + [(PeerId, ChainSyncJumpingState m blk)] | TraceNodeShutdownStart (WithOrigin SlotNo) | TraceNodeShutdownComplete | TraceNodeStartupStart @@ -181,7 +195,7 @@ traceSchedulerEventTestBlockWith setTickTime tracer0 _tracer = \case [ "╶──────────────────────────────────────────────────────────────────────────────╴", "Finished running point schedule" ] - TraceNewTick number duration (Peer pid state) currentChain mCandidateFrag -> do + TraceNewTick number duration (Peer pid state) currentChain mCandidateFrag jumpingStates -> do time <- getMonotonicTime setTickTime time traceLinesWith tracer0 @@ -193,9 +207,9 @@ traceSchedulerEventTestBlockWith setTickTime tracer0 _tracer = \case " peer: " ++ condense pid, " state: " ++ condense state, " current chain: " ++ terseHFragment currentChain, - " candidate fragment: " ++ maybe "Nothing" terseHFragment mCandidateFrag + " candidate fragment: " ++ maybe "Nothing" terseHFragment mCandidateFrag, + " jumping states:\n" ++ traceJumpingStates jumpingStates ] - TraceNodeShutdownStart immTip -> traceWith tracer0 (" Initiating node shutdown with immutable tip at slot " ++ condense immTip) TraceNodeShutdownComplete -> @@ -205,6 +219,39 @@ traceSchedulerEventTestBlockWith setTickTime tracer0 _tracer = \case TraceNodeStartupComplete selection -> traceWith tracer0 (" Node startup complete with selection " ++ terseHFragment selection) + where + traceJumpingStates :: [(PeerId, ChainSyncJumpingState m TestBlock)] -> String + traceJumpingStates = unlines . map (\(pid, state) -> " " ++ condense pid ++ ": " ++ traceJumpingState state) + + traceJumpingState :: ChainSyncJumpingState m TestBlock -> String + traceJumpingState = \case + Dynamo initState lastJump -> + let showInitState = case initState of + DynamoStarting ji -> terseJumpInfo ji + DynamoStarted -> "DynamoStarted" + in unwords ["Dynamo", showInitState, terseWithOrigin show lastJump] + Objector initState goodJumpInfo badPoint -> unwords + [ "Objector" + , show initState + , terseJumpInfo goodJumpInfo + , tersePoint (castPoint badPoint) + ] + Disengaged initState -> "Disengaged " ++ show initState + Jumper _ st -> "Jumper _ " ++ traceJumperState st + + traceJumperState :: ChainSyncJumpingJumperState TestBlock -> String + traceJumperState = \case + Happy initState mGoodJumpInfo -> + "Happy " ++ show initState ++ " " ++ maybe "Nothing" terseJumpInfo mGoodJumpInfo + FoundIntersection initState goodJumpInfo point -> unwords + [ "(FoundIntersection" + , show initState + , terseJumpInfo goodJumpInfo + , tersePoint $ castPoint point, ")" + ] + LookingForIntersection goodJumpInfo badJumpInfo -> unwords + ["(LookingForIntersection", terseJumpInfo goodJumpInfo, terseJumpInfo badJumpInfo, ")"] + traceScheduledServerHandlerEventTestBlockWith :: Tracer m String -> String -> @@ -343,9 +390,33 @@ traceChainSyncClientEventTestBlockWith pid tracer = \case trace $ "Threw an exception: " ++ show exception TraceTermination result -> trace $ "Terminated with result: " ++ show result + TraceOfferJump point -> + trace $ "Offering jump to " ++ tersePoint point + TraceJumpResult (AcceptedJump (JumpTo ji)) -> + trace $ "Accepted jump to " ++ terseJumpInfo ji + TraceJumpResult (RejectedJump (JumpTo ji)) -> + trace $ "Rejected jump to " ++ terseJumpInfo ji + TraceJumpResult (AcceptedJump (JumpToGoodPoint ji)) -> + trace $ "Accepted jump to good point: " ++ terseJumpInfo ji + TraceJumpResult (RejectedJump (JumpToGoodPoint ji)) -> + trace $ "Rejected jump to good point: " ++ terseJumpInfo ji + TraceJumpingWaitingForNextInstruction -> + trace "Waiting for next instruction from the jumping governor" + TraceJumpingInstructionIs instr -> + trace $ "Received instruction: " ++ showInstr instr where trace = traceUnitWith tracer ("ChainSyncClient " ++ condense pid) + showInstr :: Instruction TestBlock -> String + showInstr = \case + JumpInstruction (JumpTo ji) -> "JumpTo " ++ terseJumpInfo ji + JumpInstruction (JumpToGoodPoint ji) -> "JumpToGoodPoint " ++ terseJumpInfo ji + RunNormally -> "RunNormally" + Restart -> "Restart" + +terseJumpInfo :: JumpInfo TestBlock -> String +terseJumpInfo ji = tersePoint (castPoint $ headPoint $ jTheirFragment ji) + traceChainSyncClientTerminationEventTestBlockWith :: PeerId -> Tracer m String -> @@ -385,7 +456,7 @@ terseGDDEvent = \case " Selection: " ++ terseHFragment curChain, " Candidates:" ] ++ - showPeers (either (const "G") terseHeader . AF.head <$> candidates) ++ + showPeers (tersePoint . castPoint . AF.headPoint <$> candidates) ++ [ " Candidate suffixes (bounds):" ] ++ diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs index 0d3aaa7848..76af1f7c33 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs @@ -22,6 +22,7 @@ -- who then activates the next tick's peer. module Test.Consensus.PointSchedule ( BlockFetchTimeout (..) + , CSJParams (..) , ForecastRange (..) , GenesisTest (..) , GenesisTestFull @@ -444,6 +445,11 @@ data LoPBucketParams = LoPBucketParams { lbpRate :: Rational } +data CSJParams = CSJParams { + csjpJumpSize :: SlotNo + } + deriving Show + -- | Similar to 'ChainSyncTimeout' for BlockFetch. Only the states in which the -- server has agency are specified. REVIEW: Should it be upstreamed to -- ouroboros-network-protocols? @@ -462,6 +468,7 @@ data GenesisTest blk schedule = GenesisTest gtChainSyncTimeouts :: ChainSyncTimeout, gtBlockFetchTimeouts :: BlockFetchTimeout, gtLoPBucketParams :: LoPBucketParams, + gtCSJParams :: CSJParams, gtSlotLength :: SlotLength, gtSchedule :: schedule } @@ -482,6 +489,7 @@ prettyGenesisTest prettySchedule genesisTest = , " gtForecastRange: " ++ show (unForecastRange gtForecastRange) , " gtDelay: " ++ show delta , " gtSlotLength: " ++ show gtSlotLength + , " gtCSJParams: " ++ show gtCSJParams , " gtChainSyncTimeouts: " , " canAwait = " ++ show canAwaitTimeout , " intersect = " ++ show intersectTimeout @@ -510,6 +518,7 @@ prettyGenesisTest prettySchedule genesisTest = , gtBlockFetchTimeouts = BlockFetchTimeout{busyTimeout, streamingTimeout} , gtLoPBucketParams = LoPBucketParams{lbpCapacity, lbpRate} , gtSlotLength + , gtCSJParams , gtSchedule } = genesisTest diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs index 78e3ceebef..74a97ceecb 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE FlexibleContexts #-} @@ -38,6 +39,7 @@ import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.String (IsString (fromString)) import GHC.Generics (Generic) +import NoThunks.Class (NoThunks) import Ouroboros.Consensus.Util.Condense (Condense (..), CondenseList (..), PaddingDirection (..), condenseListWithPadding) @@ -47,7 +49,7 @@ data PeerId = HonestPeer | PeerId String - deriving (Eq, Generic, Show, Ord) + deriving (Eq, Generic, Show, Ord, NoThunks) instance IsString PeerId where fromString "honest" = HonestPeer diff --git a/ouroboros-consensus/bench/ChainSync-client-bench/Main.hs b/ouroboros-consensus/bench/ChainSync-client-bench/Main.hs index 0d8d54e395..aad24b713b 100644 --- a/ouroboros-consensus/bench/ChainSync-client-bench/Main.hs +++ b/ouroboros-consensus/bench/ChainSync-client-bench/Main.hs @@ -138,6 +138,7 @@ oneBenchRun , CSClient.setLatestSlot = \_ -> pure () , CSClient.idling = CSClient.noIdling , CSClient.loPBucket = CSClient.noLoPBucket + , CSClient.jumping = CSClient.noJumping } server :: ChainSyncServer H (Point B) (Tip B) IO () diff --git a/ouroboros-consensus/ouroboros-consensus.cabal b/ouroboros-consensus/ouroboros-consensus.cabal index 7e55dcc5e0..93a5b01352 100644 --- a/ouroboros-consensus/ouroboros-consensus.cabal +++ b/ouroboros-consensus/ouroboros-consensus.cabal @@ -169,6 +169,7 @@ library Ouroboros.Consensus.MiniProtocol.BlockFetch.Server Ouroboros.Consensus.MiniProtocol.ChainSync.Client Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck + Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State Ouroboros.Consensus.MiniProtocol.ChainSync.Server Ouroboros.Consensus.MiniProtocol.LocalStateQuery.Server diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs index 786b1dfae0..f6ae1f1270 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs @@ -8,6 +8,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE Rank2Types #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -53,15 +54,19 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client ( , Consensus , Our (..) , Their (..) - -- * Trace events + -- * Genesis configuration + , CSJConfig (..) + , CSJEnabledConfig (..) , ChainSyncLoPBucketConfig (..) , ChainSyncLoPBucketEnabledConfig (..) + -- * Trace events , InvalidBlockReason , TraceChainSyncClientEvent (..) -- * State shared with other components , ChainSyncClientHandle (..) , ChainSyncState (..) , ChainSyncStateView (..) + , Jumping.noJumping , chainSyncStateFor , noIdling , noLoPBucket @@ -74,6 +79,7 @@ import Control.Tracer import Data.Kind (Type) import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map +import Data.Maybe (fromMaybe) import Data.Proxy import Data.Typeable import Data.Word (Word64) @@ -94,6 +100,7 @@ import Ouroboros.Consensus.Ledger.Basics (LedgerState) import Ouroboros.Consensus.Ledger.Extended import Ouroboros.Consensus.Ledger.SupportsProtocol import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck +import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping as Jumping import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State import Ouroboros.Consensus.Node.NetworkProtocolVersion import Ouroboros.Consensus.Protocol.Abstract @@ -105,7 +112,7 @@ import Ouroboros.Consensus.Util.AnchoredFragment (cross) import Ouroboros.Consensus.Util.Assert (assertWithMsg) import Ouroboros.Consensus.Util.EarlyExit (WithEarlyExit, exitEarly) import qualified Ouroboros.Consensus.Util.EarlyExit as EarlyExit -import Ouroboros.Consensus.Util.IOLike +import Ouroboros.Consensus.Util.IOLike hiding (handle) import qualified Ouroboros.Consensus.Util.LeakyBucket as LeakyBucket import Ouroboros.Consensus.Util.STM (Fingerprint, Watcher (..), WithFingerprint (..), withWatcher) @@ -163,6 +170,36 @@ data ChainSyncLoPBucketConfig -- | Enable the leaky bucket. ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig +-- | Configuration of ChainSync Jumping +data CSJConfig + = + -- Disable ChainSync Jumping. All clients will fully synchronize with + -- the chain of its peer. + CSJDisabled + | + -- | Enable ChainSync Jumping + CSJEnabled CSJEnabledConfig + +data CSJEnabledConfig = CSJEnabledConfig { + -- | The _ideal_ size for ChainSync jumps. Note that the algorithm + -- is best-effort: there might not be exactly `csjcJumpSize` slots between two + -- jumps, depending on the chain. + -- + -- There can be a few less slots between jumps if there is not a block exactly + -- at the boundary. Jumps are often made when a block is announced after the + -- jump boundary. + -- + -- There can be even less slots if a dynamo is elected and it requires an + -- initial jump regardless of how far we are from the next jump boundary. + -- + -- csjcJumpSize must be greater than 0 and smaller or equal to the genesis + -- window size. The larger the jump size, the less jumps are made and peers + -- are less involved in the syncing. A jump size as large as the genesis + -- window has a higher change that dishonest peers can delay syncing by a + -- small margin (around 2 minutes per dishonest peer with mainnet parameters). + csjcJumpSize :: SlotNo +} + defaultChainDbView :: (IOLike m, LedgerSupportsProtocol blk) => ChainDB m blk -> ChainDbView m blk @@ -264,6 +301,9 @@ data ChainSyncStateView m blk = ChainSyncStateView { -- | Control the 'LeakyBucket' for the LoP. , csvLoPBucket :: !(LoPBucket m) + + -- | Jumping-related API. + , csvJumping :: !(Jumping.Jumping m blk) } deriving stock (Generic) @@ -272,7 +312,8 @@ deriving anyclass instance ( HasHeader blk, NoThunks (Header blk) ) => NoThunks (ChainSyncStateView m blk) -bracketChainSyncClient :: + +bracketChainSyncClient :: forall m peer blk a. ( IOLike m , Ord peer , LedgerSupportsProtocol blk @@ -285,6 +326,7 @@ bracketChainSyncClient :: -> peer -> NodeToNodeVersion -> ChainSyncLoPBucketConfig + -> CSJConfig -> (ChainSyncStateView m blk -> m a) -> m a bracketChainSyncClient @@ -294,46 +336,74 @@ bracketChainSyncClient peer version csBucketConfig + csjConfig body - = - bracket acquireHandle releaseHandle - $ \varState -> + = mkChainSyncClientHandleState >>= \csHandleState -> + withCSJCallbacks csHandleState csjConfig $ \csjCallbacks -> withWatcher "ChainSync.Client.rejectInvalidBlocks" - (invalidBlockWatcher varState) + (invalidBlockWatcher csHandleState) $ LeakyBucket.execAgainstBucket lopBucketConfig $ \lopBucket -> body ChainSyncStateView { csvSetCandidate = - modifyTVar varState . \ c s -> s {csCandidate = c} + modifyTVar csHandleState . \ c s -> s {csCandidate = c} , csvSetLatestSlot = - modifyTVar varState . \ ls s -> s {csLatestSlot = Just $! ls} + modifyTVar csHandleState . \ ls s -> s {csLatestSlot = Just $! ls} , csvIdling = Idling { - idlingStart = atomically $ modifyTVar varState $ \ s -> s {csIdling = True} - , idlingStop = atomically $ modifyTVar varState $ \ s -> s {csIdling = False} + idlingStart = atomically $ modifyTVar csHandleState $ \ s -> s {csIdling = True} + , idlingStop = atomically $ modifyTVar csHandleState $ \ s -> s {csIdling = False} } , csvLoPBucket = LoPBucket { lbPause = LeakyBucket.setPaused lopBucket True , lbResume = LeakyBucket.setPaused lopBucket False , lbGrantToken = void $ LeakyBucket.fill lopBucket 1 } + , csvJumping = csjCallbacks } where - acquireHandle = do - cschState <- newTVarIO $ ChainSyncState { - csCandidate = AF.Empty AF.AnchorGenesis - , csLatestSlot = Nothing - , csIdling = False - } + mkChainSyncClientHandleState = + newTVarIO ChainSyncState { + csCandidate = AF.Empty AF.AnchorGenesis + , csLatestSlot = Nothing + , csIdling = False + } + + withCSJCallbacks :: + StrictTVar m (ChainSyncState blk) -> + CSJConfig -> + (Jumping.Jumping m blk -> m a) -> + m a + withCSJCallbacks cschState CSJDisabled f = do + tid <- myThreadId + cschJumpInfo <- newTVarIO Nothing + cschJumping <- newTVarIO (Disengaged DisengagedDone) + let handle = ChainSyncClientHandle { + cschGDDKill = throwTo tid DensityTooLow + , cschState + , cschJumping + , cschJumpInfo + } + insertHandle = atomically $ modifyTVar varHandles $ Map.insert peer handle + deleteHandle = atomically $ modifyTVar varHandles $ Map.delete peer + bracket_ insertHandle deleteHandle $ f Jumping.noJumping + + withCSJCallbacks csHandleState (CSJEnabled csjEnabledConfig) f = + bracket (acquireContext csHandleState csjEnabledConfig) releaseContext $ \peerContext -> + f $ Jumping.mkJumping peerContext + acquireContext cschState (CSJEnabledConfig jumpSize) = do tid <- myThreadId atomically $ do - modifyTVar varHandles $ Map.insert peer ChainSyncClientHandle { - cschGDDKill = throwTo tid DensityTooLow + cschJumpInfo <- newTVar Nothing + context <- Jumping.makeContext varHandles jumpSize + Jumping.registerClient context peer cschState $ \cschJumping -> ChainSyncClientHandle + { cschGDDKill = throwTo tid DensityTooLow , cschState + , cschJumping + , cschJumpInfo } - pure cschState - releaseHandle _ = atomically $ modifyTVar varHandles $ Map.delete peer + releaseContext = atomically . Jumping.unregisterClient invalidBlockWatcher varState = invalidBlockRejector @@ -341,7 +411,7 @@ bracketChainSyncClient -- | Wrapper around 'LeakyBucket.execAgainstBucket' that handles the -- disabled bucket by running the given action with dummy handlers. - lopBucketConfig :: MonadThrow m => LeakyBucket.Config m + lopBucketConfig :: LeakyBucket.Config m lopBucketConfig = case csBucketConfig of ChainSyncLoPBucketDisabled -> LeakyBucket.dummyConfig @@ -667,6 +737,7 @@ data DynamicEnv m blk = DynamicEnv { , setLatestSlot :: WithOrigin SlotNo -> STM m () , idling :: Idling m , loPBucket :: LoPBucket m + , jumping :: Jumping.Jumping m blk } -- | General values collectively needed by the top-level entry points @@ -760,8 +831,7 @@ chainSyncClient cfgEnv dynEnv = } = chainDbView DynamicEnv { - idling, - loPBucket + idling } = dynEnv mkIntEnv :: @@ -807,7 +877,7 @@ chainSyncClient cfgEnv dynEnv = recvMsgRollForward = \_hdr _tip -> go n' s , recvMsgRollBackward = \_pt _tip -> go n' s } - in Stateful $ \s -> do (idlingStop idling >> lbResume loPBucket); go n0 s + in Stateful $ \s -> idlingStop idling >> go n0 s terminate :: ChainSyncClientResult @@ -897,6 +967,7 @@ findIntersectionTop cfgEnv dynEnv intEnv = DynamicEnv { setCandidate + , jumping } = dynEnv InternalEnv { @@ -998,9 +1069,6 @@ findIntersectionTop cfgEnv dynEnv intEnv = disconnect $ InvalidIntersection intersection (ourTipFromChain ourFrag) theirTip - atomically $ do - setCandidate theirFrag - setLatestSlot dynEnv (AF.headSlot theirFrag) let kis = assertKnownIntersectionInvariants (configConsensus cfg) $ KnownIntersectionState { @@ -1010,6 +1078,10 @@ findIntersectionTop cfgEnv dynEnv intEnv = , theirHeaderStateHistory , kBestBlockNo = uBestBlockNo } + atomically $ do + updateJumpInfoSTM jumping kis + setCandidate theirFrag + setLatestSlot dynEnv (AF.headSlot theirFrag) continueWithState kis $ knownIntersectionStateTop cfgEnv dynEnv intEnv theirTip @@ -1047,6 +1119,7 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = , idling , loPBucket , setCandidate + , jumping } = dynEnv InternalEnv { @@ -1063,7 +1136,7 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = -- Request the next message (roll forward or backward). -- -- This is also the place where we checked whether we're asked to terminate - -- by the mux layer. + -- by the mux layer or to wait and perform a CSJ jump. nextStep :: MkPipelineDecision -> Nat n @@ -1071,18 +1144,128 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = -> Stateful m blk (KnownIntersectionState blk) (ClientPipelinedStIdle n) - nextStep mkPipelineDecision n theirTip = Stateful $ \kis -> do + nextStep mkPipelineDecision n theirTip = Stateful $ \kis -> atomically controlMessageSTM >>= \case -- We have been asked to terminate the client Terminate -> terminateAfterDrain n $ AskedToTerminate - _continue -> - return $ - requestNext - kis - mkPipelineDecision - n - theirTip - (AF.headBlockNo (theirFrag kis)) + _continue -> do + -- Wait until next jumping instruction, which can be either to + -- jump or to run normal ChainSync. + -- Pause LoP while waiting, we'll resume it if we get `RunNormally` + traceWith tracer TraceJumpingWaitingForNextInstruction + lbPause loPBucket + instruction <- Jumping.jgNextInstruction jumping + traceWith tracer $ TraceJumpingInstructionIs instruction + case instruction of + Jumping.JumpInstruction jumpInstruction -> + continueWithState kis + $ drainThePipe n + $ offerJump mkPipelineDecision jumpInstruction + Jumping.RunNormally -> do + lbResume loPBucket + continueWithState kis + $ nextStep' mkPipelineDecision n theirTip + Jumping.Restart -> do + lbResume loPBucket + continueWithState () + $ drainThePipe n + $ findIntersectionTop + cfgEnv + dynEnv + intEnv + (kBestBlockNo kis) + NoMoreIntersection + + nextStep' :: + MkPipelineDecision + -> Nat n + -> Their (Tip blk) + -> Stateful m blk + (KnownIntersectionState blk) + (ClientPipelinedStIdle n) + nextStep' mkPipelineDecision n theirTip = + Stateful $ \kis -> + return $ + requestNext + kis + mkPipelineDecision + n + theirTip + (AF.headBlockNo (theirFrag kis)) + + offerJump :: + MkPipelineDecision + -> Jumping.JumpInstruction blk + -> Stateful m blk + (KnownIntersectionState blk) + (ClientPipelinedStIdle Z) + offerJump mkPipelineDecision jump = Stateful $ \kis -> do + let jumpInfo = case jump of + Jumping.JumpTo ji -> ji + Jumping.JumpToGoodPoint ji -> ji + dynamoTipPt = castPoint $ AF.headPoint $ jTheirFragment jumpInfo + traceWith tracer $ TraceOfferJump dynamoTipPt + return $ + SendMsgFindIntersect [dynamoTipPt] $ + ClientPipelinedStIntersect { + recvMsgIntersectFound = \pt theirTip -> + if + | pt == dynamoTipPt -> do + Jumping.jgProcessJumpResult jumping $ Jumping.AcceptedJump jump + traceWith tracer $ TraceJumpResult $ Jumping.AcceptedJump jump + let kis' = case jump of + -- Since the updated kis is needed to validate headers, + -- we only update it if we are becoming a Dynamo or + -- an objector + Jumping.JumpToGoodPoint{} -> combineJumpInfo kis jumpInfo + _ -> kis + continueWithState kis' $ nextStep mkPipelineDecision Zero (Their theirTip) + | otherwise -> throwIO InvalidJumpResponse + , + recvMsgIntersectNotFound = \theirTip -> do + Jumping.jgProcessJumpResult jumping $ Jumping.RejectedJump jump + traceWith tracer $ TraceJumpResult $ Jumping.RejectedJump jump + continueWithState kis $ nextStep mkPipelineDecision Zero (Their theirTip) + } + where + combineJumpInfo :: + KnownIntersectionState blk + -> JumpInfo blk + -> KnownIntersectionState blk + combineJumpInfo kis ji = + let mRewoundHistory = + HeaderStateHistory.rewind + (AF.castPoint $ AF.headPoint $ jTheirFragment ji) + (jTheirHeaderStateHistory ji) + -- We assume the history is always possible to rewind. The case + -- where this wouldn't be true is if the original candidate + -- fragment provided by the dynamo contained headers that have + -- no corresponding header state. + rewoundHistory = + fromMaybe (error "offerJump: cannot rewind history") mRewoundHistory + -- If the tip of jTheirFragment does not match the tip of + -- jTheirHeaderStateHistory, then the history needs rewinding. + historyNeedsRewinding = + (/= AF.headPoint (jTheirFragment ji)) $ + castPoint $ + either headerStatePoint headerStatePoint $ + AF.head $ + HeaderStateHistory.unHeaderStateHistory $ + jTheirHeaderStateHistory ji + -- Recompute the intersection only if a suffix of the candidate + -- fragment was trimmed. + intersection + | historyNeedsRewinding = case AF.intersectionPoint (jOurFragment ji) (jTheirFragment ji) of + Just po -> castPoint po + Nothing -> error "offerJump: the jumpInfo should have a valid intersection" + | otherwise = jMostRecentIntersection ji + in KnownIntersectionState + { mostRecentIntersection = intersection + , ourFrag = jOurFragment ji + , theirFrag = jTheirFragment ji + , theirHeaderStateHistory = rewoundHistory + , kBestBlockNo = max (fromWithOrigin 0 $ AF.headBlockNo $ jTheirFragment ji) (kBestBlockNo kis) + } requestNext :: KnownIntersectionState blk @@ -1101,7 +1284,8 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = theirTipBlockNo onMsgAwaitReply = idlingStart idling >> - lbPause loPBucket + lbPause loPBucket >> + Jumping.jgOnAwaitReply jumping in case (n, decision) of (Zero, (Request, mkPipelineDecision')) -> @@ -1195,6 +1379,7 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = checkKnownInvalid cfgEnv dynEnv intEnv hdr + Jumping.jgOnRollForward jumping (blockPoint hdr) atomically (setLatestSlot dynEnv (NotOrigin slotNo)) checkTime cfgEnv dynEnv intEnv kis arrival slotNo >>= \case @@ -1214,6 +1399,7 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = kis''' <- checkLoP cfgEnv dynEnv hdr kis'' atomically $ do + updateJumpInfoSTM jumping kis''' setCandidate (theirFrag kis''') atomically $ traceWith headerMetricsTracer (slotNo, arrivalTime) @@ -1303,12 +1489,28 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = , kBestBlockNo } atomically $ do + updateJumpInfoSTM jumping kis' setCandidate theirFrag' setLatestSlot dynEnv (pointSlot rollBackPoint) + Jumping.jgOnRollBackward jumping (pointSlot rollBackPoint) + continueWithState kis' $ nextStep mkPipelineDecision n theirTip +-- | Let ChainSync jumping know about an update to the 'KnownIntersectionState'. +updateJumpInfoSTM :: + Jumping.Jumping m blk + -> KnownIntersectionState blk + -> STM m () +updateJumpInfoSTM jumping kis@KnownIntersectionState{ourFrag} = + Jumping.jgUpdateJumpInfo jumping JumpInfo + { jMostRecentIntersection = mostRecentIntersection kis + , jOurFragment = ourFrag + , jTheirFragment = theirFrag kis + , jTheirHeaderStateHistory = theirHeaderStateHistory kis + } + {------------------------------------------------------------------------------- Header checks -------------------------------------------------------------------------------} @@ -1894,6 +2096,9 @@ data ChainSyncClientException = | EmptyBucket -- ^ The peer lost its race against the bucket. + | + InvalidJumpResponse + -- ^ When the peer responded incorrectly to a jump request. | DensityTooLow -- ^ The peer has been deemed unworthy by the GDD @@ -1925,6 +2130,9 @@ instance Eq ChainSyncClientException where (==) EmptyBucket EmptyBucket = True + (==) + InvalidJumpResponse InvalidJumpResponse + = True (==) DensityTooLow DensityTooLow = True @@ -1934,6 +2142,7 @@ instance Eq ChainSyncClientException where InvalidBlock{} == _ = False InFutureHeaderExceedsClockSkew{} == _ = False EmptyBucket == _ = False + InvalidJumpResponse == _ = False DensityTooLow == _ = False instance Exception ChainSyncClientException @@ -1976,6 +2185,19 @@ data TraceChainSyncClientEvent blk = -- ^ Whether we added a token to the LoP bucket of the peer. Also carries -- the considered header and the best block number known prior to this -- header. + | + TraceOfferJump (Point blk) + -- ^ ChainSync Jumping offering a point to jump to. + | + TraceJumpResult (Jumping.JumpResult blk) + -- ^ ChainSync Jumping -- reply. + | + TraceJumpingWaitingForNextInstruction + -- ^ ChainSync Jumping -- the ChainSync client is requesting the next + -- instruction. + | + TraceJumpingInstructionIs (Jumping.Instruction blk) + -- ^ ChainSync Jumping -- the ChainSync client got its next instruction. deriving instance ( BlockSupportsProtocol blk diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs new file mode 100644 index 0000000000..0bae5773f5 --- /dev/null +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -0,0 +1,823 @@ +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UndecidableInstances #-} + +-- | ChainSync jumping (CSJ) is an optimization for the ChainSync protocol that +-- allows nodes to sync without downloading headers from all of the honest +-- peers. This load is undesirable as it slows down all the peers involved. +-- +-- The idea is to download the headers of a chain from a single peer (the +-- dynamo) and then ask periodically to the other peers (the jumpers) whether +-- they agree with the dynamo's chain. +-- +-- When the jumpers disagree with the dynamo, the jumper with the oldest +-- intersection is asked to compete with the dynamo in the GDD logic. If the +-- dynamo is disconnected, a new dynamo is elected and the objector is demoted +-- to a jumper. +-- +-- If the objector is disconnected, the syncing process continues with the +-- dynamo and the remaining jumpers. +-- +-- The main property of the algorithm is that it never +-- downloads headers from more than two plausibly honest peers at a time (a +-- dynamo and an objector). All other peers are either waiting their turn to +-- compete with the dynamo, or are in agreement with it, or are disengaged +-- (see next section). +-- +-- The algorithm might still download headers redundantly from peers that do +-- historical rollbacks. These rollbacks, however, constitute dishonest +-- behavior, and CSJ does not concern itself with avoiding load to dishonest +-- peers. Avoiding the load induced by dishonest peers on the syncing node would +-- require additionally to disconnect peers that do historical rollbacks. This +-- is not done by CSJ. +-- +-- Interactions with the Genesis Density Disconnection logic +-- --------------------------------------------------------- +-- +-- It is possible that neither the dynamo nor the objector are disconnected. +-- This could happen if: +-- 1. They both serve the same chain, or +-- 2. They both claim to have no more headers. +-- +-- To avoid (1) CSJ checks that the objector disagrees with the dynamo at the +-- point it claimed to disagree as a jumper. If the objector agrees with the +-- dynamo, it is disengaged. A disengaged peer is not asked to jump or act as +-- dynamo or objector. Instead, it continues to offer headers for the rest of +-- the syncing. When the objector is disengaged, a new objector is elected +-- among the dissenting jumpers. If there are no dissenting jumpers left, the +-- syncing continues with the dynamo and the remaining jumpers. +-- +-- To prevent the dynamo from agreeing with the objector instead, the dynamo is +-- not allowed to rollback before the last jump it requested. If the dynamo +-- tries to rollback before the last jump, it is disengaged and a new dynamo is +-- elected. +-- +-- To avoid (2) CSJ disengages a peer as soon as it claims to have no more +-- headers. Syncing continues with a new elected dynamo or objector depending on +-- the disengaged peer's role. +-- +-- CSJ finishes and is turned off when all peers have been disengaged. +-- +-- Interactions with the ChainSync client +-- -------------------------------------- +-- +-- The ChainSync client interacts with CSJ through some callbacks that determine +-- when the client should pause, download headers, or ask about agreement with +-- a given point (jumping). See the 'Jumping' type for more details. +-- +-- Interactions with the Limit on Patience +-- --------------------------------------- +-- +-- Jumpers don't leak the Limit on Patience (LoP) bucket until they are promoted +-- to dynamos or objectors. And the leaking is stopped as soon as they are +-- disengaged or demoted. +-- +-- If a jumper refrains from answering to jumps, they will be disconnected with +-- the 'intersectTimeout' (in 'ChainSyncTimeout'). +-- +-- A jumper answering just before the timeout will not delay the syncing +-- process by a large amount. If they agree with the dynamo, the dynamo will be +-- busy downloading headers and validating blocks while the jumper answers. If +-- the jumper disagrees with the dynamo, CSJ will look for the precise +-- intersection with the dynamo's chain. This could take a few minutes, but it +-- is a path that will end up in one of the dynamo and the jumper being +-- disconnected or disengaged. +-- +-- +-- Overview of the state transitions +-- --------------------------------- +-- +-- See 'ChainSyncJumpingState' for the implementation of the states. +-- +-- > j ╔════════╗ +-- > ╭────────── ║ Dynamo ║ ◀─────────╮ +-- > │ ╚════════╝ │f +-- > ▼ ▲ │ +-- > ┌────────────┐ │ k ┌──────────┐ +-- > │ Disengaged │ ◀───────────│────────── │ Objector │ +-- > └────────────┘ ╭─────│────────── └──────────┘ +-- > │ │ ▲ ▲ │ +-- > g│ │e b │ │ │ +-- > │ │ ╭─────╯ i│ │c +-- > ╭╌╌╌╌╌╌╌▼╌╌╌╌╌╌╌╌╌╌╌╌╌│╌╌╌╌╌╌╌╌╌╌│╌▼╌╌╌╮ +-- > ┆ ╔═══════╗ a ┌──────┐ d ┌─────┐ | +-- > ┆ ║ Happy ║ ───▶ │ LFI* │ ───▶ │ FI* │ | +-- > ┆ ╚═══════╝ ◀─╮ └──────┘ └─────┘ | +-- > ┆ Jumper ╰─────┴────────────╯h | +-- > ╰╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╯ +-- +-- *: LookingForIntersection and FoundIntersection, abbreviated for this +-- drawing only; this abbreviation will not be used elsewhere. +-- +-- A new peer starts as the dynamo if there is no other peer or as a Happy +-- jumper otherwise. The dynamo periodically requests jumps from happy +-- jumpers who, in the ideal case, accept them. +-- +-- In the event that a jumper rejects a jump, it goes from Happy to LFI* (a). +-- From there starts a back-and-forth of intersection search messages until +-- the exact point of disagreement with the dynamo is found. +-- +-- Once the exact point of disagreement is found, and if there is no objector +-- yet, the jumper becomes the objector (b). If there is an objector, then we +-- compare the intersections of the objector and the jumper. If the jumper's +-- intersection is strictly older, then the jumper replaces the objector (b+c). +-- Otherwise, the jumper is marked as FI* (d). +-- +-- If the dynamo disconnects or is disengaged, one peer is elected as the new +-- dynamo (e|f) and all other peers revert to being happy jumpers (g+h). +-- +-- If the objector disconnects or is disengaged, and there are FI* jumpers, then +-- the one with the oldest intersection with the dynamo gets elected (i). +-- +-- If the dynamo rolls back to a point older than the last jump it requested, it +-- is disengaged (j) and a new dynamo is elected (e|f). +-- +-- If the objector agrees with the dynamo, it is disengaged (k). If there are +-- FI* jumpers, then one of them gets elected as the new objector (i). +-- +-- If dynamo or objector claim to have no more headers, they are disengaged +-- (j|k). +-- +module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping ( + Context + , ContextWith (..) + , Instruction (..) + , JumpInstruction (..) + , JumpResult (..) + , Jumping (..) + , makeContext + , mkJumping + , noJumping + , registerClient + , unregisterClient + ) where + +import Cardano.Slotting.Slot (SlotNo (..), WithOrigin (..)) +import Control.Monad (forM, forM_, when) +import Data.List (sortOn) +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import Data.Maybe (catMaybes, fromMaybe) +import GHC.Generics (Generic) +import Ouroboros.Consensus.Block (HasHeader (getHeaderFields), Header, + Point (..), castPoint, pointSlot, succWithOrigin) +import Ouroboros.Consensus.Ledger.SupportsProtocol + (LedgerSupportsProtocol) +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State + (ChainSyncClientHandle (..), + ChainSyncJumpingJumperState (..), + ChainSyncJumpingState (..), ChainSyncState (..), + DisengagedInitState (..), DynamoInitState (..), + JumpInfo (..), JumperInitState (..), + ObjectorInitState (..)) +import Ouroboros.Consensus.Util.IOLike hiding (handle) +import qualified Ouroboros.Network.AnchoredFragment as AF + +-- | Hooks for ChainSync jumping. +data Jumping m blk = Jumping + { -- | Get the next instruction to execute, which can be either to run normal + -- ChainSync or to jump to a given point. When the peer is a jumper and + -- there is no jump request, 'jgNextInstruction' blocks until a jump request + -- is made. + jgNextInstruction :: !(m (Instruction blk)), + + -- | To be called whenever the peer claims to have no more headers. + jgOnAwaitReply :: !(m ()), + + -- | To be called whenever a header is received from the peer + -- before it is validated. + jgOnRollForward :: !(Point (Header blk) -> m ()), + + -- | To be called whenever a peer rolls back. + jgOnRollBackward :: !(WithOrigin SlotNo -> m ()), + + -- | Process the result of a jump, either accepted or rejected. + -- + -- The jump result is used to decide on the next jumps or whether to elect + -- an objector. + jgProcessJumpResult :: !(JumpResult blk -> m ()), + + -- | To be called to update the last known jump possible to the tip of + -- the peers candidate fragment. The ChainSync clients for all peers should + -- call this function in case they are or they become dynamos. + -- + -- JumpInfo is meant to be a snapshot of the @KnownIntersectionState@ of + -- the ChainSync client. See 'JumpInfo' for more details. + jgUpdateJumpInfo :: !(JumpInfo blk -> STM m ()) + } + deriving stock (Generic) + +deriving anyclass instance + ( IOLike m, + HasHeader blk, + NoThunks (Header blk) + ) => + NoThunks (Jumping m blk) + +-- | No-op implementation of CSJ +noJumping :: (MonadSTM m) => Jumping m blk +noJumping = + Jumping + { jgNextInstruction = pure RunNormally + , jgOnAwaitReply = pure () + , jgOnRollForward = const $ pure () + , jgOnRollBackward = const $ pure () + , jgProcessJumpResult = const $ pure () + , jgUpdateJumpInfo = const $ pure () + } + +-- | Create the callbacks for a given peer. +mkJumping :: + ( MonadSTM m, + Eq peer, + LedgerSupportsProtocol blk + ) => + PeerContext m peer blk -> + Jumping m blk +mkJumping peerContext = Jumping + { jgNextInstruction = atomically $ nextInstruction peerContext + , jgOnAwaitReply = atomically $ onAwaitReply peerContext + , jgOnRollForward = atomically . onRollForward peerContext + , jgOnRollBackward = atomically . onRollBackward peerContext + , jgProcessJumpResult = atomically . processJumpResult peerContext + , jgUpdateJumpInfo = updateJumpInfo peerContext + } + +-- | A context for ChainSync jumping +-- +-- Invariants: +-- +-- - If 'handlesVar' is not empty, then there is exactly one dynamo in it. +-- - There is at most one objector in 'handlesVar'. +-- - If there exist 'FoundIntersection' jumpers in 'handlesVar', then there +-- is an objector and the intersection of the objector with the dynamo is +-- at least as old as the oldest intersection of the `FoundIntersection` jumpers +-- with the dynamo. +data ContextWith peerField handleField m peer blk = Context + { peer :: !peerField, + handle :: !handleField, + handlesVar :: !(StrictTVar m (Map peer (ChainSyncClientHandle m blk))), + jumpSize :: !SlotNo + } + +-- | A non-specific, generic context for ChainSync jumping. +type Context = ContextWith () () + +-- | A peer-specific context for ChainSync jumping. This is a 'ContextWith' +-- pointing on the handler of the peer in question. +-- +-- Invariant: The binding from 'peer' to 'handle' is present in 'handlesVar'. +type PeerContext m peer blk = ContextWith peer (ChainSyncClientHandle m blk) m peer blk + +makeContext :: + MonadSTM m => + StrictTVar m (Map peer (ChainSyncClientHandle m blk)) -> + SlotNo -> + -- ^ The size of jumps, in number of slots. + STM m (Context m peer blk) +makeContext h jumpSize = do + pure $ Context () () h jumpSize + +-- | Get a generic context from a peer context by stripping away the +-- peer-specific fields. +stripContext :: PeerContext m peer blk -> Context m peer blk +stripContext context = context {peer = (), handle = ()} + +-- | Instruction from the jumping governor, either to run normal ChainSync, or +-- to jump to follow a dynamo with the given fragment, or to restart ChainSync. +data Instruction blk + = RunNormally + | Restart + | -- | Jump to the tip of the given fragment. + JumpInstruction !(JumpInstruction blk) + deriving (Generic) + +deriving instance (HasHeader (Header blk), Eq (Header blk)) => Eq (Instruction blk) +deriving instance (HasHeader (Header blk), Show (Header blk)) => Show (Instruction blk) +deriving anyclass instance + ( HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (Header blk) + ) => NoThunks (Instruction blk) + +data JumpInstruction blk + = JumpTo !(JumpInfo blk) + | -- | Used to set the intersection of the servers of starting objectors. + -- Otherwise, the ChainSync server wouldn't know which headers to start + -- serving. + JumpToGoodPoint !(JumpInfo blk) + deriving (Generic) + +deriving instance (HasHeader (Header blk), Eq (Header blk)) => Eq (JumpInstruction blk) +instance (HasHeader (Header blk), Show (Header blk)) => Show (JumpInstruction blk) where + showsPrec p = \case + JumpTo jumpInfo -> + showParen (p > 10) $ showString "JumpTo " . shows (AF.headPoint $ jTheirFragment jumpInfo) + JumpToGoodPoint jumpInfo -> + showParen (p > 10) $ showString "JumpToGoodPoint " . shows (AF.headPoint $ jTheirFragment jumpInfo) + +deriving anyclass instance + ( HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (Header blk) + ) => NoThunks (JumpInstruction blk) + +-- | The result of a jump request, either accepted or rejected. +data JumpResult blk + = AcceptedJump !(JumpInstruction blk) + | RejectedJump !(JumpInstruction blk) + deriving (Generic) + +deriving instance (HasHeader (Header blk), Eq (Header blk)) => Eq (JumpResult blk) +deriving instance (HasHeader (Header blk), Show (Header blk)) => Show (JumpResult blk) + +deriving anyclass instance + ( HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (Header blk) + ) => NoThunks (JumpResult blk) + +-- | Compute the next instruction for the given peer. In the majority of cases, +-- this consists in reading the peer's handle, having the dynamo and objector +-- run normally and the jumpers wait for the next jump. As such, this function +-- mostly only reads from and writes to the handle of the peer. For the dynamo, every once in a +-- while, we need to indicate to the jumpers that they need to jump, and this +-- requires writing to a TVar for every jumper. +nextInstruction :: + ( MonadSTM m ) => + PeerContext m peer blk -> + STM m (Instruction blk) +nextInstruction context = + readTVar (cschJumping (handle context)) >>= \case + Disengaged DisengagedDone -> pure RunNormally + Disengaged Disengaging -> do + writeTVar (cschJumping (handle context)) (Disengaged DisengagedDone) + pure Restart + Dynamo (DynamoStarting goodJumpInfo) lastJumpSlot -> do + writeTVar (cschJumping (handle context)) $ + Dynamo DynamoStarted lastJumpSlot + pure $ JumpInstruction $ JumpToGoodPoint goodJumpInfo + Dynamo DynamoStarted _ -> + pure RunNormally + Objector Starting goodJump badPoint -> do + writeTVar (cschJumping (handle context)) $ + Objector Started goodJump badPoint + pure $ JumpInstruction $ JumpToGoodPoint goodJump + Objector Started _ _ -> pure RunNormally + Jumper nextJumpVar jumperState -> do + readTVar nextJumpVar >>= \case + Nothing -> retry + Just jumpInfo -> do + writeTVar nextJumpVar Nothing + case jumperState of + Happy FreshJumper mGoodJumpInfo -> + writeTVar (cschJumping (handle context)) $ + Jumper nextJumpVar $ Happy StartedJumper mGoodJumpInfo + _ -> pure () + pure $ JumpInstruction $ JumpTo jumpInfo + +-- | This function is called when we receive a 'MsgRollForward' message before +-- validating it. +-- +-- We request jumpers to jump here if the next header received by the dynamo is +-- at least jump size slots after the last jump. +-- +-- We also check that the Objector disagrees with the header sent at its +-- rejected jump. If it agrees to it, we disengage it. +-- +onRollForward :: forall m peer blk. + ( MonadSTM m, + LedgerSupportsProtocol blk + ) => + PeerContext m peer blk -> + Point (Header blk) -> + STM m () +onRollForward context point = + readTVar (cschJumping (handle context)) >>= \case + Objector _ _ badPoint + | badPoint == castPoint point -> do + disengage (handle context) + electNewObjector (stripContext context) + | otherwise -> pure () + Disengaged{} -> pure () + Jumper{} -> pure () + Dynamo _ lastJumpSlot + | let jumpBoundaryPlus1 = jumpSize context + succWithOrigin lastJumpSlot + , succWithOrigin (pointSlot point) > jumpBoundaryPlus1 -> do + mJumpInfo <- readTVar (cschJumpInfo (handle context)) + setJumps mJumpInfo + | otherwise -> pure () + where + setJumps Nothing = error "onRollForward: Dynamo without jump info" + setJumps (Just jumpInfo) = do + writeTVar (cschJumping (handle context)) $ + Dynamo DynamoStarted $ pointSlot $ AF.headPoint $ jTheirFragment jumpInfo + handles <- readTVar (handlesVar context) + forM_ (Map.elems handles) $ \h -> + readTVar (cschJumping h) >>= \case + Jumper nextJumpVar Happy{} -> writeTVar nextJumpVar (Just jumpInfo) + _ -> pure () + +-- | This function is called when we receive a 'MsgRollBackward' message. +-- +-- Here we check if the peer is trying to roll back to a point before the last +-- jump. If so, we disengage the peer. This prevents adversaries from sending +-- as objectors the same chain as the dynamo. +-- +onRollBackward :: forall m peer blk. + ( MonadSTM m, + Eq peer, + LedgerSupportsProtocol blk + ) => + PeerContext m peer blk -> + WithOrigin SlotNo -> + STM m () +onRollBackward context slot = + readTVar (cschJumping (handle context)) >>= \case + Objector _ _ badPoint + | slot < pointSlot badPoint -> do + disengage (handle context) + electNewObjector (stripContext context) + | otherwise -> pure () + Disengaged{} -> pure () + Jumper{} -> pure () + Dynamo _ lastJumpSlot + | slot < lastJumpSlot -> do + disengage (handle context) + electNewDynamo (stripContext context) + | otherwise -> pure () + +-- | This function is called when we receive a 'MsgAwaitReply' message. +-- +-- If this is the dynamo, we need to elect a new dynamo as no more headers +-- are available. +onAwaitReply :: + ( MonadSTM m, + Eq peer, + LedgerSupportsProtocol blk + ) => + PeerContext m peer blk -> + STM m () +onAwaitReply context = + readTVar (cschJumping (handle context)) >>= \case + Dynamo{} -> do + disengage (handle context) + electNewDynamo (stripContext context) + Objector{} -> do + disengage (handle context) + electNewObjector (stripContext context) + Jumper{} -> + -- A jumper might be receiving a 'MsgAwaitReply' message if it was + -- previously an objector and a new dynamo was elected. + disengage (handle context) + Disengaged{} -> + pure () + +-- | Process the result of a jump. In the happy case, this only consists in +-- updating the peer's handle to take the new candidate fragment and the new +-- last jump point into account. When disagreeing with the dynamo, though, we +-- enter a phase of several jumps to pinpoint exactly where the disagreement +-- occurs. Once this phase is finished, we trigger the election of a new +-- objector, which might update many TVars. +processJumpResult :: forall m peer blk. + ( MonadSTM m, + Eq peer, + LedgerSupportsProtocol blk + ) => + PeerContext m peer blk -> + JumpResult blk -> + STM m () +processJumpResult context jumpResult = + readTVar (cschJumping (handle context)) >>= \case + Dynamo{} -> + case jumpResult of + AcceptedJump (JumpToGoodPoint jumpInfo) -> + updateChainSyncState (handle context) jumpInfo + RejectedJump JumpToGoodPoint{} -> do + startDisengaging (handle context) + electNewDynamo (stripContext context) + + -- Not interesting in the dynamo state + AcceptedJump JumpTo{} -> pure () + RejectedJump JumpTo{} -> pure () + + Disengaged{} -> pure () + Objector{} -> + case jumpResult of + AcceptedJump (JumpToGoodPoint jumpInfo) -> + updateChainSyncState (handle context) jumpInfo + RejectedJump JumpToGoodPoint{} -> do + -- If the objector rejects a good point, it is a sign of a rollback + -- to earlier than the last jump. + startDisengaging (handle context) + electNewObjector (stripContext context) + + -- Not interesting in the objector state + AcceptedJump JumpTo{} -> pure () + RejectedJump JumpTo{} -> pure () + + Jumper nextJumpVar jumperState -> + case jumpResult of + AcceptedJump (JumpTo goodJumpInfo) -> do + -- The jump was accepted; we set the jumper's candidate fragment to + -- the dynamo's candidate fragment up to the accepted point. + -- + -- The candidate fragments of jumpers don't grow otherwise, as only the + -- objector and the dynamo request further headers. + updateChainSyncState (handle context) goodJumpInfo + writeTVar (cschJumpInfo (handle context)) $ Just goodJumpInfo + case jumperState of + LookingForIntersection _goodJumpInfo badJumpInfo -> + -- @AF.headPoint fragment@ is in @badFragment@, as the jumper + -- looking for an intersection is the only client asking for its + -- jumps. + lookForIntersection nextJumpVar goodJumpInfo badJumpInfo + Happy StartedJumper _mGoodJumpInfo -> + writeTVar (cschJumping (handle context)) $ + Jumper nextJumpVar $ Happy StartedJumper $ Just goodJumpInfo + Happy FreshJumper _mGoodJumpInfo -> + pure () + FoundIntersection{} -> + -- Only happy jumpers are asked to jump by the dynamo, and only + -- jumpers looking for an intersection are asked to jump by + -- themselves. + error "processJumpResult: Jumpers in state FoundIntersection shouldn't be further jumping." + + RejectedJump (JumpTo badJumpInfo) -> + -- The tip of @goodFragment@ is in @jTheirFragment jumpInfo@ or is + -- an ancestor of it. If the jump was requested by the dynamo, this + -- holds because the dynamo is not allowed to rollback before the + -- jumps that it requests. + -- + -- If the jump was requested by the jumper, this holds because the + -- jumper is looking for an intersection, and such jumper only asks + -- for jumps that meet this condition. + case jumperState of + LookingForIntersection goodJumpInfo _ -> + lookForIntersection nextJumpVar goodJumpInfo badJumpInfo + Happy StartedJumper mGoodJumpInfo -> + lookForIntersection nextJumpVar (mkGoodJumpInfo mGoodJumpInfo badJumpInfo) badJumpInfo + Happy FreshJumper _ -> + pure () + FoundIntersection{} -> + error "processJumpResult (rejected): Jumpers in state FoundIntersection shouldn't be further jumping." + + -- These aren't interesting in the case of jumpers. + AcceptedJump JumpToGoodPoint{} -> pure () + RejectedJump JumpToGoodPoint{} -> pure () + where + -- Avoid redundant constraint "HasHeader blk" reported by some ghc's + _ = getHeaderFields @blk + + updateChainSyncState :: ChainSyncClientHandle m blk -> JumpInfo blk -> STM m () + updateChainSyncState handle jump = do + let fragment = jTheirFragment jump + modifyTVar (cschState handle) $ \csState -> + csState {csCandidate = fragment, csLatestSlot = Just (AF.headSlot fragment) } + writeTVar (cschJumpInfo handle) $ Just jump + + mkGoodJumpInfo :: Maybe (JumpInfo blk) -> JumpInfo blk -> JumpInfo blk + mkGoodJumpInfo mGoodJumpInfo badJumpInfo = do + let badFragment = jTheirFragment badJumpInfo + -- use the jump info of the rejected jump if the good jump info is + -- not available (i.e. there were no accepted jumps) + badFragmentStart = AF.takeOldest 0 badFragment + in fromMaybe (badJumpInfo {jTheirFragment = badFragmentStart}) mGoodJumpInfo + + -- | Given a good point (where we know we agree with the dynamo) and a bad + -- fragment (where we know the tip disagrees with the dynamo), either decide + -- that we know the intersection for sure (if the bad point is the successor + -- of the good point) or program a jump somewhere in the middle to refine + -- those points. + -- + -- PRECONDITION: The good point is in the candidate fragment of + -- @badJumpInfo@ or it is an ancestor of it. + lookForIntersection nextJumpVar goodJumpInfo badJumpInfo = do + let badFragment = jTheirFragment badJumpInfo + -- If the good point is not in the bad fragment, the anchor of the bad + -- fragment should be a good point too. + searchFragment = + maybe badFragment snd $ + AF.splitAfterPoint badFragment (AF.headPoint $ jTheirFragment goodJumpInfo) + let len = AF.length searchFragment + if len <= 1 then do + -- If the fragment only contains the bad tip, we know the + -- intersection is the good point. + -- Clear any subsequent jumps requested by the dynamo. + writeTVar nextJumpVar Nothing + maybeElectNewObjector nextJumpVar goodJumpInfo (AF.headPoint badFragment) + else do + let middlePoint = len `div` 2 + theirFragment = AF.dropNewest middlePoint badFragment + writeTVar nextJumpVar $ Just + badJumpInfo { jTheirFragment = theirFragment } + writeTVar (cschJumping (handle context)) $ + Jumper nextJumpVar (LookingForIntersection goodJumpInfo badJumpInfo) + + maybeElectNewObjector nextJumpVar goodJumpInfo badPoint = do + findObjector (stripContext context) >>= \case + Nothing -> + -- There is no objector yet. Promote the jumper to objector. + writeTVar (cschJumping (handle context)) (Objector Starting goodJumpInfo badPoint) + Just (oInitState, oGoodJump, oPoint, oHandle) + | pointSlot oPoint <= pointSlot badPoint -> + -- The objector's intersection is still old enough. Keep it. + writeTVar (cschJumping (handle context)) $ + Jumper nextJumpVar (FoundIntersection Starting goodJumpInfo badPoint) + | otherwise -> do + -- Found an earlier intersection. Demote the old objector and + -- promote the jumper to objector. + newJumper Nothing (FoundIntersection oInitState oGoodJump oPoint) >>= + writeTVar (cschJumping oHandle) + writeTVar (cschJumping (handle context)) (Objector Starting goodJumpInfo badPoint) + +updateJumpInfo :: + (MonadSTM m) => + PeerContext m peer blk -> + JumpInfo blk -> + STM m () +updateJumpInfo context jumpInfo = + readTVar (cschJumping (handle context)) >>= \case + Disengaged{} -> pure () + _ -> writeTVar (cschJumpInfo (handle context)) $ Just jumpInfo + +-- | Find the dynamo in a TVar containing a map of handles. Returns then handle +-- of the dynamo, or 'Nothing' if there is none. +getDynamo :: + (MonadSTM m) => + StrictTVar m (Map peer (ChainSyncClientHandle m blk)) -> + STM m (Maybe (ChainSyncClientHandle m blk)) +getDynamo handlesVar = do + handles <- Map.elems <$> readTVar handlesVar + findM (\handle -> isDynamo <$> readTVar (cschJumping handle)) handles + where + isDynamo Dynamo{} = True + isDynamo _ = False + +-- | Disengage a peer, meaning that it will no longer be asked to jump or +-- act as dynamo or objector. +disengage :: MonadSTM m => ChainSyncClientHandle m blk -> STM m () +disengage = disengageWith DisengagedDone + +-- | Like 'disengage', but additionally restart ChainSync +startDisengaging :: MonadSTM m => ChainSyncClientHandle m blk -> STM m () +startDisengaging = disengageWith Disengaging + +disengageWith :: + MonadSTM m => + DisengagedInitState -> + ChainSyncClientHandle m blk -> + STM m () +disengageWith initState handle = do + writeTVar (cschJumping handle) (Disengaged initState) + writeTVar (cschJumpInfo handle) Nothing + + +-- | Convenience function that, given an intersection point and a jumper state, +-- make a fresh 'Jumper' constructor. +newJumper :: + ( MonadSTM m, + LedgerSupportsProtocol blk + ) => + Maybe (JumpInfo blk) -> + ChainSyncJumpingJumperState blk -> + STM m (ChainSyncJumpingState m blk) +newJumper jumpInfo jumperState = do + nextJumpVar <- newTVar jumpInfo + pure $ Jumper nextJumpVar jumperState + +-- | Register a new ChainSync client to a context, returning a 'PeerContext' for +-- that peer. If there is no dynamo, the peer starts as dynamo; otherwise, it +-- starts as a jumper. +registerClient :: + ( Ord peer, + LedgerSupportsProtocol blk, + IOLike m + ) => + Context m peer blk -> + peer -> + StrictTVar m (ChainSyncState blk) -> + -- | A function to make a client handle from a jumping state. + (StrictTVar m (ChainSyncJumpingState m blk) -> ChainSyncClientHandle m blk) -> + STM m (PeerContext m peer blk) +registerClient context peer csState mkHandle = do + csjState <- getDynamo (handlesVar context) >>= \case + Nothing -> do + fragment <- csCandidate <$> readTVar csState + pure $ Dynamo DynamoStarted $ pointSlot $ AF.anchorPoint fragment + Just handle -> do + mJustInfo <- readTVar (cschJumpInfo handle) + newJumper mJustInfo (Happy FreshJumper Nothing) + cschJumping <- newTVar csjState + let handle = mkHandle cschJumping + modifyTVar (handlesVar context) $ Map.insert peer handle + pure $ context {peer, handle} + +-- | Unregister a client from a 'PeerContext'; this might trigger the election +-- of a new dynamo or objector if the peer was one of these two. +unregisterClient :: + ( MonadSTM m, + Ord peer, + LedgerSupportsProtocol blk + ) => + PeerContext m peer blk -> + STM m () +unregisterClient context = do + modifyTVar (handlesVar context) $ Map.delete (peer context) + let context' = stripContext context + readTVar (cschJumping (handle context)) >>= \case + Disengaged{} -> pure () + Jumper{} -> pure () + Objector{} -> electNewObjector context' + Dynamo{} -> electNewDynamo context' + +-- | Choose an unspecified new non-idling dynamo and demote all other peers to +-- jumpers. +electNewDynamo :: + ( MonadSTM m, + Eq peer, + LedgerSupportsProtocol blk + ) => + Context m peer blk -> + STM m () +electNewDynamo context = do + peerStates <- Map.toList <$> readTVar (handlesVar context) + mDynamo <- findNonDisengaged peerStates + case mDynamo of + Nothing -> pure () + Just (dynId, dynamo) -> do + fragment <- csCandidate <$> readTVar (cschState dynamo) + mJumpInfo <- readTVar (cschJumpInfo dynamo) + -- If there is no jump info, the dynamo must be just starting and + -- there is no need to set the intersection of the ChainSync server. + let dynamoInitState = maybe DynamoStarted DynamoStarting mJumpInfo + writeTVar (cschJumping dynamo) $ + Dynamo dynamoInitState $ pointSlot $ AF.headPoint fragment + -- Demote all other peers to jumpers + forM_ peerStates $ \(peer, st) -> + when (peer /= dynId) $ do + jumpingState <- readTVar (cschJumping st) + when (not (isDisengaged jumpingState)) $ + newJumper mJumpInfo (Happy FreshJumper Nothing) + >>= writeTVar (cschJumping st) + where + findNonDisengaged = + findM $ \(_, st) -> not . isDisengaged <$> readTVar (cschJumping st) + isDisengaged Disengaged{} = True + isDisengaged _ = False + +findM :: Monad m => (a -> m Bool) -> [a] -> m (Maybe a) +findM _ [] = pure Nothing +findM p (x : xs) = p x >>= \case + True -> pure (Just x) + False -> findM p xs + +-- | Find the objector in a context, if there is one. +findObjector :: + (MonadSTM m) => + Context m peer blk -> + STM m (Maybe (ObjectorInitState, JumpInfo blk, Point (Header blk), ChainSyncClientHandle m blk)) +findObjector context = do + readTVar (handlesVar context) >>= go . Map.toList + where + go [] = pure Nothing + go ((_, handle):xs) = + readTVar (cschJumping handle) >>= \case + Objector initState goodJump badPoint -> + pure $ Just (initState, goodJump, badPoint, handle) + _ -> go xs + +-- | Look into all dissenting jumper and promote the one with the oldest +-- intersection with the dynamo as the new objector. +electNewObjector :: + (MonadSTM m) => + Context m peer blk -> + STM m () +electNewObjector context = do + peerStates <- Map.toList <$> readTVar (handlesVar context) + dissentingJumpers <- collectDissentingJumpers peerStates + let sortedJumpers = sortOn (pointSlot . fst) dissentingJumpers + case sortedJumpers of + (badPoint, (initState, goodJumpInfo, handle)):_ -> + writeTVar (cschJumping handle) $ Objector initState goodJumpInfo badPoint + _ -> + pure () + where + collectDissentingJumpers peerStates = + fmap catMaybes $ + forM peerStates $ \(_, handle) -> + readTVar (cschJumping handle) >>= \case + Jumper _ (FoundIntersection initState goodJumpInfo badPoint) -> + pure $ Just (badPoint, (initState, goodJumpInfo, handle)) + _ -> + pure Nothing diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs index c82e1afd24..189ee361e5 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs @@ -2,19 +2,35 @@ {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TypeApplications #-} {-# LANGUAGE UndecidableInstances #-} module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State ( ChainSyncClientHandle (..) + , ChainSyncJumpingJumperState (..) + , ChainSyncJumpingState (..) , ChainSyncState (..) + , DisengagedInitState (..) + , DynamoInitState (..) + , JumpInfo (..) + , JumperInitState (..) + , ObjectorInitState (..) ) where import Cardano.Slotting.Slot (SlotNo, WithOrigin) +import Data.Function (on) +import Data.Typeable (Proxy (..), typeRep) import GHC.Generics (Generic) -import Ouroboros.Consensus.Block (HasHeader, Header) -import Ouroboros.Consensus.Util.IOLike (IOLike, NoThunks, StrictTVar) -import Ouroboros.Network.AnchoredFragment (AnchoredFragment) +import Ouroboros.Consensus.Block (HasHeader, Header, Point) +import Ouroboros.Consensus.HeaderStateHistory (HeaderStateHistory) +import Ouroboros.Consensus.Ledger.SupportsProtocol + (LedgerSupportsProtocol) +import Ouroboros.Consensus.Util.IOLike (IOLike, NoThunks (..), + StrictTVar) +import Ouroboros.Network.AnchoredFragment (AnchoredFragment, + headPoint) -- | A ChainSync client's state that's used by other components, like the GDD or -- the jumping governor. @@ -53,15 +69,153 @@ deriving anyclass instance ( -- the GDD governor. data ChainSyncClientHandle m blk = ChainSyncClientHandle { -- | Disconnects from the peer when the GDD considers it adversarial - cschGDDKill :: !(m ()) + cschGDDKill :: !(m ()) -- | Data shared between the client and external components like GDD. - , cschState :: !(StrictTVar m (ChainSyncState blk)) + , cschState :: !(StrictTVar m (ChainSyncState blk)) + + -- | The state of the peer with respect to ChainSync jumping. + , cschJumping :: !(StrictTVar m (ChainSyncJumpingState m blk)) + + -- | ChainSync state needed to jump to the tip of the candidate fragment of + -- the peer. + , cschJumpInfo :: !(StrictTVar m (Maybe (JumpInfo blk))) } deriving stock (Generic) deriving anyclass instance ( IOLike m, HasHeader blk, + LedgerSupportsProtocol blk, NoThunks (Header blk) ) => NoThunks (ChainSyncClientHandle m blk) + +data DynamoInitState blk + = -- | The dynamo has not yet started jumping and we first need to jump to the + -- given jump info to set the intersection of the ChainSync server. + DynamoStarting !(JumpInfo blk) + | DynamoStarted + deriving (Generic) + +deriving anyclass instance + ( HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (Header blk) + ) => NoThunks (DynamoInitState blk) + +data ObjectorInitState + = -- | The objector still needs to set the intersection of the ChainSync + -- server before resuming retrieval of headers. + Starting + | Started + deriving (Generic, Show, NoThunks) + +data DisengagedInitState + = -- | The node is being disengaged and for that we need to restart the + -- ChainSync protocol. + Disengaging + | DisengagedDone + deriving (Generic, Show, NoThunks) + +data JumperInitState + = -- | The jumper hasn't been requested to jump yet + FreshJumper + | StartedJumper + deriving (Generic, Show, NoThunks) + +-- | State of a peer with respect to ChainSync jumping. +data ChainSyncJumpingState m blk + = -- | The dynamo, of which there is exactly one unless there are no peers, + -- runs the normal ChainSync protocol and is morally supposed to give us + -- _the_ chain. This might not be true and the dynamo might be not be + -- honest, but the goal of the algorithm is to eventually have an honest, + -- alert peer as dynamo. + Dynamo + (DynamoInitState blk) + -- | The last slot at which we triggered jumps for the jumpers. + !(WithOrigin SlotNo) + | -- | The objector, of which there is at most one, also runs normal + -- ChainSync. It is a former jumper that disagreed with the dynamo. When + -- that happened, we spun it up to let normal ChainSync and Genesis decide + -- which one to disconnect from. + Objector + ObjectorInitState + -- | The youngest point where the objector agrees with the dynamo. + !(JumpInfo blk) + -- | The point where the objector dissented with the dynamo when it was a + -- jumper. + !(Point (Header blk)) + | -- | Headers continue to be downloaded from 'Disengaged' peers. They + -- are not requested to jump, nor elected as dynamos or objectors. + Disengaged DisengagedInitState + | -- | The jumpers can be in arbitrary numbers. They are queried regularly to + -- see if they agree with the chain that the dynamo is serving; otherwise, + -- they become candidates to be the objector. See + -- 'ChainSyncJumpingJumperState' for more details. + Jumper + -- | A TVar containing the next jump to be executed. + !(StrictTVar m (Maybe (JumpInfo blk))) + -- | More precisely, the state of the jumper. + !(ChainSyncJumpingJumperState blk) + deriving (Generic) + +deriving anyclass instance + ( IOLike m, + HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (Header blk) + ) => NoThunks (ChainSyncJumpingState m blk) + +-- | The ChainSync state required for jumps +-- +-- The jump info is mostly a snapshot of the @KnownIntersectionState@ of the +-- dynamo, with the difference that 'jTheirFragment' might be a proper prefix of +-- the original candidate fragment. +-- +-- This can happen if we need to look for an intersection when the jumper +-- rejects a jump. +data JumpInfo blk = JumpInfo + { jMostRecentIntersection :: !(Point blk) + , jOurFragment :: !(AnchoredFragment (Header blk)) + , jTheirFragment :: !(AnchoredFragment (Header blk)) + , jTheirHeaderStateHistory :: !(HeaderStateHistory blk) + } + deriving (Generic) + +instance (HasHeader (Header blk)) => Eq (JumpInfo blk) where + (==) = (==) `on` headPoint . jTheirFragment + +instance LedgerSupportsProtocol blk => NoThunks (JumpInfo blk) where + showTypeOf _ = show $ typeRep (Proxy @(JumpInfo blk)) + +-- | The specific state of a jumper peer. This state is to be understood as “to +-- the best of our knowledge”, that is “last time we asked them”. For instance, +-- a jumper might be marked as 'Happy' even though its chain has been differing +-- from the dynamo's for hundreds of blocks, if we haven't asked them to jump +-- since then. +data ChainSyncJumpingJumperState blk + = -- | The jumper is happy with the dynamo, and we hold the jump info of the + -- last accepted jump. + Happy JumperInitState !(Maybe (JumpInfo blk)) + | -- | The jumper disagrees with the dynamo and we are searching where exactly + -- that happens. All we know is a point where the jumper agrees with the + -- dynamo and a point where the jumper disagrees with the dynamo, carried by + -- this constructor. + -- + -- INVARIANT: The tip of the fragment in the good jump info (first argument) + -- is in the fragment of the bad jump info or is an ancestor of it. + LookingForIntersection !(JumpInfo blk) !(JumpInfo blk) + | -- | The jumper disagrees with the dynamo and we have determined the latest + -- point where dynamo and jumper agree. We store here the jump info of the + -- latest accepted jump and the point of the earliest rejected jump. + -- + -- The init state indicates the initialization to use for the objector in + -- case this jumper is promoted. + FoundIntersection ObjectorInitState !(JumpInfo blk) !(Point (Header blk)) + deriving (Generic) + +deriving anyclass instance + ( HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (Header blk) + ) => NoThunks (ChainSyncJumpingJumperState blk) diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs index c10542d05b..ff05b517fb 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs @@ -80,13 +80,13 @@ import qualified Ouroboros.Consensus.HeaderStateHistory as HeaderStateHistory import Ouroboros.Consensus.Ledger.Abstract import Ouroboros.Consensus.Ledger.Extended hiding (ledgerState) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainDbView (..), ChainSyncClientException, - ChainSyncClientResult (..), ChainSyncLoPBucketConfig (..), - ChainSyncState (..), ChainSyncStateView (..), - ConfigEnv (..), Consensus, DynamicEnv (..), Our (..), - Their (..), TraceChainSyncClientEvent (..), - bracketChainSyncClient, chainSyncClient, chainSyncStateFor, - viewChainSyncState) + (CSJConfig (..), ChainDbView (..), + ChainSyncClientException, ChainSyncClientResult (..), + ChainSyncLoPBucketConfig (..), ChainSyncState (..), + ChainSyncStateView (..), ConfigEnv (..), Consensus, + DynamicEnv (..), Our (..), Their (..), + TraceChainSyncClientEvent (..), bracketChainSyncClient, + chainSyncClient, chainSyncStateFor, viewChainSyncState) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck import Ouroboros.Consensus.Node.NetworkProtocolVersion (NodeToNodeVersion) @@ -404,11 +404,14 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) lopBucketConfig :: ChainSyncLoPBucketConfig lopBucketConfig = ChainSyncLoPBucketDisabled + csjConfig :: CSJConfig + csjConfig = CSJDisabled + client :: ChainSyncStateView m TestBlock -> Consensus ChainSyncClientPipelined TestBlock m - client ChainSyncStateView {csvSetCandidate, csvSetLatestSlot, csvIdling, csvLoPBucket} = + client ChainSyncStateView {csvSetCandidate, csvSetLatestSlot, csvIdling, csvLoPBucket, csvJumping} = chainSyncClient ConfigEnv { chainDbView @@ -426,6 +429,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) , idling = csvIdling , loPBucket = csvLoPBucket , setLatestSlot = csvSetLatestSlot + , jumping = csvJumping } -- Set up the server @@ -499,6 +503,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) serverId maxBound lopBucketConfig + csjConfig $ \csState -> do atomically $ do handles <- readTVar varHandles From da7a6dbd1a27516a0d20a0fcc277be9475b1cff8 Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Wed, 24 Apr 2024 19:45:31 +0200 Subject: [PATCH 12/20] Make it possible to inspect all traces in stateView MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Nicolas “Niols” Jeannerod --- .../ouroboros-consensus-diffusion.cabal | 1 + .../Test/Consensus/PeerSimulator/Handlers.hs | 2 +- .../Test/Consensus/PeerSimulator/Resources.hs | 2 +- .../Test/Consensus/PeerSimulator/Run.hs | 18 ++++-- .../ScheduledBlockFetchServer.hs | 2 +- .../PeerSimulator/ScheduledChainSyncServer.hs | 2 +- .../Consensus/PeerSimulator/StateDiagram.hs | 6 +- .../Test/Consensus/PeerSimulator/StateView.hs | 20 +++++-- .../Test/Consensus/PeerSimulator/Trace.hs | 2 +- .../Test/Consensus/PointSchedule.hs | 57 ++----------------- .../Test/Consensus/PointSchedule/NodeState.hs | 57 +++++++++++++++++++ 11 files changed, 101 insertions(+), 68 deletions(-) create mode 100644 ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/NodeState.hs diff --git a/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal b/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal index 83aaa24e2b..e67b517cae 100644 --- a/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal +++ b/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal @@ -258,6 +258,7 @@ test-suite consensus-test Test.Consensus.PeerSimulator.Tests.Timeouts Test.Consensus.PeerSimulator.Trace Test.Consensus.PointSchedule + Test.Consensus.PointSchedule.NodeState Test.Consensus.PointSchedule.Peers Test.Consensus.PointSchedule.Shrinking Test.Consensus.PointSchedule.Shrinking.Tests diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Handlers.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Handlers.hs index b7f1248bc2..0ac53a11c8 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Handlers.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Handlers.hs @@ -46,7 +46,7 @@ import Test.Consensus.PeerSimulator.ScheduledChainSyncServer import Test.Consensus.PeerSimulator.Trace (TraceScheduledBlockFetchServerEvent (..), TraceScheduledChainSyncServerEvent (..)) -import Test.Consensus.PointSchedule +import Test.Consensus.PointSchedule.NodeState import Test.Util.Orphans.IOLike () import Test.Util.TestBlock (TestBlock, TestHash (TestHash)) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs index 423660b03f..c4fe394a60 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs @@ -40,7 +40,7 @@ import Test.Consensus.PeerSimulator.ScheduledBlockFetchServer runScheduledBlockFetchServer) import Test.Consensus.PeerSimulator.ScheduledChainSyncServer import Test.Consensus.PeerSimulator.Trace (TraceEvent) -import Test.Consensus.PointSchedule +import Test.Consensus.PointSchedule.NodeState import Test.Consensus.PointSchedule.Peers (PeerId) import Test.Util.Orphans.IOLike () import Test.Util.TestBlock (TestBlock) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs index 3488fb6c79..b7c04a17d3 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs @@ -53,9 +53,10 @@ import Test.Consensus.PeerSimulator.StateDiagram (peerSimStateDiagramSTMTracerDebug) import Test.Consensus.PeerSimulator.StateView import Test.Consensus.PeerSimulator.Trace -import Test.Consensus.PointSchedule (BlockFetchTimeout, CSJParams (..), - GenesisTest (..), GenesisTestFull, LoPBucketParams (..), - NodeState, PeersSchedule, peersStatesRelative) +import Test.Consensus.PointSchedule (BlockFetchTimeout, + CSJParams (..), GenesisTest (..), GenesisTestFull, + LoPBucketParams (..), PeersSchedule, peersStatesRelative) +import Test.Consensus.PointSchedule.NodeState (NodeState) import Test.Consensus.PointSchedule.Peers (Peer (..), PeerId, getPeerIds) import Test.Util.ChainDB @@ -335,7 +336,7 @@ startNode schedulerConfig genesisTest interval = do (csClient, csServer) <- startChainSyncConnectionThread peerRegistry - lrTracer + tracer lrConfig chainDbView fetchClientRegistry @@ -350,7 +351,7 @@ startNode schedulerConfig genesisTest interval = do (bfClient, bfServer) <- startBlockFetchConnectionThread peerRegistry - lrTracer + tracer lnStateViewTracers fetchClientRegistry (pure Continue) @@ -391,6 +392,13 @@ startNode schedulerConfig genesisTest interval = do , gtCSJParams = CSJParams { csjpJumpSize } } = genesisTest + StateViewTracers{svtTraceTracer} = lnStateViewTracers + + -- FIXME: This type of configuration should move to `Trace.mkTracer`. + tracer = if scTrace schedulerConfig + then Tracer (\evt -> traceWith lrTracer evt >> traceWith svtTraceTracer evt) + else svtTraceTracer + chainSyncTimeouts_ = if scEnableChainSyncTimeouts schedulerConfig then gtChainSyncTimeouts diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledBlockFetchServer.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledBlockFetchServer.hs index d0092ed7d3..d3535f895d 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledBlockFetchServer.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledBlockFetchServer.hs @@ -18,7 +18,7 @@ import Ouroboros.Network.Protocol.BlockFetch.Server import Test.Consensus.PeerSimulator.ScheduledServer (ScheduledServer (..), awaitOnlineState, runHandler) import Test.Consensus.PeerSimulator.Trace -import Test.Consensus.PointSchedule (NodeState) +import Test.Consensus.PointSchedule.NodeState (NodeState) import Test.Consensus.PointSchedule.Peers (PeerId) -- | Return values for the 'handlerSendBlocks'. diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledChainSyncServer.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledChainSyncServer.hs index 1a173a5d8d..9120bb60f9 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledChainSyncServer.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ScheduledChainSyncServer.hs @@ -27,7 +27,7 @@ import Test.Consensus.PeerSimulator.ScheduledServer import Test.Consensus.PeerSimulator.Trace (TraceEvent (TraceScheduledChainSyncServerEvent), TraceScheduledChainSyncServerEvent (..)) -import Test.Consensus.PointSchedule (NodeState) +import Test.Consensus.PointSchedule.NodeState (NodeState) import Test.Consensus.PointSchedule.Peers (PeerId) -- | Pure representation of the messages produced by the handler for the @StNext@ diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateDiagram.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateDiagram.hs index 715cc9c464..937f4830ce 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateDiagram.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateDiagram.hs @@ -54,8 +54,8 @@ import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (HeaderHash) import Test.Consensus.BlockTree (BlockTree (btBranches, btTrunk), BlockTreeBranch (btbSuffix), prettyBlockTree) -import qualified Test.Consensus.PointSchedule as PS -import Test.Consensus.PointSchedule (NodeState, genesisNodeState) +import Test.Consensus.PointSchedule.NodeState (NodeState (..), + genesisNodeState) import Test.Consensus.PointSchedule.Peers (PeerId (..)) import Test.Util.TestBlock (TestBlock, TestHash (TestHash)) @@ -478,7 +478,7 @@ addPoints :: Map PeerId (NodeState TestBlock) -> TreeSlots -> TreeSlots addPoints peerPoints treeSlots = foldl' step treeSlots (Map.toList peerPoints) where - step z (pid, ap) = addTipPoint pid (PS.nsTip ap) z + step z (pid, ap) = addTipPoint pid (nsTip ap) z ---------------------------------------------------------------------------------------------------- -- Cells diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateView.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateView.hs index f8c1fbff7b..9b56b6a0a4 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateView.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/StateView.hs @@ -36,6 +36,7 @@ import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import Ouroboros.Network.Block (StandardHash, Tip) import Ouroboros.Network.Protocol.BlockFetch.Type (BlockFetch) import Ouroboros.Network.Protocol.ChainSync.Type (ChainSync) +import Test.Consensus.PeerSimulator.Trace (TraceEvent) import Test.Consensus.PointSchedule.Peers (PeerId) import Test.Util.TersePrinting (terseBlock, terseHFragment, terseMaybe) @@ -143,7 +144,9 @@ data StateView blk = StateView { svPeerSimulatorResults :: [PeerSimulatorResult blk], -- | This field holds the most recent point in the selection (incl. anchor) -- for which we have a full block (not just a header). - svTipBlock :: Maybe blk + svTipBlock :: Maybe blk, + -- | List of all TraceEvent that have been sent during the simulation. + svTrace :: [TraceEvent blk] } instance Condense (StateView TestBlock) where @@ -164,6 +167,8 @@ collectDisconnectedPeers stateView = nubOrd $ data StateViewTracers blk m = StateViewTracers { svtPeerSimulatorResultsTracer :: Tracer m (PeerSimulatorResult blk) , svtGetPeerSimulatorResults :: m [PeerSimulatorResult blk] + , svtTraceTracer :: Tracer m (TraceEvent blk) + , svtGetTracerTrace :: m [TraceEvent blk] } -- | Helper to get exceptions from a StateView. @@ -196,7 +201,13 @@ defaultStateViewTracers :: m (StateViewTracers blk m) defaultStateViewTracers = do (svtPeerSimulatorResultsTracer, svtGetPeerSimulatorResults) <- recordingTracerTVar - pure StateViewTracers {svtPeerSimulatorResultsTracer, svtGetPeerSimulatorResults} + (svtTraceTracer, svtGetTracerTrace) <- recordingTracerTVar + pure StateViewTracers + { svtPeerSimulatorResultsTracer + , svtGetPeerSimulatorResults + , svtTraceTracer + , svtGetTracerTrace + } -- | Call 'defaultStateViewTracers' and add the provided results. stateViewTracersWithInitial :: @@ -216,8 +227,9 @@ snapshotStateView :: StateViewTracers blk m -> ChainDB m blk -> m (StateView blk) -snapshotStateView StateViewTracers{svtGetPeerSimulatorResults} chainDb = do +snapshotStateView StateViewTracers{svtGetPeerSimulatorResults, svtGetTracerTrace} chainDb = do svPeerSimulatorResults <- svtGetPeerSimulatorResults + svTrace <- svtGetTracerTrace svSelectedChain <- atomically $ ChainDB.getCurrentChain chainDb svTipBlock <- ChainDB.getTipBlock chainDb - pure StateView {svSelectedChain, svPeerSimulatorResults, svTipBlock} + pure StateView {svSelectedChain, svPeerSimulatorResults, svTipBlock, svTrace} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs index 583f4541e5..13f80ff793 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs @@ -47,7 +47,7 @@ import Ouroboros.Network.AnchoredFragment (AnchoredFragment, headPoint) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (SlotNo (SlotNo), Tip, castPoint) -import Test.Consensus.PointSchedule (NodeState) +import Test.Consensus.PointSchedule.NodeState (NodeState) import Test.Consensus.PointSchedule.Peers (Peer (Peer), PeerId) import Test.Util.TersePrinting (terseAnchor, terseBlock, terseFragment, terseHFragment, terseHeader, tersePoint, diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs index 76af1f7c33..9ed560762c 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs @@ -28,7 +28,6 @@ module Test.Consensus.PointSchedule ( , GenesisTestFull , GenesisWindow (..) , LoPBucketParams (..) - , NodeState (..) , PeerSchedule , PeersSchedule , RunGenesisTestResult (..) @@ -36,7 +35,6 @@ module Test.Consensus.PointSchedule ( , ensureScheduleDuration , genesisNodeState , longRangeAttack - , nsTipTip , peerSchedulesBlocks , peerStates , peersStates @@ -65,18 +63,18 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol import Ouroboros.Consensus.Network.NodeToNode (ChainSyncTimeout (..)) import Ouroboros.Consensus.Protocol.Abstract (SecurityParam (SecurityParam), maxRollbacks) -import Ouroboros.Consensus.Util.Condense (Condense (..), - CondenseList (..), PaddingDirection (..), - condenseListWithPadding, padListWith) +import Ouroboros.Consensus.Util.Condense (CondenseList (..), + PaddingDirection (..), condenseListWithPadding) import qualified Ouroboros.Network.AnchoredFragment as AF -import Ouroboros.Network.Block (SlotNo (..), Tip (..), blockSlot, - tipFromHeader) +import Ouroboros.Network.Block (SlotNo (..), blockSlot) import Ouroboros.Network.Point (withOrigin) import qualified System.Random.Stateful as Random import System.Random.Stateful (STGenM, StatefulGen, runSTGen_) import Test.Consensus.BlockTree (BlockTree (..), BlockTreeBranch (..), allFragments, prettyBlockTree) import Test.Consensus.PeerSimulator.StateView (StateView) +import Test.Consensus.PointSchedule.NodeState (NodeState (..), + genesisNodeState) import Test.Consensus.PointSchedule.Peers (Peer (..), Peers (..), mkPeers, peersList) import Test.Consensus.PointSchedule.SinglePeer @@ -88,53 +86,10 @@ import Test.Consensus.PointSchedule.SinglePeer.Indices import Test.Ouroboros.Consensus.ChainGenerator.Params (Delta (Delta)) import Test.QuickCheck (Gen, arbitrary) import Test.QuickCheck.Random (QCGen) -import Test.Util.TersePrinting (terseBlock, terseFragment, - terseWithOrigin) +import Test.Util.TersePrinting (terseFragment) import Test.Util.TestBlock (TestBlock) import Text.Printf (printf) ----------------------------------------------------------------------------------------------------- --- Data types ----------------------------------------------------------------------------------------------------- - --- | The state of a peer at a given point in time. -data NodeState blk = - NodeState { - nsTip :: WithOrigin blk, - nsHeader :: WithOrigin blk, - nsBlock :: WithOrigin blk - } - deriving (Eq, Show) - -nsTipTip :: AF.HasHeader blk => NodeState blk -> Tip blk -nsTipTip = withOrigin TipGenesis tipFromHeader . nsTip - -instance Condense (NodeState TestBlock) where - condense NodeState {nsTip, nsHeader, nsBlock} = - "TP " ++ terseWithOrigin terseBlock nsTip ++ - " | HP " ++ terseWithOrigin terseBlock nsHeader ++ - " | BP " ++ terseWithOrigin terseBlock nsBlock - -instance CondenseList (NodeState TestBlock) where - condenseList points = - zipWith3 - (\tip header block -> - "TP " ++ tip ++ - " | HP " ++ header ++ - " | BP " ++ block - ) - (padListWith PadRight $ map (terseWithOrigin terseBlock . nsTip) points) - (padListWith PadRight $ map (terseWithOrigin terseBlock . nsHeader) points) - (padListWith PadRight $ map (terseWithOrigin terseBlock . nsBlock) points) - -genesisNodeState :: NodeState blk -genesisNodeState = - NodeState { - nsTip = Origin, - nsHeader = Origin, - nsBlock = Origin - } - prettyPeersSchedule :: forall blk. (CondenseList (NodeState blk)) => diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/NodeState.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/NodeState.hs new file mode 100644 index 0000000000..33cbbdd8dd --- /dev/null +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/NodeState.hs @@ -0,0 +1,57 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE NamedFieldPuns #-} + +module Test.Consensus.PointSchedule.NodeState ( + NodeState (..) + , genesisNodeState + , nsTipTip + ) where + +import Ouroboros.Consensus.Block.Abstract (WithOrigin (..)) +import Ouroboros.Consensus.Util.Condense (Condense (..), + CondenseList (..), PaddingDirection (..), padListWith) +import qualified Ouroboros.Network.AnchoredFragment as AF +import Ouroboros.Network.Block (Tip (..), tipFromHeader) +import Ouroboros.Network.Point (withOrigin) +import Test.Util.TersePrinting (terseBlock, terseWithOrigin) +import Test.Util.TestBlock (TestBlock) + +-- | The state of a peer at a given point in time. +data NodeState blk = + NodeState { + nsTip :: WithOrigin blk, + nsHeader :: WithOrigin blk, + nsBlock :: WithOrigin blk + } + deriving (Eq, Show) + +nsTipTip :: AF.HasHeader blk => NodeState blk -> Tip blk +nsTipTip = withOrigin TipGenesis tipFromHeader . nsTip + +instance Condense (NodeState TestBlock) where + condense NodeState {nsTip, nsHeader, nsBlock} = + "TP " ++ terseWithOrigin terseBlock nsTip ++ + " | HP " ++ terseWithOrigin terseBlock nsHeader ++ + " | BP " ++ terseWithOrigin terseBlock nsBlock + +instance CondenseList (NodeState TestBlock) where + condenseList points = + zipWith3 + (\tip header block -> + "TP " ++ tip ++ + " | HP " ++ header ++ + " | BP " ++ block + ) + (padListWith PadRight $ map (terseWithOrigin terseBlock . nsTip) points) + (padListWith PadRight $ map (terseWithOrigin terseBlock . nsHeader) points) + (padListWith PadRight $ map (terseWithOrigin terseBlock . nsBlock) points) + +genesisNodeState :: NodeState blk +genesisNodeState = + NodeState { + nsTip = Origin, + nsHeader = Origin, + nsBlock = Origin + } + + From 3220e7d8c550dc260b6a1c89c0cb8a8a4bd2622b Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Tue, 23 Apr 2024 18:09:18 +0200 Subject: [PATCH 13/20] CSJ happy path test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Nicolas “Niols” Jeannerod Co-authored-by: Torsten Schmits --- .../ouroboros-consensus-diffusion.cabal | 1 + .../Test/Consensus/Genesis/Setup/GenChains.hs | 2 +- .../Test/Consensus/Genesis/Tests.hs | 4 +- .../Test/Consensus/Genesis/Tests/CSJ.hs | 134 ++++++++++++++++++ .../MiniProtocol/ChainSync/Client/Jumping.hs | 4 +- 5 files changed, 142 insertions(+), 3 deletions(-) create mode 100644 ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs diff --git a/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal b/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal index e67b517cae..26c461887c 100644 --- a/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal +++ b/ouroboros-consensus-diffusion/ouroboros-consensus-diffusion.cabal @@ -229,6 +229,7 @@ test-suite consensus-test Test.Consensus.Genesis.Setup.Classifiers Test.Consensus.Genesis.Setup.GenChains Test.Consensus.Genesis.Tests + Test.Consensus.Genesis.Tests.CSJ Test.Consensus.Genesis.Tests.DensityDisconnect Test.Consensus.Genesis.Tests.LoE Test.Consensus.Genesis.Tests.LoP diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs index 9b6702c114..70bbceb8fa 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs @@ -125,7 +125,7 @@ genChains genNumForks = do gtSlotLength, gtChainSyncTimeouts = chainSyncTimeouts gtSlotLength asc, gtBlockFetchTimeouts = blockFetchTimeouts, - gtLoPBucketParams = LoPBucketParams { lbpCapacity = 10_000, lbpRate = 1_000 }, + gtLoPBucketParams = LoPBucketParams { lbpCapacity = 100_000, lbpRate = 1_000 }, -- ^ REVIEW: Do we want to generate those randomly? For now, the chosen -- values carry no special meaning. Someone needs to think about what values -- would make for interesting tests. diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests.hs index 6c21341d67..6c8776bb15 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests.hs @@ -1,5 +1,6 @@ module Test.Consensus.Genesis.Tests (tests) where +import qualified Test.Consensus.Genesis.Tests.CSJ as CSJ import qualified Test.Consensus.Genesis.Tests.DensityDisconnect as GDD import qualified Test.Consensus.Genesis.Tests.LoE as LoE import qualified Test.Consensus.Genesis.Tests.LongRangeAttack as LongRangeAttack @@ -9,7 +10,8 @@ import Test.Tasty tests :: TestTree tests = testGroup "Genesis tests" - [ GDD.tests + [ CSJ.tests + , GDD.tests , LongRangeAttack.tests , LoE.tests , LoP.tests diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs new file mode 100644 index 0000000000..16b0aada02 --- /dev/null +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs @@ -0,0 +1,134 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NamedFieldPuns #-} + +module Test.Consensus.Genesis.Tests.CSJ (tests) where + +import Control.Monad (replicateM) +import Data.Containers.ListUtils (nubOrd) +import Data.Functor (($>)) +import Data.List (nub) +import Data.Maybe (mapMaybe) +import Ouroboros.Consensus.Block (blockSlot, succWithOrigin) +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client + (TraceChainSyncClientEvent (..)) +import Ouroboros.Consensus.Util.Condense (PaddingDirection (..), + condenseListWithPadding) +import qualified Ouroboros.Network.AnchoredFragment as AF +import Test.Consensus.BlockTree (BlockTree (..)) +import Test.Consensus.Genesis.Setup +import Test.Consensus.Genesis.Tests.Uniform (genUniformSchedulePoints) +import Test.Consensus.PeerSimulator.Run (SchedulerConfig (..), + defaultSchedulerConfig) +import Test.Consensus.PeerSimulator.StateView (StateView (..)) +import Test.Consensus.PeerSimulator.Trace (TraceEvent (..)) +import Test.Consensus.PointSchedule +import Test.Consensus.PointSchedule.Peers (Peer (..), Peers (..), + mkPeers) +import Test.Tasty +import Test.Tasty.QuickCheck +import Test.Util.Orphans.IOLike () +import Test.Util.TestBlock (Header, TestBlock) +import Test.Util.TestEnv (adjustQuickCheckMaxSize) + +tests :: TestTree +tests = + adjustQuickCheckMaxSize (`div` 5) $ + testGroup + "CSJ" + [ testGroup "Happy Path" + [ testProperty "synchronous" $ prop_happyPath True + , testProperty "asynchronous" $ prop_happyPath False + ] + ] + +-- | Test of the “happy path” scenario of ChainSync Jumping (CSJ). +-- +-- This test features one chain (ie. a block tree that is only trunk) and only +-- honest peers and syncs the chain in question with CSJ enabled. What we expect +-- to observe is that one of the honest peers becomes the dynamo while the +-- others become jumpers. Because the jumpers will agree to all the jumps, the +-- whole syncing should happen with CSJ without objectors. +-- +-- The final property is that headers should only ever be downloaded once and +-- only from one peer (the dynamo). This is true except when almost caught-up: +-- when the dynamo is caught-up, it gets disengaged and one of the jumpers takes +-- its place and starts serving headers. This might lead to duplication of +-- headers, but only in a window of @jumpSize@ slots near the tip of the chain. +-- +-- The boolean differentiates between “synchronous” and “asynchronous” +-- scenarios. In a synchronous scenario, all the honest peers have the same +-- schedule: they serve the chain exactly in the same way. In the asynchronous +-- scenario, a random schedule is generated for each peer (but they still serve +-- the same chain). +prop_happyPath :: Bool -> Property +prop_happyPath synchronized = + forAllGenesisTest + ( do + gt <- genChains $ pure 0 + honest <- genHonestSchedule gt + numOthers <- choose (1, 3) + otherHonests <- if synchronized + then pure $ replicate numOthers honest + else replicateM numOthers (genHonestSchedule gt) + pure $ gt $> mkPeers honest otherHonests + ) + ( defaultSchedulerConfig + { scEnableCSJ = True + , scEnableLoE = True + , scEnableLoP = True + } + ) + ( -- NOTE: Shrinking makes the tests fail because the peers reject jumps + -- because their TP is G. This makes them into objectors and they then + -- start serving headers. + \_ _ -> [] + ) + ( \gt StateView{svTrace} -> + let + headerDownloadEvents = + mapMaybe + (\case + TraceChainSyncClientEvent pid (TraceDownloadedHeader hdr) + | not (isNewerThanJumpSizeFromTip gt hdr) + -> Just (pid, hdr) + _ -> Nothing + ) + svTrace + receivedHeadersOnlyOnce = length (nub $ snd <$> headerDownloadEvents) == length headerDownloadEvents + -- NOTE: If all the headers are newer than jumpSize from the tip, then + -- 'headerDownloadEvents' is empty and the following condition would + -- violated if we used @==@. + receivedHeadersFromOnlyOnePeer = length (nubOrd $ fst <$> headerDownloadEvents) <= 1 + in + tabulate "" + [ if headerDownloadEvents == [] + then "All headers may be downloaded twice" + else "There exist headers that have to be downloaded exactly once" + ] $ + counterexample + ("Downloaded headers (except jumpSize slots near the tip):\n" ++ + ( unlines $ fmap (" " ++) $ zipWith + (\peer header -> peer ++ " | " ++ header) + (condenseListWithPadding PadRight $ fst <$> headerDownloadEvents) + (condenseListWithPadding PadRight $ snd <$> headerDownloadEvents) + ) + ) + (receivedHeadersOnlyOnce && receivedHeadersFromOnlyOnePeer) + ) + where + -- | This might seem wasteful, as we discard generated adversarial schedules. + -- It actually isn't, since we call it on trees that have no branches besides + -- the trunk, so no adversaries are generated. + genHonestSchedule :: GenesisTest TestBlock () -> Gen (PeerSchedule TestBlock) + genHonestSchedule gt = do + ps <- genUniformSchedulePoints gt + pure $ value $ honest ps + + isNewerThanJumpSizeFromTip :: GenesisTestFull TestBlock -> Header TestBlock -> Bool + isNewerThanJumpSizeFromTip gt hdr = + let jumpSize = csjpJumpSize $ gtCSJParams gt + tipSlot = AF.headSlot $ btTrunk $ gtBlockTree gt + hdrSlot = blockSlot hdr + in + -- Sanity check: add @1 +@ after @>@ and watch the World burn. + hdrSlot + jumpSize >= succWithOrigin tipSlot diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs index 0bae5773f5..2dc88be698 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -390,7 +390,9 @@ nextInstruction context = -- validating it. -- -- We request jumpers to jump here if the next header received by the dynamo is --- at least jump size slots after the last jump. +-- at least jump size slots after the last jump. Note that, since this function +-- runs before validating the next header, it will not be part of the fragment +-- considered for the jump. -- -- We also check that the Objector disagrees with the header sent at its -- rejected jump. If it agrees to it, we disengage it. From dbbc3ab929698ea909f0db5d437e4fc7f47418fb Mon Sep 17 00:00:00 2001 From: Torsten Schmits Date: Fri, 12 Apr 2024 18:48:51 +0200 Subject: [PATCH 14/20] Fix prop_densityDisconnectMonotonic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test logic failed to ever update the `killed` flag that indicated that a peer was selected for disconnection before, essentially making the test tautological. This revealed the need for another fix – the test must be terminated when the LOE fragment advances beyond the honest chain (because a branch forks off the last block). --- .../Genesis/Tests/DensityDisconnect.hs | 296 +++++++++++++----- .../Test/Consensus/PeerSimulator/Trace.hs | 53 ++-- .../Test/Consensus/PointSchedule/Peers.hs | 9 +- 3 files changed, 251 insertions(+), 107 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs index cba130d57c..87c85f54b9 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs @@ -1,30 +1,38 @@ {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE ViewPatterns #-} module Test.Consensus.Genesis.Tests.DensityDisconnect (tests) where -import Cardano.Slotting.Slot (WithOrigin (..), unSlotNo) +import Cardano.Slotting.Slot (SlotNo (unSlotNo), WithOrigin (..)) import Control.Exception (fromException) import Control.Monad.Class.MonadTime.SI (Time (..)) -import Data.Bifunctor (second) -import Data.Foldable (minimumBy, toList) +import Data.Bifunctor +import Data.Foldable (maximumBy, minimumBy, toList) import Data.Function (on) import Data.Functor (($>), (<&>)) import Data.List (intercalate) +import Data.List.NonEmpty (nonEmpty) import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map +import Data.Maybe (fromMaybe) import Data.Semigroup (Endo (..)) -import Ouroboros.Consensus.Block (fromWithOrigin, withOrigin) +import Data.Set (Set, (\\)) +import qualified Data.Set as Set +import Ouroboros.Consensus.Block (Point (GenesisPoint), + WithOrigin (NotOrigin), blockSlot, fromWithOrigin, + withOrigin) import Ouroboros.Consensus.Block.Abstract (Header, getHeader) import Ouroboros.Consensus.Config.SecurityParam (SecurityParam (SecurityParam), maxRollbacks) -import Ouroboros.Consensus.Genesis.Governor (densityDisconnect, - sharedCandidatePrefix) +import Ouroboros.Consensus.Genesis.Governor (DensityBounds, + densityDisconnect, sharedCandidatePrefix) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (ChainSyncClientException (DensityTooLow), ChainSyncState (..)) +import Ouroboros.Consensus.Util.Condense (condense) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (HasHeader, Tip (TipGenesis), @@ -38,6 +46,7 @@ import Test.Consensus.PeerSimulator.Run import Test.Consensus.PeerSimulator.StateView (PeerSimulatorComponent (..), StateView (..), exceptionsByComponent) +import Test.Consensus.PeerSimulator.Trace (prettyDensityBounds) import Test.Consensus.PointSchedule import Test.Consensus.PointSchedule.Peers import Test.Consensus.PointSchedule.Shrinking @@ -46,17 +55,18 @@ import Test.Consensus.PointSchedule.SinglePeer (SchedulePoint (..), scheduleBlockPoint, scheduleHeaderPoint, scheduleTipPoint) import qualified Test.QuickCheck as QC import Test.QuickCheck +import Test.QuickCheck.Extras (unsafeMapSuchThatJust) import Test.Tasty import Test.Tasty.QuickCheck import Test.Util.Orphans.IOLike () -import Test.Util.TersePrinting (terseHFragment) +import Test.Util.TersePrinting (terseHFragment, terseHeader) import Test.Util.TestBlock (TestBlock) import Test.Util.TestEnv (adjustQuickCheckMaxSize, adjustQuickCheckTests) tests :: TestTree tests = - adjustQuickCheckTests (* 4) $ + adjustQuickCheckTests (* 4) $ adjustQuickCheckMaxSize (`div` 5) $ testGroup "gdd" [ testProperty "basic" prop_densityDisconnectStatic, @@ -146,19 +156,26 @@ data EvolvingPeer = suffix :: [Header TestBlock], tip :: Tip TestBlock, prefixSlots :: Int, - killed :: Bool + forkSlot :: WithOrigin SlotNo } deriving Show data EvolvingPeers = EvolvingPeers { - k :: SecurityParam, - sgen :: GenesisWindow, - peers :: Peers EvolvingPeer, - loeFrag :: AnchoredFragment (Header TestBlock) + k :: SecurityParam, + sgen :: GenesisWindow, + peers :: Peers EvolvingPeer, + loeFrag :: AnchoredFragment (Header TestBlock), + fullTree :: BlockTree TestBlock } deriving Show +data Evolution = + Evolution { + peers :: Peers EvolvingPeer, + killed :: Set PeerId + } + lastSlot :: AF.HasHeader b => AnchoredFragment b -> @@ -172,7 +189,8 @@ initCandidates GenesisTest {gtSecurityParam, gtGenesisWindow, gtBlockTree} = k = gtSecurityParam, sgen = gtGenesisWindow, peers, - loeFrag = AF.Empty AF.AnchorGenesis + loeFrag = AF.Empty AF.AnchorGenesis, + fullTree = gtBlockTree } where peers = mkPeers (peer trunk (AF.Empty (AF.headAnchor trunk)) (btTrunk gtBlockTree)) (branchPeer <$> branches) @@ -187,7 +205,7 @@ initCandidates GenesisTest {gtSecurityParam, gtGenesisWindow, gtBlockTree} = suffix = AF.toOldestFirst headers, tip = branchTip chain, prefixSlots = lastSlot forkPrefix, - killed = False + forkSlot = AF.lastSlot forkSuffix } where headers = toHeaders chain @@ -196,127 +214,241 @@ initCandidates GenesisTest {gtSecurityParam, gtGenesisWindow, gtBlockTree} = branches = btBranches gtBlockTree +data UpdateEvent = UpdateEvent { + -- | The peer whose candidate was extended in this step + target :: PeerId + -- | The header appended to the candidate of 'target' + , added :: Header TestBlock + -- | Peers that have been disconnected in the current step + , killed :: Set PeerId + -- | The GDD data + , bounds :: Map PeerId (DensityBounds TestBlock) + -- | The current chains + , tree :: BlockTree (Header TestBlock) + , loeFrag :: AnchoredFragment (Header TestBlock) + , curChain :: AnchoredFragment (Header TestBlock) + } + +snapshotTree :: Peers EvolvingPeer -> BlockTree (Header TestBlock) +snapshotTree Peers {honest, others} = + foldr addBranch' (mkTrunk (candidate (value honest))) (candidate . value <$> others) + +prettyUpdateEvent :: UpdateEvent -> [String] +prettyUpdateEvent UpdateEvent {target, added, killed, bounds, tree, loeFrag, curChain} = + [ + "Extended " ++ condense target ++ " with " ++ terseHeader added, + " disconnect: " ++ show killed, + " LoE frag: " ++ terseHFragment loeFrag, + " selection: " ++ terseHFragment curChain + ] + ++ prettyDensityBounds bounds + ++ "" : prettyBlockTree tree + data MonotonicityResult = HonestKilled | - Nonmonotonic + Nonmonotonic UpdateEvent | Finished - deriving Show --- | Check whether the honest peer was killed or a peer's new losing state violates monotonicity, i.e. if it was found --- to be losing before, it shouldn't be found winning later. +-- | Check whether the honest peer was killed or a peer's new losing state +-- violates monotonicity, i.e. if it was found to be losing before, it shouldn't +-- be found winning later. -- --- If that is the case, return @Left (False, peers)@ to indicate that the test is over and failed. +-- If that is the case, return @Left (HonestKilled|Nonmonotonic, peers)@ to +-- indicate that the test is over and failed. -- --- Otherwise, remove all adversaries that either have no more blocks or have more than @sgen@ slots after their fork --- intersection. +-- Otherwise, remove all adversaries that either have no more blocks or have +-- more than @sgen@ slots after their fork intersection. There is not other +-- motivation to shrink the adversary set other than ensuring termination. -- --- If no adversaries remain, return @Left (True, peers)@ to indicate that the test is over and succeeded. +-- If no adversaries remain, return @Left (Finished, peers)@ to indicate that +-- the test is over and succeeded. -- -- Otherwise, return @Right remaining@ to continue with the next step. updatePeers :: GenesisWindow -> - PeerId -> - [PeerId] -> Peers EvolvingPeer -> - Either (MonotonicityResult, Peers EvolvingPeer) (Peers EvolvingPeer) -updatePeers (GenesisWindow sgen) target disconnect peers - | HonestPeer `elem` disconnect + -- | Peers that were disconnected previously + Set PeerId -> + UpdateEvent -> + Either (MonotonicityResult, Peers EvolvingPeer) Evolution +updatePeers (GenesisWindow sgen) peers killedBefore event@UpdateEvent {target, killed = killedNow} + | HonestPeer `Set.member` killedNow = Left (HonestKilled, peers) - | killed peer - , not (target `elem` disconnect) - = Left (Nonmonotonic, peers) + | not (null violations) + = Left (Nonmonotonic event, peers) | null remaining = Left (Finished, peers) | otherwise - = Right peers {others = remaining} + = Right evo where - Peer {value = peer} = getPeer target peers - - remaining = Map.filter (not . discardPeer) (others peers) - - discardPeer Peer {value = EvolvingPeer {candidate, suffix, prefixSlots}} = - null suffix || lastSlot candidate - prefixSlots > fromIntegral sgen - --- | Find the earliest intersection, used to compute the selection. + -- The peers that were killed in an earlier step but not in the current one + violations = killedBefore \\ killedNow + + -- The new state if no violations were detected + evo@Evolution {peers = Peers {others = remaining}} + | targetExhausted + -- If the target is done, reset the set of killed peers, since other peers + -- may have lost only against the target. + -- Remove the target from the active peers. + = Evolution {peers = peers {others = Map.delete target (others peers)}, killed = mempty} + | otherwise + -- Otherwise replace the killed peers with the current set + = Evolution {peers, killed = killedNow} + + -- Whether the extended peer is uninteresting for GDD from now on + targetExhausted = + -- Its fragment cannot be extended anymore, or + null suffix || + -- Its candidate is longer than a Genesis window + lastSlot candidate - prefixSlots > fromIntegral sgen + + Peer {value = EvolvingPeer {candidate, suffix, prefixSlots}} = getPeer target peers + +-- | Find the peer whose candidate has the earliest intersection. +-- If no peer has reached its fork suffix yet, return the one with the highest slot. +-- +-- The selection will then be computed by taking up to k blocks after the immutable tip +-- on this peer's candidate fragment. firstBranch :: Peers EvolvingPeer -> Peer EvolvingPeer -firstBranch peers = - minimumBy (compare `on` predicate) (toList (others peers)) +firstBranch Peers {honest, others} = + fromMaybe newest $ + minimumBy (compare `on` forkAnchor) <$> nonEmpty (filter hasForked (toList others)) + where + newest = maximumBy (compare `on` (AF.headSlot . candidate . value)) (honest : toList others) + forkAnchor = fromWithOrigin 0 . AF.anchorToSlotNo . AF.anchor . forkSuffix . value + hasForked Peer {value = EvolvingPeer {candidate, forkSlot}} = + AF.headSlot candidate >= forkSlot + +-- | Determine the immutable tip by computing the latest point before the fork intesection +-- for all peers, and then taking the earliest among the results. +immutableTip :: Peers EvolvingPeer -> AF.Point (Header TestBlock) +immutableTip peers = + minimum (lastHonest <$> toList (others peers)) where - predicate = fromWithOrigin 0 . AF.anchorToSlotNo . AF.anchor . forkSuffix . value + lastHonest Peer {value = EvolvingPeer {candidate, forkSlot = NotOrigin forkSlot}} = + AF.headPoint $ + AF.dropWhileNewest (\ b -> blockSlot b >= forkSlot) candidate + lastHonest _ = GenesisPoint -- | Take one block off the peer's suffix and append it to the candidate fragment. -- -- Since we don't remove the honest peer when it's exhausted, this may be called with an empty suffix. -movePeer :: EvolvingPeer -> EvolvingPeer +movePeer :: EvolvingPeer -> (EvolvingPeer, Maybe (Header TestBlock)) movePeer = \case - peer@EvolvingPeer {candidate, suffix = h : t} -> peer {candidate = candidate AF.:> h, suffix = t} - peer -> peer + peer@EvolvingPeer {candidate, suffix = h : t} -> + (peer {candidate = candidate AF.:> h, suffix = t}, Just h) + peer -> (peer, Nothing) --- | Repeatedly run the GDD, each time updating a random peer to advance by one block. --- The selection is set to the first k blocks of the first fork. --- The tips are the last blocks of each full branch. --- The returned 'Bool' indicates whether the honest peer won and no monotonicity violations were detected. +-- | Repeatedly run the GDD, each time updating the candidate fragment of a +-- random peer to advance by one header, until all peers have been discarded +-- (not the same as disconnected!) according to 'updatePeers'. +-- +-- The selection is set to the first k blocks of the first fork, the +-- anchor being the intersection. +-- +-- The latest slots are the youngest header of each candidate fragments. +-- +-- The returned 'MonotonicityResult' indicates whether the honest peer won and +-- no monotonicity violations were detected (the peer stays being disconnected +-- if it starts being disconnected). evolveBranches :: EvolvingPeers -> - Gen (MonotonicityResult, EvolvingPeers) -evolveBranches EvolvingPeers {k, sgen, peers = initialPeers} = - step initialPeers + Gen (MonotonicityResult, EvolvingPeers, [UpdateEvent]) +evolveBranches EvolvingPeers {k, sgen, peers = initialPeers, fullTree} = + step [] Evolution {peers = initialPeers, killed = mempty} where - step ps = do - target <- elements ids + step events Evolution {peers = ps, killed = killedBefore} = do + (target, nextPeers, added) <- unsafeMapSuchThatJust $ do + -- Select a random peer + pid <- elements ids + pure $ do + -- Add a block to the candidate. If the peer has no more blocks, + -- this returns 'Nothing' and the generator retries. + (nextPeers, added) <- sequence (updatePeer movePeer pid ps) + pure (pid, nextPeers, added) let - curChain = selection (value (firstBranch ps)) - next = updatePeer movePeer target ps - candidates = candidate . value <$> toMap next - states = - candidates <&> \ csCandidate -> - ChainSyncState { - csCandidate, - csIdling = False, - csLatestSlot = Just (AF.headSlot csCandidate) - } - (loeFrag, suffixes) = sharedCandidatePrefix curChain candidates - disconnect = fst (densityDisconnect sgen k states suffixes loeFrag) - either (pure . second (result loeFrag)) step (updatePeers sgen target disconnect next) + -- Compute the selection. + curChain = selection (immutableTip ps) (firstBranch ps) + candidates = candidate . value <$> toMap nextPeers + states = + candidates <&> \ csCandidate -> + ChainSyncState { + csCandidate, + csIdling = False, + csLatestSlot = Just (AF.headSlot csCandidate) + } + -- Run GDD. + (loeFrag, suffixes) = sharedCandidatePrefix curChain candidates + (killedNow, bounds) = first Set.fromList $ densityDisconnect sgen k states suffixes loeFrag + event = UpdateEvent { + target, + added, + killed = killedNow, + bounds, + tree = snapshotTree nextPeers, + loeFrag, + curChain + } + newEvents = event : events + -- Check the termination condition and remove exhausted peers. + updated = updatePeers sgen nextPeers killedBefore event + either (pure . result newEvents loeFrag) (step newEvents) updated where - result f final = EvolvingPeers {k, sgen, peers = final, loeFrag = f} + result evs f (res, final) = (res, EvolvingPeers {k, sgen, peers = final, loeFrag = f, fullTree}, reverse evs) - selection branch = - AF.takeOldest (AF.length (forkPrefix branch) + fromIntegral k') (forkSuffix branch) + -- Take k blocks after the immutable tip on the first fork. + selection imm Peer {value = EvolvingPeer {candidate}} = + case AF.splitAfterPoint candidate imm of + Just (_, s) -> AF.takeOldest (fromIntegral k') s + Nothing -> error "immutable tip not on candidate" ids = toList (getPeerIds ps) SecurityParam k' = k - _tips = tip <$> toMap' initialPeers peerInfo :: EvolvingPeers -> [String] -peerInfo EvolvingPeers {k = SecurityParam k, sgen = GenesisWindow sgen, peers = Peers {honest, others}, loeFrag} = +peerInfo EvolvingPeers {k = SecurityParam k, sgen = GenesisWindow sgen, loeFrag} = [ "k: " <> show k, "sgen: " <> show sgen, - "loeFrag: " <> terseHFragment loeFrag, - intercalate "\n" (prettyBlockTree tree) + "loeFrag: " <> terseHFragment loeFrag ] - where - tree = foldr addBranch' (mkTrunk (candidate (value honest))) (candidate . value <$> others) +-- | Tests that when GDD disconnects a peer, it continues to disconnect it when +-- its candidate fragment is extended. prop_densityDisconnectMonotonic :: Property prop_densityDisconnectMonotonic = - forAllBlind gen $ \ (result, final) -> + forAllBlind gen $ \ (result, final, events) -> appEndo (foldMap (Endo . counterexample) (peerInfo final)) $ - check result + check final events result where - check = \case - HonestKilled -> counterexample "Honest peer was killed" False - Nonmonotonic -> counterexample "Peer went from losing to winning" False + check final events = \case + HonestKilled -> withEvents $ counterexample "Honest peer was killed" False + Nonmonotonic event -> do + let msg = "Peer went from losing to remaining" + withEvents $ counterexample (catLines (msg : prettyUpdateEvent event)) False Finished -> property True + where + withEvents | debug = counterexample (catLines debugInfo) + | otherwise = id + + debugInfo = + "Event log:" : ((++ [""]) . prettyUpdateEvent =<< events) ++ + ["k: " ++ show k'] ++ + ("Full tree:" : prettyBlockTree (fullTree final) ++ [""]) + + EvolvingPeers {k = SecurityParam k'} = final + + catLines = intercalate "\n" gen = do gt <- genChains (QC.choose (1, 4)) evolveBranches (initCandidates gt) + debug = True + -- | Tests that a GDD disconnection re-triggers chain selection, i.e. when the current -- selection is blocked by LoE, and the leashing adversary reveals it is not dense enough, -- it gets disconnected and then the selection progresses. diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs index 13f80ff793..18eddfcced 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs @@ -15,6 +15,7 @@ module Test.Consensus.PeerSimulator.Trace ( , TraceScheduledServerHandlerEvent (..) , TraceSchedulerEvent (..) , mkGDDTracerTestBlock + , prettyDensityBounds , traceLinesWith , tracerTestBlock ) where @@ -447,28 +448,9 @@ traceBlockFetchClientTerminationEventTestBlockWith pid tracer = \case where trace = traceUnitWith tracer ("BlockFetchClient " ++ condense pid) --- * Other utilities -terseGDDEvent :: TraceGDDEvent PeerId TestBlock -> String -terseGDDEvent = \case - TraceGDDEvent {sgen = GenesisWindow sgen, curChain, bounds, candidates, candidateSuffixes, losingPeers, loeHead} -> - unlines $ [ - "GDD | Window: " ++ window sgen loeHead, - " Selection: " ++ terseHFragment curChain, - " Candidates:" - ] ++ - showPeers (tersePoint . castPoint . AF.headPoint <$> candidates) ++ - [ - " Candidate suffixes (bounds):" - ] ++ - showPeers (terseHFragment . clippedFragment <$> bounds) ++ - [" Density bounds:"] ++ - showPeers (showBounds <$> bounds) ++ - [" New candidate tips:"] ++ - showPeers (tersePoint . castPoint <$> Map.map AF.headPoint candidateSuffixes) ++ - [ - " Losing peers: " ++ show losingPeers, - " Setting loeFrag: " ++ terseAnchor (AF.castAnchor loeHead) - ] +prettyDensityBounds :: Map.Map PeerId (DensityBounds TestBlock) -> [String] +prettyDensityBounds bounds = + showPeers (showBounds <$> bounds) where showBounds DensityBounds {clippedFragment, offersMoreThanK, lowerBound, upperBound, hasBlockAfter, latestSlot, idling} = show lowerBound ++ "/" ++ show upperBound ++ "[" ++ more ++ "], " ++ @@ -493,6 +475,33 @@ terseGDDEvent = \case showIdling | idling = ", idling" | otherwise = "" + showPeers :: Map.Map PeerId String -> [String] + showPeers = fmap (\ (peer, v) -> " " ++ condense peer ++ ": " ++ v) . Map.toList + +-- * Other utilities +terseGDDEvent :: TraceGDDEvent PeerId TestBlock -> String +terseGDDEvent = \case + TraceGDDEvent {sgen = GenesisWindow sgen, curChain, bounds, candidates, candidateSuffixes, losingPeers, loeHead} -> + unlines $ [ + "GDG | Window: " ++ window sgen loeHead, + " Selection: " ++ terseHFragment curChain, + " Candidates:" + ] ++ + showPeers (tersePoint . castPoint . AF.headPoint <$> candidates) ++ + [ + " Candidate suffixes (bounds):" + ] ++ + showPeers (terseHFragment . clippedFragment <$> bounds) ++ + [" Density bounds:"] ++ + prettyDensityBounds bounds ++ + [" New candidate tips:"] ++ + showPeers (tersePoint . castPoint <$> Map.map AF.headPoint candidateSuffixes) ++ + [ + " Losing peers: " ++ show losingPeers, + " Setting loeFrag: " ++ terseAnchor (AF.castAnchor loeHead) + ] + where + window sgen loeHead = show winStart ++ " -> " ++ show winEnd where diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs index 74a97ceecb..9d0a084c08 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs @@ -132,12 +132,15 @@ getPeer pid peers | otherwise = others peers Map.! pid -updatePeer :: (a -> a) -> PeerId -> Peers a -> Peers a +updatePeer :: (a -> (a, b)) -> PeerId -> Peers a -> (Peers a, b) updatePeer f pid Peers {honest, others} | HonestPeer <- pid - = Peers {honest = f <$> honest, others} + , let (a, b) = f (value honest) + = (Peers {honest = a <$ honest, others}, b) | otherwise - = Peers {honest, others = Map.adjust (fmap f) pid others} + , let p = others Map.! pid + (a, b) = f (value p) + = (Peers {honest, others = Map.adjust (a <$) pid others}, b) -- | Convert 'Peers' to a list of 'Peer'. peersList :: Peers a -> NonEmpty (Peer a) From 915238e24599714ddc423e287345e1f2ec8103e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 2 May 2024 11:30:31 +0000 Subject: [PATCH 15/20] Implement recordingTracerM to avoid affecting the scheduling of execution --- .../Test/Consensus/Genesis/Setup.hs | 4 +-- .../Test/Util/Tracer.hs | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs index ec293f6771..ae6eb10095 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs @@ -35,7 +35,7 @@ import Test.QuickCheck import Test.Util.Orphans.IOLike () import Test.Util.QuickCheck (forAllGenRunShrinkCheck) import Test.Util.TestBlock (TestBlock) -import Test.Util.Tracer (recordingTracerTVar) +import Test.Util.Tracer (recordingTracerM) import Text.Printf (printf) @@ -56,7 +56,7 @@ runGenesisTest :: RunGenesisTestResult runGenesisTest schedulerConfig genesisTest = runSimStrictShutdownOrThrow $ do - (recordingTracer, getTrace) <- recordingTracerTVar + (recordingTracer, getTrace) <- recordingTracerM let tracer = if scDebug schedulerConfig then debugTracer else recordingTracer traceLinesWith tracer $ prettyGenesisTest prettyPeersSchedule genesisTest diff --git a/ouroboros-consensus/src/unstable-consensus-testlib/Test/Util/Tracer.hs b/ouroboros-consensus/src/unstable-consensus-testlib/Test/Util/Tracer.hs index 69d6dffe3c..e2218e6e93 100644 --- a/ouroboros-consensus/src/unstable-consensus-testlib/Test/Util/Tracer.hs +++ b/ouroboros-consensus/src/unstable-consensus-testlib/Test/Util/Tracer.hs @@ -1,11 +1,14 @@ +{-# LANGUAGE ScopedTypeVariables #-} module Test.Util.Tracer ( recordingTracerIORef + , recordingTracerM , recordingTracerTVar ) where import Control.Tracer import Data.IORef import Ouroboros.Consensus.Util.IOLike +import System.IO.Unsafe (unsafePerformIO) -- | Create a 'Tracer' that stores all events in an 'IORef' that is atomically -- updated. The second return value lets you obtain the events recorded so far @@ -24,3 +27,26 @@ recordingTracerTVar = uncheckedNewTVarM [] >>= \ref -> return ( Tracer $ \ev -> atomically $ modifyTVar ref (ev:) , atomically $ reverse <$> readTVar ref ) + +-- | Like 'recordingTracerIORef', but lifts IO to an arbitrary applicative. +-- This is useful to record events without changing the scheduling during a +-- test. +recordingTracerM :: forall m ev. Monad m => m (Tracer m ev, m [ev]) +recordingTracerM = do + (tr, get) <- liftIOtoM recordingTracerIORef + pure (natTracer liftIOtoM tr, liftIOtoM get) + where + liftIOtoM :: IO a -> m a + liftIOtoM m = do + -- The ficticious state is only used to force unsafePerformIO to run @m@ + -- every time @liftIOtoM m@ is evaluated. + s <- getStateM + pure $! snd $ unsafePerformIO $ do + r <- m + pure (s, r) + + -- We mark this function as NOINLINE to ensure the compiler cannot reason + -- that two calls of @getStateM@ might yield the same value. + {-# NOINLINE getStateM #-} + getStateM :: m Int + getStateM = pure 0 From 2cf3771bee5d28f876e50b9cd95918909ac522c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Tue, 30 Apr 2024 18:06:17 +0000 Subject: [PATCH 16/20] Add changelog --- ...0240430_180423_niols_milestone_12_chain_sync_jumping.md | 3 +++ ...0240430_180423_niols_milestone_12_chain_sync_jumping.md | 7 +++++++ 2 files changed, 10 insertions(+) create mode 100644 ouroboros-consensus-diffusion/changelog.d/20240430_180423_niols_milestone_12_chain_sync_jumping.md create mode 100644 ouroboros-consensus/changelog.d/20240430_180423_niols_milestone_12_chain_sync_jumping.md diff --git a/ouroboros-consensus-diffusion/changelog.d/20240430_180423_niols_milestone_12_chain_sync_jumping.md b/ouroboros-consensus-diffusion/changelog.d/20240430_180423_niols_milestone_12_chain_sync_jumping.md new file mode 100644 index 0000000000..edc421fd17 --- /dev/null +++ b/ouroboros-consensus-diffusion/changelog.d/20240430_180423_niols_milestone_12_chain_sync_jumping.md @@ -0,0 +1,3 @@ +### Breaking + +- Implemented a first version of CSJ (ChainSync Jumping). (disabled by default) diff --git a/ouroboros-consensus/changelog.d/20240430_180423_niols_milestone_12_chain_sync_jumping.md b/ouroboros-consensus/changelog.d/20240430_180423_niols_milestone_12_chain_sync_jumping.md new file mode 100644 index 0000000000..de8c14c7ac --- /dev/null +++ b/ouroboros-consensus/changelog.d/20240430_180423_niols_milestone_12_chain_sync_jumping.md @@ -0,0 +1,7 @@ +### Non-Breaking + +- Fixed GDD implementation. (still disabled by default) + +### Breaking + +- Implemented a first version of CSJ (ChainSync Jumping). (disabled by default) From 886a42caf45563d2ce80ee299a622a0bd239d6e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Mon, 6 May 2024 14:46:05 +0000 Subject: [PATCH 17/20] Edits to CSJ documentation. --- .../Consensus/MiniProtocol/ChainSync/Client.hs | 4 +++- .../MiniProtocol/ChainSync/Client/Jumping.hs | 17 ++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs index f6ae1f1270..cbc1ae0f4e 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs @@ -1150,8 +1150,10 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = Terminate -> terminateAfterDrain n $ AskedToTerminate _continue -> do -- Wait until next jumping instruction, which can be either to - -- jump or to run normal ChainSync. + -- jumap, to run normal ChainSync, or to restart the ChainSync + -- client. -- Pause LoP while waiting, we'll resume it if we get `RunNormally` + -- or `Restart`. traceWith tracer TraceJumpingWaitingForNextInstruction lbPause loPBucket instruction <- Jumping.jgNextInstruction jumping diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs index 2dc88be698..e4d619540a 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -80,7 +80,7 @@ -- -- Jumpers don't leak the Limit on Patience (LoP) bucket until they are promoted -- to dynamos or objectors. And the leaking is stopped as soon as they are --- disengaged or demoted. +-- demoted. -- -- If a jumper refrains from answering to jumps, they will be disconnected with -- the 'intersectTimeout' (in 'ChainSyncTimeout'). @@ -186,9 +186,9 @@ import qualified Ouroboros.Network.AnchoredFragment as AF -- | Hooks for ChainSync jumping. data Jumping m blk = Jumping { -- | Get the next instruction to execute, which can be either to run normal - -- ChainSync or to jump to a given point. When the peer is a jumper and - -- there is no jump request, 'jgNextInstruction' blocks until a jump request - -- is made. + -- ChainSync, to jump to a given point, or to restart ChainSync. When the + -- peer is a jumper and there is no jump request, 'jgNextInstruction' blocks + -- until a jump request is made. jgNextInstruction :: !(m (Instruction blk)), -- | To be called whenever the peer claims to have no more headers. @@ -297,6 +297,9 @@ stripContext context = context {peer = (), handle = ()} -- to jump to follow a dynamo with the given fragment, or to restart ChainSync. data Instruction blk = RunNormally + -- | The restart instruction restarts the ChainSync protocol. This is + -- necessary when disengaging a peer of which we know no point that we + -- could set the intersection of the ChainSync server to. | Restart | -- | Jump to the tip of the given fragment. JumpInstruction !(JumpInstruction blk) @@ -312,9 +315,9 @@ deriving anyclass instance data JumpInstruction blk = JumpTo !(JumpInfo blk) - | -- | Used to set the intersection of the servers of starting objectors. - -- Otherwise, the ChainSync server wouldn't know which headers to start - -- serving. + | -- | Used to set the intersection of the ChainSync servers of starting + -- objectors and dynamos. Otherwise, the ChainSync server wouldn't know + -- which headers to start serving. JumpToGoodPoint !(JumpInfo blk) deriving (Generic) From 48318d366d7adbb2b9e9d75cb65355b13cf396a2 Mon Sep 17 00:00:00 2001 From: Nicolas Jeannerod Date: Fri, 17 May 2024 13:42:13 +0000 Subject: [PATCH 18/20] Apply suggestions by @jasagredo Co-authored-by: Javier Sagredo --- .../Test/Consensus/Genesis/Setup/Classifiers.hs | 2 +- .../test/consensus-test/Test/Consensus/PointSchedule.hs | 2 +- .../Test/Consensus/PointSchedule/Shrinking.hs | 6 +++--- ouroboros-consensus/bench/ChainSync-client-bench/Main.hs | 2 +- .../Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs | 6 +++--- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/Classifiers.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/Classifiers.hs index 1f6f2e7267..559b6f1712 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/Classifiers.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/Classifiers.hs @@ -202,7 +202,7 @@ data ScheduleClassifiers = adversaryRollback :: Bool, -- | The honest peer did a rollback honestRollback :: Bool, - -- | All adversaries have an empty schedule: the only way to disconnect them are + -- | All adversaries have an empty schedule: the only way to disconnect them are -- network timeouts. allAdversariesEmpty :: Bool, -- | All adversaries have trivial schedules: they only have an initial state, and diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs index 9ed560762c..a883a4fc84 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs @@ -267,7 +267,7 @@ uniformPoints BlockTree {btTrunk, btBranches} g = do minusClamp :: (Ord a, Num a) => a -> a -> a minusClamp a b | a <= b = 0 - | otherwise = a - b + | otherwise = a - b zipPadN :: forall a . [[a]] -> [[Maybe a]] zipPadN = diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs index e424560fbe..8332e62021 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs @@ -74,8 +74,8 @@ shrinkOtherPeers shrink Peers{honest, others} = shrinkList (traverse (traverse shrink)) $ Map.toList others -- | Shrinks an honest peer by removing ticks. --- Because we are manipulating `PeerSchedule` at that point, there is no proper --- notion of a tick. Instead, we remove points of the honest `PeerSchedule`, +-- Because we are manipulating 'PeerSchedule' at that point, there is no proper +-- notion of a tick. Instead, we remove points of the honest 'PeerSchedule', -- and move all other points sooner, including those on the adversarial schedule. -- We check that this operation neither changes the final state of the honest peer, -- nor that it removes points from the adversarial schedules. @@ -99,7 +99,7 @@ shrinkHonestPeer Peers{honest, others} = do (zip (value honest) (drop 1 $ value honest)) -- | Speeds up an honest schedule after `at` time, by `speedUpBy`. --- This "speeding up" is done by removing `speedUpBy` to all points after `at`, +-- This “speeding up” is done by subtracting @speedUpBy@ to all points after @at@, -- and removing those points if they fall before `at`. We check that the operation -- doesn't change the final state of the peer, i.e. it doesn't remove all TP, HP, and BP -- in the sped up part. diff --git a/ouroboros-consensus/bench/ChainSync-client-bench/Main.hs b/ouroboros-consensus/bench/ChainSync-client-bench/Main.hs index aad24b713b..e1e69b3da1 100644 --- a/ouroboros-consensus/bench/ChainSync-client-bench/Main.hs +++ b/ouroboros-consensus/bench/ChainSync-client-bench/Main.hs @@ -138,7 +138,7 @@ oneBenchRun , CSClient.setLatestSlot = \_ -> pure () , CSClient.idling = CSClient.noIdling , CSClient.loPBucket = CSClient.noLoPBucket - , CSClient.jumping = CSClient.noJumping + , CSClient.jumping = CSClient.noJumping } server :: ChainSyncServer H (Point B) (Tip B) IO () diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs index cbc1ae0f4e..6d23553141 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs @@ -173,14 +173,14 @@ data ChainSyncLoPBucketConfig -- | Configuration of ChainSync Jumping data CSJConfig = - -- Disable ChainSync Jumping. All clients will fully synchronize with + -- | Disable ChainSync Jumping. All clients will fully synchronize with -- the chain of its peer. CSJDisabled | -- | Enable ChainSync Jumping CSJEnabled CSJEnabledConfig -data CSJEnabledConfig = CSJEnabledConfig { +newtype CSJEnabledConfig = CSJEnabledConfig { -- | The _ideal_ size for ChainSync jumps. Note that the algorithm -- is best-effort: there might not be exactly `csjcJumpSize` slots between two -- jumps, depending on the chain. @@ -1150,7 +1150,7 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = Terminate -> terminateAfterDrain n $ AskedToTerminate _continue -> do -- Wait until next jumping instruction, which can be either to - -- jumap, to run normal ChainSync, or to restart the ChainSync + -- jump, to run normal ChainSync, or to restart the ChainSync -- client. -- Pause LoP while waiting, we'll resume it if we get `RunNormally` -- or `Restart`. From 517f072eb31071917c02ac9c38e3561c3c334027 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Mon, 20 May 2024 15:37:03 -0300 Subject: [PATCH 19/20] Introduce objectors in the documentation of the Jumping module --- .../Consensus/MiniProtocol/ChainSync/Client/Jumping.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs index e4d619540a..e1281a8416 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -21,9 +21,9 @@ -- they agree with the dynamo's chain. -- -- When the jumpers disagree with the dynamo, the jumper with the oldest --- intersection is asked to compete with the dynamo in the GDD logic. If the --- dynamo is disconnected, a new dynamo is elected and the objector is demoted --- to a jumper. +-- intersection is asked to compete with the dynamo in the GDD logic (becoming +-- an objector). If the dynamo is disconnected, a new dynamo is elected and the +-- objector is demoted to a jumper. -- -- If the objector is disconnected, the syncing process continues with the -- dynamo and the remaining jumpers. From ce3b2c6bb1daf3818a55623ce3372c0f9c3a9382 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Tue, 21 May 2024 12:10:15 +0000 Subject: [PATCH 20/20] Document `headerDownloadEvents` --- .../test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs index 16b0aada02..25da0a1dce 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs @@ -85,6 +85,9 @@ prop_happyPath synchronized = ) ( \gt StateView{svTrace} -> let + -- The list of 'TraceDownloadedHeader' events that are not newer than + -- jumpSize from the tip of the chain. These are the ones that we + -- expect to see only once per header if CSJ works properly. headerDownloadEvents = mapMaybe (\case @@ -102,7 +105,7 @@ prop_happyPath synchronized = in tabulate "" [ if headerDownloadEvents == [] - then "All headers may be downloaded twice" + then "All headers may be downloaded twice (uninteresting test)" else "There exist headers that have to be downloaded exactly once" ] $ counterexample