diff --git a/ouroboros-network-api/src/Ouroboros/Network/PeerSelection/LedgerPeers/Type.hs b/ouroboros-network-api/src/Ouroboros/Network/PeerSelection/LedgerPeers/Type.hs index 6805c7b2ea..b43801120c 100644 --- a/ouroboros-network-api/src/Ouroboros/Network/PeerSelection/LedgerPeers/Type.hs +++ b/ouroboros-network-api/src/Ouroboros/Network/PeerSelection/LedgerPeers/Type.hs @@ -253,7 +253,7 @@ data IsLedgerPeer = IsLedgerPeer data IsBigLedgerPeer = IsBigLedgerPeer | IsNotBigLedgerPeer - deriving Eq + deriving (Eq, Show) -- | Return ledger state information and ledger peers. -- diff --git a/ouroboros-network/CHANGELOG.md b/ouroboros-network/CHANGELOG.md index 62efee83d2..b090302ac6 100644 --- a/ouroboros-network/CHANGELOG.md +++ b/ouroboros-network/CHANGELOG.md @@ -39,6 +39,9 @@ - Renamed `Applications` to `DiffusionApplications` - `runM` function now receives `ExtraParameters` as an argument - Configurable Mux Egress Poll Interval +- Added `pchPromotedHotVar` to `PeerConnectionHandle` to track when a peer has been promoted to hot +- Added tag `PeerHotDuration` to `PeerSelectionActionsTrace` to indicate how long a remote + peer has been in hot mode until it was either demoted or closed. ## 0.21.2.0 -- 2025-06-02 diff --git a/ouroboros-network/cardano-diffusion/Cardano/Network/Diffusion.hs b/ouroboros-network/cardano-diffusion/Cardano/Network/Diffusion.hs index 8042494323..4ebdcb9ed8 100644 --- a/ouroboros-network/cardano-diffusion/Cardano/Network/Diffusion.hs +++ b/ouroboros-network/cardano-diffusion/Cardano/Network/Diffusion.hs @@ -41,6 +41,7 @@ import Ouroboros.Network.NodeToNode (NodeToNodeVersionData (..), RemoteAddress, import Ouroboros.Network.NodeToNode qualified as NodeToNode import Ouroboros.Network.PeerSelection.LedgerPeers.Type (LedgerPeersConsensusInterface (..)) +import Ouroboros.Network.PeerSelection.PeerStateActions import Ouroboros.Network.Protocol.Handshake -- | Main entry point for Cardano data diffusion service. It allows to: @@ -136,7 +137,8 @@ run CardanoNodeArguments { (Diffusion.dcPeerSharing config) readUseBootstrapPeers (Cardano.getLedgerStateJudgement (lpExtraAPI ledgerPeersAPI)) - churnMetrics, + churnMetrics + getPromotedHotTime, daPeerSelectionGovernorArgs = Cardano.Types.cardanoPeerSelectionGovernorArgs Cardano.ExtraPeerSelectionActions { diff --git a/ouroboros-network/cardano-diffusion/Cardano/Network/Diffusion/Handlers.hs b/ouroboros-network/cardano-diffusion/Cardano/Network/Diffusion/Handlers.hs index abe2eae369..663bc22cf8 100644 --- a/ouroboros-network/cardano-diffusion/Cardano/Network/Diffusion/Handlers.hs +++ b/ouroboros-network/cardano-diffusion/Cardano/Network/Diffusion/Handlers.hs @@ -8,6 +8,8 @@ module Cardano.Network.Diffusion.Handlers where +import Control.Monad.Class.MonadTime.SI + import Cardano.Network.PeerSelection.Bootstrap (UseBootstrapPeers) import Cardano.Network.PeerSelection.Governor.PeerSelectionState qualified as Cardano import Cardano.Network.Types (LedgerStateJudgement) @@ -19,7 +21,6 @@ import Ouroboros.Network.PeerSelection.LedgerPeers.Type (UseLedgerPeers) import Ouroboros.Network.PeerSelection.PeerMetric import Ouroboros.Network.PeerSelection.PeerSharing (PeerSharing) #ifdef POSIX -import Control.Monad.Class.MonadTime.SI import Control.Tracer (traceWith) import Ouroboros.Network.ConnectionManager.Core (Trace (..)) import Ouroboros.Network.PeerSelection.Governor.Types @@ -40,6 +41,7 @@ sigUSR1Handler -> STM IO UseBootstrapPeers -> STM IO LedgerStateJudgement -> PeerMetrics IO ntnAddr + -> (peerconn -> STM IO (Maybe Time)) -> ConnectionManager muxMode socket ntnAddr handle handleError IO -> StrictTVar IO (PeerSelectionState @@ -49,7 +51,7 @@ sigUSR1Handler -> IO () #ifdef POSIX sigUSR1Handler tracersExtra getUseLedgerPeers ownPeerSharing getBootstrapPeers - getLedgerStateJudgement metrics connectionManager dbgStateVar = do + getLedgerStateJudgement metrics getPromotedHotTime connectionManager dbgStateVar = do _ <- Signals.installHandler Signals.sigUSR1 (Signals.Catch @@ -66,7 +68,7 @@ sigUSR1Handler tracersExtra getUseLedgerPeers ownPeerSharing getBootstrapPeers useBootstrapPeers <*> readTVar dbgStateVar - let dbgState = makeDebugPeerSelectionState ps up bp lsj am + dbgState <- makeDebugPeerSelectionState ps up bp lsj am getPromotedHotTime now traceWith (dtConnectionManagerTracer tracersExtra) (TrState state) @@ -77,5 +79,5 @@ sigUSR1Handler tracersExtra getUseLedgerPeers ownPeerSharing getBootstrapPeers Nothing return () #else -sigUSR1Handler _ _ _ _ _ _ _ _ = pure () +sigUSR1Handler _ _ _ _ _ _ _ _ _ = pure () #endif diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/Types.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/Types.hs index 7d806467ce..7e0cb3f75d 100644 --- a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/Types.hs +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/Types.hs @@ -769,18 +769,24 @@ data DebugPeerSelectionState extraState extraFlags extraPeers peeraddr = dpssUpstreamyness :: !(Map peeraddr Int), dpssFetchynessBlocks :: !(Map peeraddr Int), dpssAssociationMode :: !AssociationMode, + dpssHotDurations :: !(Map peeraddr (IsBigLedgerPeer, DiffTime)), dpssExtraState :: !extraState } deriving Show makeDebugPeerSelectionState - :: PeerSelectionState extraState extraFlags extraPeers peeraddr peerconn + :: (Ord peeraddr, MonadSTM m) + => PeerSelectionState extraState extraFlags extraPeers peeraddr peerconn -> Map peeraddr Int -> Map peeraddr Int -> extraDebugState -> AssociationMode - -> DebugPeerSelectionState extraDebugState extraFlags extraPeers peeraddr -makeDebugPeerSelectionState PeerSelectionState {..} up bp es am = - DebugPeerSelectionState { + -> (peerconn -> STM m (Maybe Time)) + -> Time + -> m (DebugPeerSelectionState extraDebugState extraFlags extraPeers peeraddr) +makeDebugPeerSelectionState PeerSelectionState {..} up bp es am getPromotedHotTime now = do + let activeMap = EstablishedPeers.toMap establishedPeers `Map.restrictKeys` activePeers + dpssHotDurations <- Map.traverseMaybeWithKey getDiffTimes activeMap + return DebugPeerSelectionState { dpssTargets = targets , dpssLocalRootPeers = localRootPeers , dpssPublicRootPeers = publicRootPeers @@ -802,8 +808,19 @@ makeDebugPeerSelectionState PeerSelectionState {..} up bp es am = , dpssUpstreamyness = up , dpssFetchynessBlocks = bp , dpssAssociationMode = am + , dpssHotDurations , dpssExtraState = es } + where + getDiffTimes peeraddr peerconn = do + t1 <- atomically $ getPromotedHotTime peerconn + case t1 of + Nothing -> return Nothing + Just t1' -> + let !dt = now `diffTime` t1' + in if Set.member peeraddr (PublicRootPeers.getBigLedgerPeers publicRootPeers) + then return . Just $ (IsBigLedgerPeer, dt) + else return . Just $ (IsNotBigLedgerPeer, dt) -- | Public 'PeerSelectionState' that can be accessed by Peer Sharing -- mechanisms without any problem. diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/PeerStateActions.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/PeerStateActions.hs index f9a91386fb..6942d5f976 100644 --- a/ouroboros-network/src/Ouroboros/Network/PeerSelection/PeerStateActions.hs +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/PeerStateActions.hs @@ -1,7 +1,9 @@ +{-# LANGUAGE BlockArguments #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE KindSignatures #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -18,6 +20,7 @@ module Ouroboros.Network.PeerSelection.PeerStateActions -- * Create PeerStateActions PeerStateActionsArguments (..) , PeerConnectionHandle + , getPromotedHotTime , withPeerStateActions , pchPeerSharing -- * Exceptions @@ -34,10 +37,11 @@ module Ouroboros.Network.PeerSelection.PeerStateActions import Control.Applicative (Alternative) import Control.Concurrent.Class.MonadSTM.Strict import Control.Exception (SomeAsyncException (..), assert) -import Control.Monad (when, (<=<)) +import Control.Monad (join, when, (<=<)) import Control.Monad.Class.MonadAsync import Control.Monad.Class.MonadFork import Control.Monad.Class.MonadThrow +import Control.Monad.Class.MonadTime.SI import Control.Monad.Class.MonadTimer.SI import Control.Concurrent.JobPool (Job (..), JobPool) @@ -428,13 +432,23 @@ awaitAllResults tok bundle = do -- together with their state 'StrictTVar's. -- data PeerConnectionHandle (muxMode :: Mux.Mode) responderCtx peerAddr versionData bytes m a b = PeerConnectionHandle { - pchConnectionId :: ConnectionId peerAddr, - pchPeerStatus :: StrictTVar m PeerStatus, - pchMux :: Mux.Mux muxMode m, - pchAppHandles :: TemperatureBundle (ApplicationHandle muxMode responderCtx peerAddr bytes m a b), - pchVersionData :: !versionData + pchConnectionId :: !(ConnectionId peerAddr), + pchPeerStatus :: !(StrictTVar m PeerStatus), + pchMux :: !(Mux.Mux muxMode m), + pchAppHandles :: !(TemperatureBundle (ApplicationHandle muxMode responderCtx peerAddr bytes m a b)), + pchVersionData :: !versionData, + pchPromotedHotVar :: !(StrictTVar m (Maybe Time)) } +-- | Retrieve the time the remote peer has been promoted to hot state +-- or Nothing if either the peer was not promoted or is being currently demoted +-- +getPromotedHotTime :: (MonadSTM m) + => PeerConnectionHandle muxMode responderCtx peerAddr versionData bytes m a b + -> STM m (Maybe Time) +getPromotedHotTime PeerConnectionHandle { pchPromotedHotVar } = + readTVar pchPromotedHotVar + mkInitiatorContext :: MonadSTM m => SingProtocolTemperature pt -> IsBigLedgerPeer @@ -620,14 +634,10 @@ withPeerStateActions PeerStateActionsArguments { then return False else writeTVar stateVar newState >> return True - isNotCoolingOrCold :: StrictTVar m PeerStatus -> STM m Bool - isNotCoolingOrCold stateVar = - (> PeerCooling) <$> readTVar stateVar - peerMonitoringLoop :: PeerConnectionHandle muxMode responderCtx peerAddr versionData ByteString m a b -> m () - peerMonitoringLoop pch@PeerConnectionHandle { pchConnectionId, pchPeerStatus, pchAppHandles } = do + peerMonitoringLoop pch@PeerConnectionHandle { pchConnectionId, pchPeerStatus, pchAppHandles, pchPromotedHotVar } = do -- A first-to-finish synchronisation on all the bundles; As a result -- this is a first-to-finish synchronisation between all the -- mini-protocols runs toward the given peer. @@ -669,10 +679,8 @@ withPeerStateActions PeerStateActionsArguments { Just (WithSomeProtocolTemperature (WithHot MiniProtocolError{})) -> do -- current `pchPeerStatus` must be 'HotPeer' - state <- atomically $ do - peerState <- readTVar pchPeerStatus - _ <- updateUnlessCoolingOrCold pchPeerStatus PeerCooling - return peerState + state <- atomically do + readTVar pchPeerStatus <* updateUnlessCoolingOrCold pchPeerStatus PeerCooling case state of PeerCold -> return () PeerCooling -> return () @@ -687,10 +695,9 @@ withPeerStateActions PeerStateActionsArguments { Just (WithSomeProtocolTemperature (WithEstablished MiniProtocolError{})) -> do -- update 'pchPeerStatus' and log (as the two other transition to -- cold state. - state <- atomically $ do - peerState <- readTVar pchPeerStatus - _ <- updateUnlessCoolingOrCold pchPeerStatus PeerCooling - pure peerState + state <- atomically do + readTVar pchPeerStatus <* updateUnlessCoolingOrCold pchPeerStatus PeerCooling + case state of PeerCold -> return () PeerCooling -> return () @@ -731,7 +738,13 @@ withPeerStateActions PeerStateActionsArguments { -- peerMonitingLoop exit -- - Nothing -> + Nothing -> do + pchPromotedHot <- atomically $ stateTVar pchPromotedHotVar (, Nothing) + case pchPromotedHot of + Just t1 -> do + dt <- diffTime <$> getMonotonicTime <*> pure t1 + traceWith spsTracer (PeerHotDuration pchConnectionId dt) + Nothing -> pure () traceWith spsTracer (PeerStatusChanged (CoolingToCold pchConnectionId)) establishPeerConnection :: JobPool () m (Maybe SomeException) @@ -768,7 +781,8 @@ withPeerStateActions PeerStateActionsArguments { writeTVar (projectBundle SingWarm controlMessageBundle) Continue writeTVar (projectBundle SingEstablished controlMessageBundle) Continue - awaitVarBundle <- atomically $ mkAwaitVars muxBundle + awaitVarBundle <- atomically $ mkAwaitVars muxBundle + pchPromotedHotVar <- newTVarIO Nothing let connHandle = PeerConnectionHandle { @@ -779,7 +793,8 @@ withPeerStateActions PeerStateActionsArguments { muxBundle controlMessageBundle awaitVarBundle, - pchVersionData = versionData + pchVersionData = versionData, + pchPromotedHotVar } startProtocols SingWarm isBigLedgerPeer connHandle @@ -796,9 +811,15 @@ withPeerStateActions PeerStateActionsArguments { Just SomeAsyncException {} -> Nothing Nothing -> Just e) (\e -> do - atomically $ do + promotedHot <- atomically $ do waitForOutboundDemotion spsConnectionManager connId writeTVar peerStateVar PeerCold + stateTVar pchPromotedHotVar (, Nothing) + case promotedHot of + Just t1 -> do + dt <- diffTime <$> getMonotonicTime <*> pure t1 + traceWith spsTracer (PeerHotDuration connId dt) + Nothing -> pure () traceWith spsTracer (PeerMonitoringError connId e) throwIO e) (peerMonitoringLoop connHandle $> Nothing)) @@ -913,106 +934,95 @@ withPeerStateActions PeerStateActionsArguments { connHandle@PeerConnectionHandle { pchConnectionId, pchPeerStatus, - pchAppHandles } = do - -- quiesce warm peer protocols and set hot ones in 'Continue' mode. - wasWarm <- atomically $ do - -- if the peer is cold we can't activate it. - notCold <- isNotCoolingOrCold pchPeerStatus - when notCold $ do - writeTVar (getControlVar SingHot pchAppHandles) Continue - writeTVar (getControlVar SingWarm pchAppHandles) Quiesce - return notCold - when (not wasWarm) $ do - traceWith spsTracer (PeerStatusChangeFailure - (WarmToHot pchConnectionId) - ActiveCold) - throwIO $ ColdActivationException pchConnectionId - - -- start hot peer protocols - startProtocols SingHot isBigLedgerPeer connHandle - - -- Only set the status to PeerHot if the peer isn't PeerCold. - -- This can happen asynchronously between the check above and now. - wasWarm' <- atomically $ updateUnlessCoolingOrCold pchPeerStatus PeerHot - if wasWarm' - then traceWith spsTracer (PeerStatusChanged (WarmToHot pchConnectionId)) - else do - traceWith spsTracer (PeerStatusChangeFailure - (WarmToHot pchConnectionId) - ActiveCold) - throwIO $ ColdActivationException pchConnectionId + pchAppHandles, + pchPromotedHotVar } = do + join . atomically $ do + peerStatus <- readTVar pchPeerStatus + case peerStatus of + PeerWarm -> do + writeTVar (getControlVar SingHot pchAppHandles) Continue + writeTVar (getControlVar SingWarm pchAppHandles) Quiesce + writeTVar pchPeerStatus PeerHot + return $ do + startProtocols SingHot isBigLedgerPeer connHandle + atomically . writeTVar pchPromotedHotVar . (Just $!) =<< getMonotonicTime + traceWith spsTracer (PeerStatusChanged (WarmToHot pchConnectionId)) + _otherwise -> return $ do + traceWith spsTracer (PeerStatusChangeFailure + (WarmToHot pchConnectionId) + (ActiveCold peerStatus)) + throwIO $ ColdActivationException pchConnectionId -- Take a hot peer and demote it to a warm one. + -- this can be raced by 'peerMonitoringLoop' and peer selection demotion activity deactivatePeerConnection :: PeerConnectionHandle muxMode responderCtx peerAddr versionData ByteString m a b -> m () deactivatePeerConnection PeerConnectionHandle { pchConnectionId, pchPeerStatus, pchMux, - pchAppHandles + pchAppHandles, + pchPromotedHotVar } = do - wasCold <- atomically $ do - notCold <- isNotCoolingOrCold pchPeerStatus - when notCold $ do - writeTVar (getControlVar SingHot pchAppHandles) Terminate - writeTVar (getControlVar SingWarm pchAppHandles) Continue - return (not notCold) - when wasCold $ do - -- The governor attempted to demote an already cold peer. - traceWith spsTracer (PeerStatusChangeFailure - (HotToWarm pchConnectionId) - ActiveCold) - throwIO $ ColdDeactivationException pchConnectionId - - - -- Hot protocols should stop within 'spsDeactivateTimeout'. - res <- - timeout spsDeactivateTimeout - (atomically $ awaitAllResults SingHot pchAppHandles) - case res of - Nothing -> do - Mux.stop pchMux - atomically (writeTVar pchPeerStatus PeerCooling) - traceWith spsTracer (PeerStatusChangeFailure - (HotToCooling pchConnectionId) - TimeoutError) - throwIO (DeactivationTimeout pchConnectionId) - - -- some of the hot mini-protocols errored - Just (SomeErrored errs) -> do - -- we don't need to notify the connection manager, we can instead - -- relay on mux property: if any of the mini-protocols errors, mux - -- throws an exception as well. - atomically (writeTVar pchPeerStatus PeerCooling) - traceWith spsTracer (PeerStatusChangeFailure - (HotToCooling pchConnectionId) - (ApplicationFailure errs)) - throwIO (MiniProtocolExceptions errs) - - -- all hot mini-protocols succeeded - Just (AllSucceeded results) -> do - -- we don't notify the connection manager as this connection is still - -- useful to the outbound governor (warm peer). - wasWarm <- atomically $ do - -- Only set the status to PeerWarm if the peer isn't cold - -- (can happen asynchronously). - notCold <- updateUnlessCoolingOrCold pchPeerStatus PeerWarm - when notCold $ do - -- We need to update hot protocols to indicate that they are not - -- running. Preserve the results returned by their previous - -- execution. - modifyTVar (getMiniProtocolsVar SingHot pchAppHandles) - (\_ -> Map.map (pure . NotRunning . Right) results) - return notCold - - if wasWarm - then traceWith spsTracer (PeerStatusChanged (HotToWarm pchConnectionId)) - else do - traceWith spsTracer (PeerStatusChangeFailure - (HotToWarm pchConnectionId) - ActiveCold) - throwIO $ ColdDeactivationException pchConnectionId + join . atomically $ do + peerStatus <- readTVar pchPeerStatus + case peerStatus of + PeerHot -> do + writeTVar (getControlVar SingHot pchAppHandles) Terminate + writeTVar (getControlVar SingWarm pchAppHandles) Continue + pchPromotedHot <- stateTVar pchPromotedHotVar (, Nothing) + return $ do + case pchPromotedHot of + Just t1 -> do + dt <- diffTime <$> getMonotonicTime <*> pure t1 + traceWith spsTracer (PeerHotDuration pchConnectionId dt) + Nothing -> pure () + + -- Hot protocols should stop within 'spsDeactivateTimeout'. + res <- + timeout spsDeactivateTimeout + $ join . atomically $ do + res <- awaitAllResults SingHot pchAppHandles + case res of + AllSucceeded results -> do + modifyTVar (getMiniProtocolsVar SingHot pchAppHandles) + (\_ -> Map.map (pure . NotRunning . Right) results) + stateTVar pchPeerStatus \case + PeerHot -> (traceWith spsTracer (PeerStatusChanged (HotToWarm pchConnectionId)) + , PeerWarm) + x -> (pure () , x) + SomeErrored errs -> + stateTVar pchPeerStatus \status -> + if status <= PeerCooling then + (throwIO (MiniProtocolExceptions errs), status) + else ( traceWith spsTracer (PeerStatusChangeFailure + (HotToCooling pchConnectionId) + (ApplicationFailure errs)) + >> throwIO (MiniProtocolExceptions errs) + , PeerCooling) + + case res of + Nothing -> do + Mux.stop pchMux + trace <- atomically $ updateUnlessCoolingOrCold pchPeerStatus PeerCooling + when trace do + traceWith spsTracer (PeerStatusChangeFailure + (HotToCooling pchConnectionId) + TimeoutError) + throwIO (DeactivationTimeout pchConnectionId) + Just _ -> return () + + -- we could genuinly hit this case due to a race between 'peerMonitoringLoop' + -- and peer selection demotion job + PeerWarm -> return $ pure () + + _otherwise -> + return $ do + traceWith spsTracer (PeerStatusChangeFailure + (HotToWarm pchConnectionId) + (ActiveCold peerStatus)) + throwIO $ ColdDeactivationException pchConnectionId closePeerConnection :: PeerConnectionHandle muxMode responderCtx peerAddr versionData ByteString m a b @@ -1024,57 +1034,56 @@ withPeerStateActions PeerStateActionsArguments { pchAppHandles, pchMux } = do - atomically $ do + peerStatus <- atomically $ do writeTVar (getControlVar SingWarm pchAppHandles) Terminate writeTVar (getControlVar SingEstablished pchAppHandles) Terminate writeTVar (getControlVar SingHot pchAppHandles) Terminate + readTVar pchPeerStatus <* updateUnlessCoolingOrCold pchPeerStatus PeerCooling + + case peerStatus of + ps@PeerCooling -> return ps + ps@PeerCold -> return ps + _otherwise -> do + res <- + timeout spsCloseConnectionTimeout + (atomically $ + (\a b c -> a <> b <> c) + -- note: we use last to finish on hot, warm and + -- established mini-protocols since 'closePeerConnection' + -- is also used by asynchronous demotions, not just + -- /warm → cold/ transition. + <$> awaitAllResults SingHot pchAppHandles + <*> awaitAllResults SingWarm pchAppHandles + <*> awaitAllResults SingEstablished pchAppHandles) + + PeerCooling <$ case res of + Nothing -> do + -- timeout fired + Mux.stop pchMux + traceWith spsTracer (PeerStatusChangeFailure + (WarmToCooling pchConnectionId) + TimeoutError) + + Just (SomeErrored errs) -> do + -- some mini-protocol errored + -- + -- we don't need to notify the connection manager, we can instead + -- rely on mux property: if any of the mini-protocols errors, mux + -- throws an exception as well. + traceWith spsTracer (PeerStatusChangeFailure + (WarmToCooling pchConnectionId) + (ApplicationFailure errs)) + throwIO (MiniProtocolExceptions errs) + + Just AllSucceeded {} -> do + -- all mini-protocols terminated cleanly + -- + -- 'unregisterOutboundConnection' could only fail to demote the peer if + -- connection manager would simultaneously promote it, but this is not + -- possible. + _ <- releaseOutboundConnection spsConnectionManager pchConnectionId + traceWith spsTracer (PeerStatusChanged (WarmToCooling pchConnectionId)) - res <- - timeout spsCloseConnectionTimeout - (atomically $ - (\a b c -> a <> b <> c) - -- note: we use last to finish on hot, warm and - -- established mini-protocols since 'closePeerConnection' - -- is also used by asynchronous demotions, not just - -- /warm → cold/ transition. - <$> awaitAllResults SingHot pchAppHandles - <*> awaitAllResults SingWarm pchAppHandles - <*> awaitAllResults SingEstablished pchAppHandles) - case res of - Nothing -> do - -- timeout fired - Mux.stop pchMux - wasWarm <- atomically (updateUnlessCoolingOrCold pchPeerStatus PeerCooling) - when wasWarm $ - traceWith spsTracer (PeerStatusChangeFailure - (WarmToCooling pchConnectionId) - TimeoutError) - readTVarIO pchPeerStatus - - Just (SomeErrored errs) -> do - -- some mini-protocol errored - -- - -- we don't need to notify the connection manager, we can instead - -- rely on mux property: if any of the mini-protocols errors, mux - -- throws an exception as well. - wasWarm <- atomically (updateUnlessCoolingOrCold pchPeerStatus PeerCooling) - when wasWarm $ - traceWith spsTracer (PeerStatusChangeFailure - (WarmToCooling pchConnectionId) - (ApplicationFailure errs)) - throwIO (MiniProtocolExceptions errs) - - Just AllSucceeded {} -> do - -- all mini-protocols terminated cleanly - -- - -- 'unregisterOutboundConnection' could only fail to demote the peer if - -- connection manager would simultaneously promote it, but this is not - -- possible. - wasWarm <- atomically (updateUnlessCoolingOrCold pchPeerStatus PeerCooling) - when wasWarm $ do - _ <- releaseOutboundConnection spsConnectionManager pchConnectionId - traceWith spsTracer (PeerStatusChanged (WarmToCooling pchConnectionId)) - readTVarIO pchPeerStatus -- -- Utilities @@ -1179,7 +1188,7 @@ data FailureType versionNumber = | HandleFailure !SomeException | MuxStoppedFailure | TimeoutError - | ActiveCold + | ActiveCold !PeerStatus | ApplicationFailure ![MiniProtocolException] deriving Show @@ -1207,4 +1216,5 @@ data PeerSelectionActionsTrace peerAddr vNumber = | PeerMonitoringError (ConnectionId peerAddr) SomeException | PeerMonitoringResult (ConnectionId peerAddr) (Maybe (WithSomeProtocolTemperature FirstToFinishResult)) | AcquireConnectionError SomeException + | PeerHotDuration (ConnectionId peerAddr) DiffTime deriving Show diff --git a/ouroboros-network/testlib/Test/Ouroboros/Network/Diffusion/Testnet/Cardano.hs b/ouroboros-network/testlib/Test/Ouroboros/Network/Diffusion/Testnet/Cardano.hs index f29c11470c..07d55fe2bc 100644 --- a/ouroboros-network/testlib/Test/Ouroboros/Network/Diffusion/Testnet/Cardano.hs +++ b/ouroboros-network/testlib/Test/Ouroboros/Network/Diffusion/Testnet/Cardano.hs @@ -1259,6 +1259,8 @@ prop_peer_selection_action_trace_coverage defaultBearerInfo diffScript = = "AcquireConnectionError: " ++ show (ioe_type ioe) | otherwise = "AcquireConnectionError: " ++ show e + peerSelectionActionsTraceMap (PeerHotDuration _id _dt) = + "PeerHotDuration" eventsSeenNames = map peerSelectionActionsTraceMap events @@ -3879,7 +3881,7 @@ prop_diffusion_peer_selection_actions_no_dodgy_traces ioSimTrace traceNumber = $ evs' numOfActiveColdErrors = length . filter (\case - (PeerStatusChangeFailure HotToWarm{} ActiveCold) + (PeerStatusChangeFailure HotToWarm{} ActiveCold{}) -> True _ -> False) $ evs' @@ -3902,7 +3904,7 @@ prop_diffusion_peer_selection_actions_no_dodgy_traces ioSimTrace traceNumber = in conjoin (zipWith (curry (\case ev@( WithTime _ (PeerStatusChangeFailure (HotToWarm _) TimeoutError) - , WithTime _ (PeerStatusChangeFailure (HotToWarm _) ActiveCold) + , WithTime _ (PeerStatusChangeFailure (HotToWarm _) ActiveCold{}) ) -> counterexample (show ev) $ counterexample (unlines $ map show peerSelectionActionsEvents) @@ -3969,7 +3971,8 @@ prop_diffusion_peer_selection_actions_no_dodgy_traces ioSimTrace traceNumber = WithTime _ (PeerStatusChangeFailure type_ _) -> getConnId type_ WithTime _ (PeerMonitoringError connId _) -> Just connId WithTime _ (PeerMonitoringResult connId _) -> Just connId - WithTime _ (AcquireConnectionError _) -> Nothing) + WithTime _ (AcquireConnectionError _) -> Nothing + WithTime _ (PeerHotDuration connId _) -> Just connId) $ peerSelectionActionsEvents )