From 2222a116d07e1c9b7b13de54fbf60a6faf2d3871 Mon Sep 17 00:00:00 2001 From: Mark Holt <135143369+mh0lt@users.noreply.github.com> Date: Mon, 10 Jun 2024 03:27:26 +0100 Subject: [PATCH] Fix potential p2p shutdown hangup (#10626) This is a fix for: https://github.com/ledgerwatch/erigon/issues/10192 This fixes is a deadlock in v4_udp.go where * Thread A waits on mutex.Lock() in resetTimeout() called after reading listUpdate channel. * Thread B waits on listUpdate <- plist.PushBack(p) called after locking mutex.Lock() This fix decouples the list operations which need locking from the channel operations which don't by storing the changes in local variables. These updates are used for resetting a timeout - which is not order dependent. --- p2p/discover/v4_udp.go | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/p2p/discover/v4_udp.go b/p2p/discover/v4_udp.go index 5491f541744..e3f2a300206 100644 --- a/p2p/discover/v4_udp.go +++ b/p2p/discover/v4_udp.go @@ -605,14 +605,15 @@ func (t *UDPv4) loop() { return case p := <-t.addReplyMatcher: - func() { - mutex.Lock() - defer mutex.Unlock() - p.deadline = time.Now().Add(t.replyTimeout) - listUpdate <- plist.PushBack(p) - }() + mutex.Lock() + p.deadline = time.Now().Add(t.replyTimeout) + back := plist.PushBack(p) + mutex.Unlock() + listUpdate <- back case r := <-t.gotreply: + var removals []*list.Element + func() { mutex.Lock() defer mutex.Unlock() @@ -628,7 +629,7 @@ func (t *UDPv4) loop() { if requestDone { p.errc <- nil plist.Remove(el) - listUpdate <- el + removals = append(removals, el) } // Reset the continuous timeout counter (time drift detection) contTimeouts = 0 @@ -637,6 +638,10 @@ func (t *UDPv4) loop() { r.matched <- matched }() + for _, el := range removals { + listUpdate <- el + } + case key := <-t.gotkey: go func() { if key, err := v4wire.DecodePubkey(crypto.S256(), key); err == nil {