From bc62df22237c85777d034d7767cce9e1326839a7 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 18 Apr 2024 00:02:26 +0200 Subject: [PATCH 01/41] internal/testlog: fix level matching --- internal/testlog/testlog.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/testlog/testlog.go b/internal/testlog/testlog.go index 3740dd1f242c..ad61af9eac20 100644 --- a/internal/testlog/testlog.go +++ b/internal/testlog/testlog.go @@ -58,7 +58,7 @@ func (h *bufHandler) Handle(_ context.Context, r slog.Record) error { } func (h *bufHandler) Enabled(_ context.Context, lvl slog.Level) bool { - return lvl <= h.level + return lvl >= h.level } func (h *bufHandler) WithAttrs(attrs []slog.Attr) slog.Handler { From 8f4c72c579e25b9c191dbc1eeea0f3a6f63341f5 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 26 Mar 2024 19:35:48 +0100 Subject: [PATCH 02/41] p2p/discover: add debug API --- p2p/discover/node.go | 6 ++++++ p2p/discover/table.go | 19 ++++++++++--------- p2p/discover/v4_udp.go | 4 ++++ 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/p2p/discover/node.go b/p2p/discover/node.go index 9ffe101ccff8..079fed8852b0 100644 --- a/p2p/discover/node.go +++ b/p2p/discover/node.go @@ -29,6 +29,12 @@ import ( "github.com/ethereum/go-ethereum/p2p/enode" ) +type BucketNode struct { + Node *enode.Node `json:"node"` + AddedAt time.Time `json:"added"` + Checks int `json:"checks"` +} + // node represents a host on the network. // The fields of Node may not be modified. type node struct { diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 2b7a28708b8d..46e6401448f9 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -150,18 +150,19 @@ func newMeteredTable(t transport, db *enode.DB, cfg Config) (*Table, error) { } // Nodes returns all nodes contained in the table. -func (tab *Table) Nodes() []*enode.Node { - if !tab.isInitDone() { - return nil - } - +func (tab *Table) Nodes() [][]BucketNode { tab.mutex.Lock() defer tab.mutex.Unlock() - var nodes []*enode.Node - for _, b := range &tab.buckets { - for _, n := range b.entries { - nodes = append(nodes, unwrapNode(n)) + nodes := make([][]BucketNode, len(tab.buckets)) + for i, b := range &tab.buckets { + nodes[i] = make([]BucketNode, len(b.entries)) + for j, n := range b.entries { + nodes[i][j] = BucketNode{ + Node: &n.Node, + Checks: int(n.livenessChecks), + AddedAt: n.addedAt, + } } } return nodes diff --git a/p2p/discover/v4_udp.go b/p2p/discover/v4_udp.go index 44b1f5305c45..c2de12133ca2 100644 --- a/p2p/discover/v4_udp.go +++ b/p2p/discover/v4_udp.go @@ -375,6 +375,10 @@ func (t *UDPv4) RequestENR(n *enode.Node) (*enode.Node, error) { return respN, nil } +func (t *UDPv4) TableBuckets() [][]BucketNode { + return t.tab.Nodes() +} + // pending adds a reply matcher to the pending reply queue. // see the documentation of type replyMatcher for a detailed explanation. func (t *UDPv4) pending(id enode.ID, ip net.IP, ptype byte, callback replyMatchFunc) *replyMatcher { From 37149d02df4cf333e02afee88d22c778102376f2 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 26 Mar 2024 19:35:56 +0100 Subject: [PATCH 03/41] cmd/devp2p: add discv4 RPC server --- cmd/devp2p/discv4cmd.go | 48 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/cmd/devp2p/discv4cmd.go b/cmd/devp2p/discv4cmd.go index 45bcdcd3674b..203ed08425ad 100644 --- a/cmd/devp2p/discv4cmd.go +++ b/cmd/devp2p/discv4cmd.go @@ -20,6 +20,8 @@ import ( "errors" "fmt" "net" + "net/http" + "net/rpc" "strconv" "strings" "time" @@ -28,6 +30,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/internal/flags" + "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/p2p/discover" "github.com/ethereum/go-ethereum/p2p/enode" "github.com/ethereum/go-ethereum/params" @@ -131,6 +134,10 @@ var ( Usage: "Enode of the remote node under test", EnvVars: []string{"REMOTE_ENODE"}, } + httpAddrFlag = &cli.StringFlag{ + Name: "rpc", + Usage: "HTTP server listening address", + } ) var discoveryNodeFlags = []cli.Flag{ @@ -154,6 +161,27 @@ func discv4Ping(ctx *cli.Context) error { return nil } +func discv4Listen(ctx *cli.Context) error { + disc, _ := startV4(ctx) + defer disc.Close() + + fmt.Println(disc.Self()) + + httpAddr := ctx.String(httpAddrFlag.Name) + if httpAddr == "" { + // Non-HTTP mode. + select {} + } + + api := &discv4API{disc} + log.Info("Starting RPC API server", "addr", httpAddr) + srv := rpc.NewServer() + srv.RegisterName("discv4", api) + http.DefaultServeMux.Handle("/", srv) + httpsrv := http.Server{Addr: httpAddr, Handler: http.DefaultServeMux} + return httpsrv.ListenAndServe() +} + func discv4RequestRecord(ctx *cli.Context) error { n := getNodeArg(ctx) disc, _ := startV4(ctx) @@ -362,3 +390,23 @@ func parseBootnodes(ctx *cli.Context) ([]*enode.Node, error) { } return nodes, nil } + +type discv4API struct { + host *discover.UDPv4 +} + +func (api *discv4API) LookupRandom(n int) (ns []*enode.Node) { + it := api.host.RandomNodes() + for len(ns) < n && it.Next() { + ns = append(ns, it.Node()) + } + return ns +} + +func (api *discv4API) Buckets() [][]discover.BucketNode { + return api.host.TableBuckets() +} + +func (api *discv4API) Self() *enode.Node { + return api.host.Self() +} From 1a6a64ba49bd5b1d10879ad48bf4126fb002d066 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 26 Mar 2024 19:39:00 +0100 Subject: [PATCH 04/41] cmd/devp2p: add listen command --- cmd/devp2p/discv4cmd.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cmd/devp2p/discv4cmd.go b/cmd/devp2p/discv4cmd.go index 203ed08425ad..d149aecd5e78 100644 --- a/cmd/devp2p/discv4cmd.go +++ b/cmd/devp2p/discv4cmd.go @@ -48,6 +48,7 @@ var ( discv4ResolveJSONCommand, discv4CrawlCommand, discv4TestCommand, + discv4ListenCommand, }, } discv4PingCommand = &cli.Command{ @@ -78,6 +79,14 @@ var ( Flags: discoveryNodeFlags, ArgsUsage: "", } + discv4ListenCommand = &cli.Command{ + Name: "listen", + Usage: "Runs a discovery node", + Action: discv4Listen, + Flags: flags.Merge(discoveryNodeFlags, []cli.Flag{ + httpAddrFlag, + }), + } discv4CrawlCommand = &cli.Command{ Name: "crawl", Usage: "Updates a nodes.json file with random nodes found in the DHT", From f8c744894cd9adee4c8ac2e5af7ded47973a74e1 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 26 Mar 2024 19:41:14 +0100 Subject: [PATCH 05/41] cmd/devp2p: fix import --- cmd/devp2p/discv4cmd.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/devp2p/discv4cmd.go b/cmd/devp2p/discv4cmd.go index d149aecd5e78..3b5400ca3a83 100644 --- a/cmd/devp2p/discv4cmd.go +++ b/cmd/devp2p/discv4cmd.go @@ -21,7 +21,6 @@ import ( "fmt" "net" "net/http" - "net/rpc" "strconv" "strings" "time" @@ -34,6 +33,7 @@ import ( "github.com/ethereum/go-ethereum/p2p/discover" "github.com/ethereum/go-ethereum/p2p/enode" "github.com/ethereum/go-ethereum/params" + "github.com/ethereum/go-ethereum/rpc" "github.com/urfave/cli/v2" ) From df8e793e1f2cbe968e9fc54b2bea43dca92292d3 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 18 Apr 2024 00:02:49 +0200 Subject: [PATCH 06/41] p2p/discover: new revalidation --- p2p/discover/common.go | 35 ++ p2p/discover/lookup.go | 4 +- p2p/discover/node.go | 11 +- p2p/discover/table.go | 634 ++++++++++++++------------------ p2p/discover/table_reval.go | 204 ++++++++++ p2p/discover/table_test.go | 134 +++---- p2p/discover/table_util_test.go | 60 ++- p2p/discover/v4_udp.go | 2 +- p2p/discover/v5_udp.go | 2 +- 9 files changed, 627 insertions(+), 459 deletions(-) create mode 100644 p2p/discover/table_reval.go diff --git a/p2p/discover/common.go b/p2p/discover/common.go index 1f763904bb1f..8b024231b71c 100644 --- a/p2p/discover/common.go +++ b/p2p/discover/common.go @@ -18,7 +18,11 @@ package discover import ( "crypto/ecdsa" + crand "crypto/rand" + "encoding/binary" + "math/rand" "net" + "sync/atomic" "time" "github.com/ethereum/go-ethereum/common/mclock" @@ -92,3 +96,34 @@ type ReadPacket struct { Data []byte Addr *net.UDPAddr } + +type randomSource interface { + Intn(int) int + Int63n(int64) int64 +} + +// reseedingRandom is a random number generator that tracks when it was last re-seeded. +type reseedingRandom struct { + cur atomic.Pointer[rand.Rand] + lastSeed mclock.AbsTime +} + +func (r *reseedingRandom) nextReseedTime() mclock.AbsTime { + return r.lastSeed.Add(10 * time.Minute) +} + +func (r *reseedingRandom) seed(now mclock.AbsTime) { + var b [8]byte + crand.Read(b[:]) + seed := binary.BigEndian.Uint64(b[:]) + r.cur.Store(rand.New(rand.NewSource(int64(seed)))) + r.lastSeed = now +} + +func (r *reseedingRandom) Intn(n int) int { + return r.cur.Load().Intn(n) +} + +func (r *reseedingRandom) Int63n(n int64) int64 { + return r.cur.Load().Int63n(n) +} diff --git a/p2p/discover/lookup.go b/p2p/discover/lookup.go index b8d97b44e1cc..df6cddc8330e 100644 --- a/p2p/discover/lookup.go +++ b/p2p/discover/lookup.go @@ -152,9 +152,9 @@ func (it *lookup) query(n *node, reply chan<- []*node) { // Remove the node from the local table if it fails to return anything useful too // many times, but only if there are enough other nodes in the bucket. dropped := false - if fails >= maxFindnodeFailures && it.tab.bucketLen(n.ID()) >= bucketSize/2 { + if fails >= maxFindnodeFailures { dropped = true - it.tab.delete(n) + it.tab.trackFindFailure(n) } it.tab.log.Trace("FINDNODE failed", "id", n.ID(), "failcount", fails, "dropped", dropped, "err", err) } else if fails > 0 { diff --git a/p2p/discover/node.go b/p2p/discover/node.go index 079fed8852b0..17280a587824 100644 --- a/p2p/discover/node.go +++ b/p2p/discover/node.go @@ -38,9 +38,10 @@ type BucketNode struct { // node represents a host on the network. // The fields of Node may not be modified. type node struct { - enode.Node - addedAt time.Time // time when the node was added to the table - livenessChecks uint // how often liveness was checked + *enode.Node + addedAt time.Time // time when the node was added to the table + livenessChecks uint // how often liveness was checked + isValidatedLive bool } type encPubkey [64]byte @@ -71,7 +72,7 @@ func (e encPubkey) id() enode.ID { } func wrapNode(n *enode.Node) *node { - return &node{Node: *n} + return &node{Node: n} } func wrapNodes(ns []*enode.Node) []*node { @@ -83,7 +84,7 @@ func wrapNodes(ns []*enode.Node) []*node { } func unwrapNode(n *node) *enode.Node { - return &n.Node + return n.Node } func unwrapNodes(ns []*node) []*enode.Node { diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 46e6401448f9..6e2bf653411e 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -24,16 +24,15 @@ package discover import ( "context" - crand "crypto/rand" - "encoding/binary" "fmt" - mrand "math/rand" "net" + "slices" "sort" "sync" "time" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/mclock" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/p2p/enode" @@ -55,21 +54,21 @@ const ( bucketIPLimit, bucketSubnet = 2, 24 // at most 2 addresses from the same /24 tableIPLimit, tableSubnet = 10, 24 - copyNodesInterval = 30 * time.Second - seedMinTableTime = 5 * time.Minute - seedCount = 30 - seedMaxAge = 5 * 24 * time.Hour + seedMinTableTime = 5 * time.Minute + seedCount = 30 + seedMaxAge = 5 * 24 * time.Hour ) // Table is the 'node table', a Kademlia-like index of neighbor nodes. The table keeps // itself up-to-date by verifying the liveness of neighbors and requesting their node // records when announcements of a new record version are received. type Table struct { - mutex sync.Mutex // protects buckets, bucket content, nursery, rand - buckets [nBuckets]*bucket // index of known nodes by distance - nursery []*node // bootstrap nodes - rand *mrand.Rand // source of randomness, periodically reseeded - ips netutil.DistinctNetSet + mutex sync.Mutex // protects buckets, bucket content, nursery, rand + buckets [nBuckets]*bucket // index of known nodes by distance + nursery []*node // bootstrap nodes + rand *reseedingRandom // source of randomness, periodically reseeded + ips netutil.DistinctNetSet + revalidation tableRevalidation db *enode.DB // database of known nodes net transport @@ -77,10 +76,14 @@ type Table struct { log log.Logger // loop channels - refreshReq chan chan struct{} - initDone chan struct{} - closeReq chan struct{} - closed chan struct{} + refreshReq chan chan struct{} + revalidateResp chan revalidationResponse + addNodeCh chan addNodeRequest + addNodeHandled chan struct{} + findFailureCh chan *node + initDone chan struct{} + closeReq chan struct{} + closed chan struct{} nodeAddedHook func(*bucket, *node) nodeRemovedHook func(*bucket, *node) @@ -104,22 +107,28 @@ type bucket struct { index int } +type addNodeRequest struct { + node *node + isLive bool +} + func newTable(t transport, db *enode.DB, cfg Config) (*Table, error) { cfg = cfg.withDefaults() tab := &Table{ - net: t, - db: db, - cfg: cfg, - log: cfg.Log, - refreshReq: make(chan chan struct{}), - initDone: make(chan struct{}), - closeReq: make(chan struct{}), - closed: make(chan struct{}), - rand: mrand.New(mrand.NewSource(0)), - ips: netutil.DistinctNetSet{Subnet: tableSubnet, Limit: tableIPLimit}, - } - if err := tab.setFallbackNodes(cfg.Bootnodes); err != nil { - return nil, err + net: t, + db: db, + cfg: cfg, + log: cfg.Log, + refreshReq: make(chan chan struct{}), + revalidateResp: make(chan revalidationResponse), + addNodeCh: make(chan addNodeRequest), + addNodeHandled: make(chan struct{}), + findFailureCh: make(chan *node), + initDone: make(chan struct{}), + closeReq: make(chan struct{}), + closed: make(chan struct{}), + rand: new(reseedingRandom), + ips: netutil.DistinctNetSet{Subnet: tableSubnet, Limit: tableIPLimit}, } for i := range tab.buckets { tab.buckets[i] = &bucket{ @@ -127,26 +136,23 @@ func newTable(t transport, db *enode.DB, cfg Config) (*Table, error) { ips: netutil.DistinctNetSet{Subnet: bucketSubnet, Limit: bucketIPLimit}, } } - tab.seedRand() + tab.rand.seed(cfg.Clock.Now()) + tab.revalidation.init(&cfg) + + // initial table content + if err := tab.setFallbackNodes(cfg.Bootnodes); err != nil { + return nil, err + } tab.loadSeedNodes() return tab, nil } -func newMeteredTable(t transport, db *enode.DB, cfg Config) (*Table, error) { - tab, err := newTable(t, db, cfg) - if err != nil { - return nil, err - } - if metrics.Enabled { - tab.nodeAddedHook = func(b *bucket, n *node) { - bucketsCounter[b.index].Inc(1) - } - tab.nodeRemovedHook = func(b *bucket, n *node) { - bucketsCounter[b.index].Dec(1) - } +func (tab *Table) trackFindFailure(n *node) { + select { + case tab.findFailureCh <- n: + case <-tab.closed: } - return tab, nil } // Nodes returns all nodes contained in the table. @@ -172,15 +178,6 @@ func (tab *Table) self() *enode.Node { return tab.net.Self() } -func (tab *Table) seedRand() { - var b [8]byte - crand.Read(b[:]) - - tab.mutex.Lock() - tab.rand.Seed(int64(binary.BigEndian.Uint64(b[:]))) - tab.mutex.Unlock() -} - // getNode returns the node with the given ID or nil if it isn't in the table. func (tab *Table) getNode(id enode.ID) *enode.Node { tab.mutex.Lock() @@ -240,52 +237,157 @@ func (tab *Table) refresh() <-chan struct{} { return done } -// loop schedules runs of doRefresh, doRevalidate and copyLiveNodes. +// findnodeByID returns the n nodes in the table that are closest to the given id. +// This is used by the FINDNODE/v4 handler. +// +// The preferLive parameter says whether the caller wants liveness-checked results. If +// preferLive is true and the table contains any verified nodes, the result will not +// contain unverified nodes. However, if there are no verified nodes at all, the result +// will contain unverified nodes. +func (tab *Table) findnodeByID(target enode.ID, nresults int, preferLive bool) *nodesByDistance { + tab.mutex.Lock() + defer tab.mutex.Unlock() + + // Scan all buckets. There might be a better way to do this, but there aren't that many + // buckets, so this solution should be fine. The worst-case complexity of this loop + // is O(tab.len() * nresults). + nodes := &nodesByDistance{target: target} + liveNodes := &nodesByDistance{target: target} + for _, b := range &tab.buckets { + for _, n := range b.entries { + nodes.push(n, nresults) + if preferLive && n.isValidatedLive { + liveNodes.push(n, nresults) + } + } + } + + if preferLive && len(liveNodes.entries) > 0 { + return liveNodes + } + return nodes +} + +// appendLiveNodes adds nodes at the given distance to the result slice. +// This is used by the FINDNODE/v5 handler. +func (tab *Table) appendLiveNodes(dist uint, result []*enode.Node) []*enode.Node { + if dist > 256 { + return result + } + if dist == 0 { + return append(result, tab.self()) + } + + tab.mutex.Lock() + defer tab.mutex.Unlock() + for _, n := range tab.bucketAtDistance(int(dist)).entries { + if n.isValidatedLive { + result = append(result, n.Node) + } + } + return result +} + +// len returns the number of nodes in the table. +func (tab *Table) len() (n int) { + tab.mutex.Lock() + defer tab.mutex.Unlock() + + for _, b := range &tab.buckets { + n += len(b.entries) + } + return n +} + +// addSeenNode adds a node which may not be live. If the bucket has space available, +// adding the node succeeds immediately. Otherwise, the node is added to the replacements +// list. +// +// The caller must not hold tab.mutex. +func (tab *Table) addSeenNode(n *node) { + req := addNodeRequest{node: n, isLive: false} + select { + case tab.addNodeCh <- req: + <-tab.addNodeHandled + case <-tab.closed: + } +} + +// addVerifiedNode adds a node whose existence has been verified recently. If the bucket +// has no space, the node is added to the replacements list. +// +// There is an additional safety measure: if the table is still initializing the node +// is not added. This prevents an attack where the table could be filled by just sending +// ping repeatedly. +// +// The caller must not hold tab.mutex. +func (tab *Table) addVerifiedNode(n *node) { + req := addNodeRequest{node: n, isLive: true} + select { + case tab.addNodeCh <- req: + <-tab.addNodeHandled + case <-tab.closed: + } +} + +// loop is the main loop of Table. func (tab *Table) loop() { var ( - revalidate = time.NewTimer(tab.nextRevalidateTime()) - refresh = time.NewTimer(tab.nextRefreshTime()) - copyNodes = time.NewTicker(copyNodesInterval) - refreshDone = make(chan struct{}) // where doRefresh reports completion - revalidateDone chan struct{} // where doRevalidate reports completion - waiting = []chan struct{}{tab.initDone} // holds waiting callers while doRefresh runs + refresh = time.NewTimer(tab.nextRefreshTime()) + refreshDone = make(chan struct{}) // where doRefresh reports completion + waiting = []chan struct{}{tab.initDone} // holds waiting callers while doRefresh runs + revalTimer = mclock.NewAlarm(tab.cfg.Clock) + reseedRandTimer = mclock.NewAlarm(tab.cfg.Clock) ) defer refresh.Stop() - defer revalidate.Stop() - defer copyNodes.Stop() + defer revalTimer.Stop() + defer reseedRandTimer.Stop() // Start initial refresh. go tab.doRefresh(refreshDone) loop: for { + reseedRandTimer.Schedule(tab.rand.nextReseedTime()) + revalTimer.Schedule(tab.revalidation.nextTime()) + select { + case <-reseedRandTimer.C(): + tab.rand.seed(tab.cfg.Clock.Now()) + + case <-revalTimer.C(): + tab.revalidation.run(tab, mclock.Now()) + + case r := <-tab.revalidateResp: + tab.revalidation.handleResponse(tab, r) + + case addreq := <-tab.addNodeCh: + tab.handleAddNode(addreq) + tab.addNodeHandled <- struct{}{} + + case <-tab.findFailureCh: + // TODO: handle failure by potentially dropping node + case <-refresh.C: - tab.seedRand() if refreshDone == nil { refreshDone = make(chan struct{}) go tab.doRefresh(refreshDone) } + case req := <-tab.refreshReq: waiting = append(waiting, req) if refreshDone == nil { refreshDone = make(chan struct{}) go tab.doRefresh(refreshDone) } + case <-refreshDone: for _, ch := range waiting { close(ch) } waiting, refreshDone = nil, nil refresh.Reset(tab.nextRefreshTime()) - case <-revalidate.C: - revalidateDone = make(chan struct{}) - go tab.doRevalidate(revalidateDone) - case <-revalidateDone: - revalidate.Reset(tab.nextRevalidateTime()) - revalidateDone = nil - case <-copyNodes.C: - go tab.copyLiveNodes() + case <-tab.closeReq: break loop } @@ -297,9 +399,6 @@ loop: for _, ch := range waiting { close(ch) } - if revalidateDone != nil { - <-revalidateDone - } close(tab.closed) } @@ -340,165 +439,11 @@ func (tab *Table) loadSeedNodes() { } } -// doRevalidate checks that the last node in a random bucket is still live and replaces or -// deletes the node if it isn't. -func (tab *Table) doRevalidate(done chan<- struct{}) { - defer func() { done <- struct{}{} }() - - last, bi := tab.nodeToRevalidate() - if last == nil { - // No non-empty bucket found. - return - } - - // Ping the selected node and wait for a pong. - remoteSeq, err := tab.net.ping(unwrapNode(last)) - - // Also fetch record if the node replied and returned a higher sequence number. - if last.Seq() < remoteSeq { - n, err := tab.net.RequestENR(unwrapNode(last)) - if err != nil { - tab.log.Debug("ENR request failed", "id", last.ID(), "addr", last.addr(), "err", err) - } else { - last = &node{Node: *n, addedAt: last.addedAt, livenessChecks: last.livenessChecks} - } - } - - tab.mutex.Lock() - defer tab.mutex.Unlock() - b := tab.buckets[bi] - if err == nil { - // The node responded, move it to the front. - last.livenessChecks++ - tab.log.Debug("Revalidated node", "b", bi, "id", last.ID(), "checks", last.livenessChecks) - tab.bumpInBucket(b, last) - return - } - // No reply received, pick a replacement or delete the node if there aren't - // any replacements. - if r := tab.replace(b, last); r != nil { - tab.log.Debug("Replaced dead node", "b", bi, "id", last.ID(), "ip", last.IP(), "checks", last.livenessChecks, "r", r.ID(), "rip", r.IP()) - } else { - tab.log.Debug("Removed dead node", "b", bi, "id", last.ID(), "ip", last.IP(), "checks", last.livenessChecks) - } -} - -// nodeToRevalidate returns the last node in a random, non-empty bucket. -func (tab *Table) nodeToRevalidate() (n *node, bi int) { - tab.mutex.Lock() - defer tab.mutex.Unlock() - - for _, bi = range tab.rand.Perm(len(tab.buckets)) { - b := tab.buckets[bi] - if len(b.entries) > 0 { - last := b.entries[len(b.entries)-1] - return last, bi - } - } - return nil, 0 -} - -func (tab *Table) nextRevalidateTime() time.Duration { - tab.mutex.Lock() - defer tab.mutex.Unlock() - - return time.Duration(tab.rand.Int63n(int64(tab.cfg.PingInterval))) -} - func (tab *Table) nextRefreshTime() time.Duration { - tab.mutex.Lock() - defer tab.mutex.Unlock() - half := tab.cfg.RefreshInterval / 2 return half + time.Duration(tab.rand.Int63n(int64(half))) } -// copyLiveNodes adds nodes from the table to the database if they have been in the table -// longer than seedMinTableTime. -func (tab *Table) copyLiveNodes() { - tab.mutex.Lock() - defer tab.mutex.Unlock() - - now := time.Now() - for _, b := range &tab.buckets { - for _, n := range b.entries { - if n.livenessChecks > 0 && now.Sub(n.addedAt) >= seedMinTableTime { - tab.db.UpdateNode(unwrapNode(n)) - } - } - } -} - -// findnodeByID returns the n nodes in the table that are closest to the given id. -// This is used by the FINDNODE/v4 handler. -// -// The preferLive parameter says whether the caller wants liveness-checked results. If -// preferLive is true and the table contains any verified nodes, the result will not -// contain unverified nodes. However, if there are no verified nodes at all, the result -// will contain unverified nodes. -func (tab *Table) findnodeByID(target enode.ID, nresults int, preferLive bool) *nodesByDistance { - tab.mutex.Lock() - defer tab.mutex.Unlock() - - // Scan all buckets. There might be a better way to do this, but there aren't that many - // buckets, so this solution should be fine. The worst-case complexity of this loop - // is O(tab.len() * nresults). - nodes := &nodesByDistance{target: target} - liveNodes := &nodesByDistance{target: target} - for _, b := range &tab.buckets { - for _, n := range b.entries { - nodes.push(n, nresults) - if preferLive && n.livenessChecks > 0 { - liveNodes.push(n, nresults) - } - } - } - - if preferLive && len(liveNodes.entries) > 0 { - return liveNodes - } - return nodes -} - -// appendLiveNodes adds nodes at the given distance to the result slice. -func (tab *Table) appendLiveNodes(dist uint, result []*enode.Node) []*enode.Node { - if dist > 256 { - return result - } - if dist == 0 { - return append(result, tab.self()) - } - - tab.mutex.Lock() - defer tab.mutex.Unlock() - for _, n := range tab.bucketAtDistance(int(dist)).entries { - if n.livenessChecks >= 1 { - node := n.Node // avoid handing out pointer to struct field - result = append(result, &node) - } - } - return result -} - -// len returns the number of nodes in the table. -func (tab *Table) len() (n int) { - tab.mutex.Lock() - defer tab.mutex.Unlock() - - for _, b := range &tab.buckets { - n += len(b.entries) - } - return n -} - -// bucketLen returns the number of nodes in the bucket for the given ID. -func (tab *Table) bucketLen(id enode.ID) int { - tab.mutex.Lock() - defer tab.mutex.Unlock() - - return len(tab.bucket(id).entries) -} - // bucket returns the bucket for the given node ID hash. func (tab *Table) bucket(id enode.ID) *bucket { d := enode.LogDist(tab.self().ID(), id) @@ -512,95 +457,6 @@ func (tab *Table) bucketAtDistance(d int) *bucket { return tab.buckets[d-bucketMinDistance-1] } -// addSeenNode adds a node which may or may not be live to the end of a bucket. If the -// bucket has space available, adding the node succeeds immediately. Otherwise, the node is -// added to the replacements list. -// -// The caller must not hold tab.mutex. -func (tab *Table) addSeenNode(n *node) { - if n.ID() == tab.self().ID() { - return - } - - tab.mutex.Lock() - defer tab.mutex.Unlock() - b := tab.bucket(n.ID()) - if contains(b.entries, n.ID()) { - // Already in bucket, don't add. - return - } - if len(b.entries) >= bucketSize { - // Bucket full, maybe add as replacement. - tab.addReplacement(b, n) - return - } - if !tab.addIP(b, n.IP()) { - // Can't add: IP limit reached. - return - } - - // Add to end of bucket: - b.entries = append(b.entries, n) - b.replacements = deleteNode(b.replacements, n) - n.addedAt = time.Now() - - if tab.nodeAddedHook != nil { - tab.nodeAddedHook(b, n) - } -} - -// addVerifiedNode adds a node whose existence has been verified recently to the front of a -// bucket. If the node is already in the bucket, it is moved to the front. If the bucket -// has no space, the node is added to the replacements list. -// -// There is an additional safety measure: if the table is still initializing the node -// is not added. This prevents an attack where the table could be filled by just sending -// ping repeatedly. -// -// The caller must not hold tab.mutex. -func (tab *Table) addVerifiedNode(n *node) { - if !tab.isInitDone() { - return - } - if n.ID() == tab.self().ID() { - return - } - - tab.mutex.Lock() - defer tab.mutex.Unlock() - b := tab.bucket(n.ID()) - if tab.bumpInBucket(b, n) { - // Already in bucket, moved to front. - return - } - if len(b.entries) >= bucketSize { - // Bucket full, maybe add as replacement. - tab.addReplacement(b, n) - return - } - if !tab.addIP(b, n.IP()) { - // Can't add: IP limit reached. - return - } - - // Add to front of bucket. - b.entries, _ = pushNode(b.entries, n, bucketSize) - b.replacements = deleteNode(b.replacements, n) - n.addedAt = time.Now() - - if tab.nodeAddedHook != nil { - tab.nodeAddedHook(b, n) - } -} - -// delete removes an entry from the node table. It is used to evacuate dead nodes. -func (tab *Table) delete(node *node) { - tab.mutex.Lock() - defer tab.mutex.Unlock() - - tab.deleteInBucket(tab.bucket(node.ID()), node) -} - func (tab *Table) addIP(b *bucket, ip net.IP) bool { if len(ip) == 0 { return false // Nodes without IP cannot be added. @@ -628,11 +484,43 @@ func (tab *Table) removeIP(b *bucket, ip net.IP) { b.ips.Remove(ip) } +func (tab *Table) handleAddNode(req addNodeRequest) { + if req.node.ID() == tab.self().ID() { + return + } + if !req.isLive && !tab.isInitDone() { + return + } + + tab.mutex.Lock() + defer tab.mutex.Unlock() + b := tab.bucket(req.node.ID()) + if tab.bumpInBucket(b, req.node.Node) { + // Already in bucket, update record. + return + } + if len(b.entries) >= bucketSize { + // Bucket full, maybe add as replacement. + tab.addReplacement(b, req.node) + return + } + if !tab.addIP(b, req.node.IP()) { + // Can't add: IP limit reached. + return + } + + // Add to bucket. + b.entries, _ = pushNode(b.entries, req.node, bucketSize) + b.replacements = deleteNode(b.replacements, req.node) + req.node.addedAt = time.Now() + tab.nodeAdded(b, req.node) +} + +// addReplacement adds n to the replacement cache of bucket b. func (tab *Table) addReplacement(b *bucket, n *node) { - for _, e := range b.replacements { - if e.ID() == n.ID() { - return // already in list - } + if contains(b.replacements, n.ID()) { + // TODO: update ENR + return } if !tab.addIP(b, n.IP()) { return @@ -644,60 +532,76 @@ func (tab *Table) addReplacement(b *bucket, n *node) { } } -// replace removes n from the replacement list and replaces 'last' with it if it is the -// last entry in the bucket. If 'last' isn't the last entry, it has either been replaced -// with someone else or became active. -func (tab *Table) replace(b *bucket, last *node) *node { - if len(b.entries) == 0 || b.entries[len(b.entries)-1].ID() != last.ID() { - // Entry has moved, don't replace it. +func (tab *Table) nodeAdded(b *bucket, n *node) { + tab.revalidation.nodeAdded(tab, n) + if tab.nodeAddedHook != nil { + tab.nodeAddedHook(b, n) + } + if metrics.Enabled { + bucketsCounter[b.index].Inc(1) + } +} + +func (tab *Table) nodeRemoved(b *bucket, n *node) { + tab.revalidation.nodeRemoved(n) + if tab.nodeRemovedHook != nil { + tab.nodeRemovedHook(b, n) + } + if metrics.Enabled { + bucketsCounter[b.index].Dec(1) + } +} + +// deleteInBucket removes node n from the table. +// If there are replacement nodes in the bucket, the node is replaced. +func (tab *Table) deleteInBucket(b *bucket, id enode.ID) *node { + index := slices.IndexFunc(b.entries, func(e *node) bool { return e.ID() == id }) + if index == -1 { + // Entry has been removed already, don't replace it. return nil } - // Still the last entry. + + // Remove the node. + n := b.entries[index] + b.entries = slices.Delete(b.entries, index, index+1) + tab.removeIP(b, n.IP()) + tab.nodeRemoved(b, n) + + // Add replacement. if len(b.replacements) == 0 { - tab.deleteInBucket(b, last) + tab.log.Debug("Removed dead node", "b", b.index, "id", n.ID(), "ip", n.IP(), "checks", n.livenessChecks) return nil } - r := b.replacements[tab.rand.Intn(len(b.replacements))] - b.replacements = deleteNode(b.replacements, r) - b.entries[len(b.entries)-1] = r - tab.removeIP(b, last.IP()) - return r -} - -// bumpInBucket moves the given node to the front of the bucket entry list -// if it is contained in that list. -func (tab *Table) bumpInBucket(b *bucket, n *node) bool { - for i := range b.entries { - if b.entries[i].ID() == n.ID() { - if !n.IP().Equal(b.entries[i].IP()) { - // Endpoint has changed, ensure that the new IP fits into table limits. - tab.removeIP(b, b.entries[i].IP()) - if !tab.addIP(b, n.IP()) { - // It doesn't, put the previous one back. - tab.addIP(b, b.entries[i].IP()) - return false - } - } - // Move it to the front. - copy(b.entries[1:], b.entries[:i]) - b.entries[0] = n - return true - } - } - return false + rindex := tab.rand.Intn(len(b.replacements)) + rep := b.replacements[rindex] + b.replacements = slices.Delete(b.replacements, rindex, rindex+1) + b.entries = append(b.entries, rep) + tab.nodeAdded(b, rep) + + tab.log.Debug("Replaced dead node", "b", b.index, "id", n.ID(), "ip", n.IP(), "checks", n.livenessChecks, "r", rep.ID(), "rip", rep.IP()) + return rep } -func (tab *Table) deleteInBucket(b *bucket, n *node) { - // Check if the node is actually in the bucket so the removed hook - // isn't called multiple times for the same node. - if !contains(b.entries, n.ID()) { - return +// bumpInBucket updates the node record of n in the bucket. +func (tab *Table) bumpInBucket(b *bucket, newRecord *enode.Node) bool { + i := slices.IndexFunc(b.entries, func(elem *node) bool { + return elem.ID() == newRecord.ID() + }) + if i == -1 { + return false } - b.entries = deleteNode(b.entries, n) - tab.removeIP(b, n.IP()) - if tab.nodeRemovedHook != nil { - tab.nodeRemovedHook(b, n) + + if !newRecord.IP().Equal(b.entries[i].IP()) { + // Endpoint has changed, ensure that the new IP fits into table limits. + tab.removeIP(b, b.entries[i].IP()) + if !tab.addIP(b, newRecord.IP()) { + // It doesn't, put the previous one back. + tab.addIP(b, b.entries[i].IP()) + return false + } } + b.entries[i].Node = newRecord + return true } func contains(ns []*node, id enode.ID) bool { diff --git a/p2p/discover/table_reval.go b/p2p/discover/table_reval.go new file mode 100644 index 000000000000..d4bb92a844b9 --- /dev/null +++ b/p2p/discover/table_reval.go @@ -0,0 +1,204 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package discover + +import ( + "fmt" + "slices" + "time" + + "github.com/ethereum/go-ethereum/common/mclock" + "github.com/ethereum/go-ethereum/p2p/enode" +) + +const never = ^mclock.AbsTime(0) + +type tableRevalidation struct { + newNodes revalidationQueue + nodes revalidationQueue + activeReq map[enode.ID]struct{} +} + +type revalidationResponse struct { + n *node + didRespond bool + isNewNode bool + newRecord *enode.Node +} + +func (tr *tableRevalidation) init(cfg *Config) { + tr.activeReq = make(map[enode.ID]struct{}) + tr.newNodes.nextTime = never + tr.newNodes.interval = cfg.PingInterval + tr.nodes.nextTime = never + tr.nodes.interval = cfg.PingInterval +} + +// nodeAdded is called when the table receives a new node. +func (tr *tableRevalidation) nodeAdded(tab *Table, n *node) { + tr.newNodes.push(n, tab.rand) +} + +// nodeRemoved is called when a node was removed from the table. +func (tr *tableRevalidation) nodeRemoved(n *node) { + wasnew := tr.newNodes.remove(n) + if !wasnew { + tr.nodes.remove(n) + } +} + +// nextTime returns the next time run() should be invoked. +// The Table main loop uses this to schedule a timer. +func (tr *tableRevalidation) nextTime() mclock.AbsTime { + return min(tr.newNodes.nextTime, tr.nodes.nextTime) +} + +// run performs node revalidation. +func (tr *tableRevalidation) run(tab *Table, now mclock.AbsTime) { + if n := tr.newNodes.get(now, tab.rand, tr.activeReq); n != nil { + tr.startRequest(tab, n, true) + tr.newNodes.schedule(tab.rand) + } + if n := tr.nodes.get(now, tab.rand, tr.activeReq); n != nil { + tr.startRequest(tab, n, false) + tr.nodes.schedule(tab.rand) + } +} + +// startRequest spawns a revalidation request for node n. +func (tr *tableRevalidation) startRequest(tab *Table, n *node, newNode bool) { + if _, ok := tr.activeReq[n.ID()]; ok { + panic("duplicate startRequest") + } + tr.activeReq[n.ID()] = struct{}{} + resp := revalidationResponse{n: n, isNewNode: newNode} + + go func() { + // Ping the selected node and wait for a pong response. + remoteSeq, err := tab.net.ping(unwrapNode(n)) + resp.didRespond = err == nil + + // Also fetch record if the node replied and returned a higher sequence number. + if remoteSeq > n.Seq() { + newrec, err := tab.net.RequestENR(unwrapNode(n)) + if err != nil { + tab.log.Debug("ENR request failed", "id", n.ID(), "addr", n.addr(), "err", err) + } else { + resp.newRecord = newrec + } + } + + select { + case tab.revalidateResp <- resp: + case <-tab.closed: + } + }() +} + +// handleResponse processes the result of a revalidation request. +func (tr *tableRevalidation) handleResponse(tab *Table, resp revalidationResponse) { + n := resp.n + b := tab.bucket(n.ID()) + delete(tr.activeReq, n.ID()) + + tab.mutex.Lock() + defer tab.mutex.Unlock() + + if !resp.didRespond { + // Revalidation failed. + n.livenessChecks /= 3 + if n.livenessChecks == 0 || resp.isNewNode { + tab.deleteInBucket(b, n.ID()) + } + return + } + + // The node responded. + n.livenessChecks++ + n.isValidatedLive = true + tab.log.Debug("Revalidated node", "b", b.index, "id", n.ID(), "checks", n.livenessChecks) + if resp.newRecord != nil { + updated := tab.bumpInBucket(b, resp.newRecord) + if updated { + // If the node changed its advertised endpoint, the updated ENR is not served + // until it has been revalidated. + n.isValidatedLive = false + } + } + + // Move node over to main queue after first validation. + if resp.isNewNode { + tr.newNodes.remove(n) + tr.nodes.push(n, tab.rand) + } + + // Store potential seeds in database. + if n.isValidatedLive && n.livenessChecks > 5 { + tab.db.UpdateNode(resp.n.Node) + } +} + +// revalidationQueue holds a list nodes and the next revalidation time. +type revalidationQueue struct { + nodes []*node + nextTime mclock.AbsTime + interval time.Duration +} + +// get returns a random node from the queue. Nodes in the 'exclude' map are not returned. +func (rq *revalidationQueue) get(now mclock.AbsTime, rand randomSource, exclude map[enode.ID]struct{}) *node { + if now < rq.nextTime || len(rq.nodes) == 0 { + return nil + } + for i := 0; i < len(rq.nodes)*3; i++ { + n := rq.nodes[rand.Intn(len(rq.nodes))] + _, excluded := exclude[n.ID()] + if !excluded { + return n + } + } + return nil +} + +func (rq *revalidationQueue) push(n *node, rand randomSource) { + rq.nodes = append(rq.nodes, n) + if rq.nextTime == never { + rq.schedule(rand) + } +} + +func (rq *revalidationQueue) schedule(rand randomSource) { + rq.nextTime = mclock.AbsTime(rand.Int63n(int64(rq.interval))) +} + +func (rq *revalidationQueue) remove(n *node) bool { + i := slices.Index(rq.nodes, n) + if i == -1 { + return false + } + rq.nodes = slices.Delete(rq.nodes, i, i+1) + if len(rq.nodes) == 0 { + rq.nextTime = never + } + return true +} + +func printIDs(list []*node) { + for i, n := range list { + fmt.Println(" - ", i, n.ID()) + } +} diff --git a/p2p/discover/table_test.go b/p2p/discover/table_test.go index 3ba342225133..9796d576e586 100644 --- a/p2p/discover/table_test.go +++ b/p2p/discover/table_test.go @@ -20,14 +20,16 @@ import ( "crypto/ecdsa" "fmt" "math/rand" - "net" "reflect" "testing" "testing/quick" "time" + "github.com/ethereum/go-ethereum/common/mclock" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/internal/testlog" + "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/p2p/enode" "github.com/ethereum/go-ethereum/p2p/enr" "github.com/ethereum/go-ethereum/p2p/netutil" @@ -49,29 +51,34 @@ func TestTable_pingReplace(t *testing.T) { } func testPingReplace(t *testing.T, newNodeIsResponding, lastInBucketIsResponding bool) { + simclock := new(mclock.Simulated) transport := newPingRecorder() - tab, db := newTestTable(transport) + tab, db := newTestTable(transport, Config{ + Clock: simclock, + Log: testlog.Logger(t, log.LevelTrace), + }) defer db.Close() defer tab.close() <-tab.initDone // Fill up the sender's bucket. - pingKey, _ := crypto.HexToECDSA("45a915e4d060149eb4365960e6a7a45f334393093061116b197e3240065ff2d8") - pingSender := wrapNode(enode.NewV4(&pingKey.PublicKey, net.IP{127, 0, 0, 1}, 99, 99)) - last := fillBucket(tab, pingSender) + replacementNodeKey, _ := crypto.HexToECDSA("45a915e4d060149eb4365960e6a7a45f334393093061116b197e3240065ff2d8") + replacementNode := wrapNode(enode.NewV4(&replacementNodeKey.PublicKey, net.IP{127, 0, 0, 1}, 99, 99)) + last := fillBucket(tab, replacementNode) - // Add the sender as if it just pinged us. Revalidate should replace the last node in - // its bucket if it is unresponsive. Revalidate again to ensure that + // Add the sender as if it just pinged us. The revalidation process should replace + // this node in the bucket if it is unresponsive. transport.dead[last.ID()] = !lastInBucketIsResponding - transport.dead[pingSender.ID()] = !newNodeIsResponding - tab.addSeenNode(pingSender) - tab.doRevalidate(make(chan struct{}, 1)) - tab.doRevalidate(make(chan struct{}, 1)) - - if !transport.pinged[last.ID()] { - // Oldest node in bucket is pinged to see whether it is still alive. - t.Error("table did not ping last node in bucket") + transport.dead[replacementNode.ID()] = !newNodeIsResponding + tab.addSeenNode(replacementNode) + + // Wait until the last node was pinged. + waitForRevalidationPing(t, transport, tab, last.ID()) + + // If a replacement is expected, we also need to wait until the replacement node was pinged. + if !lastInBucketIsResponding { + waitForRevalidationPing(t, transport, tab, replacementNode.ID()) } tab.mutex.Lock() @@ -80,69 +87,41 @@ func testPingReplace(t *testing.T, newNodeIsResponding, lastInBucketIsResponding if !lastInBucketIsResponding && !newNodeIsResponding { wantSize-- } - if l := len(tab.bucket(pingSender.ID()).entries); l != wantSize { + bucket := tab.bucket(replacementNode.ID()) + if l := len(bucket.entries); l != wantSize { t.Errorf("wrong bucket size after bond: got %d, want %d", l, wantSize) } - if found := contains(tab.bucket(pingSender.ID()).entries, last.ID()); found != lastInBucketIsResponding { - t.Errorf("last entry found: %t, want: %t", found, lastInBucketIsResponding) + if ok := contains(bucket.entries, last.ID()); ok != lastInBucketIsResponding { + t.Errorf("last entry found: %t, want: %t", ok, lastInBucketIsResponding) } wantNewEntry := newNodeIsResponding && !lastInBucketIsResponding - if found := contains(tab.bucket(pingSender.ID()).entries, pingSender.ID()); found != wantNewEntry { - t.Errorf("new entry found: %t, want: %t", found, wantNewEntry) + if ok := contains(bucket.entries, replacementNode.ID()); ok != wantNewEntry { + t.Errorf("new entry found: %t, want: %t", ok, wantNewEntry) } } -func TestBucket_bumpNoDuplicates(t *testing.T) { - t.Parallel() - cfg := &quick.Config{ - MaxCount: 1000, - Rand: rand.New(rand.NewSource(time.Now().Unix())), - Values: func(args []reflect.Value, rand *rand.Rand) { - // generate a random list of nodes. this will be the content of the bucket. - n := rand.Intn(bucketSize-1) + 1 - nodes := make([]*node, n) - for i := range nodes { - nodes[i] = nodeAtDistance(enode.ID{}, 200, intIP(200)) - } - args[0] = reflect.ValueOf(nodes) - // generate random bump positions. - bumps := make([]int, rand.Intn(100)) - for i := range bumps { - bumps[i] = rand.Intn(len(nodes)) - } - args[1] = reflect.ValueOf(bumps) - }, - } - - prop := func(nodes []*node, bumps []int) (ok bool) { - tab, db := newTestTable(newPingRecorder()) - defer db.Close() - defer tab.close() - - b := &bucket{entries: make([]*node, len(nodes))} - copy(b.entries, nodes) - for i, pos := range bumps { - tab.bumpInBucket(b, b.entries[pos]) - if hasDuplicates(b.entries) { - t.Logf("bucket has duplicates after %d/%d bumps:", i+1, len(bumps)) - for _, n := range b.entries { - t.Logf(" %p", n) - } - return false - } +// waitForRevalidationPing waits until a PING message is sent to a node with the given id. +func waitForRevalidationPing(t *testing.T, transport *pingRecorder, tab *Table, id enode.ID) *enode.Node { + simclock := tab.cfg.Clock.(*mclock.Simulated) + maxAttempts := tab.len() * 5 + for i := 0; i < maxAttempts; i++ { + simclock.Run(tab.cfg.PingInterval) + p := transport.waitPing(500 * time.Millisecond) + if p == nil { + t.Fatal("Table did not send any revalidation ping") + } + if id == (enode.ID{}) || p.ID() == id { + return p } - checkIPLimitInvariant(t, tab) - return true - } - if err := quick.Check(prop, cfg); err != nil { - t.Error(err) } + t.Fatalf("Table did not ping node %v (%d attempts)", id, maxAttempts) + return nil } // This checks that the table-wide IP limit is applied correctly. func TestTable_IPLimit(t *testing.T) { transport := newPingRecorder() - tab, db := newTestTable(transport) + tab, db := newTestTable(transport, Config{}) defer db.Close() defer tab.close() @@ -159,7 +138,7 @@ func TestTable_IPLimit(t *testing.T) { // This checks that the per-bucket IP limit is applied correctly. func TestTable_BucketIPLimit(t *testing.T) { transport := newPingRecorder() - tab, db := newTestTable(transport) + tab, db := newTestTable(transport, Config{}) defer db.Close() defer tab.close() @@ -196,7 +175,7 @@ func TestTable_findnodeByID(t *testing.T) { test := func(test *closeTest) bool { // for any node table, Target and N transport := newPingRecorder() - tab, db := newTestTable(transport) + tab, db := newTestTable(transport, Config{}) defer db.Close() defer tab.close() fillTable(tab, test.All, true) @@ -271,7 +250,7 @@ func (*closeTest) Generate(rand *rand.Rand, size int) reflect.Value { } func TestTable_addVerifiedNode(t *testing.T) { - tab, db := newTestTable(newPingRecorder()) + tab, db := newTestTable(newPingRecorder(), Config{}) <-tab.initDone defer db.Close() defer tab.close() @@ -281,11 +260,12 @@ func TestTable_addVerifiedNode(t *testing.T) { n2 := nodeAtDistance(tab.self().ID(), 256, net.IP{88, 77, 66, 2}) tab.addSeenNode(n1) tab.addSeenNode(n2) + bucket := tab.bucket(n1.ID()) // Verify bucket content: bcontent := []*node{n1, n2} - if !reflect.DeepEqual(tab.bucket(n1.ID()).entries, bcontent) { - t.Fatalf("wrong bucket content: %v", tab.bucket(n1.ID()).entries) + if !reflect.DeepEqual(unwrapNodes(bucket.entries), unwrapNodes(bcontent)) { + t.Fatalf("wrong bucket content: %v", bucket.entries) } // Add a changed version of n2. @@ -295,15 +275,15 @@ func TestTable_addVerifiedNode(t *testing.T) { tab.addVerifiedNode(newn2) // Check that bucket is updated correctly. - newBcontent := []*node{newn2, n1} - if !reflect.DeepEqual(tab.bucket(n1.ID()).entries, newBcontent) { - t.Fatalf("wrong bucket content after update: %v", tab.bucket(n1.ID()).entries) + newBcontent := []*node{n1, newn2} + if !reflect.DeepEqual(unwrapNodes(bucket.entries), unwrapNodes(newBcontent)) { + t.Fatalf("wrong bucket content after update: %v", bucket.entries) } checkIPLimitInvariant(t, tab) } func TestTable_addSeenNode(t *testing.T) { - tab, db := newTestTable(newPingRecorder()) + tab, db := newTestTable(newPingRecorder(), Config{}) <-tab.initDone defer db.Close() defer tab.close() @@ -337,7 +317,10 @@ func TestTable_addSeenNode(t *testing.T) { // announces a new sequence number, the new record should be pulled. func TestTable_revalidateSyncRecord(t *testing.T) { transport := newPingRecorder() - tab, db := newTestTable(transport) + tab, db := newTestTable(transport, Config{ + Clock: new(mclock.Simulated), + Log: testlog.Logger(t, log.LevelTrace), + }) <-tab.initDone defer db.Close() defer tab.close() @@ -354,7 +337,8 @@ func TestTable_revalidateSyncRecord(t *testing.T) { n2 := enode.SignNull(&r, id) transport.updateRecord(n2) - tab.doRevalidate(make(chan struct{}, 1)) + waitForRevalidationPing(t, transport, tab, n2.ID()) + intable := tab.getNode(id) if !reflect.DeepEqual(intable, n2) { t.Fatalf("table contains old record with seq %d, want seq %d", intable.Seq(), n2.Seq()) diff --git a/p2p/discover/table_util_test.go b/p2p/discover/table_util_test.go index f5d4d39bdbb7..5eda1479498d 100644 --- a/p2p/discover/table_util_test.go +++ b/p2p/discover/table_util_test.go @@ -26,6 +26,8 @@ import ( "net" "slices" "sync" + "sync/atomic" + "time" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/p2p/enode" @@ -40,8 +42,7 @@ func init() { nullNode = enode.SignNull(&r, enode.ID{}) } -func newTestTable(t transport) (*Table, *enode.DB) { - cfg := Config{} +func newTestTable(t transport, cfg Config) (*Table, *enode.DB) { db, _ := enode.OpenDB("") tab, _ := newTable(t, db, cfg) go tab.loop() @@ -102,7 +103,9 @@ func fillBucket(tab *Table, n *node) (last *node) { ld := enode.LogDist(tab.self().ID(), n.ID()) b := tab.bucket(n.ID()) for len(b.entries) < bucketSize { - b.entries = append(b.entries, nodeAtDistance(tab.self().ID(), ld, intIP(ld))) + node := nodeAtDistance(tab.self().ID(), ld, intIP(ld)) + b.entries = append(b.entries, node) + tab.nodeAdded(b, node) } return b.entries[bucketSize-1] } @@ -119,10 +122,12 @@ func fillTable(tab *Table, nodes []*node, setLive bool) { } type pingRecorder struct { - mu sync.Mutex - dead, pinged map[enode.ID]bool - records map[enode.ID]*enode.Node - n *enode.Node + mu sync.Mutex + cond *sync.Cond + dead map[enode.ID]bool + records map[enode.ID]*enode.Node + pinged []*enode.Node + n *enode.Node } func newPingRecorder() *pingRecorder { @@ -130,12 +135,13 @@ func newPingRecorder() *pingRecorder { r.Set(enr.IP{0, 0, 0, 0}) n := enode.SignNull(&r, enode.ID{}) - return &pingRecorder{ + t := &pingRecorder{ dead: make(map[enode.ID]bool), - pinged: make(map[enode.ID]bool), records: make(map[enode.ID]*enode.Node), n: n, } + t.cond = sync.NewCond(&t.mu) + return t } // updateRecord updates a node record. Future calls to ping and @@ -151,12 +157,46 @@ func (t *pingRecorder) Self() *enode.Node { return nullNode } func (t *pingRecorder) lookupSelf() []*enode.Node { return nil } func (t *pingRecorder) lookupRandom() []*enode.Node { return nil } +func (t *pingRecorder) wasPinged(id enode.ID) bool { + t.mu.Lock() + defer t.mu.Unlock() + return slices.ContainsFunc(t.pinged, func(n *enode.Node) bool { return n.ID() == id }) +} + +func (t *pingRecorder) waitPing(timeout time.Duration) *enode.Node { + t.mu.Lock() + defer t.mu.Unlock() + + // Wake up the loop on timeout. + var timedout atomic.Bool + timer := time.AfterFunc(timeout, func() { + timedout.Store(true) + t.cond.Broadcast() + }) + defer timer.Stop() + + // Wait for a ping. + for { + if timedout.Load() { + return nil + } + if len(t.pinged) > 0 { + n := t.pinged[0] + t.pinged = append(t.pinged[:0], t.pinged[1:]...) + return n + } + t.cond.Wait() + } +} + // ping simulates a ping request. func (t *pingRecorder) ping(n *enode.Node) (seq uint64, err error) { t.mu.Lock() defer t.mu.Unlock() - t.pinged[n.ID()] = true + t.pinged = append(t.pinged, n) + t.cond.Broadcast() + if t.dead[n.ID()] { return 0, errTimeout } diff --git a/p2p/discover/v4_udp.go b/p2p/discover/v4_udp.go index c2de12133ca2..a6121d1923e3 100644 --- a/p2p/discover/v4_udp.go +++ b/p2p/discover/v4_udp.go @@ -142,7 +142,7 @@ func ListenV4(c UDPConn, ln *enode.LocalNode, cfg Config) (*UDPv4, error) { log: cfg.Log, } - tab, err := newMeteredTable(t, ln.Database(), cfg) + tab, err := newTable(t, ln.Database(), cfg) if err != nil { return nil, err } diff --git a/p2p/discover/v5_udp.go b/p2p/discover/v5_udp.go index 20a8bccd058e..c6e65bfca27b 100644 --- a/p2p/discover/v5_udp.go +++ b/p2p/discover/v5_udp.go @@ -175,7 +175,7 @@ func newUDPv5(conn UDPConn, ln *enode.LocalNode, cfg Config) (*UDPv5, error) { cancelCloseCtx: cancelCloseCtx, } t.talk = newTalkSystem(t) - tab, err := newMeteredTable(t, t.db, cfg) + tab, err := newTable(t, t.db, cfg) if err != nil { return nil, err } From 70b8918ea04fe5de1898aaa56042233101a48b08 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 18 Apr 2024 00:05:08 +0200 Subject: [PATCH 07/41] p2p/discover: fix build --- p2p/discover/table.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 6e2bf653411e..ed2356f7501c 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -165,7 +165,7 @@ func (tab *Table) Nodes() [][]BucketNode { nodes[i] = make([]BucketNode, len(b.entries)) for j, n := range b.entries { nodes[i][j] = BucketNode{ - Node: &n.Node, + Node: n.Node, Checks: int(n.livenessChecks), AddedAt: n.addedAt, } From 0a4b314244663f6366e89ef12b4f61d78715257c Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 18 Apr 2024 09:58:59 +0200 Subject: [PATCH 08/41] p2p/discover: update --- p2p/discover/table.go | 2 +- p2p/discover/table_reval.go | 80 ++++++++++++++++----------------- p2p/discover/table_util_test.go | 8 +--- 3 files changed, 42 insertions(+), 48 deletions(-) diff --git a/p2p/discover/table.go b/p2p/discover/table.go index ed2356f7501c..225ca2211b55 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -435,7 +435,7 @@ func (tab *Table) loadSeedNodes() { age := time.Since(tab.db.LastPongReceived(seed.ID(), seed.IP())) tab.log.Trace("Found seed node in database", "id", seed.ID(), "addr", seed.addr(), "age", age) } - tab.addSeenNode(seed) + tab.handleAddNode(addNodeRequest{node: seed, isLive: true}) } } diff --git a/p2p/discover/table_reval.go b/p2p/discover/table_reval.go index d4bb92a844b9..bff2a43fc043 100644 --- a/p2p/discover/table_reval.go +++ b/p2p/discover/table_reval.go @@ -17,7 +17,6 @@ package discover import ( - "fmt" "slices" "time" @@ -28,8 +27,8 @@ import ( const never = ^mclock.AbsTime(0) type tableRevalidation struct { - newNodes revalidationQueue - nodes revalidationQueue + newNodes revalidationList + nodes revalidationList activeReq map[enode.ID]struct{} } @@ -43,14 +42,14 @@ type revalidationResponse struct { func (tr *tableRevalidation) init(cfg *Config) { tr.activeReq = make(map[enode.ID]struct{}) tr.newNodes.nextTime = never - tr.newNodes.interval = cfg.PingInterval + tr.newNodes.interval = cfg.PingInterval / 3 tr.nodes.nextTime = never tr.nodes.interval = cfg.PingInterval } // nodeAdded is called when the table receives a new node. func (tr *tableRevalidation) nodeAdded(tab *Table, n *node) { - tr.newNodes.push(n, tab.rand) + tr.newNodes.push(n, tab.cfg.Clock.Now(), tab.rand) } // nodeRemoved is called when a node was removed from the table. @@ -71,11 +70,11 @@ func (tr *tableRevalidation) nextTime() mclock.AbsTime { func (tr *tableRevalidation) run(tab *Table, now mclock.AbsTime) { if n := tr.newNodes.get(now, tab.rand, tr.activeReq); n != nil { tr.startRequest(tab, n, true) - tr.newNodes.schedule(tab.rand) + tr.newNodes.schedule(now, tab.rand) } if n := tr.nodes.get(now, tab.rand, tr.activeReq); n != nil { tr.startRequest(tab, n, false) - tr.nodes.schedule(tab.rand) + tr.nodes.schedule(now, tab.rand) } } @@ -87,26 +86,33 @@ func (tr *tableRevalidation) startRequest(tab *Table, n *node, newNode bool) { tr.activeReq[n.ID()] = struct{}{} resp := revalidationResponse{n: n, isNewNode: newNode} - go func() { - // Ping the selected node and wait for a pong response. - remoteSeq, err := tab.net.ping(unwrapNode(n)) - resp.didRespond = err == nil - - // Also fetch record if the node replied and returned a higher sequence number. - if remoteSeq > n.Seq() { - newrec, err := tab.net.RequestENR(unwrapNode(n)) - if err != nil { - tab.log.Debug("ENR request failed", "id", n.ID(), "addr", n.addr(), "err", err) - } else { - resp.newRecord = newrec - } - } + // Fetch the node while holding lock. + tab.mutex.Lock() + node := n.Node + tab.mutex.Unlock() - select { - case tab.revalidateResp <- resp: - case <-tab.closed: + go tab.doRevalidate(resp, node) +} + +func (tab *Table) doRevalidate(resp revalidationResponse, node *enode.Node) { + // Ping the selected node and wait for a pong response. + remoteSeq, err := tab.net.ping(node) + resp.didRespond = err == nil + + // Also fetch record if the node replied and returned a higher sequence number. + if remoteSeq > node.Seq() { + newrec, err := tab.net.RequestENR(node) + if err != nil { + tab.log.Debug("ENR request failed", "id", node.ID(), "err", err) + } else { + resp.newRecord = newrec } - }() + } + + select { + case tab.revalidateResp <- resp: + case <-tab.closed: + } } // handleResponse processes the result of a revalidation request. @@ -143,7 +149,7 @@ func (tr *tableRevalidation) handleResponse(tab *Table, resp revalidationRespons // Move node over to main queue after first validation. if resp.isNewNode { tr.newNodes.remove(n) - tr.nodes.push(n, tab.rand) + tr.nodes.push(n, tab.cfg.Clock.Now(), tab.rand) } // Store potential seeds in database. @@ -152,15 +158,15 @@ func (tr *tableRevalidation) handleResponse(tab *Table, resp revalidationRespons } } -// revalidationQueue holds a list nodes and the next revalidation time. -type revalidationQueue struct { +// revalidationList holds a list nodes and the next revalidation time. +type revalidationList struct { nodes []*node nextTime mclock.AbsTime interval time.Duration } // get returns a random node from the queue. Nodes in the 'exclude' map are not returned. -func (rq *revalidationQueue) get(now mclock.AbsTime, rand randomSource, exclude map[enode.ID]struct{}) *node { +func (rq *revalidationList) get(now mclock.AbsTime, rand randomSource, exclude map[enode.ID]struct{}) *node { if now < rq.nextTime || len(rq.nodes) == 0 { return nil } @@ -174,18 +180,18 @@ func (rq *revalidationQueue) get(now mclock.AbsTime, rand randomSource, exclude return nil } -func (rq *revalidationQueue) push(n *node, rand randomSource) { +func (rq *revalidationList) push(n *node, now mclock.AbsTime, rand randomSource) { rq.nodes = append(rq.nodes, n) if rq.nextTime == never { - rq.schedule(rand) + rq.schedule(now, rand) } } -func (rq *revalidationQueue) schedule(rand randomSource) { - rq.nextTime = mclock.AbsTime(rand.Int63n(int64(rq.interval))) +func (rq *revalidationList) schedule(now mclock.AbsTime, rand randomSource) { + rq.nextTime = now.Add(time.Duration(rand.Int63n(int64(rq.interval)))) } -func (rq *revalidationQueue) remove(n *node) bool { +func (rq *revalidationList) remove(n *node) bool { i := slices.Index(rq.nodes, n) if i == -1 { return false @@ -196,9 +202,3 @@ func (rq *revalidationQueue) remove(n *node) bool { } return true } - -func printIDs(list []*node) { - for i, n := range list { - fmt.Println(" - ", i, n.ID()) - } -} diff --git a/p2p/discover/table_util_test.go b/p2p/discover/table_util_test.go index 5eda1479498d..190076c571a8 100644 --- a/p2p/discover/table_util_test.go +++ b/p2p/discover/table_util_test.go @@ -120,7 +120,7 @@ func fillTable(tab *Table, nodes []*node, setLive bool) { tab.addSeenNode(n) } } - +7 type pingRecorder struct { mu sync.Mutex cond *sync.Cond @@ -157,12 +157,6 @@ func (t *pingRecorder) Self() *enode.Node { return nullNode } func (t *pingRecorder) lookupSelf() []*enode.Node { return nil } func (t *pingRecorder) lookupRandom() []*enode.Node { return nil } -func (t *pingRecorder) wasPinged(id enode.ID) bool { - t.mu.Lock() - defer t.mu.Unlock() - return slices.ContainsFunc(t.pinged, func(n *enode.Node) bool { return n.ID() == id }) -} - func (t *pingRecorder) waitPing(timeout time.Duration) *enode.Node { t.mu.Lock() defer t.mu.Unlock() From fad3fe78534887011860c35bb69971b9cfb8ed6f Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 18 Apr 2024 10:21:57 +0200 Subject: [PATCH 09/41] p2p/discover: fix some tests --- p2p/discover/table_util_test.go | 3 ++- p2p/discover/v4_udp_test.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/p2p/discover/table_util_test.go b/p2p/discover/table_util_test.go index 190076c571a8..6ed4bcda9036 100644 --- a/p2p/discover/table_util_test.go +++ b/p2p/discover/table_util_test.go @@ -116,11 +116,12 @@ func fillTable(tab *Table, nodes []*node, setLive bool) { for _, n := range nodes { if setLive { n.livenessChecks = 1 + n.isValidatedLive = true } tab.addSeenNode(n) } } -7 + type pingRecorder struct { mu sync.Mutex cond *sync.Cond diff --git a/p2p/discover/v4_udp_test.go b/p2p/discover/v4_udp_test.go index 361e37962648..ada2786418d0 100644 --- a/p2p/discover/v4_udp_test.go +++ b/p2p/discover/v4_udp_test.go @@ -264,7 +264,7 @@ func TestUDPv4_findnode(t *testing.T) { n := wrapNode(enode.NewV4(&key.PublicKey, ip, 0, 2000)) // Ensure half of table content isn't verified live yet. if i > numCandidates/2 { - n.livenessChecks = 1 + n.isValidatedLive = true live[n.ID()] = true } nodes.push(n, numCandidates) From f2ac6927ab51b61762859e4ff4c0e945ef525a7a Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 18 Apr 2024 10:22:14 +0200 Subject: [PATCH 10/41] p2p/discover: fix shutdown hang --- p2p/discover/table.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 225ca2211b55..5fe8fa0e5aa9 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -309,7 +309,7 @@ func (tab *Table) addSeenNode(n *node) { select { case tab.addNodeCh <- req: <-tab.addNodeHandled - case <-tab.closed: + case <-tab.closeReq: } } @@ -326,7 +326,7 @@ func (tab *Table) addVerifiedNode(n *node) { select { case tab.addNodeCh <- req: <-tab.addNodeHandled - case <-tab.closed: + case <-tab.closeReq: } } From 00e71f5d1b1a7a0013b170a8b776fdb5baa57e73 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 18 Apr 2024 10:22:29 +0200 Subject: [PATCH 11/41] p2p/discover: fix test --- p2p/discover/table.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 5fe8fa0e5aa9..99dd0d85ccca 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -510,7 +510,7 @@ func (tab *Table) handleAddNode(req addNodeRequest) { } // Add to bucket. - b.entries, _ = pushNode(b.entries, req.node, bucketSize) + b.entries = append(b.entries, req.node) b.replacements = deleteNode(b.replacements, req.node) req.node.addedAt = time.Now() tab.nodeAdded(b, req.node) From 1d896e8165bb18b978480e6a1c54efa201e3472c Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 18 Apr 2024 10:54:32 +0200 Subject: [PATCH 12/41] p2p/discover: fix spin condition --- p2p/discover/table_reval.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/p2p/discover/table_reval.go b/p2p/discover/table_reval.go index bff2a43fc043..93471d1608ff 100644 --- a/p2p/discover/table_reval.go +++ b/p2p/discover/table_reval.go @@ -17,6 +17,7 @@ package discover import ( + "math" "slices" "time" @@ -24,7 +25,7 @@ import ( "github.com/ethereum/go-ethereum/p2p/enode" ) -const never = ^mclock.AbsTime(0) +const never = mclock.AbsTime(math.MaxInt64) type tableRevalidation struct { newNodes revalidationList From dc3f78bb8175b2fc21a192f4bf98ff7bc28e6439 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 18 Apr 2024 11:46:05 +0200 Subject: [PATCH 13/41] p2p/discover: add live flag in debug node --- p2p/discover/node.go | 1 + p2p/discover/table.go | 1 + 2 files changed, 2 insertions(+) diff --git a/p2p/discover/node.go b/p2p/discover/node.go index 17280a587824..99c9ad7abe9c 100644 --- a/p2p/discover/node.go +++ b/p2p/discover/node.go @@ -33,6 +33,7 @@ type BucketNode struct { Node *enode.Node `json:"node"` AddedAt time.Time `json:"added"` Checks int `json:"checks"` + Live bool `json:"live"` } // node represents a host on the network. diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 99dd0d85ccca..ecbf585d9275 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -167,6 +167,7 @@ func (tab *Table) Nodes() [][]BucketNode { nodes[i][j] = BucketNode{ Node: n.Node, Checks: int(n.livenessChecks), + Live: n.isValidatedLive, AddedAt: n.addedAt, } } From c1afb3c0cc9b1ffa0f1606460a344312a866c32d Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Sat, 20 Apr 2024 10:07:20 +0200 Subject: [PATCH 14/41] p2p/discover: return nextTime from run() --- p2p/discover/table.go | 52 ++++++++++++++++++------------------- p2p/discover/table_reval.go | 32 +++++++++++++---------- 2 files changed, 44 insertions(+), 40 deletions(-) diff --git a/p2p/discover/table.go b/p2p/discover/table.go index ecbf585d9275..fbcf02019b14 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -66,7 +66,7 @@ type Table struct { mutex sync.Mutex // protects buckets, bucket content, nursery, rand buckets [nBuckets]*bucket // index of known nodes by distance nursery []*node // bootstrap nodes - rand *reseedingRandom // source of randomness, periodically reseeded + rand reseedingRandom // source of randomness, periodically reseeded ips netutil.DistinctNetSet revalidation tableRevalidation @@ -76,14 +76,14 @@ type Table struct { log log.Logger // loop channels - refreshReq chan chan struct{} - revalidateResp chan revalidationResponse - addNodeCh chan addNodeRequest - addNodeHandled chan struct{} - findFailureCh chan *node - initDone chan struct{} - closeReq chan struct{} - closed chan struct{} + refreshReq chan chan struct{} + revalResponseCh chan revalidationResponse + addNodeCh chan addNodeRequest + addNodeHandled chan struct{} + findFailureCh chan *node + initDone chan struct{} + closeReq chan struct{} + closed chan struct{} nodeAddedHook func(*bucket, *node) nodeRemovedHook func(*bucket, *node) @@ -115,20 +115,19 @@ type addNodeRequest struct { func newTable(t transport, db *enode.DB, cfg Config) (*Table, error) { cfg = cfg.withDefaults() tab := &Table{ - net: t, - db: db, - cfg: cfg, - log: cfg.Log, - refreshReq: make(chan chan struct{}), - revalidateResp: make(chan revalidationResponse), - addNodeCh: make(chan addNodeRequest), - addNodeHandled: make(chan struct{}), - findFailureCh: make(chan *node), - initDone: make(chan struct{}), - closeReq: make(chan struct{}), - closed: make(chan struct{}), - rand: new(reseedingRandom), - ips: netutil.DistinctNetSet{Subnet: tableSubnet, Limit: tableIPLimit}, + net: t, + db: db, + cfg: cfg, + log: cfg.Log, + refreshReq: make(chan chan struct{}), + revalResponseCh: make(chan revalidationResponse), + addNodeCh: make(chan addNodeRequest), + addNodeHandled: make(chan struct{}), + findFailureCh: make(chan *node), + initDone: make(chan struct{}), + closeReq: make(chan struct{}), + closed: make(chan struct{}), + ips: netutil.DistinctNetSet{Subnet: tableSubnet, Limit: tableIPLimit}, } for i := range tab.buckets { tab.buckets[i] = &bucket{ @@ -349,17 +348,18 @@ func (tab *Table) loop() { loop: for { + nextTime := tab.revalidation.run(tab, tab.cfg.Clock.Now()) + revalTimer.Schedule(nextTime) + reseedRandTimer.Schedule(tab.rand.nextReseedTime()) - revalTimer.Schedule(tab.revalidation.nextTime()) select { case <-reseedRandTimer.C(): tab.rand.seed(tab.cfg.Clock.Now()) case <-revalTimer.C(): - tab.revalidation.run(tab, mclock.Now()) - case r := <-tab.revalidateResp: + case r := <-tab.revalResponseCh: tab.revalidation.handleResponse(tab, r) case addreq := <-tab.addNodeCh: diff --git a/p2p/discover/table_reval.go b/p2p/discover/table_reval.go index 93471d1608ff..7e5c96211ca1 100644 --- a/p2p/discover/table_reval.go +++ b/p2p/discover/table_reval.go @@ -50,7 +50,7 @@ func (tr *tableRevalidation) init(cfg *Config) { // nodeAdded is called when the table receives a new node. func (tr *tableRevalidation) nodeAdded(tab *Table, n *node) { - tr.newNodes.push(n, tab.cfg.Clock.Now(), tab.rand) + tr.newNodes.push(n, tab.cfg.Clock.Now(), &tab.rand) } // nodeRemoved is called when a node was removed from the table. @@ -61,22 +61,26 @@ func (tr *tableRevalidation) nodeRemoved(n *node) { } } -// nextTime returns the next time run() should be invoked. -// The Table main loop uses this to schedule a timer. -func (tr *tableRevalidation) nextTime() mclock.AbsTime { - return min(tr.newNodes.nextTime, tr.nodes.nextTime) -} - // run performs node revalidation. -func (tr *tableRevalidation) run(tab *Table, now mclock.AbsTime) { - if n := tr.newNodes.get(now, tab.rand, tr.activeReq); n != nil { +// It returns the next time it should be invoked, which is used in the Table main loop +// to schedule a timer. However, run can be called at any time. +func (tr *tableRevalidation) run(tab *Table, now mclock.AbsTime) (nextTime mclock.AbsTime) { + if n := tr.newNodes.get(now, &tab.rand, tr.activeReq); n != nil { tr.startRequest(tab, n, true) - tr.newNodes.schedule(now, tab.rand) + tr.newNodes.schedule(now, &tab.rand) } - if n := tr.nodes.get(now, tab.rand, tr.activeReq); n != nil { + if n := tr.nodes.get(now, &tab.rand, tr.activeReq); n != nil { tr.startRequest(tab, n, false) - tr.nodes.schedule(now, tab.rand) + tr.nodes.schedule(now, &tab.rand) } + + if tr.newNodes.nextTime == never { + return tr.nodes.nextTime + } + if tr.nodes.nextTime == never { + return tr.newNodes.nextTime + } + return min(tr.newNodes.nextTime, tr.nodes.nextTime) } // startRequest spawns a revalidation request for node n. @@ -111,7 +115,7 @@ func (tab *Table) doRevalidate(resp revalidationResponse, node *enode.Node) { } select { - case tab.revalidateResp <- resp: + case tab.revalResponseCh <- resp: case <-tab.closed: } } @@ -150,7 +154,7 @@ func (tr *tableRevalidation) handleResponse(tab *Table, resp revalidationRespons // Move node over to main queue after first validation. if resp.isNewNode { tr.newNodes.remove(n) - tr.nodes.push(n, tab.cfg.Clock.Now(), tab.rand) + tr.nodes.push(n, tab.cfg.Clock.Now(), &tab.rand) } // Store potential seeds in database. From 7667ebfcebdd6941e49b9b169d675c48f7fdaa8d Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Sat, 20 Apr 2024 10:09:38 +0200 Subject: [PATCH 15/41] p2p/discover: simplify rand timer --- p2p/discover/common.go | 10 ++-------- p2p/discover/table.go | 10 ++++------ 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/p2p/discover/common.go b/p2p/discover/common.go index 8b024231b71c..18eb94c71b45 100644 --- a/p2p/discover/common.go +++ b/p2p/discover/common.go @@ -104,20 +104,14 @@ type randomSource interface { // reseedingRandom is a random number generator that tracks when it was last re-seeded. type reseedingRandom struct { - cur atomic.Pointer[rand.Rand] - lastSeed mclock.AbsTime + cur atomic.Pointer[rand.Rand] } -func (r *reseedingRandom) nextReseedTime() mclock.AbsTime { - return r.lastSeed.Add(10 * time.Minute) -} - -func (r *reseedingRandom) seed(now mclock.AbsTime) { +func (r *reseedingRandom) seed() { var b [8]byte crand.Read(b[:]) seed := binary.BigEndian.Uint64(b[:]) r.cur.Store(rand.New(rand.NewSource(int64(seed)))) - r.lastSeed = now } func (r *reseedingRandom) Intn(n int) int { diff --git a/p2p/discover/table.go b/p2p/discover/table.go index fbcf02019b14..68ce499ebf15 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -135,7 +135,7 @@ func newTable(t transport, db *enode.DB, cfg Config) (*Table, error) { ips: netutil.DistinctNetSet{Subnet: bucketSubnet, Limit: bucketIPLimit}, } } - tab.rand.seed(cfg.Clock.Now()) + tab.rand.seed() tab.revalidation.init(&cfg) // initial table content @@ -337,7 +337,7 @@ func (tab *Table) loop() { refreshDone = make(chan struct{}) // where doRefresh reports completion waiting = []chan struct{}{tab.initDone} // holds waiting callers while doRefresh runs revalTimer = mclock.NewAlarm(tab.cfg.Clock) - reseedRandTimer = mclock.NewAlarm(tab.cfg.Clock) + reseedRandTimer = time.NewTicker(10 * time.Minute) ) defer refresh.Stop() defer revalTimer.Stop() @@ -351,11 +351,9 @@ loop: nextTime := tab.revalidation.run(tab, tab.cfg.Clock.Now()) revalTimer.Schedule(nextTime) - reseedRandTimer.Schedule(tab.rand.nextReseedTime()) - select { - case <-reseedRandTimer.C(): - tab.rand.seed(tab.cfg.Clock.Now()) + case <-reseedRandTimer.C: + tab.rand.seed() case <-revalTimer.C(): From b44673fa290665b4154f7e49be429ca79b827e07 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Sat, 20 Apr 2024 10:19:29 +0200 Subject: [PATCH 16/41] p2p/discover: move back to fast list when endpoint changed or dead --- p2p/discover/common.go | 2 +- p2p/discover/table_reval.go | 73 ++++++++++++++++++++++--------------- 2 files changed, 44 insertions(+), 31 deletions(-) diff --git a/p2p/discover/common.go b/p2p/discover/common.go index 18eb94c71b45..28f7b0055a39 100644 --- a/p2p/discover/common.go +++ b/p2p/discover/common.go @@ -66,7 +66,7 @@ type Config struct { func (cfg Config) withDefaults() Config { // Node table configuration: if cfg.PingInterval == 0 { - cfg.PingInterval = 10 * time.Second + cfg.PingInterval = 5 * time.Second } if cfg.RefreshInterval == 0 { cfg.RefreshInterval = 30 * time.Minute diff --git a/p2p/discover/table_reval.go b/p2p/discover/table_reval.go index 7e5c96211ca1..6a9d96fda22d 100644 --- a/p2p/discover/table_reval.go +++ b/p2p/discover/table_reval.go @@ -28,8 +28,8 @@ import ( const never = mclock.AbsTime(math.MaxInt64) type tableRevalidation struct { - newNodes revalidationList - nodes revalidationList + fast revalidationList + slow revalidationList activeReq map[enode.ID]struct{} } @@ -42,22 +42,21 @@ type revalidationResponse struct { func (tr *tableRevalidation) init(cfg *Config) { tr.activeReq = make(map[enode.ID]struct{}) - tr.newNodes.nextTime = never - tr.newNodes.interval = cfg.PingInterval / 3 - tr.nodes.nextTime = never - tr.nodes.interval = cfg.PingInterval + tr.fast.nextTime = never + tr.fast.interval = cfg.PingInterval + tr.slow.nextTime = never + tr.slow.interval = cfg.PingInterval * 3 } // nodeAdded is called when the table receives a new node. func (tr *tableRevalidation) nodeAdded(tab *Table, n *node) { - tr.newNodes.push(n, tab.cfg.Clock.Now(), &tab.rand) + tr.fast.push(n, tab.cfg.Clock.Now(), &tab.rand) } // nodeRemoved is called when a node was removed from the table. func (tr *tableRevalidation) nodeRemoved(n *node) { - wasnew := tr.newNodes.remove(n) - if !wasnew { - tr.nodes.remove(n) + if !tr.fast.remove(n) { + tr.slow.remove(n) } } @@ -65,22 +64,22 @@ func (tr *tableRevalidation) nodeRemoved(n *node) { // It returns the next time it should be invoked, which is used in the Table main loop // to schedule a timer. However, run can be called at any time. func (tr *tableRevalidation) run(tab *Table, now mclock.AbsTime) (nextTime mclock.AbsTime) { - if n := tr.newNodes.get(now, &tab.rand, tr.activeReq); n != nil { + if n := tr.fast.get(now, &tab.rand, tr.activeReq); n != nil { tr.startRequest(tab, n, true) - tr.newNodes.schedule(now, &tab.rand) + tr.fast.schedule(now, &tab.rand) } - if n := tr.nodes.get(now, &tab.rand, tr.activeReq); n != nil { + if n := tr.slow.get(now, &tab.rand, tr.activeReq); n != nil { tr.startRequest(tab, n, false) - tr.nodes.schedule(now, &tab.rand) + tr.slow.schedule(now, &tab.rand) } - if tr.newNodes.nextTime == never { - return tr.nodes.nextTime + if tr.fast.nextTime == never { + return tr.slow.nextTime } - if tr.nodes.nextTime == never { - return tr.newNodes.nextTime + if tr.slow.nextTime == never { + return tr.fast.nextTime } - return min(tr.newNodes.nextTime, tr.nodes.nextTime) + return min(tr.fast.nextTime, tr.slow.nextTime) } // startRequest spawns a revalidation request for node n. @@ -122,6 +121,7 @@ func (tab *Table) doRevalidate(resp revalidationResponse, node *enode.Node) { // handleResponse processes the result of a revalidation request. func (tr *tableRevalidation) handleResponse(tab *Table, resp revalidationResponse) { + now := tab.cfg.Clock.Now() n := resp.n b := tab.bucket(n.ID()) delete(tr.activeReq, n.ID()) @@ -135,26 +135,32 @@ func (tr *tableRevalidation) handleResponse(tab *Table, resp revalidationRespons if n.livenessChecks == 0 || resp.isNewNode { tab.deleteInBucket(b, n.ID()) } + // Move to fast queue. + if !resp.isNewNode { + tr.moveToList(&tr.fast, &tr.slow, n, now, &tab.rand) + } return } // The node responded. n.livenessChecks++ n.isValidatedLive = true - tab.log.Debug("Revalidated node", "b", b.index, "id", n.ID(), "checks", n.livenessChecks) + var endpointChanged bool if resp.newRecord != nil { - updated := tab.bumpInBucket(b, resp.newRecord) - if updated { + endpointChanged := tab.bumpInBucket(b, resp.newRecord) + if endpointChanged { // If the node changed its advertised endpoint, the updated ENR is not served // until it has been revalidated. n.isValidatedLive = false } } + tab.log.Debug("Revalidated node", "b", b.index, "id", n.ID(), "checks", n.livenessChecks, "changed", endpointChanged) - // Move node over to main queue after first validation. - if resp.isNewNode { - tr.newNodes.remove(n) - tr.nodes.push(n, tab.cfg.Clock.Now(), &tab.rand) + // Move node over to slow queue after first validation. + if resp.isNewNode && !endpointChanged { + tr.moveToList(&tr.slow, &tr.fast, n, now, &tab.rand) + } else if endpointChanged { + tr.moveToList(&tr.fast, &tr.slow, n, now, &tab.rand) } // Store potential seeds in database. @@ -163,6 +169,13 @@ func (tr *tableRevalidation) handleResponse(tab *Table, resp revalidationRespons } } +func (tr *tableRevalidation) moveToList(dest, source *revalidationList, n *node, now mclock.AbsTime, rand randomSource) { + if !source.remove(n) { + panic("moveToList: node not in source list") + } + dest.push(n, now, rand) +} + // revalidationList holds a list nodes and the next revalidation time. type revalidationList struct { nodes []*node @@ -185,6 +198,10 @@ func (rq *revalidationList) get(now mclock.AbsTime, rand randomSource, exclude m return nil } +func (rq *revalidationList) schedule(now mclock.AbsTime, rand randomSource) { + rq.nextTime = now.Add(time.Duration(rand.Int63n(int64(rq.interval)))) +} + func (rq *revalidationList) push(n *node, now mclock.AbsTime, rand randomSource) { rq.nodes = append(rq.nodes, n) if rq.nextTime == never { @@ -192,10 +209,6 @@ func (rq *revalidationList) push(n *node, now mclock.AbsTime, rand randomSource) } } -func (rq *revalidationList) schedule(now mclock.AbsTime, rand randomSource) { - rq.nextTime = now.Add(time.Duration(rand.Int63n(int64(rq.interval)))) -} - func (rq *revalidationList) remove(n *node) bool { i := slices.Index(rq.nodes, n) if i == -1 { From 822b05926f0738d749298395c90d3dd9d8675710 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Sat, 20 Apr 2024 10:31:51 +0200 Subject: [PATCH 17/41] p2p/discover: fix crash in move --- p2p/discover/table_reval.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/p2p/discover/table_reval.go b/p2p/discover/table_reval.go index 6a9d96fda22d..185f152bf7b3 100644 --- a/p2p/discover/table_reval.go +++ b/p2p/discover/table_reval.go @@ -132,11 +132,10 @@ func (tr *tableRevalidation) handleResponse(tab *Table, resp revalidationRespons if !resp.didRespond { // Revalidation failed. n.livenessChecks /= 3 - if n.livenessChecks == 0 || resp.isNewNode { + if n.livenessChecks <= 0 { tab.deleteInBucket(b, n.ID()) - } - // Move to fast queue. - if !resp.isNewNode { + } else if !resp.isNewNode { + // Move to fast queue. tr.moveToList(&tr.fast, &tr.slow, n, now, &tab.rand) } return From c2ad0a783d732171e55d9bf476a3901073d43b4b Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Sat, 20 Apr 2024 10:41:04 +0200 Subject: [PATCH 18/41] p2p/discover: improve list moving --- p2p/discover/table_reval.go | 38 +++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/p2p/discover/table_reval.go b/p2p/discover/table_reval.go index 185f152bf7b3..d1dc68c90e37 100644 --- a/p2p/discover/table_reval.go +++ b/p2p/discover/table_reval.go @@ -17,6 +17,7 @@ package discover import ( + "fmt" "math" "slices" "time" @@ -35,17 +36,19 @@ type tableRevalidation struct { type revalidationResponse struct { n *node - didRespond bool - isNewNode bool newRecord *enode.Node + list *revalidationList + didRespond bool } func (tr *tableRevalidation) init(cfg *Config) { tr.activeReq = make(map[enode.ID]struct{}) tr.fast.nextTime = never tr.fast.interval = cfg.PingInterval + tr.fast.name = "fast" tr.slow.nextTime = never tr.slow.interval = cfg.PingInterval * 3 + tr.slow.name = "slow" } // nodeAdded is called when the table receives a new node. @@ -65,11 +68,11 @@ func (tr *tableRevalidation) nodeRemoved(n *node) { // to schedule a timer. However, run can be called at any time. func (tr *tableRevalidation) run(tab *Table, now mclock.AbsTime) (nextTime mclock.AbsTime) { if n := tr.fast.get(now, &tab.rand, tr.activeReq); n != nil { - tr.startRequest(tab, n, true) + tr.startRequest(tab, &tr.fast, n) tr.fast.schedule(now, &tab.rand) } if n := tr.slow.get(now, &tab.rand, tr.activeReq); n != nil { - tr.startRequest(tab, n, false) + tr.startRequest(tab, &tr.slow, n) tr.slow.schedule(now, &tab.rand) } @@ -83,12 +86,12 @@ func (tr *tableRevalidation) run(tab *Table, now mclock.AbsTime) (nextTime mcloc } // startRequest spawns a revalidation request for node n. -func (tr *tableRevalidation) startRequest(tab *Table, n *node, newNode bool) { +func (tr *tableRevalidation) startRequest(tab *Table, list *revalidationList, n *node) { if _, ok := tr.activeReq[n.ID()]; ok { - panic("duplicate startRequest") + panic(fmt.Errorf("duplicate startRequest (list %q, node %v)", list.name, n.ID())) } tr.activeReq[n.ID()] = struct{}{} - resp := revalidationResponse{n: n, isNewNode: newNode} + resp := revalidationResponse{n: n, list: list} // Fetch the node while holding lock. tab.mutex.Lock() @@ -134,9 +137,8 @@ func (tr *tableRevalidation) handleResponse(tab *Table, resp revalidationRespons n.livenessChecks /= 3 if n.livenessChecks <= 0 { tab.deleteInBucket(b, n.ID()) - } else if !resp.isNewNode { - // Move to fast queue. - tr.moveToList(&tr.fast, &tr.slow, n, now, &tab.rand) + } else { + tr.moveToList(&tr.fast, resp.list, n, now, &tab.rand) } return } @@ -153,13 +155,13 @@ func (tr *tableRevalidation) handleResponse(tab *Table, resp revalidationRespons n.isValidatedLive = false } } - tab.log.Debug("Revalidated node", "b", b.index, "id", n.ID(), "checks", n.livenessChecks, "changed", endpointChanged) + tab.log.Debug("Revalidated node", "b", b.index, "id", n.ID(), "checks", n.livenessChecks, "q", resp.list.name) // Move node over to slow queue after first validation. - if resp.isNewNode && !endpointChanged { - tr.moveToList(&tr.slow, &tr.fast, n, now, &tab.rand) - } else if endpointChanged { - tr.moveToList(&tr.fast, &tr.slow, n, now, &tab.rand) + if !endpointChanged { + tr.moveToList(&tr.slow, resp.list, n, now, &tab.rand) + } else { + tr.moveToList(&tr.fast, resp.list, n, now, &tab.rand) } // Store potential seeds in database. @@ -169,8 +171,11 @@ func (tr *tableRevalidation) handleResponse(tab *Table, resp revalidationRespons } func (tr *tableRevalidation) moveToList(dest, source *revalidationList, n *node, now mclock.AbsTime, rand randomSource) { + if source == dest { + return + } if !source.remove(n) { - panic("moveToList: node not in source list") + panic(fmt.Errorf("moveToList(%q -> %q): node %v not in source list", source.name, dest.name, n.ID())) } dest.push(n, now, rand) } @@ -180,6 +185,7 @@ type revalidationList struct { nodes []*node nextTime mclock.AbsTime interval time.Duration + name string } // get returns a random node from the queue. Nodes in the 'exclude' map are not returned. From 0b44fc4deaa93611c18ffc37c0d52e94036c788b Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Sat, 20 Apr 2024 10:44:10 +0200 Subject: [PATCH 19/41] p2p/discover: faster --- p2p/discover/common.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/p2p/discover/common.go b/p2p/discover/common.go index 28f7b0055a39..b3e0387ca29a 100644 --- a/p2p/discover/common.go +++ b/p2p/discover/common.go @@ -66,7 +66,7 @@ type Config struct { func (cfg Config) withDefaults() Config { // Node table configuration: if cfg.PingInterval == 0 { - cfg.PingInterval = 5 * time.Second + cfg.PingInterval = 3 * time.Second } if cfg.RefreshInterval == 0 { cfg.RefreshInterval = 30 * time.Minute From 30eb2bf40624633feac240ff1df1cc6c004d0247 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Sat, 20 Apr 2024 10:47:25 +0200 Subject: [PATCH 20/41] p2p/discover: remove logging checks in for dead nodes (it's always zero) --- p2p/discover/table.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 68ce499ebf15..41b39d1aa806 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -568,7 +568,7 @@ func (tab *Table) deleteInBucket(b *bucket, id enode.ID) *node { // Add replacement. if len(b.replacements) == 0 { - tab.log.Debug("Removed dead node", "b", b.index, "id", n.ID(), "ip", n.IP(), "checks", n.livenessChecks) + tab.log.Debug("Removed dead node", "b", b.index, "id", n.ID(), "ip", n.IP()) return nil } rindex := tab.rand.Intn(len(b.replacements)) @@ -577,7 +577,7 @@ func (tab *Table) deleteInBucket(b *bucket, id enode.ID) *node { b.entries = append(b.entries, rep) tab.nodeAdded(b, rep) - tab.log.Debug("Replaced dead node", "b", b.index, "id", n.ID(), "ip", n.IP(), "checks", n.livenessChecks, "r", rep.ID(), "rip", rep.IP()) + tab.log.Debug("Replaced dead node", "b", b.index, "id", n.ID(), "ip", n.IP(), "r", rep.ID(), "rip", rep.IP()) return rep } From 81e553f428f226092eda4917f8f6c5d8a70a5fe3 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Sat, 20 Apr 2024 10:57:14 +0200 Subject: [PATCH 21/41] p2p/discover: faster decay --- p2p/discover/table_reval.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/p2p/discover/table_reval.go b/p2p/discover/table_reval.go index d1dc68c90e37..3dcc5fba05e6 100644 --- a/p2p/discover/table_reval.go +++ b/p2p/discover/table_reval.go @@ -134,7 +134,7 @@ func (tr *tableRevalidation) handleResponse(tab *Table, resp revalidationRespons if !resp.didRespond { // Revalidation failed. - n.livenessChecks /= 3 + n.livenessChecks /= 5 if n.livenessChecks <= 0 { tab.deleteInBucket(b, n.ID()) } else { From 0048a0f54d27cca39b2f29d533951d4438a47648 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 24 Apr 2024 16:20:40 +0200 Subject: [PATCH 22/41] Update p2p/discover/table_reval.go Co-authored-by: Martin HS --- p2p/discover/table_reval.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/p2p/discover/table_reval.go b/p2p/discover/table_reval.go index 3dcc5fba05e6..5b6dd848a0d1 100644 --- a/p2p/discover/table_reval.go +++ b/p2p/discover/table_reval.go @@ -148,7 +148,7 @@ func (tr *tableRevalidation) handleResponse(tab *Table, resp revalidationRespons n.isValidatedLive = true var endpointChanged bool if resp.newRecord != nil { - endpointChanged := tab.bumpInBucket(b, resp.newRecord) + endpointChanged = tab.bumpInBucket(b, resp.newRecord) if endpointChanged { // If the node changed its advertised endpoint, the updated ENR is not served // until it has been revalidated. From 041ce1b7b319b2445475de110b3e045811832abf Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Mon, 22 Apr 2024 16:40:59 +0200 Subject: [PATCH 23/41] p2p/discover: fix addedAt time --- p2p/discover/node.go | 14 ++++++++------ p2p/discover/table.go | 17 +++++++++++------ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/p2p/discover/node.go b/p2p/discover/node.go index 99c9ad7abe9c..fdb85b1c8d0a 100644 --- a/p2p/discover/node.go +++ b/p2p/discover/node.go @@ -30,19 +30,21 @@ import ( ) type BucketNode struct { - Node *enode.Node `json:"node"` - AddedAt time.Time `json:"added"` - Checks int `json:"checks"` - Live bool `json:"live"` + Node *enode.Node `json:"node"` + AddedTable time.Time `json:"addedToTable"` + AddedBucket time.Time `json:"addedToBucket"` + Checks int `json:"checks"` + Live bool `json:"live"` } // node represents a host on the network. // The fields of Node may not be modified. type node struct { *enode.Node - addedAt time.Time // time when the node was added to the table + addedToTable time.Time // first time node was added to bucket or replacement list + addedToBucket time.Time // time it was added in the actual bucket livenessChecks uint // how often liveness was checked - isValidatedLive bool + isValidatedLive bool // true if existence of node is considered validated right now } type encPubkey [64]byte diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 41b39d1aa806..24b3c7b68659 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -164,10 +164,11 @@ func (tab *Table) Nodes() [][]BucketNode { nodes[i] = make([]BucketNode, len(b.entries)) for j, n := range b.entries { nodes[i][j] = BucketNode{ - Node: n.Node, - Checks: int(n.livenessChecks), - Live: n.isValidatedLive, - AddedAt: n.addedAt, + Node: n.Node, + Checks: int(n.livenessChecks), + Live: n.isValidatedLive, + AddedTable: n.addedToTable, + AddedBucket: n.addedToBucket, } } } @@ -511,7 +512,6 @@ func (tab *Table) handleAddNode(req addNodeRequest) { // Add to bucket. b.entries = append(b.entries, req.node) b.replacements = deleteNode(b.replacements, req.node) - req.node.addedAt = time.Now() tab.nodeAdded(b, req.node) } @@ -524,6 +524,8 @@ func (tab *Table) addReplacement(b *bucket, n *node) { if !tab.addIP(b, n.IP()) { return } + + n.addedToTable = time.Now() var removed *node b.replacements, removed = pushNode(b.replacements, n, maxReplacements) if removed != nil { @@ -532,6 +534,10 @@ func (tab *Table) addReplacement(b *bucket, n *node) { } func (tab *Table) nodeAdded(b *bucket, n *node) { + if n.addedToTable == (time.Time{}) { + n.addedToTable = time.Now() + } + n.addedToBucket = time.Now() tab.revalidation.nodeAdded(tab, n) if tab.nodeAddedHook != nil { tab.nodeAddedHook(b, n) @@ -576,7 +582,6 @@ func (tab *Table) deleteInBucket(b *bucket, id enode.ID) *node { b.replacements = slices.Delete(b.replacements, rindex, rindex+1) b.entries = append(b.entries, rep) tab.nodeAdded(b, rep) - tab.log.Debug("Replaced dead node", "b", b.index, "id", n.ID(), "ip", n.IP(), "r", rep.ID(), "rip", rep.IP()) return rep } From 2c2c7f7a070a48d936935fbd24b2d879d586c3d8 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Mon, 22 Apr 2024 16:51:12 +0200 Subject: [PATCH 24/41] p2p/discover: rename add node methods This is to better reflect their purpose. The previous naming of 'seen' and 'verified' was kind of arbitrary, especially since 'verified' was the stricter one. --- p2p/discover/lookup.go | 2 +- p2p/discover/table.go | 30 ++++++++++++++++-------------- p2p/discover/table_test.go | 20 ++++++++++---------- p2p/discover/table_util_test.go | 2 +- p2p/discover/v4_udp.go | 4 ++-- p2p/discover/v5_udp.go | 2 +- p2p/discover/v5_udp_test.go | 2 +- 7 files changed, 32 insertions(+), 30 deletions(-) diff --git a/p2p/discover/lookup.go b/p2p/discover/lookup.go index df6cddc8330e..95b19943d636 100644 --- a/p2p/discover/lookup.go +++ b/p2p/discover/lookup.go @@ -165,7 +165,7 @@ func (it *lookup) query(n *node, reply chan<- []*node) { // Grab as many nodes as possible. Some of them might not be alive anymore, but we'll // just remove those again during revalidation. for _, n := range r { - it.tab.addSeenNode(n) + it.tab.addFoundNode(n) } reply <- r } diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 24b3c7b68659..421bcae98404 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -108,8 +108,8 @@ type bucket struct { } type addNodeRequest struct { - node *node - isLive bool + node *node + isInbound bool } func newTable(t transport, db *enode.DB, cfg Config) (*Table, error) { @@ -300,13 +300,13 @@ func (tab *Table) len() (n int) { return n } -// addSeenNode adds a node which may not be live. If the bucket has space available, +// addFoundNode adds a node which may not be live. If the bucket has space available, // adding the node succeeds immediately. Otherwise, the node is added to the replacements // list. // // The caller must not hold tab.mutex. -func (tab *Table) addSeenNode(n *node) { - req := addNodeRequest{node: n, isLive: false} +func (tab *Table) addFoundNode(n *node) { + req := addNodeRequest{node: n, isInbound: false} select { case tab.addNodeCh <- req: <-tab.addNodeHandled @@ -314,16 +314,16 @@ func (tab *Table) addSeenNode(n *node) { } } -// addVerifiedNode adds a node whose existence has been verified recently. If the bucket -// has no space, the node is added to the replacements list. +// addInboundNode adds a node from an inbound contact. If the bucket has no space, the +// node is added to the replacements list. // -// There is an additional safety measure: if the table is still initializing the node -// is not added. This prevents an attack where the table could be filled by just sending -// ping repeatedly. +// There is an additional safety measure: if the table is still initializing the node is +// not added. This prevents an attack where the table could be filled by just sending ping +// repeatedly. // // The caller must not hold tab.mutex. -func (tab *Table) addVerifiedNode(n *node) { - req := addNodeRequest{node: n, isLive: true} +func (tab *Table) addInboundNode(n *node) { + req := addNodeRequest{node: n, isInbound: true} select { case tab.addNodeCh <- req: <-tab.addNodeHandled @@ -435,7 +435,7 @@ func (tab *Table) loadSeedNodes() { age := time.Since(tab.db.LastPongReceived(seed.ID(), seed.IP())) tab.log.Trace("Found seed node in database", "id", seed.ID(), "addr", seed.addr(), "age", age) } - tab.handleAddNode(addNodeRequest{node: seed, isLive: true}) + tab.handleAddNode(addNodeRequest{node: seed, isInbound: true}) } } @@ -488,7 +488,9 @@ func (tab *Table) handleAddNode(req addNodeRequest) { if req.node.ID() == tab.self().ID() { return } - if !req.isLive && !tab.isInitDone() { + // For nodes from inbound contact, there is an additional safety measure: if the table + // is still initializing the node is not added. + if req.isInbound && !tab.isInitDone() { return } diff --git a/p2p/discover/table_test.go b/p2p/discover/table_test.go index 9796d576e586..ac159ebd5be3 100644 --- a/p2p/discover/table_test.go +++ b/p2p/discover/table_test.go @@ -71,7 +71,7 @@ func testPingReplace(t *testing.T, newNodeIsResponding, lastInBucketIsResponding // this node in the bucket if it is unresponsive. transport.dead[last.ID()] = !lastInBucketIsResponding transport.dead[replacementNode.ID()] = !newNodeIsResponding - tab.addSeenNode(replacementNode) + tab.addFoundNode(replacementNode) // Wait until the last node was pinged. waitForRevalidationPing(t, transport, tab, last.ID()) @@ -127,7 +127,7 @@ func TestTable_IPLimit(t *testing.T) { for i := 0; i < tableIPLimit+1; i++ { n := nodeAtDistance(tab.self().ID(), i, net.IP{172, 0, 1, byte(i)}) - tab.addSeenNode(n) + tab.addFoundNode(n) } if tab.len() > tableIPLimit { t.Errorf("too many nodes in table") @@ -145,7 +145,7 @@ func TestTable_BucketIPLimit(t *testing.T) { d := 3 for i := 0; i < bucketIPLimit+1; i++ { n := nodeAtDistance(tab.self().ID(), d, net.IP{172, 0, 1, byte(i)}) - tab.addSeenNode(n) + tab.addFoundNode(n) } if tab.len() > bucketIPLimit { t.Errorf("too many nodes in table") @@ -258,8 +258,8 @@ func TestTable_addVerifiedNode(t *testing.T) { // Insert two nodes. n1 := nodeAtDistance(tab.self().ID(), 256, net.IP{88, 77, 66, 1}) n2 := nodeAtDistance(tab.self().ID(), 256, net.IP{88, 77, 66, 2}) - tab.addSeenNode(n1) - tab.addSeenNode(n2) + tab.addFoundNode(n1) + tab.addFoundNode(n2) bucket := tab.bucket(n1.ID()) // Verify bucket content: @@ -272,7 +272,7 @@ func TestTable_addVerifiedNode(t *testing.T) { newrec := n2.Record() newrec.Set(enr.IP{99, 99, 99, 99}) newn2 := wrapNode(enode.SignNull(newrec, n2.ID())) - tab.addVerifiedNode(newn2) + tab.addInboundNode(newn2) // Check that bucket is updated correctly. newBcontent := []*node{n1, newn2} @@ -291,8 +291,8 @@ func TestTable_addSeenNode(t *testing.T) { // Insert two nodes. n1 := nodeAtDistance(tab.self().ID(), 256, net.IP{88, 77, 66, 1}) n2 := nodeAtDistance(tab.self().ID(), 256, net.IP{88, 77, 66, 2}) - tab.addSeenNode(n1) - tab.addSeenNode(n2) + tab.addFoundNode(n1) + tab.addFoundNode(n2) // Verify bucket content: bcontent := []*node{n1, n2} @@ -304,7 +304,7 @@ func TestTable_addSeenNode(t *testing.T) { newrec := n2.Record() newrec.Set(enr.IP{99, 99, 99, 99}) newn2 := wrapNode(enode.SignNull(newrec, n2.ID())) - tab.addSeenNode(newn2) + tab.addFoundNode(newn2) // Check that bucket content is unchanged. if !reflect.DeepEqual(tab.bucket(n1.ID()).entries, bcontent) { @@ -330,7 +330,7 @@ func TestTable_revalidateSyncRecord(t *testing.T) { r.Set(enr.IP(net.IP{127, 0, 0, 1})) id := enode.ID{1} n1 := wrapNode(enode.SignNull(&r, id)) - tab.addSeenNode(n1) + tab.addFoundNode(n1) // Update the node record. r.Set(enr.WithEntry("foo", "bar")) diff --git a/p2p/discover/table_util_test.go b/p2p/discover/table_util_test.go index 6ed4bcda9036..6d278f5c11e2 100644 --- a/p2p/discover/table_util_test.go +++ b/p2p/discover/table_util_test.go @@ -118,7 +118,7 @@ func fillTable(tab *Table, nodes []*node, setLive bool) { n.livenessChecks = 1 n.isValidatedLive = true } - tab.addSeenNode(n) + tab.addFoundNode(n) } } diff --git a/p2p/discover/v4_udp.go b/p2p/discover/v4_udp.go index a6121d1923e3..d4e06416744c 100644 --- a/p2p/discover/v4_udp.go +++ b/p2p/discover/v4_udp.go @@ -673,10 +673,10 @@ func (t *UDPv4) handlePing(h *packetHandlerV4, from *net.UDPAddr, fromID enode.I n := wrapNode(enode.NewV4(h.senderKey, from.IP, int(req.From.TCP), from.Port)) if time.Since(t.db.LastPongReceived(n.ID(), from.IP)) > bondExpiration { t.sendPing(fromID, from, func() { - t.tab.addVerifiedNode(n) + t.tab.addInboundNode(n) }) } else { - t.tab.addVerifiedNode(n) + t.tab.addInboundNode(n) } // Update node database and endpoint predictor. diff --git a/p2p/discover/v5_udp.go b/p2p/discover/v5_udp.go index c6e65bfca27b..8cdc9dfbce3d 100644 --- a/p2p/discover/v5_udp.go +++ b/p2p/discover/v5_udp.go @@ -699,7 +699,7 @@ func (t *UDPv5) handlePacket(rawpacket []byte, fromAddr *net.UDPAddr) error { } if fromNode != nil { // Handshake succeeded, add to table. - t.tab.addSeenNode(wrapNode(fromNode)) + t.tab.addInboundNode(wrapNode(fromNode)) } if packet.Kind() != v5wire.WhoareyouPacket { // WHOAREYOU logged separately to report errors. diff --git a/p2p/discover/v5_udp_test.go b/p2p/discover/v5_udp_test.go index 4373ea81847f..0015f7cc7062 100644 --- a/p2p/discover/v5_udp_test.go +++ b/p2p/discover/v5_udp_test.go @@ -141,7 +141,7 @@ func TestUDPv5_unknownPacket(t *testing.T) { // Make node known. n := test.getNode(test.remotekey, test.remoteaddr).Node() - test.table.addSeenNode(wrapNode(n)) + test.table.addFoundNode(wrapNode(n)) test.packetIn(&v5wire.Unknown{Nonce: nonce}) test.waitPacketOut(func(p *v5wire.Whoareyou, addr *net.UDPAddr, _ v5wire.Nonce) { From ea9f0a4b033d5a0dce5ac8ead91f815fef4d4c59 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 25 Apr 2024 17:37:53 +0200 Subject: [PATCH 25/41] p2p/discover: put back findnode request tracking --- p2p/discover/lookup.go | 28 +++------------ p2p/discover/table.go | 77 ++++++++++++++++++++++++++++++------------ 2 files changed, 60 insertions(+), 45 deletions(-) diff --git a/p2p/discover/lookup.go b/p2p/discover/lookup.go index 95b19943d636..aff3aa82580e 100644 --- a/p2p/discover/lookup.go +++ b/p2p/discover/lookup.go @@ -140,32 +140,12 @@ func (it *lookup) slowdown() { } func (it *lookup) query(n *node, reply chan<- []*node) { - fails := it.tab.db.FindFails(n.ID(), n.IP()) r, err := it.queryfunc(n) - if errors.Is(err, errClosed) { + if !errors.Is(err, errClosed) { // Avoid recording failures on shutdown. - reply <- nil - return - } else if len(r) == 0 { - fails++ - it.tab.db.UpdateFindFails(n.ID(), n.IP(), fails) - // Remove the node from the local table if it fails to return anything useful too - // many times, but only if there are enough other nodes in the bucket. - dropped := false - if fails >= maxFindnodeFailures { - dropped = true - it.tab.trackFindFailure(n) - } - it.tab.log.Trace("FINDNODE failed", "id", n.ID(), "failcount", fails, "dropped", dropped, "err", err) - } else if fails > 0 { - // Reset failure counter because it counts _consecutive_ failures. - it.tab.db.UpdateFindFails(n.ID(), n.IP(), 0) - } - - // Grab as many nodes as possible. Some of them might not be alive anymore, but we'll - // just remove those again during revalidation. - for _, n := range r { - it.tab.addFoundNode(n) + success := len(r) > 0 + it.tab.trackRequest(n, success, r) + it.tab.log.Trace("FINDNODE failed", "id", n.ID(), "err", err) } reply <- r } diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 421bcae98404..4fa73c8240f3 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -78,9 +78,9 @@ type Table struct { // loop channels refreshReq chan chan struct{} revalResponseCh chan revalidationResponse - addNodeCh chan addNodeRequest + addNodeCh chan addNodeOp addNodeHandled chan struct{} - findFailureCh chan *node + trackRequestCh chan trackRequestOp initDone chan struct{} closeReq chan struct{} closed chan struct{} @@ -107,11 +107,17 @@ type bucket struct { index int } -type addNodeRequest struct { +type addNodeOp struct { node *node isInbound bool } +type trackRequestOp struct { + node *node + foundNodes []*node + success bool +} + func newTable(t transport, db *enode.DB, cfg Config) (*Table, error) { cfg = cfg.withDefaults() tab := &Table{ @@ -121,9 +127,9 @@ func newTable(t transport, db *enode.DB, cfg Config) (*Table, error) { log: cfg.Log, refreshReq: make(chan chan struct{}), revalResponseCh: make(chan revalidationResponse), - addNodeCh: make(chan addNodeRequest), + addNodeCh: make(chan addNodeOp), addNodeHandled: make(chan struct{}), - findFailureCh: make(chan *node), + trackRequestCh: make(chan trackRequestOp), initDone: make(chan struct{}), closeReq: make(chan struct{}), closed: make(chan struct{}), @@ -147,13 +153,6 @@ func newTable(t transport, db *enode.DB, cfg Config) (*Table, error) { return tab, nil } -func (tab *Table) trackFindFailure(n *node) { - select { - case tab.findFailureCh <- n: - case <-tab.closed: - } -} - // Nodes returns all nodes contained in the table. func (tab *Table) Nodes() [][]BucketNode { tab.mutex.Lock() @@ -306,9 +305,9 @@ func (tab *Table) len() (n int) { // // The caller must not hold tab.mutex. func (tab *Table) addFoundNode(n *node) { - req := addNodeRequest{node: n, isInbound: false} + op := addNodeOp{node: n, isInbound: false} select { - case tab.addNodeCh <- req: + case tab.addNodeCh <- op: <-tab.addNodeHandled case <-tab.closeReq: } @@ -323,14 +322,22 @@ func (tab *Table) addFoundNode(n *node) { // // The caller must not hold tab.mutex. func (tab *Table) addInboundNode(n *node) { - req := addNodeRequest{node: n, isInbound: true} + op := addNodeOp{node: n, isInbound: true} select { - case tab.addNodeCh <- req: + case tab.addNodeCh <- op: <-tab.addNodeHandled case <-tab.closeReq: } } +func (tab *Table) trackRequest(n *node, success bool, foundNodes []*node) { + op := trackRequestOp{n, foundNodes, success} + select { + case tab.trackRequestCh <- op: + case <-tab.closeReq: + } +} + // loop is the main loop of Table. func (tab *Table) loop() { var ( @@ -361,11 +368,12 @@ loop: case r := <-tab.revalResponseCh: tab.revalidation.handleResponse(tab, r) - case addreq := <-tab.addNodeCh: - tab.handleAddNode(addreq) + case op := <-tab.addNodeCh: + tab.handleAddNode(op) tab.addNodeHandled <- struct{}{} - case <-tab.findFailureCh: + case op := <-tab.trackRequestCh: + tab.handleTrackRequest(op) // TODO: handle failure by potentially dropping node case <-refresh.C: @@ -435,7 +443,7 @@ func (tab *Table) loadSeedNodes() { age := time.Since(tab.db.LastPongReceived(seed.ID(), seed.IP())) tab.log.Trace("Found seed node in database", "id", seed.ID(), "addr", seed.addr(), "age", age) } - tab.handleAddNode(addNodeRequest{node: seed, isInbound: true}) + tab.handleAddNode(addNodeOp{node: seed, isInbound: true}) } } @@ -484,7 +492,7 @@ func (tab *Table) removeIP(b *bucket, ip net.IP) { b.ips.Remove(ip) } -func (tab *Table) handleAddNode(req addNodeRequest) { +func (tab *Table) handleAddNode(req addNodeOp) { if req.node.ID() == tab.self().ID() { return } @@ -610,6 +618,33 @@ func (tab *Table) bumpInBucket(b *bucket, newRecord *enode.Node) bool { return true } +func (tab *Table) handleTrackRequest(op trackRequestOp) { + var fails int + if op.success { + // Reset failure counter because it counts _consecutive_ failures. + tab.db.UpdateFindFails(op.node.ID(), op.node.IP(), 0) + } else { + fails = tab.db.FindFails(op.node.ID(), op.node.IP()) + fails++ + tab.db.UpdateFindFails(op.node.ID(), op.node.IP(), fails) + } + + tab.mutex.Lock() + defer tab.mutex.Unlock() + + b := tab.bucket(op.node.ID()) + // Remove the node from the local table if it fails to return anything useful too + // many times, but only if there are enough other nodes in the bucket. + if fails >= maxFindnodeFailures && len(b.entries) >= bucketSize/2 { + tab.deleteInBucket(b, op.node.ID()) + } + + // Add found nodes. + for _, n := range op.foundNodes { + tab.handleAddNode(addNodeOp{n, false}) + } +} + func contains(ns []*node, id enode.ID) bool { for _, n := range ns { if n.ID() == id { From edb3a12a66d308015a2fe4da026d0e5556a530e5 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Fri, 26 Apr 2024 14:04:04 +0200 Subject: [PATCH 26/41] p2p/discover: fix lookup failure log --- p2p/discover/lookup.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/p2p/discover/lookup.go b/p2p/discover/lookup.go index aff3aa82580e..5c3d90d6c90f 100644 --- a/p2p/discover/lookup.go +++ b/p2p/discover/lookup.go @@ -141,11 +141,12 @@ func (it *lookup) slowdown() { func (it *lookup) query(n *node, reply chan<- []*node) { r, err := it.queryfunc(n) - if !errors.Is(err, errClosed) { - // Avoid recording failures on shutdown. + if !errors.Is(err, errClosed) { // avoid recording failures on shutdown. success := len(r) > 0 it.tab.trackRequest(n, success, r) - it.tab.log.Trace("FINDNODE failed", "id", n.ID(), "err", err) + if err != nil { + it.tab.log.Trace("FINDNODE failed", "id", n.ID(), "err", err) + } } reply <- r } From 1765712a83a15a5869e8413db24256a37881a944 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Fri, 26 Apr 2024 14:04:19 +0200 Subject: [PATCH 27/41] p2p/discover: fix double lock --- p2p/discover/table.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 4fa73c8240f3..5f9de4096a70 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -369,12 +369,13 @@ loop: tab.revalidation.handleResponse(tab, r) case op := <-tab.addNodeCh: + tab.mutex.Lock() tab.handleAddNode(op) + tab.mutex.Unlock() tab.addNodeHandled <- struct{}{} case op := <-tab.trackRequestCh: tab.handleTrackRequest(op) - // TODO: handle failure by potentially dropping node case <-refresh.C: if refreshDone == nil { @@ -492,6 +493,8 @@ func (tab *Table) removeIP(b *bucket, ip net.IP) { b.ips.Remove(ip) } +// handleAddNode adds the node in the request to the table, if there is space. +// The caller must hold tab.mutex. func (tab *Table) handleAddNode(req addNodeOp) { if req.node.ID() == tab.self().ID() { return @@ -502,8 +505,6 @@ func (tab *Table) handleAddNode(req addNodeOp) { return } - tab.mutex.Lock() - defer tab.mutex.Unlock() b := tab.bucket(req.node.ID()) if tab.bumpInBucket(b, req.node.Node) { // Already in bucket, update record. From 009658c4fa790b58f4cabbf141e12a8971cabb9b Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Fri, 26 Apr 2024 15:40:38 +0200 Subject: [PATCH 28/41] p2p/discover: seed nodes are not inbound --- p2p/discover/table.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 5f9de4096a70..b9c5202f2489 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -444,7 +444,7 @@ func (tab *Table) loadSeedNodes() { age := time.Since(tab.db.LastPongReceived(seed.ID(), seed.IP())) tab.log.Trace("Found seed node in database", "id", seed.ID(), "addr", seed.addr(), "age", age) } - tab.handleAddNode(addNodeOp{node: seed, isInbound: true}) + tab.handleAddNode(addNodeOp{node: seed, isInbound: false}) } } From 051ca137fb67588823d63e98f30f37d886c49611 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Fri, 3 May 2024 14:09:48 +0200 Subject: [PATCH 29/41] p2p/discover: use lock in reseedingRandom --- p2p/discover/common.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/p2p/discover/common.go b/p2p/discover/common.go index b3e0387ca29a..d4972778dd52 100644 --- a/p2p/discover/common.go +++ b/p2p/discover/common.go @@ -22,7 +22,7 @@ import ( "encoding/binary" "math/rand" "net" - "sync/atomic" + "sync" "time" "github.com/ethereum/go-ethereum/common/mclock" @@ -104,20 +104,29 @@ type randomSource interface { // reseedingRandom is a random number generator that tracks when it was last re-seeded. type reseedingRandom struct { - cur atomic.Pointer[rand.Rand] + mu sync.Mutex + cur *rand.Rand } func (r *reseedingRandom) seed() { var b [8]byte crand.Read(b[:]) seed := binary.BigEndian.Uint64(b[:]) - r.cur.Store(rand.New(rand.NewSource(int64(seed)))) + new := rand.New(rand.NewSource(int64(seed))) + + r.mu.Lock() + r.cur = new + r.mu.Unlock() } func (r *reseedingRandom) Intn(n int) int { - return r.cur.Load().Intn(n) + r.mu.Lock() + defer r.mu.Unlock() + return r.cur.Intn(n) } func (r *reseedingRandom) Int63n(n int64) int64 { - return r.cur.Load().Int63n(n) + r.mu.Lock() + defer r.mu.Unlock() + return r.cur.Int63n(n) } From 9faae76660b68a4a1468a2172d01a0ce07993814 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Fri, 3 May 2024 14:11:20 +0200 Subject: [PATCH 30/41] p2p/discover: improve test reliability --- p2p/discover/table_test.go | 44 +++++++++++++++++++----- p2p/discover/table_util_test.go | 60 +++++++++++++++++++++++++++++++-- 2 files changed, 92 insertions(+), 12 deletions(-) diff --git a/p2p/discover/table_test.go b/p2p/discover/table_test.go index ac159ebd5be3..93075d687056 100644 --- a/p2p/discover/table_test.go +++ b/p2p/discover/table_test.go @@ -63,24 +63,48 @@ func testPingReplace(t *testing.T, newNodeIsResponding, lastInBucketIsResponding <-tab.initDone // Fill up the sender's bucket. + tab.mutex.Lock() replacementNodeKey, _ := crypto.HexToECDSA("45a915e4d060149eb4365960e6a7a45f334393093061116b197e3240065ff2d8") replacementNode := wrapNode(enode.NewV4(&replacementNodeKey.PublicKey, net.IP{127, 0, 0, 1}, 99, 99)) - last := fillBucket(tab, replacementNode) + last := fillBucket(tab, replacementNode.ID()) + nodeEvents := newNodeEventRecorder(128) + tab.nodeAddedHook = nodeEvents.nodeAdded + tab.nodeRemovedHook = nodeEvents.nodeRemoved + tab.mutex.Unlock() - // Add the sender as if it just pinged us. The revalidation process should replace + // The revalidation process should replace // this node in the bucket if it is unresponsive. transport.dead[last.ID()] = !lastInBucketIsResponding transport.dead[replacementNode.ID()] = !newNodeIsResponding + + // Add replacement node to table. tab.addFoundNode(replacementNode) + t.Log("last:", last.ID()) + t.Log("replacement:", replacementNode.ID()) + // Wait until the last node was pinged. waitForRevalidationPing(t, transport, tab, last.ID()) - // If a replacement is expected, we also need to wait until the replacement node was pinged. if !lastInBucketIsResponding { + if !nodeEvents.waitNodeAbsent(last.ID(), 2*time.Second) { + t.Error("last node was not removed") + } + if !nodeEvents.waitNodePresent(replacementNode.ID(), 2*time.Second) { + t.Error("replacement node was not added") + } + + // If a replacement is expected, we also need to wait until the replacement node + // was pinged and added/removed. waitForRevalidationPing(t, transport, tab, replacementNode.ID()) + if !newNodeIsResponding { + if !nodeEvents.waitNodeAbsent(replacementNode.ID(), 2*time.Second) { + t.Error("replacement node was not removed") + } + } } + // Check bucket content. tab.mutex.Lock() defer tab.mutex.Unlock() wantSize := bucketSize @@ -89,26 +113,28 @@ func testPingReplace(t *testing.T, newNodeIsResponding, lastInBucketIsResponding } bucket := tab.bucket(replacementNode.ID()) if l := len(bucket.entries); l != wantSize { - t.Errorf("wrong bucket size after bond: got %d, want %d", l, wantSize) + t.Errorf("wrong bucket size after revalidation: got %d, want %d", l, wantSize) } if ok := contains(bucket.entries, last.ID()); ok != lastInBucketIsResponding { - t.Errorf("last entry found: %t, want: %t", ok, lastInBucketIsResponding) + t.Errorf("revalidated node found: %t, want: %t", ok, lastInBucketIsResponding) } wantNewEntry := newNodeIsResponding && !lastInBucketIsResponding if ok := contains(bucket.entries, replacementNode.ID()); ok != wantNewEntry { - t.Errorf("new entry found: %t, want: %t", ok, wantNewEntry) + t.Errorf("replacement node found: %t, want: %t", ok, wantNewEntry) } } // waitForRevalidationPing waits until a PING message is sent to a node with the given id. func waitForRevalidationPing(t *testing.T, transport *pingRecorder, tab *Table, id enode.ID) *enode.Node { + t.Helper() + simclock := tab.cfg.Clock.(*mclock.Simulated) - maxAttempts := tab.len() * 5 + maxAttempts := tab.len() * 8 for i := 0; i < maxAttempts; i++ { simclock.Run(tab.cfg.PingInterval) - p := transport.waitPing(500 * time.Millisecond) + p := transport.waitPing(2 * time.Second) if p == nil { - t.Fatal("Table did not send any revalidation ping") + t.Fatal("Table did not send revalidation ping") } if id == (enode.ID{}) || p.ID() == id { return p diff --git a/p2p/discover/table_util_test.go b/p2p/discover/table_util_test.go index 6d278f5c11e2..7d15a3d66519 100644 --- a/p2p/discover/table_util_test.go +++ b/p2p/discover/table_util_test.go @@ -99,9 +99,9 @@ func intIP(i int) net.IP { } // fillBucket inserts nodes into the given bucket until it is full. -func fillBucket(tab *Table, n *node) (last *node) { - ld := enode.LogDist(tab.self().ID(), n.ID()) - b := tab.bucket(n.ID()) +func fillBucket(tab *Table, id enode.ID) (last *node) { + ld := enode.LogDist(tab.self().ID(), id) + b := tab.bucket(id) for len(b.entries) < bucketSize { node := nodeAtDistance(tab.self().ID(), ld, intIP(ld)) b.entries = append(b.entries, node) @@ -291,3 +291,57 @@ func hexEncPubkey(h string) (ret encPubkey) { copy(ret[:], b) return ret } + +type nodeEventRecorder struct { + evc chan recordedNodeEvent +} + +type recordedNodeEvent struct { + node *node + added bool +} + +func newNodeEventRecorder(buffer int) *nodeEventRecorder { + return &nodeEventRecorder{ + evc: make(chan recordedNodeEvent, buffer), + } +} + +func (set *nodeEventRecorder) nodeAdded(b *bucket, n *node) { + select { + case set.evc <- recordedNodeEvent{n, true}: + default: + panic("no space in event buffer") + } +} + +func (set *nodeEventRecorder) nodeRemoved(b *bucket, n *node) { + select { + case set.evc <- recordedNodeEvent{n, false}: + default: + panic("no space in event buffer") + } +} + +func (set *nodeEventRecorder) waitNodePresent(id enode.ID, timeout time.Duration) bool { + return set.waitNodeEvent(id, timeout, true) +} + +func (set *nodeEventRecorder) waitNodeAbsent(id enode.ID, timeout time.Duration) bool { + return set.waitNodeEvent(id, timeout, false) +} + +func (set *nodeEventRecorder) waitNodeEvent(id enode.ID, timeout time.Duration, added bool) bool { + timer := time.NewTimer(timeout) + defer timer.Stop() + for { + select { + case ev := <-set.evc: + if ev.node.ID() == id && ev.added == added { + return true + } + case <-timer.C: + return false + } + } +} From 008ff21cdfb3c8961f4a336b9d99f561b6159e13 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 14 May 2024 13:18:47 +0200 Subject: [PATCH 31/41] p2p/discover: fix race in test --- p2p/discover/table.go | 33 ++++++++++++++++++--------------- p2p/discover/table_test.go | 2 +- p2p/discover/table_util_test.go | 5 +++-- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/p2p/discover/table.go b/p2p/discover/table.go index b9c5202f2489..b920bde7cc0b 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -79,7 +79,7 @@ type Table struct { refreshReq chan chan struct{} revalResponseCh chan revalidationResponse addNodeCh chan addNodeOp - addNodeHandled chan struct{} + addNodeHandled chan bool trackRequestCh chan trackRequestOp initDone chan struct{} closeReq chan struct{} @@ -128,7 +128,7 @@ func newTable(t transport, db *enode.DB, cfg Config) (*Table, error) { refreshReq: make(chan chan struct{}), revalResponseCh: make(chan revalidationResponse), addNodeCh: make(chan addNodeOp), - addNodeHandled: make(chan struct{}), + addNodeHandled: make(chan bool), trackRequestCh: make(chan trackRequestOp), initDone: make(chan struct{}), closeReq: make(chan struct{}), @@ -304,12 +304,13 @@ func (tab *Table) len() (n int) { // list. // // The caller must not hold tab.mutex. -func (tab *Table) addFoundNode(n *node) { +func (tab *Table) addFoundNode(n *node) bool { op := addNodeOp{node: n, isInbound: false} select { case tab.addNodeCh <- op: - <-tab.addNodeHandled + return <-tab.addNodeHandled case <-tab.closeReq: + return false } } @@ -321,12 +322,13 @@ func (tab *Table) addFoundNode(n *node) { // repeatedly. // // The caller must not hold tab.mutex. -func (tab *Table) addInboundNode(n *node) { +func (tab *Table) addInboundNode(n *node) bool { op := addNodeOp{node: n, isInbound: true} select { case tab.addNodeCh <- op: - <-tab.addNodeHandled + return <-tab.addNodeHandled case <-tab.closeReq: + return false } } @@ -370,9 +372,9 @@ loop: case op := <-tab.addNodeCh: tab.mutex.Lock() - tab.handleAddNode(op) + ok := tab.handleAddNode(op) tab.mutex.Unlock() - tab.addNodeHandled <- struct{}{} + tab.addNodeHandled <- ok case op := <-tab.trackRequestCh: tab.handleTrackRequest(op) @@ -495,35 +497,36 @@ func (tab *Table) removeIP(b *bucket, ip net.IP) { // handleAddNode adds the node in the request to the table, if there is space. // The caller must hold tab.mutex. -func (tab *Table) handleAddNode(req addNodeOp) { +func (tab *Table) handleAddNode(req addNodeOp) bool { if req.node.ID() == tab.self().ID() { - return + return false } // For nodes from inbound contact, there is an additional safety measure: if the table // is still initializing the node is not added. if req.isInbound && !tab.isInitDone() { - return + return false } b := tab.bucket(req.node.ID()) if tab.bumpInBucket(b, req.node.Node) { // Already in bucket, update record. - return + return false } if len(b.entries) >= bucketSize { // Bucket full, maybe add as replacement. tab.addReplacement(b, req.node) - return + return false } if !tab.addIP(b, req.node.IP()) { // Can't add: IP limit reached. - return + return false } // Add to bucket. b.entries = append(b.entries, req.node) b.replacements = deleteNode(b.replacements, req.node) tab.nodeAdded(b, req.node) + return true } // addReplacement adds n to the replacement cache of bucket b. @@ -573,7 +576,7 @@ func (tab *Table) nodeRemoved(b *bucket, n *node) { func (tab *Table) deleteInBucket(b *bucket, id enode.ID) *node { index := slices.IndexFunc(b.entries, func(e *node) bool { return e.ID() == id }) if index == -1 { - // Entry has been removed already, don't replace it. + // Entry has been removed already. return nil } diff --git a/p2p/discover/table_test.go b/p2p/discover/table_test.go index 93075d687056..8b11a32d5c12 100644 --- a/p2p/discover/table_test.go +++ b/p2p/discover/table_test.go @@ -63,10 +63,10 @@ func testPingReplace(t *testing.T, newNodeIsResponding, lastInBucketIsResponding <-tab.initDone // Fill up the sender's bucket. - tab.mutex.Lock() replacementNodeKey, _ := crypto.HexToECDSA("45a915e4d060149eb4365960e6a7a45f334393093061116b197e3240065ff2d8") replacementNode := wrapNode(enode.NewV4(&replacementNodeKey.PublicKey, net.IP{127, 0, 0, 1}, 99, 99)) last := fillBucket(tab, replacementNode.ID()) + tab.mutex.Lock() nodeEvents := newNodeEventRecorder(128) tab.nodeAddedHook = nodeEvents.nodeAdded tab.nodeRemovedHook = nodeEvents.nodeRemoved diff --git a/p2p/discover/table_util_test.go b/p2p/discover/table_util_test.go index 7d15a3d66519..86c60480948a 100644 --- a/p2p/discover/table_util_test.go +++ b/p2p/discover/table_util_test.go @@ -104,8 +104,9 @@ func fillBucket(tab *Table, id enode.ID) (last *node) { b := tab.bucket(id) for len(b.entries) < bucketSize { node := nodeAtDistance(tab.self().ID(), ld, intIP(ld)) - b.entries = append(b.entries, node) - tab.nodeAdded(b, node) + if !tab.addFoundNode(node) { + panic("node not added") + } } return b.entries[bucketSize-1] } From f96f5965e619176278254a02dd6ccc7a2423d31a Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 14 May 2024 22:26:49 +0200 Subject: [PATCH 32/41] p2p/discover: rename --- p2p/discover/table_reval.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/p2p/discover/table_reval.go b/p2p/discover/table_reval.go index 5b6dd848a0d1..43a7a5ef22be 100644 --- a/p2p/discover/table_reval.go +++ b/p2p/discover/table_reval.go @@ -189,12 +189,12 @@ type revalidationList struct { } // get returns a random node from the queue. Nodes in the 'exclude' map are not returned. -func (rq *revalidationList) get(now mclock.AbsTime, rand randomSource, exclude map[enode.ID]struct{}) *node { - if now < rq.nextTime || len(rq.nodes) == 0 { +func (list *revalidationList) get(now mclock.AbsTime, rand randomSource, exclude map[enode.ID]struct{}) *node { + if now < list.nextTime || len(list.nodes) == 0 { return nil } - for i := 0; i < len(rq.nodes)*3; i++ { - n := rq.nodes[rand.Intn(len(rq.nodes))] + for i := 0; i < len(list.nodes)*3; i++ { + n := list.nodes[rand.Intn(len(list.nodes))] _, excluded := exclude[n.ID()] if !excluded { return n @@ -203,25 +203,25 @@ func (rq *revalidationList) get(now mclock.AbsTime, rand randomSource, exclude m return nil } -func (rq *revalidationList) schedule(now mclock.AbsTime, rand randomSource) { - rq.nextTime = now.Add(time.Duration(rand.Int63n(int64(rq.interval)))) +func (list *revalidationList) schedule(now mclock.AbsTime, rand randomSource) { + list.nextTime = now.Add(time.Duration(rand.Int63n(int64(list.interval)))) } -func (rq *revalidationList) push(n *node, now mclock.AbsTime, rand randomSource) { - rq.nodes = append(rq.nodes, n) - if rq.nextTime == never { - rq.schedule(now, rand) +func (list *revalidationList) push(n *node, now mclock.AbsTime, rand randomSource) { + list.nodes = append(list.nodes, n) + if list.nextTime == never { + list.schedule(now, rand) } } -func (rq *revalidationList) remove(n *node) bool { - i := slices.Index(rq.nodes, n) +func (list *revalidationList) remove(n *node) bool { + i := slices.Index(list.nodes, n) if i == -1 { return false } - rq.nodes = slices.Delete(rq.nodes, i, i+1) - if len(rq.nodes) == 0 { - rq.nextTime = never + list.nodes = slices.Delete(list.nodes, i, i+1) + if len(list.nodes) == 0 { + list.nextTime = never } return true } From f0504f699b47a920e1991c5c8c943b1232578dcc Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 14 May 2024 22:27:07 +0200 Subject: [PATCH 33/41] p2p/discover: simplify nextTime --- p2p/discover/table_reval.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/p2p/discover/table_reval.go b/p2p/discover/table_reval.go index 43a7a5ef22be..5213097b81a2 100644 --- a/p2p/discover/table_reval.go +++ b/p2p/discover/table_reval.go @@ -76,12 +76,6 @@ func (tr *tableRevalidation) run(tab *Table, now mclock.AbsTime) (nextTime mcloc tr.slow.schedule(now, &tab.rand) } - if tr.fast.nextTime == never { - return tr.slow.nextTime - } - if tr.slow.nextTime == never { - return tr.fast.nextTime - } return min(tr.fast.nextTime, tr.slow.nextTime) } From cda6f56ee94f4bd18c1583cbed8bb049c85af887 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 21 May 2024 14:16:59 +0200 Subject: [PATCH 34/41] p2p/discover: rename fields --- p2p/discover/node.go | 10 +++++----- p2p/discover/table.go | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/p2p/discover/node.go b/p2p/discover/node.go index fdb85b1c8d0a..47df09e883da 100644 --- a/p2p/discover/node.go +++ b/p2p/discover/node.go @@ -30,11 +30,11 @@ import ( ) type BucketNode struct { - Node *enode.Node `json:"node"` - AddedTable time.Time `json:"addedToTable"` - AddedBucket time.Time `json:"addedToBucket"` - Checks int `json:"checks"` - Live bool `json:"live"` + Node *enode.Node `json:"node"` + AddedToTable time.Time `json:"addedToTable"` + AddedToBucket time.Time `json:"addedToBucket"` + Checks int `json:"checks"` + Live bool `json:"live"` } // node represents a host on the network. diff --git a/p2p/discover/table.go b/p2p/discover/table.go index b920bde7cc0b..54920446d0a6 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -163,11 +163,11 @@ func (tab *Table) Nodes() [][]BucketNode { nodes[i] = make([]BucketNode, len(b.entries)) for j, n := range b.entries { nodes[i][j] = BucketNode{ - Node: n.Node, - Checks: int(n.livenessChecks), - Live: n.isValidatedLive, - AddedTable: n.addedToTable, - AddedBucket: n.addedToBucket, + Node: n.Node, + Checks: int(n.livenessChecks), + Live: n.isValidatedLive, + AddedToTable: n.addedToTable, + AddedToBucket: n.addedToBucket, } } } From 6e37d520e9b0ccb97b19450201da1cd2011ce299 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 21 May 2024 14:18:27 +0200 Subject: [PATCH 35/41] p2p/discover: change to / 3 --- p2p/discover/table_reval.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/p2p/discover/table_reval.go b/p2p/discover/table_reval.go index 5213097b81a2..333de4a10a09 100644 --- a/p2p/discover/table_reval.go +++ b/p2p/discover/table_reval.go @@ -128,7 +128,7 @@ func (tr *tableRevalidation) handleResponse(tab *Table, resp revalidationRespons if !resp.didRespond { // Revalidation failed. - n.livenessChecks /= 5 + n.livenessChecks /= 3 if n.livenessChecks <= 0 { tab.deleteInBucket(b, n.ID()) } else { From 9ec2553844ce21f89782220293f58f1ebaead429 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 22 May 2024 12:30:59 +0200 Subject: [PATCH 36/41] p2p/discover: explain --- p2p/discover/table.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 54920446d0a6..33b5bd1b635d 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -79,7 +79,7 @@ type Table struct { refreshReq chan chan struct{} revalResponseCh chan revalidationResponse addNodeCh chan addNodeOp - addNodeHandled chan bool + addNodeHandled chan 1bool trackRequestCh chan trackRequestOp initDone chan struct{} closeReq chan struct{} @@ -638,8 +638,10 @@ func (tab *Table) handleTrackRequest(op trackRequestOp) { b := tab.bucket(op.node.ID()) // Remove the node from the local table if it fails to return anything useful too - // many times, but only if there are enough other nodes in the bucket. - if fails >= maxFindnodeFailures && len(b.entries) >= bucketSize/2 { + // many times, but only if there are enough other nodes in the bucket. This latter + // condition specifically exists to make bootstrapping in smaller test networks more + // reliable. + if fails >= maxFindnodeFailures && len(b.entries) >= bucketSize/4 { tab.deleteInBucket(b, op.node.ID()) } From de7e1c7d317fe830c3e47db5b0eb12b66df198e1 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 22 May 2024 13:26:12 +0200 Subject: [PATCH 37/41] p2p/discover: shuffle in appendLiveNodes --- p2p/discover/common.go | 7 +++++++ p2p/discover/table.go | 9 +++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/p2p/discover/common.go b/p2p/discover/common.go index d4972778dd52..bebea8cc3846 100644 --- a/p2p/discover/common.go +++ b/p2p/discover/common.go @@ -100,6 +100,7 @@ type ReadPacket struct { type randomSource interface { Intn(int) int Int63n(int64) int64 + Shuffle(int, func(int, int)) } // reseedingRandom is a random number generator that tracks when it was last re-seeded. @@ -130,3 +131,9 @@ func (r *reseedingRandom) Int63n(n int64) int64 { defer r.mu.Unlock() return r.cur.Int63n(n) } + +func (r *reseedingRandom) Shuffle(n int, swap func(i, j int)) { + r.mu.Lock() + defer r.mu.Unlock() + r.cur.Shuffle(n, swap) +} diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 33b5bd1b635d..74c0e930e4af 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -79,7 +79,7 @@ type Table struct { refreshReq chan chan struct{} revalResponseCh chan revalidationResponse addNodeCh chan addNodeOp - addNodeHandled chan 1bool + addNodeHandled chan bool trackRequestCh chan trackRequestOp initDone chan struct{} closeReq chan struct{} @@ -279,12 +279,17 @@ func (tab *Table) appendLiveNodes(dist uint, result []*enode.Node) []*enode.Node } tab.mutex.Lock() - defer tab.mutex.Unlock() for _, n := range tab.bucketAtDistance(int(dist)).entries { if n.isValidatedLive { result = append(result, n.Node) } } + tab.mutex.Unlock() + + // Shuffle result to avoid always returning same nodes in FINDNODE/v5. + tab.rand.Shuffle(len(result), func(i, j int) { + result[i], result[j] = result[j], result[i] + }) return result } From 4ace55420900d484af40e886bb4f09927e209d41 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 23 May 2024 11:36:50 +0200 Subject: [PATCH 38/41] p2p/discover: fix flaky test --- p2p/discover/table_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/p2p/discover/table_test.go b/p2p/discover/table_test.go index 8b11a32d5c12..f72ecd94c989 100644 --- a/p2p/discover/table_test.go +++ b/p2p/discover/table_test.go @@ -363,6 +363,9 @@ func TestTable_revalidateSyncRecord(t *testing.T) { n2 := enode.SignNull(&r, id) transport.updateRecord(n2) + // Wait for revalidation. We wait for the node to be revalidated two times + // in order to synchronize with the update in the able. + waitForRevalidationPing(t, transport, tab, n2.ID()) waitForRevalidationPing(t, transport, tab, n2.ID()) intable := tab.getNode(id) From 16b4386f1ee167efe15d7af7265d976cff8471b4 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 23 May 2024 11:55:10 +0200 Subject: [PATCH 39/41] p2p/discover: add documentation --- p2p/discover/table_reval.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/p2p/discover/table_reval.go b/p2p/discover/table_reval.go index 333de4a10a09..9a13900ebca4 100644 --- a/p2p/discover/table_reval.go +++ b/p2p/discover/table_reval.go @@ -28,6 +28,8 @@ import ( const never = mclock.AbsTime(math.MaxInt64) +// tableRevalidation implements the node revalidation process. +// It tracks all nodes contained in Table, and schedules sending PING to them. type tableRevalidation struct { fast revalidationList slow revalidationList From 4386e8a662d0cdc2bbf36ce1c7d9c8edd021322c Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 23 May 2024 12:07:16 +0200 Subject: [PATCH 40/41] p2p: add accessors for discovery instances --- p2p/server.go | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/p2p/server.go b/p2p/server.go index 5b9a4aa71fdc..79c5fc76b69f 100644 --- a/p2p/server.go +++ b/p2p/server.go @@ -190,8 +190,8 @@ type Server struct { nodedb *enode.DB localnode *enode.LocalNode - ntab *discover.UDPv4 - DiscV5 *discover.UDPv5 + discv4 *discover.UDPv4 + discv5 *discover.UDPv5 discmix *enode.FairMix dialsched *dialScheduler @@ -400,6 +400,16 @@ func (srv *Server) Self() *enode.Node { return ln.Node() } +// DiscoveryV4 returns the discovery v4 instance, if configured. +func (srv *Server) DiscoveryV4() *discover.UDPv4 { + return srv.discv4 +} + +// DiscoveryV4 returns the discovery v5 instance, if configured. +func (srv *Server) DiscoveryV5() *discover.UDPv5 { + return srv.discv5 +} + // Stop terminates the server and all active peer connections. // It blocks until all active connections have been closed. func (srv *Server) Stop() { @@ -547,13 +557,13 @@ func (srv *Server) setupDiscovery() error { ) // If both versions of discovery are running, setup a shared // connection, so v5 can read unhandled messages from v4. - if srv.DiscoveryV4 && srv.DiscoveryV5 { + if srv.Config.DiscoveryV4 && srv.Config.DiscoveryV5 { unhandled = make(chan discover.ReadPacket, 100) sconn = &sharedUDPConn{conn, unhandled} } // Start discovery services. - if srv.DiscoveryV4 { + if srv.Config.DiscoveryV4 { cfg := discover.Config{ PrivateKey: srv.PrivateKey, NetRestrict: srv.NetRestrict, @@ -565,17 +575,17 @@ func (srv *Server) setupDiscovery() error { if err != nil { return err } - srv.ntab = ntab + srv.discv4 = ntab srv.discmix.AddSource(ntab.RandomNodes()) } - if srv.DiscoveryV5 { + if srv.Config.DiscoveryV5 { cfg := discover.Config{ PrivateKey: srv.PrivateKey, NetRestrict: srv.NetRestrict, Bootnodes: srv.BootstrapNodesV5, Log: srv.log, } - srv.DiscV5, err = discover.ListenV5(sconn, srv.localnode, cfg) + srv.discv5, err = discover.ListenV5(sconn, srv.localnode, cfg) if err != nil { return err } @@ -602,8 +612,8 @@ func (srv *Server) setupDialScheduler() { dialer: srv.Dialer, clock: srv.clock, } - if srv.ntab != nil { - config.resolver = srv.ntab + if srv.discv4 != nil { + config.resolver = srv.discv4 } if config.dialer == nil { config.dialer = tcpDialer{&net.Dialer{Timeout: defaultDialTimeout}} @@ -799,11 +809,11 @@ running: srv.log.Trace("P2P networking is spinning down") // Terminate discovery. If there is a running lookup it will terminate soon. - if srv.ntab != nil { - srv.ntab.Close() + if srv.discv4 != nil { + srv.discv4.Close() } - if srv.DiscV5 != nil { - srv.DiscV5.Close() + if srv.discv5 != nil { + srv.discv5.Close() } // Disconnect all peers. for _, p := range peers { From 6101ec318aeb45f0198a0e9a051270c6e311313f Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Thu, 23 May 2024 12:09:56 +0200 Subject: [PATCH 41/41] node: add p2p debug API --- node/api.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/node/api.go b/node/api.go index a71ae6aa2954..33dfb3a1cc4d 100644 --- a/node/api.go +++ b/node/api.go @@ -26,6 +26,7 @@ import ( "github.com/ethereum/go-ethereum/internal/debug" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/p2p" + "github.com/ethereum/go-ethereum/p2p/discover" "github.com/ethereum/go-ethereum/p2p/enode" "github.com/ethereum/go-ethereum/rpc" ) @@ -39,6 +40,9 @@ func (n *Node) apis() []rpc.API { }, { Namespace: "debug", Service: debug.Handler, + }, { + Namespace: "debug", + Service: &p2pDebugAPI{n}, }, { Namespace: "web3", Service: &web3API{n}, @@ -333,3 +337,16 @@ func (s *web3API) ClientVersion() string { func (s *web3API) Sha3(input hexutil.Bytes) hexutil.Bytes { return crypto.Keccak256(input) } + +// p2pDebugAPI provides access to p2p internals for debugging. +type p2pDebugAPI struct { + stack *Node +} + +func (s *p2pDebugAPI) DiscoveryV4Table() [][]discover.BucketNode { + disc := s.stack.server.DiscoveryV4() + if disc != nil { + return disc.TableBuckets() + } + return nil +}