-
Notifications
You must be signed in to change notification settings - Fork 234
Commit
* added check to avoid adding unresponsive dht peers to the dht routing table * removed lock in adding peers to the rt * made variable names more meaningful * fixed network loop and corrected tests * added UsefulPeer() references from current PR * go mod tidy * added delay in TestRefreshBelowMinRTThreshold * addressed review * go mod tidy * addressed Jorropo review * added comments * removed state of peers probed recently * fix conflicts merge * updated deps * added optimizations documentation * Update dht.go * updated md files --------- Co-authored-by: Jorropo <jorropo.pgm@gmail.com> (cherry picked from commit 8c9fdff)
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,11 +71,6 @@ const ( | |
protectedBuckets = 2 | ||
) | ||
|
||
type addPeerRTReq struct { | ||
p peer.ID | ||
queryPeer bool | ||
} | ||
|
||
// IpfsDHT is an implementation of Kademlia with S/Kademlia modifications. | ||
// It is used to implement the base Routing module. | ||
type IpfsDHT struct { | ||
|
@@ -128,6 +123,9 @@ type IpfsDHT struct { | |
|
||
autoRefresh bool | ||
|
||
// timeout for the lookupCheck operation | ||
lookupCheckTimeout time.Duration | ||
|
||
// A function returning a set of bootstrap peers to fallback on if all other attempts to fix | ||
// the routing table fail (or, e.g., this is the first time this node is | ||
// connecting to the network). | ||
|
@@ -143,7 +141,7 @@ type IpfsDHT struct { | |
disableFixLowPeers bool | ||
fixLowPeersChan chan struct{} | ||
|
||
addPeerToRTChan chan addPeerRTReq | ||
addPeerToRTChan chan peer.ID | ||
refreshFinishedCh chan struct{} | ||
|
||
rtFreezeTimeout time.Duration | ||
|
@@ -243,7 +241,7 @@ func New(ctx context.Context, h host.Host, options ...Option) (*IpfsDHT, error) | |
// Fill routing table with currently connected peers that are DHT servers | ||
dht.plk.Lock() | ||
for _, p := range dht.host.Network().Peers() { | ||
dht.peerFound(p, false) | ||
dht.peerFound(dht.ctx, p) | ||
} | ||
dht.plk.Unlock() | ||
|
||
|
@@ -304,7 +302,7 @@ func makeDHT(ctx context.Context, h host.Host, cfg dhtcfg.Config) (*IpfsDHT, err | |
|
||
fixLowPeersChan: make(chan struct{}, 1), | ||
|
||
addPeerToRTChan: make(chan addPeerRTReq), | ||
addPeerToRTChan: make(chan peer.ID), | ||
refreshFinishedCh: make(chan struct{}), | ||
|
||
enableOptProv: cfg.EnableOptimisticProvide, | ||
|
@@ -334,6 +332,8 @@ func makeDHT(ctx context.Context, h host.Host, cfg dhtcfg.Config) (*IpfsDHT, err | |
dht.routingTable = rt | ||
dht.bootstrapPeers = cfg.BootstrapPeers | ||
|
||
dht.lookupCheckTimeout = cfg.RoutingTable.RefreshQueryTimeout | ||
|
||
// init network size estimator | ||
dht.nsEstimator = netsize.NewEstimator(h.ID(), rt, cfg.BucketSize) | ||
|
||
|
@@ -372,6 +372,18 @@ func makeDHT(ctx context.Context, h host.Host, cfg dhtcfg.Config) (*IpfsDHT, err | |
return dht, nil | ||
} | ||
|
||
// lookupCheck performs a lookup request to a remote peer.ID, verifying that it is able to | ||
// answer it correctly | ||
func (dht *IpfsDHT) lookupCheck(ctx context.Context, p peer.ID) error { | ||
// lookup request to p requesting for its own peer.ID | ||
peerids, err := dht.protoMessenger.GetClosestPeers(ctx, p, p) | ||
// p should return at least its own peerid | ||
if err == nil && len(peerids) == 0 { | ||
return fmt.Errorf("peer %s failed to return its closest peers, got %d", p, len(peerids)) | ||
} | ||
return err | ||
} | ||
|
||
func makeRtRefreshManager(dht *IpfsDHT, cfg dhtcfg.Config, maxLastSuccessfulOutboundThreshold time.Duration) (*rtrefresh.RtRefreshManager, error) { | ||
keyGenFnc := func(cpl uint) (string, error) { | ||
p, err := dht.routingTable.GenRandPeerID(cpl) | ||
|
@@ -383,16 +395,11 @@ func makeRtRefreshManager(dht *IpfsDHT, cfg dhtcfg.Config, maxLastSuccessfulOutb | |
return err | ||
} | ||
|
||
pingFnc := func(ctx context.Context, p peer.ID) error { | ||
_, err := dht.protoMessenger.GetClosestPeers(ctx, p, p) // don't use the PING message type as it's deprecated | ||
return err | ||
} | ||
|
||
r, err := rtrefresh.NewRtRefreshManager( | ||
dht.host, dht.routingTable, cfg.RoutingTable.AutoRefresh, | ||
keyGenFnc, | ||
queryFnc, | ||
pingFnc, | ||
dht.lookupCheck, | ||
cfg.RoutingTable.RefreshQueryTimeout, | ||
cfg.RoutingTable.RefreshInterval, | ||
maxLastSuccessfulOutboundThreshold, | ||
|
@@ -500,7 +507,7 @@ func (dht *IpfsDHT) fixLowPeers(ctx context.Context) { | |
// we try to add all peers we are connected to to the Routing Table | ||
// in case they aren't already there. | ||
for _, p := range dht.host.Network().Peers() { | ||
dht.peerFound(p, false) | ||
dht.peerFound(ctx, p) | ||
} | ||
|
||
// TODO Active Bootstrapping | ||
|
@@ -611,22 +618,22 @@ func (dht *IpfsDHT) rtPeerLoop(proc goprocess.Process) { | |
select { | ||
case <-timerCh: | ||
dht.routingTable.MarkAllPeersIrreplaceable() | ||
case addReq := <-dht.addPeerToRTChan: | ||
prevSize := dht.routingTable.Size() | ||
if prevSize == 0 { | ||
case p := <-dht.addPeerToRTChan: | ||
if dht.routingTable.Size() == 0 { | ||
isBootsrapping = true | ||
bootstrapCount = 0 | ||
timerCh = nil | ||
} | ||
newlyAdded, err := dht.routingTable.TryAddPeer(addReq.p, addReq.queryPeer, isBootsrapping) | ||
// queryPeer set to true as we only try to add queried peers to the RT | ||
newlyAdded, err := dht.routingTable.TryAddPeer(p, true, isBootsrapping) | ||
if err != nil { | ||
// peer not added. | ||
continue | ||
} | ||
if !newlyAdded && addReq.queryPeer { | ||
if !newlyAdded { | ||
// the peer is already in our RT, but we just successfully queried it and so let's give it a | ||
// bump on the query time so we don't ping it too soon for a liveliness check. | ||
dht.routingTable.UpdateLastSuccessfulOutboundQueryAt(addReq.p, time.Now()) | ||
dht.routingTable.UpdateLastSuccessfulOutboundQueryAt(p, time.Now()) | ||
} | ||
case <-dht.refreshFinishedCh: | ||
bootstrapCount = bootstrapCount + 1 | ||
|
@@ -646,40 +653,47 @@ func (dht *IpfsDHT) rtPeerLoop(proc goprocess.Process) { | |
} | ||
} | ||
|
||
// peerFound signals the routingTable that we've found a peer that | ||
// might support the DHT protocol. | ||
// If we have a connection a peer but no exchange of a query RPC -> | ||
// | ||
// LastQueriedAt=time.Now (so we don't ping it for some time for a liveliness check) | ||
// LastUsefulAt=0 | ||
// | ||
// If we connect to a peer and then exchange a query RPC -> | ||
// | ||
// LastQueriedAt=time.Now (same reason as above) | ||
// LastUsefulAt=time.Now (so we give it some life in the RT without immediately evicting it) | ||
// | ||
// If we query a peer we already have in our Routing Table -> | ||
// | ||
// LastQueriedAt=time.Now() | ||
// LastUsefulAt remains unchanged | ||
// | ||
// If we connect to a peer we already have in the RT but do not exchange a query (rare) | ||
// | ||
// Do Nothing. | ||
func (dht *IpfsDHT) peerFound(p peer.ID, queryPeer bool) { | ||
|
||
if c := baseLogger.Check(zap.DebugLevel, "peer found"); c != nil { | ||
c.Write(zap.String("peer", p.String())) | ||
// peerFound verifies whether the found peer advertises DHT protocols | ||
// and probe it to make sure it answers DHT queries as expected. If | ||
// it fails to answer, it isn't added to the routingTable. | ||
func (dht *IpfsDHT) peerFound(ctx context.Context, p peer.ID) { | ||
// if the peer is already in the routing table or the appropriate bucket is | ||
// already full, don't try to add the new peer.ID | ||
if dht.routingTable.Find(p) != "" || !dht.routingTable.UsefulPeer(p) { | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong. |
||
return | ||
} | ||
|
||
// verify whether the remote peer advertises the right dht protocol | ||
b, err := dht.validRTPeer(p) | ||
if err != nil { | ||
logger.Errorw("failed to validate if peer is a DHT peer", "peer", p, "error", err) | ||
} else if b { | ||
select { | ||
case dht.addPeerToRTChan <- addPeerRTReq{p, queryPeer}: | ||
case <-dht.ctx.Done(): | ||
|
||
livelinessCtx, cancel := context.WithTimeout(ctx, dht.lookupCheckTimeout) | ||
defer cancel() | ||
|
||
// performing a FIND_NODE query | ||
if err := dht.lookupCheck(livelinessCtx, p); err != nil { | ||
This comment has been minimized.
Sorry, something went wrong.
Jorropo
Contributor
|
||
logger.Debugw("connected peer not answering DHT request as expected", "peer", p, "error", err) | ||
return | ||
} | ||
|
||
// if the FIND_NODE succeeded, the peer is considered as valid | ||
dht.validPeerFound(ctx, p) | ||
} | ||
} | ||
|
||
// validPeerFound signals the routingTable that we've found a peer that | ||
// supports the DHT protocol, and just answered correctly to a DHT FindPeers | ||
func (dht *IpfsDHT) validPeerFound(ctx context.Context, p peer.ID) { | ||
if c := baseLogger.Check(zap.DebugLevel, "peer found"); c != nil { | ||
c.Write(zap.String("peer", p.String())) | ||
} | ||
|
||
select { | ||
case dht.addPeerToRTChan <- p: | ||
case <-dht.ctx.Done(): | ||
return | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -110,8 +110,10 @@ func (dht *IpfsDHT) handleNewMessage(s network.Stream) bool { | |
return false | ||
} | ||
|
||
// a peer has queried us, let's add it to RT | ||
dht.peerFound(mPeer, true) | ||
// a peer has queried us, let's add it to RT. A new go routine is required | ||
// because we can't block the stream handler until the remote peer answers | ||
// our query. | ||
go dht.peerFound(dht.ctx, mPeer) | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
Jorropo
Contributor
|
||
|
||
if c := baseLogger.Check(zap.DebugLevel, "handling message"); c != nil { | ||
c.Write(zap.String("from", mPeer.String()), | ||
|
This aquire the same mutex twice back to back for no reason, leading to a potential logical race,
dht.routingTable.Find(p) != ""
needs to be moved inside.UsefulPeer
(I would consider renaming it.UsefullNewPeer
then).