Skip to content
This repository has been archived by the owner on Aug 2, 2021. It is now read-only.

api, metrics, network: check caps when deciding on next peer for a chunk #1749

Merged
merged 6 commits into from
Sep 12, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/inspector.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func (i *Inspector) DeliveriesPerPeer() map[string]int64 {
// iterate connection in kademlia
i.hive.Kademlia.EachConn(nil, 255, func(p *network.Peer, po int) bool {
// get how many chunks we receive for retrieve requests per peer
peermetric := fmt.Sprintf("chunk.delivery.%x", p.Over()[:16])
peermetric := fmt.Sprintf("network.retrieve.chunk.delivery.%x", p.Over()[:16])
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent of the findPeer - RequestFromPeers bug, but nonetheless a nice-to-have fix, so that DeliveriesFromPeers debug API works.


res[fmt.Sprintf("%x", p.Over()[:16])] = metrics.GetOrRegisterCounter(peermetric, nil).Count()

Expand Down
2 changes: 1 addition & 1 deletion metrics/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func datadirDiskUsage(path string, d time.Duration) {
for range time.Tick(d) {
bytes, err := dirSize(path)
if err != nil {
log.Warn("cannot get disk space", "err", err)
log.Trace("cannot get disk space", "err", err)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This warning is annoying when we hit LevelDB during compaction, and it is mostly useless in our monitoring system, therefore I am reducing its level.

}

metrics.GetOrRegisterGauge("datadir.usage", nil).Update(bytes)
Expand Down
7 changes: 3 additions & 4 deletions network/retrieval/retrieve.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,7 @@ func (r *Retrieval) findPeer(ctx context.Context, req *storage.Request) (retPeer
r.kad.EachConn(req.Addr[:], 255, func(p *network.Peer, po int) bool {
id := p.ID()

// skip light nodes
if p.LightNode {
if !p.HasCap("bzz-retrieve") {
return true
}

Expand Down Expand Up @@ -410,11 +409,11 @@ FINDPEER:

protoPeer := r.getPeer(sp.ID())
if protoPeer == nil {
r.logger.Warn("findPeer returned a peer to skip", "peer", sp.String(), "retry", retries)
r.logger.Warn("findPeer returned a peer to skip", "peer", sp.String(), "retry", retries, "ref", req.Addr)
req.PeersToSkip.Store(sp.ID().String(), time.Now())
retries++
if retries == maxFindPeerRetries {
r.logger.Error("max find peer retries reached", "max retries", maxFindPeerRetries)
r.logger.Error("max find peer retries reached", "max retries", maxFindPeerRetries, "ref", req.Addr)
return nil, ErrNoPeerFound
}

Expand Down
2 changes: 1 addition & 1 deletion network/retrieval/retrieve_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ func TestRequestFromPeers(t *testing.T) {

addr := network.RandomAddr()
to := network.NewKademlia(addr.OAddr, network.NewKadParams())
protocolsPeer := protocols.NewPeer(p2p.NewPeer(dummyPeerID, "dummy", nil), nil, nil)
protocolsPeer := protocols.NewPeer(p2p.NewPeer(dummyPeerID, "dummy", []p2p.Cap{p2p.Cap{Name: "bzz-retrieve", Version: 1}}), nil, nil)
peer := network.NewPeer(&network.BzzPeer{
BzzAddr: network.RandomAddr(),
LightNode: false,
Expand Down
2 changes: 1 addition & 1 deletion storage/netstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ func (n *NetStore) RemoteFetch(ctx context.Context, req *Request, fi *Fetcher) e
osp.Finish()
break
case <-ctx.Done(): // global fetcher timeout
n.logger.Trace("remote.fetch, fail", "ref", ref)
n.logger.Warn("remote.fetch, global timeout fail", "ref", ref)
metrics.GetOrRegisterCounter("remote.fetch.timeout.global", nil).Inc(1)

osp.LogFields(olog.Bool("fail", true))
Expand Down