Skip to content
This repository has been archived by the owner on Nov 25, 2024. It is now read-only.

Commit

Permalink
log less for failed key querys, add counters for incoming pdus/edus (#…
Browse files Browse the repository at this point in the history
…1801)

* log less for failed key querys, add counters for incoming pdus/edus

* use labels

* Blacklist flakey test

* Fix metrics
  • Loading branch information
kegsay authored Mar 23, 2021
1 parent 01267a3 commit a1b7e4e
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 9 deletions.
28 changes: 28 additions & 0 deletions federationapi/routing/send.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,34 @@ import (
"github.com/matrix-org/dendrite/setup/config"
"github.com/matrix-org/gomatrixserverlib"
"github.com/matrix-org/util"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
)

var (
pduCountTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "dendrite",
Subsystem: "federationapi",
Name: "recv_pdus",
},
[]string{"status"},
)
eduCountTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "dendrite",
Subsystem: "federationapi",
Name: "recv_edus",
},
)
)

func init() {
prometheus.MustRegister(
pduCountTotal, eduCountTotal,
)
}

// Send implements /_matrix/federation/v1/send/{txnID}
func Send(
httpReq *http.Request,
Expand Down Expand Up @@ -133,6 +158,7 @@ func (t *txnReq) processTransaction(ctx context.Context) (*gomatrixserverlib.Res

pdus := []*gomatrixserverlib.HeaderedEvent{}
for _, pdu := range t.PDUs {
pduCountTotal.WithLabelValues("total").Inc()
var header struct {
RoomID string `json:"room_id"`
}
Expand Down Expand Up @@ -224,6 +250,7 @@ func (t *txnReq) processTransaction(ctx context.Context) (*gomatrixserverlib.Res
}
} else {
results[e.EventID()] = gomatrixserverlib.PDUResult{}
pduCountTotal.WithLabelValues("success").Inc()
}
}

Expand Down Expand Up @@ -281,6 +308,7 @@ func (t *txnReq) haveEventIDs() map[string]bool {

func (t *txnReq) processEDUs(ctx context.Context) {
for _, e := range t.EDUs {
eduCountTotal.Inc()
switch e.Type {
case gomatrixserverlib.MTyping:
// https://matrix.org/docs/spec/server_server/latest#typing-notifications
Expand Down
3 changes: 1 addition & 2 deletions federationsender/consumers/eduserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,7 @@ func (t *OutputEDUConsumer) onReceiptEvent(msg *sarama.ConsumerMessage) error {
return nil
}
if receiptServerName != t.ServerName {
log.WithField("other_server", receiptServerName).Info("Suppressing receipt notif: originated elsewhere")
return nil
return nil // don't log, very spammy as it logs for each remote receipt
}

joined, err := t.db.GetJoinedHosts(context.TODO(), receipt.RoomID)
Expand Down
14 changes: 8 additions & 6 deletions keyserver/internal/device_list_update.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,16 +330,16 @@ func (u *DeviceListUpdater) processServer(serverName gomatrixserverlib.ServerNam
logger.WithError(err).Error("failed to load stale device lists")
return waitTime, true
}
hasFailures := false
failCount := 0
for _, userID := range userIDs {
if ctx.Err() != nil {
// we've timed out, give up and go to the back of the queue to let another server be processed.
hasFailures = true
failCount += 1
break
}
res, err := u.fedClient.GetUserDevices(ctx, serverName, userID)
if err != nil {
logger.WithError(err).WithField("user_id", userID).Error("failed to query device keys for user")
failCount += 1
fcerr, ok := err.(*fedsenderapi.FederationClientError)
if ok {
if fcerr.RetryAfter > 0 {
Expand All @@ -351,20 +351,22 @@ func (u *DeviceListUpdater) processServer(serverName gomatrixserverlib.ServerNam
waitTime = time.Hour
logger.WithError(err).Warn("GetUserDevices returned unknown error type")
}
hasFailures = true
continue
}
err = u.updateDeviceList(&res)
if err != nil {
logger.WithError(err).WithField("user_id", userID).Error("fetched device list but failed to store/emit it")
hasFailures = true
failCount += 1
}
}
if failCount > 0 {
logger.WithField("total", len(userIDs)).WithField("failed", failCount).Error("failed to query device keys for some users")
}
for _, userID := range userIDs {
// always clear the channel to unblock Update calls regardless of success/failure
u.clearChannel(userID)
}
return waitTime, hasFailures
return waitTime, failCount > 0
}

func (u *DeviceListUpdater) updateDeviceList(res *gomatrixserverlib.RespUserDevices) error {
Expand Down
5 changes: 4 additions & 1 deletion sytest-blacklist
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,7 @@ Forgotten room messages cannot be paginated
Can re-join room if re-invited

# Blacklisted due to flakiness after #1774
Local device key changes get to remote servers with correct prev_id
Local device key changes get to remote servers with correct prev_id

# Flakey
Local device key changes appear in /keys/changes

0 comments on commit a1b7e4e

Please sign in to comment.