From 42646c1b607c78a907b8801fa6a0b2e2c790ca4a Mon Sep 17 00:00:00 2001 From: JmPotato Date: Tue, 31 Aug 2021 13:26:12 +0800 Subject: [PATCH] tso, election: remove some unnecessary logs from the TSO hot path (#4044) * Remove some unnecessary logs from the TSO hot path Signed-off-by: JmPotato * Remove more unnecessary logs Signed-off-by: JmPotato * Fix the test Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- errors.toml | 5 ----- metrics/grafana/pd.json | 2 +- pkg/errs/errno.go | 1 - pkg/errs/errs_test.go | 8 ++++---- server/election/leadership.go | 14 +++++++++++--- server/grpc_service.go | 6 ------ server/tso/global_allocator.go | 4 ---- server/tso/tso.go | 7 +------ 8 files changed, 17 insertions(+), 30 deletions(-) diff --git a/errors.toml b/errors.toml index 740f0eadc125..c3f412a6dc5a 100644 --- a/errors.toml +++ b/errors.toml @@ -611,11 +611,6 @@ error = ''' get local allocator failed, %s ''' -["PD:tso:ErrInvalidTimestamp"] -error = ''' -invalid timestamp -''' - ["PD:tso:ErrLogicOverflow"] error = ''' logic part overflow diff --git a/metrics/grafana/pd.json b/metrics/grafana/pd.json index 9ece3ea936f0..a40158ab03a7 100644 --- a/metrics/grafana/pd.json +++ b/metrics/grafana/pd.json @@ -9124,7 +9124,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PD server TSO handle time + Client revc time", + "title": "PD server TSO handle time", "tooltip": { "msResolution": false, "shared": true, diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index cdf33435fffc..6f58b4a5b963 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -37,7 +37,6 @@ var ( ErrSyncMaxTS = errors.Normalize("sync max ts failed, %s", errors.RFCCodeText("PD:tso:ErrSyncMaxTS")) ErrResetUserTimestamp = errors.Normalize("reset user timestamp failed, %s", errors.RFCCodeText("PD:tso:ErrResetUserTimestamp")) ErrGenerateTimestamp = errors.Normalize("generate timestamp failed, %s", errors.RFCCodeText("PD:tso:ErrGenerateTimestamp")) - ErrInvalidTimestamp = errors.Normalize("invalid timestamp", errors.RFCCodeText("PD:tso:ErrInvalidTimestamp")) ErrLogicOverflow = errors.Normalize("logic part overflow", errors.RFCCodeText("PD:tso:ErrLogicOverflow")) ) diff --git a/pkg/errs/errs_test.go b/pkg/errs/errs_test.go index 796b2d1019b7..195dab7f9298 100644 --- a/pkg/errs/errs_test.go +++ b/pkg/errs/errs_test.go @@ -84,12 +84,12 @@ func (s *testErrorSuite) TestError(c *C) { lg := newZapTestLogger(conf) log.ReplaceGlobals(lg.Logger, nil) - rfc := `[error="[PD:tso:ErrInvalidTimestamp]invalid timestamp"]` - log.Error("test", zap.Error(ErrInvalidTimestamp.FastGenByArgs())) + rfc := `[error="[PD:member:ErrEtcdLeaderNotFound]etcd leader not found` + log.Error("test", zap.Error(ErrEtcdLeaderNotFound.FastGenByArgs())) c.Assert(strings.Contains(lg.Message(), rfc), IsTrue) err := errors.New("test error") - log.Error("test", ZapError(ErrInvalidTimestamp, err)) - rfc = `[error="[PD:tso:ErrInvalidTimestamp]test error"]` + log.Error("test", ZapError(ErrEtcdLeaderNotFound, err)) + rfc = `[error="[PD:member:ErrEtcdLeaderNotFound]test error"]` c.Assert(strings.Contains(lg.Message(), rfc), IsTrue) } diff --git a/server/election/leadership.go b/server/election/leadership.go index 942a8b93474e..39817a55d472 100644 --- a/server/election/leadership.go +++ b/server/election/leadership.go @@ -53,6 +53,9 @@ type Leadership struct { // leaderKey and leaderValue are key-value pair in etcd leaderKey string leaderValue string + + keepAliveCtx context.Context + keepAliceCancelFunc context.CancelFunc } // NewLeadership creates a new Leadership. @@ -124,10 +127,11 @@ func (ls *Leadership) Campaign(leaseTimeout int64, leaderData string, cmps ...cl // Keep will keep the leadership available by update the lease's expired time continuously func (ls *Leadership) Keep(ctx context.Context) { - ls.getLease().KeepAlive(ctx) + ls.keepAliveCtx, ls.keepAliceCancelFunc = context.WithCancel(ctx) + ls.getLease().KeepAlive(ls.keepAliveCtx) } -// Check returns whether the leadership is still available +// Check returns whether the leadership is still available. func (ls *Leadership) Check() bool { return ls != nil && ls.getLease() != nil && !ls.getLease().IsExpired() } @@ -208,10 +212,14 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { } } -// Reset does some defer job such as closing lease, resetting lease etc. +// Reset does some defer jobs such as closing lease, resetting lease etc. func (ls *Leadership) Reset() { if ls == nil || ls.getLease() == nil { return } + if ls.keepAliceCancelFunc != nil { + ls.keepAliceCancelFunc() + } ls.getLease().Close() + ls.setLease(nil) } diff --git a/server/grpc_service.go b/server/grpc_service.go index f630093995c1..7ea0949ddfda 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -43,8 +43,6 @@ import ( "google.golang.org/grpc/status" ) -const slowThreshold = 5 * time.Millisecond - // gRPC errors var ( // ErrNotLeader is returned when current server is not the leader and not possible to process request. @@ -166,10 +164,6 @@ func (s *Server) Tso(stream pdpb.PD_TsoServer) error { return status.Errorf(codes.Unknown, err.Error()) } - elapsed := time.Since(start) - if elapsed > slowThreshold { - log.Warn("get timestamp too slow", zap.Duration("cost", elapsed)) - } tsoHandleDuration.Observe(time.Since(start).Seconds()) response := &pdpb.TsoResponse{ Header: s.header(), diff --git a/server/tso/global_allocator.go b/server/tso/global_allocator.go index 484951c819d7..4788c4e32315 100644 --- a/server/tso/global_allocator.go +++ b/server/tso/global_allocator.go @@ -104,10 +104,6 @@ func (gta *GlobalTSOAllocator) getSyncRTT() int64 { func (gta *GlobalTSOAllocator) estimateMaxTS(count uint32, suffixBits int) (*pdpb.Timestamp, bool, error) { physical, logical, lastUpdateTime := gta.timestampOracle.generateTSO(int64(count), 0) if physical == 0 { - log.Error("invalid global tso in memory, unable to estimate maxTSO", - zap.Any("timestamp-physical", physical), - zap.Any("timestamp-logical", logical), - errs.ZapError(errs.ErrInvalidTimestamp)) return &pdpb.Timestamp{}, false, errs.ErrGenerateTimestamp.FastGenByArgs("timestamp in memory isn't initialized") } estimatedMaxTSO := &pdpb.Timestamp{ diff --git a/server/tso/tso.go b/server/tso/tso.go index 5169fa7ce5dd..eb13f09ef28a 100644 --- a/server/tso/tso.go +++ b/server/tso/tso.go @@ -358,19 +358,14 @@ func (t *timestampOracle) getTS(leadership *election.Leadership, count uint32, s return resp, errs.ErrGenerateTimestamp.FastGenByArgs("tso count should be positive") } for i := 0; i < maxRetryCount; i++ { - currentPhysical, currentLogical := t.getTSO() + currentPhysical, _ := t.getTSO() if currentPhysical == typeutil.ZeroTime { // If it's leader, maybe SyncTimestamp hasn't completed yet if leadership.Check() { - log.Info("this PD is a new leader, but sync hasn't been completed yet, wait for a while") time.Sleep(200 * time.Millisecond) continue } tsoCounter.WithLabelValues("not_leader_anymore", t.dcLocation).Inc() - log.Error("invalid timestamp", - zap.Any("timestamp-physical", currentPhysical), - zap.Any("timestamp-logical", currentLogical), - errs.ZapError(errs.ErrInvalidTimestamp)) return pdpb.Timestamp{}, errs.ErrGenerateTimestamp.FastGenByArgs("timestamp in memory isn't initialized") } // Get a new TSO result with the given count