From 641394ff624c5bb66c76108398703db45354754c Mon Sep 17 00:00:00 2001 From: JmPotato Date: Fri, 27 Aug 2021 18:05:22 +0800 Subject: [PATCH 1/3] Remove some unnecessary logs from the TSO hot path Signed-off-by: JmPotato --- server/election/leadership.go | 14 +++++++++++--- server/grpc_service.go | 6 ------ server/tso/tso.go | 1 - 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/server/election/leadership.go b/server/election/leadership.go index 942a8b93474..39817a55d47 100644 --- a/server/election/leadership.go +++ b/server/election/leadership.go @@ -53,6 +53,9 @@ type Leadership struct { // leaderKey and leaderValue are key-value pair in etcd leaderKey string leaderValue string + + keepAliveCtx context.Context + keepAliceCancelFunc context.CancelFunc } // NewLeadership creates a new Leadership. @@ -124,10 +127,11 @@ func (ls *Leadership) Campaign(leaseTimeout int64, leaderData string, cmps ...cl // Keep will keep the leadership available by update the lease's expired time continuously func (ls *Leadership) Keep(ctx context.Context) { - ls.getLease().KeepAlive(ctx) + ls.keepAliveCtx, ls.keepAliceCancelFunc = context.WithCancel(ctx) + ls.getLease().KeepAlive(ls.keepAliveCtx) } -// Check returns whether the leadership is still available +// Check returns whether the leadership is still available. func (ls *Leadership) Check() bool { return ls != nil && ls.getLease() != nil && !ls.getLease().IsExpired() } @@ -208,10 +212,14 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { } } -// Reset does some defer job such as closing lease, resetting lease etc. +// Reset does some defer jobs such as closing lease, resetting lease etc. func (ls *Leadership) Reset() { if ls == nil || ls.getLease() == nil { return } + if ls.keepAliceCancelFunc != nil { + ls.keepAliceCancelFunc() + } ls.getLease().Close() + ls.setLease(nil) } diff --git a/server/grpc_service.go b/server/grpc_service.go index 5ca2bb77a47..0f8357cf7c8 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -43,8 +43,6 @@ import ( "google.golang.org/grpc/status" ) -const slowThreshold = 5 * time.Millisecond - // gRPC errors var ( // ErrNotLeader is returned when current server is not the leader and not possible to process request. @@ -166,10 +164,6 @@ func (s *Server) Tso(stream pdpb.PD_TsoServer) error { return status.Errorf(codes.Unknown, err.Error()) } - elapsed := time.Since(start) - if elapsed > slowThreshold { - log.Warn("get timestamp too slow", zap.Duration("cost", elapsed)) - } tsoHandleDuration.Observe(time.Since(start).Seconds()) response := &pdpb.TsoResponse{ Header: s.header(), diff --git a/server/tso/tso.go b/server/tso/tso.go index 5169fa7ce5d..b851d02ea2a 100644 --- a/server/tso/tso.go +++ b/server/tso/tso.go @@ -362,7 +362,6 @@ func (t *timestampOracle) getTS(leadership *election.Leadership, count uint32, s if currentPhysical == typeutil.ZeroTime { // If it's leader, maybe SyncTimestamp hasn't completed yet if leadership.Check() { - log.Info("this PD is a new leader, but sync hasn't been completed yet, wait for a while") time.Sleep(200 * time.Millisecond) continue } From a443374463433d6a27b1e752d0ebaec97ff1da74 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Fri, 27 Aug 2021 18:28:52 +0800 Subject: [PATCH 2/3] Remove more unnecessary logs Signed-off-by: JmPotato --- errors.toml | 5 ----- metrics/grafana/pd.json | 2 +- pkg/errs/errno.go | 1 - pkg/errs/errs_test.go | 4 ++-- server/tso/global_allocator.go | 4 ---- server/tso/tso.go | 6 +----- 6 files changed, 4 insertions(+), 18 deletions(-) diff --git a/errors.toml b/errors.toml index 740f0eadc12..c3f412a6dc5 100644 --- a/errors.toml +++ b/errors.toml @@ -611,11 +611,6 @@ error = ''' get local allocator failed, %s ''' -["PD:tso:ErrInvalidTimestamp"] -error = ''' -invalid timestamp -''' - ["PD:tso:ErrLogicOverflow"] error = ''' logic part overflow diff --git a/metrics/grafana/pd.json b/metrics/grafana/pd.json index 9ece3ea936f..a40158ab03a 100644 --- a/metrics/grafana/pd.json +++ b/metrics/grafana/pd.json @@ -9124,7 +9124,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PD server TSO handle time + Client revc time", + "title": "PD server TSO handle time", "tooltip": { "msResolution": false, "shared": true, diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index cdf33435fff..6f58b4a5b96 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -37,7 +37,6 @@ var ( ErrSyncMaxTS = errors.Normalize("sync max ts failed, %s", errors.RFCCodeText("PD:tso:ErrSyncMaxTS")) ErrResetUserTimestamp = errors.Normalize("reset user timestamp failed, %s", errors.RFCCodeText("PD:tso:ErrResetUserTimestamp")) ErrGenerateTimestamp = errors.Normalize("generate timestamp failed, %s", errors.RFCCodeText("PD:tso:ErrGenerateTimestamp")) - ErrInvalidTimestamp = errors.Normalize("invalid timestamp", errors.RFCCodeText("PD:tso:ErrInvalidTimestamp")) ErrLogicOverflow = errors.Normalize("logic part overflow", errors.RFCCodeText("PD:tso:ErrLogicOverflow")) ) diff --git a/pkg/errs/errs_test.go b/pkg/errs/errs_test.go index 796b2d1019b..cf3a563b788 100644 --- a/pkg/errs/errs_test.go +++ b/pkg/errs/errs_test.go @@ -85,10 +85,10 @@ func (s *testErrorSuite) TestError(c *C) { log.ReplaceGlobals(lg.Logger, nil) rfc := `[error="[PD:tso:ErrInvalidTimestamp]invalid timestamp"]` - log.Error("test", zap.Error(ErrInvalidTimestamp.FastGenByArgs())) + log.Error("test", zap.Error(ErrEtcdLeaderNotFound.FastGenByArgs())) c.Assert(strings.Contains(lg.Message(), rfc), IsTrue) err := errors.New("test error") - log.Error("test", ZapError(ErrInvalidTimestamp, err)) + log.Error("test", ZapError(ErrEtcdLeaderNotFound, err)) rfc = `[error="[PD:tso:ErrInvalidTimestamp]test error"]` c.Assert(strings.Contains(lg.Message(), rfc), IsTrue) } diff --git a/server/tso/global_allocator.go b/server/tso/global_allocator.go index 484951c819d..4788c4e3231 100644 --- a/server/tso/global_allocator.go +++ b/server/tso/global_allocator.go @@ -104,10 +104,6 @@ func (gta *GlobalTSOAllocator) getSyncRTT() int64 { func (gta *GlobalTSOAllocator) estimateMaxTS(count uint32, suffixBits int) (*pdpb.Timestamp, bool, error) { physical, logical, lastUpdateTime := gta.timestampOracle.generateTSO(int64(count), 0) if physical == 0 { - log.Error("invalid global tso in memory, unable to estimate maxTSO", - zap.Any("timestamp-physical", physical), - zap.Any("timestamp-logical", logical), - errs.ZapError(errs.ErrInvalidTimestamp)) return &pdpb.Timestamp{}, false, errs.ErrGenerateTimestamp.FastGenByArgs("timestamp in memory isn't initialized") } estimatedMaxTSO := &pdpb.Timestamp{ diff --git a/server/tso/tso.go b/server/tso/tso.go index b851d02ea2a..eb13f09ef28 100644 --- a/server/tso/tso.go +++ b/server/tso/tso.go @@ -358,7 +358,7 @@ func (t *timestampOracle) getTS(leadership *election.Leadership, count uint32, s return resp, errs.ErrGenerateTimestamp.FastGenByArgs("tso count should be positive") } for i := 0; i < maxRetryCount; i++ { - currentPhysical, currentLogical := t.getTSO() + currentPhysical, _ := t.getTSO() if currentPhysical == typeutil.ZeroTime { // If it's leader, maybe SyncTimestamp hasn't completed yet if leadership.Check() { @@ -366,10 +366,6 @@ func (t *timestampOracle) getTS(leadership *election.Leadership, count uint32, s continue } tsoCounter.WithLabelValues("not_leader_anymore", t.dcLocation).Inc() - log.Error("invalid timestamp", - zap.Any("timestamp-physical", currentPhysical), - zap.Any("timestamp-logical", currentLogical), - errs.ZapError(errs.ErrInvalidTimestamp)) return pdpb.Timestamp{}, errs.ErrGenerateTimestamp.FastGenByArgs("timestamp in memory isn't initialized") } // Get a new TSO result with the given count From 337f78d66e34f53848050445e62a954a9b411a80 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Fri, 27 Aug 2021 18:30:51 +0800 Subject: [PATCH 3/3] Fix the test Signed-off-by: JmPotato --- pkg/errs/errs_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/errs/errs_test.go b/pkg/errs/errs_test.go index cf3a563b788..195dab7f929 100644 --- a/pkg/errs/errs_test.go +++ b/pkg/errs/errs_test.go @@ -84,12 +84,12 @@ func (s *testErrorSuite) TestError(c *C) { lg := newZapTestLogger(conf) log.ReplaceGlobals(lg.Logger, nil) - rfc := `[error="[PD:tso:ErrInvalidTimestamp]invalid timestamp"]` + rfc := `[error="[PD:member:ErrEtcdLeaderNotFound]etcd leader not found` log.Error("test", zap.Error(ErrEtcdLeaderNotFound.FastGenByArgs())) c.Assert(strings.Contains(lg.Message(), rfc), IsTrue) err := errors.New("test error") log.Error("test", ZapError(ErrEtcdLeaderNotFound, err)) - rfc = `[error="[PD:tso:ErrInvalidTimestamp]test error"]` + rfc = `[error="[PD:member:ErrEtcdLeaderNotFound]test error"]` c.Assert(strings.Contains(lg.Message(), rfc), IsTrue) }