Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tso, election: remove some unnecessary logs from the TSO hot path #4044

Merged
merged 4 commits into from
Aug 31, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions errors.toml
Original file line number Diff line number Diff line change
Expand Up @@ -611,11 +611,6 @@ error = '''
get local allocator failed, %s
'''

["PD:tso:ErrInvalidTimestamp"]
error = '''
invalid timestamp
'''

["PD:tso:ErrLogicOverflow"]
error = '''
logic part overflow
Expand Down
2 changes: 1 addition & 1 deletion metrics/grafana/pd.json
Original file line number Diff line number Diff line change
Expand Up @@ -9124,7 +9124,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "PD server TSO handle time + Client revc time",
"title": "PD server TSO handle time",
"tooltip": {
"msResolution": false,
"shared": true,
Expand Down
1 change: 0 additions & 1 deletion pkg/errs/errno.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ var (
ErrSyncMaxTS = errors.Normalize("sync max ts failed, %s", errors.RFCCodeText("PD:tso:ErrSyncMaxTS"))
ErrResetUserTimestamp = errors.Normalize("reset user timestamp failed, %s", errors.RFCCodeText("PD:tso:ErrResetUserTimestamp"))
ErrGenerateTimestamp = errors.Normalize("generate timestamp failed, %s", errors.RFCCodeText("PD:tso:ErrGenerateTimestamp"))
ErrInvalidTimestamp = errors.Normalize("invalid timestamp", errors.RFCCodeText("PD:tso:ErrInvalidTimestamp"))
ErrLogicOverflow = errors.Normalize("logic part overflow", errors.RFCCodeText("PD:tso:ErrLogicOverflow"))
)

Expand Down
8 changes: 4 additions & 4 deletions pkg/errs/errs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,12 @@ func (s *testErrorSuite) TestError(c *C) {
lg := newZapTestLogger(conf)
log.ReplaceGlobals(lg.Logger, nil)

rfc := `[error="[PD:tso:ErrInvalidTimestamp]invalid timestamp"]`
log.Error("test", zap.Error(ErrInvalidTimestamp.FastGenByArgs()))
rfc := `[error="[PD:member:ErrEtcdLeaderNotFound]etcd leader not found`
log.Error("test", zap.Error(ErrEtcdLeaderNotFound.FastGenByArgs()))
c.Assert(strings.Contains(lg.Message(), rfc), IsTrue)
err := errors.New("test error")
log.Error("test", ZapError(ErrInvalidTimestamp, err))
rfc = `[error="[PD:tso:ErrInvalidTimestamp]test error"]`
log.Error("test", ZapError(ErrEtcdLeaderNotFound, err))
rfc = `[error="[PD:member:ErrEtcdLeaderNotFound]test error"]`
c.Assert(strings.Contains(lg.Message(), rfc), IsTrue)
}

Expand Down
14 changes: 11 additions & 3 deletions server/election/leadership.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ type Leadership struct {
// leaderKey and leaderValue are key-value pair in etcd
leaderKey string
leaderValue string

keepAliveCtx context.Context
keepAliceCancelFunc context.CancelFunc
}

// NewLeadership creates a new Leadership.
Expand Down Expand Up @@ -124,10 +127,11 @@ func (ls *Leadership) Campaign(leaseTimeout int64, leaderData string, cmps ...cl

// Keep will keep the leadership available by update the lease's expired time continuously
func (ls *Leadership) Keep(ctx context.Context) {
ls.getLease().KeepAlive(ctx)
ls.keepAliveCtx, ls.keepAliceCancelFunc = context.WithCancel(ctx)
ls.getLease().KeepAlive(ls.keepAliveCtx)
}

// Check returns whether the leadership is still available
// Check returns whether the leadership is still available.
func (ls *Leadership) Check() bool {
return ls != nil && ls.getLease() != nil && !ls.getLease().IsExpired()
}
Expand Down Expand Up @@ -208,10 +212,14 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) {
}
}

// Reset does some defer job such as closing lease, resetting lease etc.
// Reset does some defer jobs such as closing lease, resetting lease etc.
func (ls *Leadership) Reset() {
if ls == nil || ls.getLease() == nil {
return
}
if ls.keepAliceCancelFunc != nil {
ls.keepAliceCancelFunc()
}
ls.getLease().Close()
ls.setLease(nil)
}
6 changes: 0 additions & 6 deletions server/grpc_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ import (
"google.golang.org/grpc/status"
)

const slowThreshold = 5 * time.Millisecond

// gRPC errors
var (
// ErrNotLeader is returned when current server is not the leader and not possible to process request.
Expand Down Expand Up @@ -166,10 +164,6 @@ func (s *Server) Tso(stream pdpb.PD_TsoServer) error {
return status.Errorf(codes.Unknown, err.Error())
}

elapsed := time.Since(start)
if elapsed > slowThreshold {
log.Warn("get timestamp too slow", zap.Duration("cost", elapsed))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it useless?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can get it from the metrics also.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't find any metrics to record it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, you mean tsoHandleDuration.Observe(time.Since(start).Seconds())

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.

}
tsoHandleDuration.Observe(time.Since(start).Seconds())
response := &pdpb.TsoResponse{
Header: s.header(),
Expand Down
4 changes: 0 additions & 4 deletions server/tso/global_allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,6 @@ func (gta *GlobalTSOAllocator) getSyncRTT() int64 {
func (gta *GlobalTSOAllocator) estimateMaxTS(count uint32, suffixBits int) (*pdpb.Timestamp, bool, error) {
physical, logical, lastUpdateTime := gta.timestampOracle.generateTSO(int64(count), 0)
if physical == 0 {
log.Error("invalid global tso in memory, unable to estimate maxTSO",
zap.Any("timestamp-physical", physical),
zap.Any("timestamp-logical", logical),
errs.ZapError(errs.ErrInvalidTimestamp))
return &pdpb.Timestamp{}, false, errs.ErrGenerateTimestamp.FastGenByArgs("timestamp in memory isn't initialized")
}
estimatedMaxTSO := &pdpb.Timestamp{
Expand Down
7 changes: 1 addition & 6 deletions server/tso/tso.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,19 +358,14 @@ func (t *timestampOracle) getTS(leadership *election.Leadership, count uint32, s
return resp, errs.ErrGenerateTimestamp.FastGenByArgs("tso count should be positive")
}
for i := 0; i < maxRetryCount; i++ {
currentPhysical, currentLogical := t.getTSO()
currentPhysical, _ := t.getTSO()
if currentPhysical == typeutil.ZeroTime {
// If it's leader, maybe SyncTimestamp hasn't completed yet
if leadership.Check() {
log.Info("this PD is a new leader, but sync hasn't been completed yet, wait for a while")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also this log will be noise:

[2021/06/18 16:15:19.627 +08:00] [ERROR] [tso.go:302] ["invalid timestamp"] [timestamp={}]
[2021/06/18 16:15:19.628 +08:00] [ERROR] [tso.go:302] ["invalid timestamp"] [timestamp={}]
[2021/06/18 16:15:19.628 +08:00] [ERROR] [tso.go:302] ["invalid timestamp"] [timestamp={}]
[2021/06/18 16:15:19.685 +08:00] [ERROR] [tso.go:302] ["invalid timestamp"] [timestamp={}]
[2021/06/18 16:15:19.686 +08:00] [ERROR] [tso.go:302] ["invalid timestamp"] [timestamp={}]
[2021/06/18 16:15:19.718 +08:00] [ERROR] [tso.go:302] ["invalid timestamp"] [timestamp={}]
[2021/06/18 16:15:19.718 +08:00] [ERROR] [tso.go:302] ["invalid timestamp"] [timestamp={}]

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed. PTAL.

time.Sleep(200 * time.Millisecond)
continue
}
tsoCounter.WithLabelValues("not_leader_anymore", t.dcLocation).Inc()
log.Error("invalid timestamp",
zap.Any("timestamp-physical", currentPhysical),
zap.Any("timestamp-logical", currentLogical),
errs.ZapError(errs.ErrInvalidTimestamp))
return pdpb.Timestamp{}, errs.ErrGenerateTimestamp.FastGenByArgs("timestamp in memory isn't initialized")
}
// Get a new TSO result with the given count
Expand Down