Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tso: make follower fail fast for tso requests (#2614) #2622

Merged
merged 4 commits into from
Jul 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions server/tso/tso.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ type TimestampOracle struct {
// For tso, set after pd becomes leader.
ts unsafe.Pointer
lastSavedTime atomic.Value
lease *member.LeaderLease

lease *member.LeaderLease

rootPath string
member string
Expand Down Expand Up @@ -255,7 +256,7 @@ func (t *TimestampOracle) ResetTimestamp() {
atomic.StorePointer(&t.ts, unsafe.Pointer(zero))
}

var maxRetryCount = 100
var maxRetryCount = 10

// GetRespTS is used to get a timestamp.
func (t *TimestampOracle) GetRespTS(count uint32) (pdpb.Timestamp, error) {
Expand All @@ -272,9 +273,7 @@ func (t *TimestampOracle) GetRespTS(count uint32) (pdpb.Timestamp, error) {
for i := 0; i < maxRetryCount; i++ {
current := (*atomicObject)(atomic.LoadPointer(&t.ts))
if current == nil || current.physical == typeutil.ZeroTime {
log.Error("we haven't synced timestamp ok, wait and retry", zap.Int("retry-count", i))
time.Sleep(200 * time.Millisecond)
continue
return pdpb.Timestamp{}, errors.New("can not get timestamp, may be not leader")
}

resp.Physical = current.physical.UnixNano() / int64(time.Millisecond)
Expand All @@ -287,6 +286,7 @@ func (t *TimestampOracle) GetRespTS(count uint32) (pdpb.Timestamp, error) {
time.Sleep(UpdateTimestampStep)
continue
}
// In case lease expired after the first check.
if t.lease == nil || t.lease.IsExpired() {
return pdpb.Timestamp{}, errors.New("alloc timestamp failed, lease expired")
}
Expand Down
40 changes: 40 additions & 0 deletions tests/server/tso/tso_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,46 @@ func (s *testTsoSuite) TestTsoCount0(c *C) {
c.Assert(err, NotNil)
}

func (s *testTsoSuite) TestRequestFollower(c *C) {
cluster, err := tests.NewTestCluster(s.ctx, 2)
c.Assert(err, IsNil)
defer cluster.Destroy()

err = cluster.RunInitialServers()
c.Assert(err, IsNil)
cluster.WaitLeader()

var followerServer *tests.TestServer
for _, s := range cluster.GetServers() {
if s.GetConfig().Name != cluster.GetLeader() {
followerServer = s
}
}
c.Assert(followerServer, NotNil)

grpcPDClient := testutil.MustNewGrpcClient(c, followerServer.GetAddr())
clusterID := followerServer.GetClusterID()
req := &pdpb.TsoRequest{
Header: testutil.NewRequestHeader(clusterID),
Count: 1,
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
tsoClient, err := grpcPDClient.Tso(ctx)
c.Assert(err, IsNil)
defer tsoClient.CloseSend()

start := time.Now()
err = tsoClient.Send(req)
c.Assert(err, IsNil)
_, err = tsoClient.Recv()
c.Assert(err, NotNil)

// Requesting follower should fail fast, or the unavailable time will be
// too long.
c.Assert(time.Since(start), Less, time.Second)
}

var _ = Suite(&testTimeFallBackSuite{})

type testTimeFallBackSuite struct {
Expand Down