Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fit witness #633

Merged
merged 8 commits into from
Jan 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions error/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ var (
ErrRegionFlashbackInProgress = errors.New("region is in the flashback progress")
// ErrRegionFlashbackNotPrepared is the error when a region is not prepared for the flashback first.
ErrRegionFlashbackNotPrepared = errors.New("region is not prepared for the flashback")
// ErrIsWitness is the error when a request is send to a witness.
ErrIsWitness = errors.New("peer is witness")
// ErrUnknown is the unknow error.
ErrUnknown = errors.New("unknow")
// ErrResultUndetermined is the error when execution result is unknown.
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ require (
github.com/opentracing/opentracing-go v1.2.0
github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989
github.com/pingcap/kvproto v0.0.0-20221227030452-22819f5b377a
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.11.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 h1:surzm05a8C9dN8dIUmo4Be2+pMRb6f55i+UIYrluu2E=
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw=
github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/kvproto v0.0.0-20221227030452-22819f5b377a h1:4TiR0nGKmLmu2kTC22jOhUIplmv4GGUrUD9D2DTMms0=
github.com/pingcap/kvproto v0.0.0-20221227030452-22819f5b377a/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12 h1:ZgPcRwsrzcuYcgrsGB+2I9w0n6JIT+GEptLUZ1kWKZA=
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY=
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
Expand Down
6 changes: 6 additions & 0 deletions integration_tests/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -344,13 +344,19 @@ github.com/pingcap/fn v0.0.0-20200306044125-d5540d389059 h1:Pe2LbxRmbTfAoKJ65bZL
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 h1:surzm05a8C9dN8dIUmo4Be2+pMRb6f55i+UIYrluu2E=
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw=
github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12 h1:ZgPcRwsrzcuYcgrsGB+2I9w0n6JIT+GEptLUZ1kWKZA=
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/kvproto v0.0.0-20230105060948-64890fa4f6c1 h1:jw4NjEiCleRJPPpHM7K6l8OKzOjnZAj62eKteCAY6ro=
github.com/pingcap/kvproto v0.0.0-20230105060948-64890fa4f6c1/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8=
github.com/pingcap/log v1.1.0/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY=
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
github.com/pingcap/log v1.1.1-0.20221116035753-734d527bc87c h1:crhkw6DD+07Bg1wYhW5Piw+kYNKZqFQqfC2puUf6gMI=
github.com/pingcap/log v1.1.1-0.20221116035753-734d527bc87c/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
github.com/pingcap/sysutil v0.0.0-20220114020952-ea68d2dbf5b4 h1:HYbcxtnkN3s5tqrZ/z3eJS4j3Db8wMphEm1q10lY/TM=
github.com/pingcap/tidb v1.1.0-beta.0.20221101102559-97add26c8f84 h1:qAougMzGGYDQ00UfIVs+M0LBaQQrL8yXdmLQEqqR8HY=
github.com/pingcap/tidb v1.1.0-beta.0.20221101102559-97add26c8f84/go.mod h1:lhtdZBXqJGbk2/Fr8JnT9KhXxEKy8h0vrDwdna/ou3g=
github.com/pingcap/tidb v1.1.0-beta.0.20230109054422-b477b1c94620 h1:153psGlYCgPaDYc3AtlDG5ePNPVE4T1c8iJGHFZInZw=
github.com/pingcap/tidb v1.1.0-beta.0.20230109054422-b477b1c94620/go.mod h1:Rounl3rQhnhOBh8e7pxAr6BhnNJh0nGvomRCRE6W98U=
github.com/pingcap/tidb/parser v0.0.0-20230109054422-b477b1c94620 h1:cEP+A7ylw8sGGOCHe8F8gto+vlkGQaYt09DHgHd2xmg=
Expand Down
8 changes: 8 additions & 0 deletions internal/locate/region_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,14 @@ func newRegion(bo *retry.Backoffer, c *RegionCache, pdRegion *pd.Region) (*Regio
if addr == "" {
continue
}

// Since the witness is read-write prohibited, it does not make sense to send requests to it
// unless it is the leader. When it is the leader and transfer leader encounters a problem,
// the backoff timeout will be triggered, and the client can give a more accurate error message.
if p.IsWitness && !isSamePeer(p, leader) {
continue
}

if isSamePeer(p, leader) {
leaderAccessIdx = AccessIndex(len(rs.accessIndex[tiKVOnly]))
}
Expand Down
36 changes: 36 additions & 0 deletions internal/locate/region_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1260,6 +1260,42 @@ func (s *testRegionCacheSuite) TestPeersLenChange() {
s.cache.OnSendFail(retry.NewNoopBackoff(context.Background()), ctx, false, errors.New("send fail"))
}

func (s *testRegionCacheSuite) TestPeersLenChangedByWitness() {
// 2 peers [peer1, peer2] and let peer2 become leader
loc, err := s.cache.LocateKey(s.bo, []byte("a"))
s.Nil(err)
s.cache.UpdateLeader(loc.Region, &metapb.Peer{Id: s.peer2, StoreId: s.store2}, 0)

// current leader is peer2 in [peer1, peer2]
loc, err = s.cache.LocateKey(s.bo, []byte("a"))
s.Nil(err)
ctx, err := s.cache.GetTiKVRPCContext(s.bo, loc.Region, kv.ReplicaReadLeader, 0)
s.Nil(err)
s.Equal(ctx.Peer.StoreId, s.store2)

// simulate peer1 become witness in kv heartbeat and loaded before response back.
cpMeta := &metapb.Region{
Id: ctx.Meta.Id,
StartKey: ctx.Meta.StartKey,
EndKey: ctx.Meta.EndKey,
RegionEpoch: ctx.Meta.RegionEpoch,
Peers: make([]*metapb.Peer, len(ctx.Meta.Peers)),
}
copy(cpMeta.Peers, ctx.Meta.Peers)
for _, peer := range cpMeta.Peers {
if peer.Id == s.peer1 {
peer.IsWitness = true
}
}
cpRegion := &pd.Region{Meta: cpMeta}
region, err := newRegion(s.bo, s.cache, cpRegion)
s.Nil(err)
s.cache.insertRegionToCache(region)

// OnSendFail should not panic
s.cache.OnSendFail(retry.NewNoopBackoff(context.Background()), ctx, false, errors.New("send fail"))
}

func createSampleRegion(startKey, endKey []byte) *Region {
return &Region{
meta: &metapb.Region{
Expand Down
12 changes: 12 additions & 0 deletions internal/locate/region_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -1403,6 +1403,8 @@ func regionErrorToLabel(e *errorpb.Error) string {
return "flashback_in_progress"
} else if e.GetFlashbackNotPrepared() != nil {
return "flashback_not_prepared"
} else if e.GetIsWitness() != nil {
return "peer_is_witness"
}
return "unknown"
}
Expand Down Expand Up @@ -1460,6 +1462,16 @@ func (s *RegionRequestSender) onRegionError(bo *retry.Backoffer, ctx *RPCContext
return false, nil
}

if regionErr.GetIsWitness() != nil {
s.regionCache.InvalidateCachedRegion(ctx.Region)
logutil.BgLogger().Debug("tikv reports `IsWitness`", zap.Stringer("ctx", ctx))
err = bo.Backoff(retry.BoIsWitness, errors.Errorf("is witness, ctx: %v", ctx))
if err != nil {
return false, err
}
return false, nil
}

// Since we expect that the workload should be stopped during the flashback progress,
// if a request meets the FlashbackInProgress error, it should stop retrying immediately
// to avoid unnecessary backoff and potential unexpected data status to the user.
Expand Down
7 changes: 5 additions & 2 deletions internal/retry/backoff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ func TestBackoffWithMax(t *testing.T) {
}

func TestBackoffErrorType(t *testing.T) {
// the actual maxSleep is multiplied by weight, which is 480ms
b := NewBackofferWithVars(context.TODO(), 210, nil)
// the actual maxSleep is multiplied by weight, which is 1600ms
b := NewBackofferWithVars(context.TODO(), 800, nil)
err := b.Backoff(BoRegionMiss, errors.New("region miss")) // 2ms sleep
assert.Nil(t, err)
// 6ms sleep at most in total
Expand All @@ -67,6 +67,9 @@ func TestBackoffErrorType(t *testing.T) {
// sleep from ServerIsBusy is not counted
err = b.Backoff(BoTiKVServerBusy, errors.New("server is busy"))
assert.Nil(t, err)
// 1000ms sleep at most in total
err = b.Backoff(BoIsWitness, errors.New("peer is witness"))
assert.Nil(t, err)
// wait it exceed max sleep
for i := 0; i < 10; i++ {
err = b.Backoff(BoTxnNotFound, errors.New("txn not found"))
Expand Down
1 change: 1 addition & 0 deletions internal/retry/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ var (
BoMaxTsNotSynced = NewConfig("maxTsNotSynced", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrTiKVMaxTimestampNotSynced)
BoMaxDataNotReady = NewConfig("dataNotReady", &metrics.BackoffHistogramDataNotReady, NewBackoffFnCfg(2, 2000, NoJitter), tikverr.ErrRegionDataNotReady)
BoMaxRegionNotInitialized = NewConfig("regionNotInitialized", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 1000, NoJitter), tikverr.ErrRegionNotInitialized)
BoIsWitness = NewConfig("isWitness", &metrics.BackoffHistogramIsWitness, NewBackoffFnCfg(1000, 10000, EqualJitter), tikverr.ErrIsWitness)
// TxnLockFast's `base` load from vars.BackoffLockFast when create BackoffFn.
BoTxnLockFast = NewConfig(txnLockFastName, &metrics.BackoffHistogramLockFast, NewBackoffFnCfg(2, 3000, EqualJitter), tikverr.ErrResolveLockTimeout)
)
Expand Down
2 changes: 2 additions & 0 deletions metrics/shortcuts.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ var (
BackoffHistogramRegionRecoveryInProgress prometheus.Observer
BackoffHistogramStaleCmd prometheus.Observer
BackoffHistogramDataNotReady prometheus.Observer
BackoffHistogramIsWitness prometheus.Observer
BackoffHistogramEmpty prometheus.Observer

TxnRegionsNumHistogramWithSnapshot prometheus.Observer
Expand Down Expand Up @@ -166,6 +167,7 @@ func initShortcuts() {
BackoffHistogramRegionRecoveryInProgress = TiKVBackoffHistogram.WithLabelValues("regionRecoveryInProgress")
BackoffHistogramStaleCmd = TiKVBackoffHistogram.WithLabelValues("staleCommand")
BackoffHistogramDataNotReady = TiKVBackoffHistogram.WithLabelValues("dataNotReady")
BackoffHistogramIsWitness = TiKVBackoffHistogram.WithLabelValues("isWitness")
BackoffHistogramEmpty = TiKVBackoffHistogram.WithLabelValues("")

TxnRegionsNumHistogramWithSnapshot = TiKVTxnRegionsNumHistogram.WithLabelValues("snapshot")
Expand Down