Skip to content

Commit

Permalink
define a new backoff error for witness
Browse files Browse the repository at this point in the history
ref tikv/tikv#12876

Signed-off-by: Wenbo Zhang <ethercflow@gmail.com>
  • Loading branch information
ethercflow committed Dec 31, 2022
1 parent 4bd44a8 commit 0ba8b34
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 6 deletions.
2 changes: 2 additions & 0 deletions error/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ var (
ErrRegionFlashbackInProgress = errors.New("region is in the flashback progress")
// ErrRegionFlashbackNotPrepared is the error when a region is not prepared for the flashback first.
ErrRegionFlashbackNotPrepared = errors.New("region is not prepared for the flashback")
// ErrIsWitness is the error when a request is send to a witness.
ErrIsWitness = errors.New("peer is witness")
// ErrUnknown is the unknow error.
ErrUnknown = errors.New("unknow")
// ErrResultUndetermined is the error when execution result is unknown.
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,5 @@ require (
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
stathat.com/c/consistent v1.0.0 // indirect
)

replace github.com/pingcap/kvproto v0.0.0-20221129023506-621ec37aac7a => github.com/ethercflow/kvproto v0.0.0-20221231014813-181758d64db6
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.m
github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/ethercflow/kvproto v0.0.0-20221231014813-181758d64db6 h1:KTC2tgCNSjNfLx36uQELMe96jVduYfFF9EnYYqHI10I=
github.com/ethercflow/kvproto v0.0.0-20221231014813-181758d64db6/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
Expand Down Expand Up @@ -155,8 +157,6 @@ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 h1:surzm05a8C9dN8dIUmo4Be2+pMRb6f55i+UIYrluu2E=
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw=
github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/kvproto v0.0.0-20221129023506-621ec37aac7a h1:LzIZsQpXQlj8yF7+yvyOg680OaPq7bmPuDuszgXfHsw=
github.com/pingcap/kvproto v0.0.0-20221129023506-621ec37aac7a/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY=
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
Expand Down
17 changes: 17 additions & 0 deletions internal/locate/region_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,23 @@ func (c *RegionCache) SetRegionCacheStore(id uint64, storeType tikvrpc.EndpointT
}
}

// ChangeFollowersToWitness is used to change followers to witness, for testing only
func (c *RegionCache) ChangeFollowersToWitness(regionID RegionVerID, leaderStoreIdx int) {
cachedRegion := c.GetCachedRegionWithRLock(regionID)
if cachedRegion == nil || !cachedRegion.isValid() {
return
}
regionStore := cachedRegion.getStore()
for _, storeIdx := range regionStore.accessIndex[tiKVOnly] {
if storeIdx != leaderStoreIdx {
peer := cachedRegion.meta.Peers[storeIdx]
if peer.GetRole() != metapb.PeerRole_Learner {
peer.IsWitness = true
}
}
}
}

// SetPDClient replaces pd client,for testing only
func (c *RegionCache) SetPDClient(client pd.Client) {
c.pdClient = client
Expand Down
14 changes: 13 additions & 1 deletion internal/locate/region_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -1407,6 +1407,8 @@ func regionErrorToLabel(e *errorpb.Error) string {
return "flashback_in_progress"
} else if e.GetFlashbackNotPrepared() != nil {
return "flashback_not_prepared"
} else if e.GetIsWitness() != nil {
return "peer_is_witness"
}
return "unknown"
}
Expand Down Expand Up @@ -1461,7 +1463,7 @@ func (s *RegionRequestSender) onRegionError(bo *retry.Backoffer, ctx *RPCContext
if err != nil {
return false, err
}
return true, nil
return false, nil
}

// Since we expect that the workload should be stopped during the flashback progress,
Expand Down Expand Up @@ -1615,6 +1617,16 @@ func (s *RegionRequestSender) onRegionError(bo *retry.Backoffer, ctx *RPCContext
return true, nil
}

if regionErr.GetIsWitness() != nil {
s.regionCache.InvalidateCachedRegion(ctx.Region)
logutil.BgLogger().Debug("tikv reports `IsWitness`", zap.Stringer("ctx", ctx))
err = bo.Backoff(retry.BoRegionRecoveryInProgress, errors.Errorf("is witness, ctx: %v", ctx))
if err != nil {
return false, err
}
return false, nil
}

logutil.BgLogger().Debug("tikv reports region failed",
zap.Stringer("regionErr", regionErr),
zap.Stringer("ctx", ctx))
Expand Down
18 changes: 17 additions & 1 deletion internal/locate/region_request3_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ func (s *testRegionRequestToThreeStoresSuite) TestReplicaSelector() {
regionStore.stores[sidx].epoch++
regionStore.storeEpochs[sidx]++
// Add a TiFlash peer to the region.
peer := &metapb.Peer{Id: s.cluster.AllocID(), StoreId: s.cluster.AllocID()}
peer := &metapb.Peer{Id: s.cluster.AllocID(), StoreId: s.cluster.AllocID(), Role: metapb.PeerRole_Learner}
regionStore.accessIndex[tiFlashOnly] = append(regionStore.accessIndex[tiFlashOnly], len(regionStore.stores))
regionStore.stores = append(regionStore.stores, &Store{storeID: peer.StoreId, storeType: tikvrpc.TiFlash})
regionStore.storeEpochs = append(regionStore.storeEpochs, 0)
Expand Down Expand Up @@ -598,6 +598,22 @@ func (s *testRegionRequestToThreeStoresSuite) TestReplicaSelector() {
s.NotNil(replicaSelector)
s.Nil(err)

// Test accessFollower state filtering witness peer.
region.lastAccess = time.Now().Unix()
// Change all followers as witness.
cache.ChangeFollowersToWitness(regionLoc.Region, sidx)
refreshEpochs(regionStore)
replicaSelector, err = newReplicaSelector(cache, regionLoc.Region, req)
s.NotNil(replicaSelector)
s.Nil(err)
state3 = replicaSelector.state.(*accessFollower)
// Should fallback to the leader immediately.
rpcCtx, err = replicaSelector.next(s.bo)
s.Nil(err)
s.Equal(regionStore.workTiKVIdx, state3.lastIdx)
s.Equal(replicaSelector.targetIdx, state3.lastIdx)
assertRPCCtxEqual(rpcCtx, replicaSelector.replicas[regionStore.workTiKVIdx], nil)

// Invalidate the region if the leader is not in the region.
region.lastAccess = time.Now().Unix()
replicaSelector.updateLeader(&metapb.Peer{Id: s.cluster.AllocID(), StoreId: s.cluster.AllocID()})
Expand Down
7 changes: 5 additions & 2 deletions internal/retry/backoff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ func TestBackoffWithMax(t *testing.T) {
}

func TestBackoffErrorType(t *testing.T) {
// the actual maxSleep is multiplied by weight, which is 480ms
b := NewBackofferWithVars(context.TODO(), 210, nil)
// the actual maxSleep is multiplied by weight, which is 1600ms
b := NewBackofferWithVars(context.TODO(), 800, nil)
err := b.Backoff(BoRegionMiss, errors.New("region miss")) // 2ms sleep
assert.Nil(t, err)
// 6ms sleep at most in total
Expand All @@ -67,6 +67,9 @@ func TestBackoffErrorType(t *testing.T) {
// sleep from ServerIsBusy is not counted
err = b.Backoff(BoTiKVServerBusy, errors.New("server is busy"))
assert.Nil(t, err)
// 1000ms sleep at most in total
err = b.Backoff(BoIsWitness, errors.New("peer is witness"))
assert.Nil(t, err)
// wait it exceed max sleep
for i := 0; i < 10; i++ {
err = b.Backoff(BoTxnNotFound, errors.New("txn not found"))
Expand Down
1 change: 1 addition & 0 deletions internal/retry/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ var (
BoMaxTsNotSynced = NewConfig("maxTsNotSynced", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrTiKVMaxTimestampNotSynced)
BoMaxDataNotReady = NewConfig("dataNotReady", &metrics.BackoffHistogramDataNotReady, NewBackoffFnCfg(2, 2000, NoJitter), tikverr.ErrRegionDataNotReady)
BoMaxRegionNotInitialized = NewConfig("regionNotInitialized", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 1000, NoJitter), tikverr.ErrRegionNotInitialized)
BoIsWitness = NewConfig("isWitness", &metrics.BackoffHistogramIsWitness, NewBackoffFnCfg(1000, 10000, EqualJitter), tikverr.ErrIsWitness)
// TxnLockFast's `base` load from vars.BackoffLockFast when create BackoffFn.
BoTxnLockFast = NewConfig(txnLockFastName, &metrics.BackoffHistogramLockFast, NewBackoffFnCfg(2, 3000, EqualJitter), tikverr.ErrResolveLockTimeout)
)
Expand Down
2 changes: 2 additions & 0 deletions metrics/shortcuts.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ var (
BackoffHistogramRegionRecoveryInProgress prometheus.Observer
BackoffHistogramStaleCmd prometheus.Observer
BackoffHistogramDataNotReady prometheus.Observer
BackoffHistogramIsWitness prometheus.Observer
BackoffHistogramEmpty prometheus.Observer

TxnRegionsNumHistogramWithSnapshot prometheus.Observer
Expand Down Expand Up @@ -166,6 +167,7 @@ func initShortcuts() {
BackoffHistogramRegionRecoveryInProgress = TiKVBackoffHistogram.WithLabelValues("regionRecoveryInProgress")
BackoffHistogramStaleCmd = TiKVBackoffHistogram.WithLabelValues("staleCommand")
BackoffHistogramDataNotReady = TiKVBackoffHistogram.WithLabelValues("dataNotReady")
BackoffHistogramIsWitness = TiKVBackoffHistogram.WithLabelValues("isWitness")
BackoffHistogramEmpty = TiKVBackoffHistogram.WithLabelValues("")

TxnRegionsNumHistogramWithSnapshot = TiKVTxnRegionsNumHistogram.WithLabelValues("snapshot")
Expand Down

0 comments on commit 0ba8b34

Please sign in to comment.