Skip to content

Commit

Permalink
tikv: fix infinite retry when kv region continuing to return StaleCom…
Browse files Browse the repository at this point in the history
…mand error (pingcap#16481) (pingcap#16527)
  • Loading branch information
sre-bot authored Apr 22, 2020
1 parent 71dc882 commit b310048
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 5 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ require (
github.com/pingcap/goleveldb v0.0.0-20171020084629-8d44bfdf1030
github.com/pingcap/kvproto v0.0.0-20190826051950-fc8799546726
github.com/pingcap/log v0.0.0-20190715063458-479153f07ebd
github.com/pingcap/parser v0.0.0-20191220111854-63cc130be9fa
github.com/pingcap/parser v2.1.20-0.20200421093405-8b0e305f0f94+incompatible
github.com/pingcap/pd v2.1.12+incompatible
github.com/pingcap/tidb-tools v2.1.3-0.20190116051332-34c808eef588+incompatible
github.com/pingcap/tipb v0.0.0-20200401051341-79a721ff4a15
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ github.com/pingcap/kvproto v0.0.0-20190826051950-fc8799546726 h1:AzGIEmaYVYMtmki
github.com/pingcap/kvproto v0.0.0-20190826051950-fc8799546726/go.mod h1:0gwbe1F2iBIjuQ9AH0DbQhL+Dpr5GofU8fgYyXk+ykk=
github.com/pingcap/log v0.0.0-20190715063458-479153f07ebd h1:hWDol43WY5PGhsh3+8794bFHY1bPrmu6bTalpssCrGg=
github.com/pingcap/log v0.0.0-20190715063458-479153f07ebd/go.mod h1:WpHUKhNZ18v116SvGrmjkA9CBhYmuUTKL+p8JC9ANEw=
github.com/pingcap/parser v0.0.0-20191220111854-63cc130be9fa h1:cDcoEb9rNlWifDVHGcBTFecK3MU4/Q2+LdJa6X6iK8Q=
github.com/pingcap/parser v0.0.0-20191220111854-63cc130be9fa/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA=
github.com/pingcap/parser v2.1.20-0.20200421093405-8b0e305f0f94+incompatible h1:Vnf0vLAUN5sLFlCEM9bjPZ9oLrerDpPX49m6tzcs1EQ=
github.com/pingcap/parser v2.1.20-0.20200421093405-8b0e305f0f94+incompatible/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA=
github.com/pingcap/pd v2.1.12+incompatible h1:6N3LBxx2aSZqT+IWEG730EDNDttP7dXO8J6yvBh+HXw=
github.com/pingcap/pd v2.1.12+incompatible/go.mod h1:nD3+EoYes4+aNNODO99ES59V83MZSI+dFbhyr667a0E=
github.com/pingcap/tidb-tools v2.1.3-0.20190116051332-34c808eef588+incompatible h1:e9Gi/LP9181HT3gBfSOeSBA+5JfemuE4aEAhqNgoE4k=
Expand Down
3 changes: 2 additions & 1 deletion mysql/errcode.go
Original file line number Diff line number Diff line change
Expand Up @@ -906,5 +906,6 @@ const (
ErrRegionUnavailable = 9005
ErrGCTooEarly = 9006

ErrTxnTooLarge = 9500
ErrTiKVStaleCommand = 9010
ErrTxnTooLarge = 9500
)
3 changes: 2 additions & 1 deletion mysql/errname.go
Original file line number Diff line number Diff line change
Expand Up @@ -903,5 +903,6 @@ var MySQLErrName = map[uint16]string{
ErrRegionUnavailable: "Region is unavailable",
ErrGCTooEarly: "GC life time is shorter than transaction duration, transaction starts at %v, GC safe point is %v",

ErrTxnTooLarge: "Transaction is too large",
ErrTiKVStaleCommand: "TiKV server reports stale command",
ErrTxnTooLarge: "Transaction is too large",
}
11 changes: 11 additions & 0 deletions store/tikv/backoff.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ var (
tikvBackoffCounterRegionMiss = metrics.TiKVBackoffCounter.WithLabelValues("regionMiss")
tikvBackoffCounterUpdateLeader = metrics.TiKVBackoffCounter.WithLabelValues("updateLeader")
tikvBackoffCounterServerBusy = metrics.TiKVBackoffCounter.WithLabelValues("serverBusy")
tikvBackoffCounterStaleCmd = metrics.TiKVBackoffCounter.WithLabelValues("staleCommand")
tikvBackoffCounterEmpty = metrics.TiKVBackoffCounter.WithLabelValues("")
tikvBackoffHistogramRPC = metrics.TiKVBackoffHistogram.WithLabelValues("tikvRPC")
tikvBackoffHistogramLock = metrics.TiKVBackoffHistogram.WithLabelValues("txnLock")
Expand All @@ -60,6 +61,7 @@ var (
tikvBackoffHistogramRegionMiss = metrics.TiKVBackoffHistogram.WithLabelValues("regionMiss")
tikvBackoffHistogramUpdateLeader = metrics.TiKVBackoffHistogram.WithLabelValues("updateLeader")
tikvBackoffHistogramServerBusy = metrics.TiKVBackoffHistogram.WithLabelValues("serverBusy")
tikvBackoffHistogramStaleCmd = metrics.TiKVBackoffHistogram.WithLabelValues("staleCommand")
tikvBackoffHistogramEmpty = metrics.TiKVBackoffHistogram.WithLabelValues("")
)

Expand All @@ -79,6 +81,8 @@ func (t backoffType) metric() (prometheus.Counter, prometheus.Histogram) {
return tikvBackoffCounterUpdateLeader, tikvBackoffHistogramUpdateLeader
case boServerBusy:
return tikvBackoffCounterServerBusy, tikvBackoffHistogramServerBusy
case boStaleCmd:
return tikvBackoffCounterStaleCmd, tikvBackoffHistogramStaleCmd
}
return tikvBackoffCounterEmpty, tikvBackoffHistogramEmpty
}
Expand Down Expand Up @@ -136,6 +140,7 @@ const (
BoRegionMiss
BoUpdateLeader
boServerBusy
boStaleCmd
)

func (t backoffType) createFn(vars *kv.Variables) func(context.Context) int {
Expand All @@ -157,6 +162,8 @@ func (t backoffType) createFn(vars *kv.Variables) func(context.Context) int {
return NewBackoffFn(1, 10, NoJitter)
case boServerBusy:
return NewBackoffFn(2000, 10000, EqualJitter)
case boStaleCmd:
return NewBackoffFn(2, 1000, NoJitter)
}
return nil
}
Expand All @@ -177,6 +184,8 @@ func (t backoffType) String() string {
return "updateLeader"
case boServerBusy:
return "serverBusy"
case boStaleCmd:
return "staleCommand"
}
return ""
}
Expand All @@ -193,6 +202,8 @@ func (t backoffType) TError() error {
return ErrRegionUnavailable
case boServerBusy:
return ErrTiKVServerBusy
case boStaleCmd:
return ErrTiKVStaleCommand
}
return terror.ClassTiKV.New(mysql.ErrUnknown, mysql.MySQLErrName[mysql.ErrUnknown])
}
Expand Down
1 change: 1 addition & 0 deletions store/tikv/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ var (
ErrRegionUnavailable = terror.ClassTiKV.New(mysql.ErrRegionUnavailable, mysql.MySQLErrName[mysql.ErrRegionUnavailable]+kv.TxnRetryableMark)
ErrTiKVServerBusy = terror.ClassTiKV.New(mysql.ErrTiKVServerBusy, mysql.MySQLErrName[mysql.ErrTiKVServerBusy]+kv.TxnRetryableMark)
ErrGCTooEarly = terror.ClassTiKV.New(mysql.ErrGCTooEarly, mysql.MySQLErrName[mysql.ErrGCTooEarly])
ErrTiKVStaleCommand = terror.ClassTiKV.New(mysql.ErrTiKVStaleCommand, mysql.MySQLErrName[mysql.ErrTiKVStaleCommand])
)

func init() {
Expand Down
4 changes: 4 additions & 0 deletions store/tikv/region_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,10 @@ func (s *RegionRequestSender) onRegionError(bo *Backoffer, ctx *RPCContext, regi
}
if regionErr.GetStaleCommand() != nil {
logutil.Logger(context.Background()).Debug("tikv reports `StaleCommand`", zap.Stringer("ctx", ctx))
err = bo.Backoff(boStaleCmd, errors.Errorf("stale command, ctx: %v", ctx))
if err != nil {
return false, errors.Trace(err)
}
return true, nil
}
if regionErr.GetRaftEntryTooLarge() != nil {
Expand Down
45 changes: 45 additions & 0 deletions store/tikv/region_request_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
. "github.com/pingcap/check"
"github.com/pingcap/errors"
"github.com/pingcap/kvproto/pkg/coprocessor"
"github.com/pingcap/kvproto/pkg/errorpb"
"github.com/pingcap/kvproto/pkg/kvrpcpb"
"github.com/pingcap/kvproto/pkg/tikvpb"
"github.com/pingcap/tidb/config"
Expand Down Expand Up @@ -55,6 +56,50 @@ func (s *testRegionRequestSuite) SetUpTest(c *C) {
s.regionRequestSender = NewRegionRequestSender(s.cache, client)
}

type fnClient struct {
fn func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (*tikvrpc.Response, error)
}

func (f *fnClient) Close() error {
return nil
}

func (f *fnClient) SendRequest(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (*tikvrpc.Response, error) {
return f.fn(ctx, addr, req, timeout)
}

func (s *testRegionRequestSuite) TestOnRegionError(c *C) {
req := &tikvrpc.Request{
Type: tikvrpc.CmdRawPut,
RawPut: &kvrpcpb.RawPutRequest{
Key: []byte("key"),
Value: []byte("value"),
},
}
region, err := s.cache.LocateRegionByID(s.bo, s.region)
c.Assert(err, IsNil)
c.Assert(region, NotNil)

// test stale command retry.
func() {
oc := s.regionRequestSender.client
defer func() {
s.regionRequestSender.client = oc
}()
s.regionRequestSender.client = &fnClient{func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) {
staleResp := &tikvrpc.Response{Type: tikvrpc.CmdGet, Get: &kvrpcpb.GetResponse{
RegionError: &errorpb.Error{StaleCommand: &errorpb.StaleCommand{}},
}}
return staleResp, nil
}}
bo := NewBackoffer(context.Background(), 5)
resp, err := s.regionRequestSender.SendReq(bo, req, region.Region, time.Second)
c.Assert(err, NotNil)
c.Assert(resp, IsNil)
}()

}

func (s *testRegionRequestSuite) TestOnSendFailedWithStoreRestart(c *C) {
req := &tikvrpc.Request{
Type: tikvrpc.CmdRawPut,
Expand Down

0 comments on commit b310048

Please sign in to comment.