diff --git a/br/pkg/lightning/backend/local/local.go b/br/pkg/lightning/backend/local/local.go index fd91144734022..22ec265802dd4 100644 --- a/br/pkg/lightning/backend/local/local.go +++ b/br/pkg/lightning/backend/local/local.go @@ -943,6 +943,11 @@ func (local *local) WriteToTiKV( } } } + leaderID := region.Leader.GetId() + if leaderID == 0 { + return nil, Range{}, rangeStats{}, common.ErrNoLeader.GenWithStackByArgs(region.Region.Id, leaderID) + } + begin := time.Now() regionRange := intersectRange(region.Region, Range{start: start, end: end}) opt := &pebble.IterOptions{LowerBound: regionRange.start, UpperBound: regionRange.end} @@ -984,7 +989,6 @@ func (local *local) WriteToTiKV( // annotate the error with peer/store/region info to help debug. return errors.Annotatef(in, "peer %d, store %d, region %d, epoch %s", peer.Id, peer.StoreId, region.Region.Id, region.Region.RegionEpoch.String()) } - leaderID := region.Leader.GetId() clients := make([]sst.ImportSST_WriteClient, 0, len(region.Region.GetPeers())) allPeers := make([]*metapb.Peer, 0, len(region.Region.GetPeers())) requests := make([]*sst.WriteRequest, 0, len(region.Region.GetPeers())) @@ -1106,13 +1110,17 @@ func (local *local) WriteToTiKV( } } + failpoint.Inject("NoLeader", func() { + log.FromContext(ctx).Warn("enter failpoint NoLeader") + leaderPeerMetas = nil + }) + // if there is not leader currently, we should directly return an error if len(leaderPeerMetas) == 0 { log.FromContext(ctx).Warn("write to tikv no leader", logutil.Region(region.Region), logutil.Leader(region.Leader), zap.Uint64("leader_id", leaderID), logutil.SSTMeta(meta), zap.Int64("kv_pairs", totalCount), zap.Int64("total_bytes", size)) - return nil, Range{}, stats, errors.Errorf("write to tikv with no leader returned, region '%d', leader: %d", - region.Region.Id, leaderID) + return nil, Range{}, stats, common.ErrNoLeader.GenWithStackByArgs(region.Region.Id, leaderID) } log.FromContext(ctx).Debug("write to kv", zap.Reflect("region", region), zap.Uint64("leader", leaderID), diff --git a/br/pkg/lightning/common/errors.go b/br/pkg/lightning/common/errors.go index 14645217636a4..a5cf01e43b184 100644 --- a/br/pkg/lightning/common/errors.go +++ b/br/pkg/lightning/common/errors.go @@ -82,6 +82,7 @@ var ( ErrKVReadIndexNotReady = errors.Normalize("read index not ready", errors.RFCCodeText("Lightning:KV:ReadIndexNotReady")) ErrKVIngestFailed = errors.Normalize("ingest tikv failed", errors.RFCCodeText("Lightning:KV:ErrKVIngestFailed")) ErrKVRaftProposalDropped = errors.Normalize("raft proposal dropped", errors.RFCCodeText("Lightning:KV:ErrKVRaftProposalDropped")) + ErrNoLeader = errors.Normalize("write to tikv with no leader returned, region '%d', leader: %d", errors.RFCCodeText("Lightning:KV:ErrNoLeader")) ErrUnknownBackend = errors.Normalize("unknown backend %s", errors.RFCCodeText("Lightning:Restore:ErrUnknownBackend")) ErrCheckLocalFile = errors.Normalize("cannot find local file for table: %s engineDir: %s", errors.RFCCodeText("Lightning:Restore:ErrCheckLocalFile")) diff --git a/br/pkg/lightning/common/retry.go b/br/pkg/lightning/common/retry.go index f6db6cda86407..c3bb979a9bd32 100644 --- a/br/pkg/lightning/common/retry.go +++ b/br/pkg/lightning/common/retry.go @@ -70,6 +70,7 @@ func IsRetryableError(err error) bool { var retryableErrorIDs = map[errors.ErrorID]struct{}{ ErrKVEpochNotMatch.ID(): {}, ErrKVNotLeader.ID(): {}, + ErrNoLeader.ID(): {}, ErrKVRegionNotFound.ID(): {}, // common.ErrKVServerIsBusy is a little duplication with tmysql.ErrTiKVServerBusy // it's because the response of sst.ingest gives us a sst.IngestResponse which doesn't contain error code, diff --git a/br/tests/lightning_local_backend/config.toml b/br/tests/lightning_local_backend/config.toml index 46ca06e09b4ab..73c54882430c7 100644 --- a/br/tests/lightning_local_backend/config.toml +++ b/br/tests/lightning_local_backend/config.toml @@ -1,5 +1,6 @@ [lightning] table-concurrency = 1 +index-concurrency = 1 [checkpoint] enable = true diff --git a/br/tests/lightning_local_backend/run.sh b/br/tests/lightning_local_backend/run.sh index 60fca277d8cf6..b4eebef19c639 100755 --- a/br/tests/lightning_local_backend/run.sh +++ b/br/tests/lightning_local_backend/run.sh @@ -55,7 +55,7 @@ check_contains 'sum(c): 46' run_sql 'DROP DATABASE cpeng;' rm -f "/tmp/tidb_lightning_checkpoint_local_backend_test.pb" -export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/lightning/backend/local/FailIngestMeta=2*return("epochnotmatch")' +export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/lightning/backend/local/FailIngestMeta=2*return("epochnotmatch");github.com/pingcap/tidb/br/pkg/lightning/backend/local/NoLeader=1*return()' run_lightning --backend local --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-local.log" --config "tests/$TEST_NAME/config.toml" diff --git a/errors.toml b/errors.toml index b793e284e314a..c3d927027bfd0 100644 --- a/errors.toml +++ b/errors.toml @@ -406,6 +406,11 @@ error = ''' raft proposal dropped ''' +["Lightning:KV:ErrNoLeader"] +error = ''' +write to tikv with no leader returned, region '%d', leader: %d +''' + ["Lightning:KV:NotLeader"] error = ''' not leader