diff --git a/store/tikv/gcworker/gc_worker.go b/store/tikv/gcworker/gc_worker.go index 1231e96c65892..71254177df858 100644 --- a/store/tikv/gcworker/gc_worker.go +++ b/store/tikv/gcworker/gc_worker.go @@ -410,7 +410,7 @@ func (w *GCWorker) getGCConcurrency(ctx context.Context) (int, error) { return w.loadGCConcurrencyWithDefault() } - stores, err := w.getUpStoresForGC(ctx) + stores, err := w.getStoresForGC(ctx) concurrency := len(stores) if err != nil { logutil.Logger(ctx).Error("[gc worker] failed to get up stores to calculate concurrency. use config.", @@ -457,8 +457,8 @@ func (w *GCWorker) checkGCInterval(now time.Time) (bool, error) { return true, nil } -// validateGCLiftTime checks whether life time is small than min gc life time. -func (w *GCWorker) validateGCLiftTime(lifeTime time.Duration) (time.Duration, error) { +// validateGCLifeTime checks whether life time is small than min gc life time. +func (w *GCWorker) validateGCLifeTime(lifeTime time.Duration) (time.Duration, error) { if lifeTime >= gcMinLifeTime { return lifeTime, nil } @@ -476,7 +476,7 @@ func (w *GCWorker) calculateNewSafePoint(ctx context.Context, now time.Time) (*t if err != nil { return nil, 0, errors.Trace(err) } - *lifeTime, err = w.validateGCLiftTime(*lifeTime) + *lifeTime, err = w.validateGCLifeTime(*lifeTime) if err != nil { return nil, 0, err } @@ -710,7 +710,7 @@ func (w *GCWorker) redoDeleteRanges(ctx context.Context, safePoint uint64, concu func (w *GCWorker) doUnsafeDestroyRangeRequest(ctx context.Context, startKey []byte, endKey []byte, concurrency int) error { // Get all stores every time deleting a region. So the store list is less probably to be stale. - stores, err := w.getUpStoresForGC(ctx) + stores, err := w.getStoresForGC(ctx) if err != nil { logutil.Logger(ctx).Error("[gc worker] delete ranges: got an error while trying to get store list from PD", zap.String("uuid", w.uuid), @@ -786,8 +786,14 @@ const ( // needsGCOperationForStore checks if the store-level requests related to GC needs to be sent to the store. The store-level // requests includes UnsafeDestroyRange, PhysicalScanLock, etc. func needsGCOperationForStore(store *metapb.Store) (bool, error) { - engineLabel := "" + // TombStone means the store has been removed from the cluster and there isn't any peer on the store, so needn't do GC for it. + // Offline means the store is being removed from the cluster and it becomes tombstone after all peers are removed from it, + // so we need to do GC for it. + if store.State == metapb.StoreState_Tombstone { + return false, nil + } + engineLabel := "" for _, label := range store.GetLabels() { if label.GetKey() == engineLabelKey { engineLabel = label.GetValue() @@ -816,8 +822,8 @@ func needsGCOperationForStore(store *metapb.Store) (bool, error) { } } -// getUpStoresForGC gets the list of stores that needs to be processed during GC. -func (w *GCWorker) getUpStoresForGC(ctx context.Context) ([]*metapb.Store, error) { +// getStoresForGC gets the list of stores that needs to be processed during GC. +func (w *GCWorker) getStoresForGC(ctx context.Context) ([]*metapb.Store, error) { stores, err := w.pdClient.GetAllStores(ctx) if err != nil { return nil, errors.Trace(err) @@ -825,9 +831,6 @@ func (w *GCWorker) getUpStoresForGC(ctx context.Context) ([]*metapb.Store, error upStores := make([]*metapb.Store, 0, len(stores)) for _, store := range stores { - if store.State != metapb.StoreState_Up { - continue - } needsGCOp, err := needsGCOperationForStore(store) if err != nil { return nil, errors.Trace(err) @@ -839,8 +842,8 @@ func (w *GCWorker) getUpStoresForGC(ctx context.Context) ([]*metapb.Store, error return upStores, nil } -func (w *GCWorker) getUpStoresMapForGC(ctx context.Context) (map[uint64]*metapb.Store, error) { - stores, err := w.getUpStoresForGC(ctx) +func (w *GCWorker) getStoresMapForGC(ctx context.Context) (map[uint64]*metapb.Store, error) { + stores, err := w.getStoresForGC(ctx) if err != nil { return nil, err } @@ -1080,7 +1083,7 @@ func (w *GCWorker) resolveLocksPhysical(ctx context.Context, safePoint uint64) e registeredStores := make(map[uint64]*metapb.Store) defer w.removeLockObservers(ctx, safePoint, registeredStores) - dirtyStores, err := w.getUpStoresMapForGC(ctx) + dirtyStores, err := w.getStoresMapForGC(ctx) if err != nil { return errors.Trace(err) } @@ -1101,7 +1104,7 @@ func (w *GCWorker) resolveLocksPhysical(ctx context.Context, safePoint uint64) e failpoint.Inject("beforeCheckLockObservers", func() {}) - stores, err := w.getUpStoresMapForGC(ctx) + stores, err := w.getStoresMapForGC(ctx) if err != nil { return errors.Trace(err) } diff --git a/store/tikv/gcworker/gc_worker_test.go b/store/tikv/gcworker/gc_worker_test.go index 3c76d884830d5..504c5cc05e3ad 100644 --- a/store/tikv/gcworker/gc_worker_test.go +++ b/store/tikv/gcworker/gc_worker_test.go @@ -497,8 +497,9 @@ func (s *testGCWorkerSuite) TestCheckScanLockMode(c *C) { } func (s *testGCWorkerSuite) TestNeedsGCOperationForStore(c *C) { - newStore := func(hasEngineLabel bool, engineLabel string) *metapb.Store { + newStore := func(state metapb.StoreState, hasEngineLabel bool, engineLabel string) *metapb.Store { store := &metapb.Store{} + store.State = state if hasEngineLabel { store.Labels = []*metapb.StoreLabel{{Key: engineLabelKey, Value: engineLabel}} } @@ -506,23 +507,25 @@ func (s *testGCWorkerSuite) TestNeedsGCOperationForStore(c *C) { } // TiKV needs to do the store-level GC operations. - res, err := needsGCOperationForStore(newStore(false, "")) - c.Assert(err, IsNil) - c.Assert(res, IsTrue) - res, err = needsGCOperationForStore(newStore(true, "")) - c.Assert(err, IsNil) - c.Assert(res, IsTrue) - res, err = needsGCOperationForStore(newStore(true, engineLabelTiKV)) - c.Assert(err, IsNil) - c.Assert(res, IsTrue) - - // TiFlash does not need these operations. - res, err = needsGCOperationForStore(newStore(true, engineLabelTiFlash)) - c.Assert(err, IsNil) - c.Assert(res, IsFalse) + for _, state := range []metapb.StoreState{metapb.StoreState_Up, metapb.StoreState_Offline, metapb.StoreState_Tombstone} { + needGC := state != metapb.StoreState_Tombstone + res, err := needsGCOperationForStore(newStore(state, false, "")) + c.Assert(err, IsNil) + c.Assert(res, Equals, needGC) + res, err = needsGCOperationForStore(newStore(state, true, "")) + c.Assert(err, IsNil) + c.Assert(res, Equals, needGC) + res, err = needsGCOperationForStore(newStore(state, true, engineLabelTiKV)) + c.Assert(err, IsNil) + c.Assert(res, Equals, needGC) + // TiFlash does not need these operations. + res, err = needsGCOperationForStore(newStore(state, true, engineLabelTiFlash)) + c.Assert(err, IsNil) + c.Assert(res, IsFalse) + } // Throw an error for unknown store types. - _, err = needsGCOperationForStore(newStore(true, "invalid")) + _, err := needsGCOperationForStore(newStore(metapb.StoreState_Up, true, "invalid")) c.Assert(err, NotNil) } @@ -569,7 +572,7 @@ func (s *testGCWorkerSuite) testDeleteRangesFailureImpl(c *C, failType int) { c.Assert(err, IsNil) c.Assert(preparedRanges, DeepEquals, ranges) - stores, err := s.gcWorker.getUpStoresForGC(context.Background()) + stores, err := s.gcWorker.getStoresForGC(context.Background()) c.Assert(err, IsNil) c.Assert(len(stores), Equals, 3) @@ -977,7 +980,7 @@ func (s *testGCWorkerSuite) makeMergedMockClient(c *C, count int) (*mergeLockSca const scanLockLimit = 3 - storesMap, err := s.gcWorker.getUpStoresMapForGC(context.Background()) + storesMap, err := s.gcWorker.getStoresMapForGC(context.Background()) c.Assert(err, IsNil) scanner := newMergeLockScanner(100000, s.client, storesMap) scanner.scanLockLimit = scanLockLimit