From a6fb515dc040626ae733997599716b9913080c48 Mon Sep 17 00:00:00 2001 From: hehechen Date: Mon, 30 Jan 2023 18:05:50 +0800 Subject: [PATCH 1/3] skip down stores Signed-off-by: hehechen --- domain/infosync/tiflash_manager.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/domain/infosync/tiflash_manager.go b/domain/infosync/tiflash_manager.go index 4d01c64de002d..b4ea81f9941b4 100644 --- a/domain/infosync/tiflash_manager.go +++ b/domain/infosync/tiflash_manager.go @@ -31,6 +31,7 @@ import ( "github.com/gorilla/mux" "github.com/pingcap/errors" + "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/tidb/ddl/placement" "github.com/pingcap/tidb/store/helper" "github.com/pingcap/tidb/tablecodec" @@ -92,7 +93,11 @@ func getTiFlashPeerWithoutLagCount(tiFlashStores map[int64]helper.StoreStat, tab if err != nil { logutil.BgLogger().Error("Fail to get peer status from TiFlash.", zap.Int64("tableID", tableID)) - return 0, err + // Just skip down or offline or tomestone stores, because PD will migrate regions from these stores. + if store.Store.State == int64(metapb.StoreState_Up) && store.Store.StateName != "Down" { + return 0, err + } + continue } flashPeerCount += len(regionReplica) } From ad65a0c13f2f881c960ffc8505a9ebcda699cdd7 Mon Sep 17 00:00:00 2001 From: hehechen Date: Mon, 30 Jan 2023 19:44:26 +0800 Subject: [PATCH 2/3] add ut Signed-off-by: hehechen --- ddl/ddl_tiflash_api.go | 8 ++++++++ ddl/tiflashtest/ddl_tiflash_test.go | 20 ++++++++++++++++++++ domain/infosync/tiflash_manager.go | 6 ++++++ 3 files changed, 34 insertions(+) diff --git a/ddl/ddl_tiflash_api.go b/ddl/ddl_tiflash_api.go index 4b8fca2a91c0f..1ade909b93ee9 100644 --- a/ddl/ddl_tiflash_api.go +++ b/ddl/ddl_tiflash_api.go @@ -424,6 +424,14 @@ func (d *ddl) refreshTiFlashTicker(ctx sessionctx.Context, pollTiFlashContext *T return err } } + + failpoint.Inject("OneTiFlashStoreDown", func() { + for storeID, store := range pollTiFlashContext.TiFlashStores { + store.Store.StateName = "Down" + pollTiFlashContext.TiFlashStores[storeID] = store + break + } + }) pollTiFlashContext.PollCounter++ // Start to process every table. diff --git a/ddl/tiflashtest/ddl_tiflash_test.go b/ddl/tiflashtest/ddl_tiflash_test.go index d1d0368138b18..c3ec3a1d2b0fb 100644 --- a/ddl/tiflashtest/ddl_tiflash_test.go +++ b/ddl/tiflashtest/ddl_tiflash_test.go @@ -1334,3 +1334,23 @@ func TestTiFlashAvailableAfterAddPartition(t *testing.T) { require.NotNil(t, pi) require.Equal(t, len(pi.Definitions), 2) } + +func TestTiFlashAvailableAfterDownOneStore(t *testing.T) { + s, teardown := createTiFlashContext(t) + defer teardown() + tk := testkit.NewTestKit(t, s.store) + + tk.MustExec("use test") + tk.MustExec("drop table if exists ddltiflash") + tk.MustExec("create table ddltiflash(z int) PARTITION BY RANGE(z) (PARTITION p0 VALUES LESS THAN (10))") + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/ddl/OneTiFlashStoreDown", `return`)) + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/domain/infosync/OneTiFlashStoreDown", `return`)) + defer func() { + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/ddl/OneTiFlashStoreDown")) + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/domain/infosync/OneTiFlashStoreDown")) + }() + + tk.MustExec("alter table ddltiflash set tiflash replica 1") + time.Sleep(ddl.PollTiFlashInterval * RoundToBeAvailable * 3) + CheckTableAvailable(s.dom, t, 1, []string{}) +} diff --git a/domain/infosync/tiflash_manager.go b/domain/infosync/tiflash_manager.go index b4ea81f9941b4..564e90990f585 100644 --- a/domain/infosync/tiflash_manager.go +++ b/domain/infosync/tiflash_manager.go @@ -31,6 +31,7 @@ import ( "github.com/gorilla/mux" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/tidb/ddl/placement" "github.com/pingcap/tidb/store/helper" @@ -90,6 +91,11 @@ func getTiFlashPeerWithoutLagCount(tiFlashStores map[int64]helper.StoreStat, tab for _, store := range tiFlashStores { regionReplica := make(map[int64]int) err := helper.CollectTiFlashStatus(store.Store.StatusAddress, tableID, ®ionReplica) + failpoint.Inject("OneTiFlashStoreDown", func() { + if store.Store.StateName == "Down" { + err = errors.New("mock TiFlasah down") + } + }) if err != nil { logutil.BgLogger().Error("Fail to get peer status from TiFlash.", zap.Int64("tableID", tableID)) From e4b24418c5421144172cc90592551502a4965e91 Mon Sep 17 00:00:00 2001 From: hehechen Date: Tue, 31 Jan 2023 12:09:13 +0800 Subject: [PATCH 3/3] address comment Signed-off-by: hehechen --- domain/infosync/tiflash_manager.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/domain/infosync/tiflash_manager.go b/domain/infosync/tiflash_manager.go index 564e90990f585..d5cc46f95db95 100644 --- a/domain/infosync/tiflash_manager.go +++ b/domain/infosync/tiflash_manager.go @@ -32,7 +32,6 @@ import ( "github.com/gorilla/mux" "github.com/pingcap/errors" "github.com/pingcap/failpoint" - "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/tidb/ddl/placement" "github.com/pingcap/tidb/store/helper" "github.com/pingcap/tidb/tablecodec" @@ -100,7 +99,7 @@ func getTiFlashPeerWithoutLagCount(tiFlashStores map[int64]helper.StoreStat, tab logutil.BgLogger().Error("Fail to get peer status from TiFlash.", zap.Int64("tableID", tableID)) // Just skip down or offline or tomestone stores, because PD will migrate regions from these stores. - if store.Store.State == int64(metapb.StoreState_Up) && store.Store.StateName != "Down" { + if store.Store.StateName == "Up" || store.Store.StateName == "Disconnected" { return 0, err } continue