From 14608bff8032e00fdc010c84cffc549077a3ce10 Mon Sep 17 00:00:00 2001 From: weekface Date: Mon, 26 Nov 2018 18:02:04 +0800 Subject: [PATCH 1/2] fix pd-control: not rely on error text --- pkg/controller/pd_control.go | 42 +++++++++++++++++++--------------- pkg/controller/tidb_control.go | 1 + 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/pkg/controller/pd_control.go b/pkg/controller/pd_control.go index 9989adacef5..13093e9e421 100644 --- a/pkg/controller/pd_control.go +++ b/pkg/controller/pd_control.go @@ -33,10 +33,6 @@ import ( const ( timeout = 5 * time.Second - - // https://github.com/pingcap/pd/blob/master/server/coordinator.go#L42-L45 - schedulerExisted = "scheduler existed" - schedulerNotFound = "scheduler not found" ) // PDControlInterface is an interface that knows how to manage and get tidb cluster's PD client @@ -328,13 +324,6 @@ func (pc *pdClient) DeleteStore(storeID uint64) error { return err } - // FIXME: We should not rely on error text - // Remove a tombstone store should returns "store has been removed" - bodyStr := string(body) - if strings.Contains(bodyStr, "store has been removed") { - return nil - } - return fmt.Errorf("failed to delete store %d: %v", storeID, string(body)) } @@ -407,15 +396,24 @@ func (pc *pdClient) BeginEvictLeader(storeID uint64) error { if res.StatusCode == http.StatusOK { return nil } - err2 := readErrorBody(res.Body) - if strings.Contains(err2.Error(), schedulerExisted) { - return nil + + evictLeaderSchedulers, err := pc.GetEvictLeaderSchedulers() + if err != nil { + return err + } + for _, s := range evictLeaderSchedulers { + if s == getLeaderEvictSchedulerStr(storeID) { + return nil + } } + + err2 := readErrorBody(res.Body) return fmt.Errorf("failed %v to begin evict leader of store:[%d],error: %v", res.StatusCode, storeID, err2) } func (pc *pdClient) EndEvictLeader(storeID uint64) error { - apiURL := fmt.Sprintf("%s/%s/%s", pc.url, schedulersPrefix, getLeaderEvictSchedulerStr(storeID)) + sName := getLeaderEvictSchedulerStr(storeID) + apiURL := fmt.Sprintf("%s/%s/%s", pc.url, schedulersPrefix, sName) req, err := http.NewRequest("DELETE", apiURL, nil) if err != nil { return err @@ -428,11 +426,19 @@ func (pc *pdClient) EndEvictLeader(storeID uint64) error { if res.StatusCode == http.StatusOK || res.StatusCode == http.StatusNotFound { return nil } + err2 := readErrorBody(res.Body) - if strings.Contains(err2.Error(), schedulerNotFound) { - return nil + evictLeaderSchedulers, err := pc.GetEvictLeaderSchedulers() + if err != nil { + return err + } + for _, s := range evictLeaderSchedulers { + if s == sName { + return fmt.Errorf("failed %v to end leader evict scheduler of store [%d], error: %v", res.StatusCode, storeID, err2) + } } - return fmt.Errorf("failed %v to end leader evict scheduler of store [%d],error:%v", res.StatusCode, storeID, err2) + + return nil } func (pc *pdClient) GetEvictLeaderSchedulers() ([]string, error) { diff --git a/pkg/controller/tidb_control.go b/pkg/controller/tidb_control.go index 15b9dd0f62e..4c4f99461cf 100644 --- a/pkg/controller/tidb_control.go +++ b/pkg/controller/tidb_control.go @@ -22,6 +22,7 @@ import ( ) const ( + // https://github.com/pingcap/tidb/blob/master/owner/manager.go#L183 // NotDDLOwnerError is the error message which was returned when the tidb node is not a ddl owner NotDDLOwnerError = "This node is not a ddl owner, can't be resigned." ) From 2190372bf59ee7d39c828afeaba85d03e680aa73 Mon Sep 17 00:00:00 2001 From: weekface Date: Wed, 28 Nov 2018 15:04:46 +0800 Subject: [PATCH 2/2] address comment --- pkg/controller/pd_control.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pkg/controller/pd_control.go b/pkg/controller/pd_control.go index 13093e9e421..b43a30f81e8 100644 --- a/pkg/controller/pd_control.go +++ b/pkg/controller/pd_control.go @@ -397,6 +397,13 @@ func (pc *pdClient) BeginEvictLeader(storeID uint64) error { return nil } + // pd will return an error with the body contains "scheduler existed" if the scheduler already exists + // this is not the standard response. + // so these lines are just a workaround for now: + // - make a new request to get all schedulers + // - return nil if the scheduler already exists + // + // when PD returns standard json response, we should get rid of this verbose code. evictLeaderSchedulers, err := pc.GetEvictLeaderSchedulers() if err != nil { return err @@ -427,6 +434,13 @@ func (pc *pdClient) EndEvictLeader(storeID uint64) error { return nil } + // pd will return an error with the body contains "scheduler not found" if the scheduler is not found + // this is not the standard response. + // so these lines are just a workaround for now: + // - make a new request to get all schedulers + // - return nil if the scheduler is not found + // + // when PD returns standard json response, we should get rid of this verbose code. err2 := readErrorBody(res.Body) evictLeaderSchedulers, err := pc.GetEvictLeaderSchedulers() if err != nil {