Skip to content

Commit

Permalink
failover for tiflash (#2249)
Browse files Browse the repository at this point in the history
  • Loading branch information
DanielZhangQD authored Apr 22, 2020
1 parent ab23098 commit 75c26b2
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 4 deletions.
3 changes: 3 additions & 0 deletions pkg/apis/pingcap/v1alpha1/defaulting/tidbcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,7 @@ func setTiFlashSpecDefault(tc *v1alpha1.TidbCluster) {
tc.Spec.TiFlash.BaseImage = defaultTiFlashImage
}
}
if tc.Spec.TiFlash.MaxFailoverCount == nil {
tc.Spec.TiFlash.MaxFailoverCount = pointer.Int32Ptr(3)
}
}
2 changes: 1 addition & 1 deletion pkg/controller/tidbcluster/tidb_cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ func NewController(
pdFailover := mm.NewPDFailover(cli, pdControl, pdFailoverPeriod, podInformer.Lister(), podControl, pvcInformer.Lister(), pvcControl, pvInformer.Lister(), recorder)
tikvFailover := mm.NewTiKVFailover(tikvFailoverPeriod, recorder)
tidbFailover := mm.NewTiDBFailover(tidbFailoverPeriod, recorder)
tiflashFailover := mm.NewTiFlashFailover(tiflashFailoverPeriod)
tiflashFailover := mm.NewTiFlashFailover(tiflashFailoverPeriod, recorder)
pdUpgrader := mm.NewPDUpgrader(pdControl, podControl, podInformer.Lister())
tikvUpgrader := mm.NewTiKVUpgrader(pdControl, podControl, podInformer.Lister())
tiflashUpgrader := mm.NewTiFlashUpgrader(pdControl, podControl, podInformer.Lister())
Expand Down
47 changes: 44 additions & 3 deletions pkg/manager/member/tiflash_failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,63 @@
package member

import (
"fmt"
"time"

"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
"k8s.io/klog"
)

type tiflashFailover struct {
tiflashFailoverPeriod time.Duration
recorder record.EventRecorder
}

// NewTiFlashFailover returns a tiflash Failover
func NewTiFlashFailover(tiflashFailoverPeriod time.Duration) Failover {
return &tiflashFailover{tiflashFailoverPeriod}
func NewTiFlashFailover(tiflashFailoverPeriod time.Duration, recorder record.EventRecorder) Failover {
return &tiflashFailover{tiflashFailoverPeriod, recorder}
}

// TODO: Finish the failover logic
func (tff *tiflashFailover) Failover(tc *v1alpha1.TidbCluster) error {
ns := tc.GetNamespace()
tcName := tc.GetName()

for storeID, store := range tc.Status.TiFlash.Stores {
podName := store.PodName
if store.LastTransitionTime.IsZero() {
continue
}
deadline := store.LastTransitionTime.Add(tff.tiflashFailoverPeriod)
exist := false
for _, failureStore := range tc.Status.TiFlash.FailureStores {
if failureStore.PodName == podName {
exist = true
break
}
}
if store.State == v1alpha1.TiKVStateDown && time.Now().After(deadline) && !exist {
if tc.Status.TiFlash.FailureStores == nil {
tc.Status.TiFlash.FailureStores = map[string]v1alpha1.TiKVFailureStore{}
}
if tc.Spec.TiFlash.MaxFailoverCount != nil && *tc.Spec.TiFlash.MaxFailoverCount > 0 {
maxFailoverCount := *tc.Spec.TiFlash.MaxFailoverCount
if len(tc.Status.TiFlash.FailureStores) >= int(maxFailoverCount) {
klog.Warningf("%s/%s TiFlash failure stores count reached the limit: %d", ns, tcName, tc.Spec.TiFlash.MaxFailoverCount)
return nil
}
tc.Status.TiFlash.FailureStores[storeID] = v1alpha1.TiKVFailureStore{
PodName: podName,
StoreID: store.ID,
CreatedAt: metav1.Now(),
}
msg := fmt.Sprintf("store [%s] is Down", store.ID)
tff.recorder.Event(tc, corev1.EventTypeWarning, unHealthEventReason, fmt.Sprintf(unHealthEventMsgPattern, "tiflash", podName, msg))
}
}
}
return nil
}

Expand Down

0 comments on commit 75c26b2

Please sign in to comment.