From 8a7e46b2d104754c9f190f9c8e9dfa3f68528a39 Mon Sep 17 00:00:00 2001 From: pingcap-github-bot Date: Thu, 30 Apr 2020 14:04:43 +0800 Subject: [PATCH] don't failover if TiDB pod is not scheduled and perform recovery no matter what state failover pods are in (#2263) (#2358) * skip auto failover if pod is not scheduled * fix tidb recover * share failover period in tests Co-authored-by: Yecheng Fu --- .../tidbcluster/tidb_cluster_controller.go | 2 +- pkg/manager/member/tidb_failover.go | 52 +++-- pkg/manager/member/tidb_failover_test.go | 196 ++++++++++++---- pkg/manager/member/tidb_member_manager.go | 34 ++- .../member/tidb_member_manager_test.go | 196 ++++++++++++++++ tests/actions.go | 19 ++ tests/e2e/tidbcluster/stability.go | 216 +++++++++++++++++- tests/e2e/tidbcluster/tidbcluster.go | 2 +- 8 files changed, 648 insertions(+), 69 deletions(-) diff --git a/pkg/controller/tidbcluster/tidb_cluster_controller.go b/pkg/controller/tidbcluster/tidb_cluster_controller.go index 498dea46a25..ff61dfe312c 100644 --- a/pkg/controller/tidbcluster/tidb_cluster_controller.go +++ b/pkg/controller/tidbcluster/tidb_cluster_controller.go @@ -108,7 +108,7 @@ func NewController( tiflashScaler := mm.NewTiFlashScaler(pdControl, pvcInformer.Lister(), pvcControl, podInformer.Lister()) pdFailover := mm.NewPDFailover(cli, pdControl, pdFailoverPeriod, podInformer.Lister(), podControl, pvcInformer.Lister(), pvcControl, pvInformer.Lister(), recorder) tikvFailover := mm.NewTiKVFailover(tikvFailoverPeriod, recorder) - tidbFailover := mm.NewTiDBFailover(tidbFailoverPeriod, recorder) + tidbFailover := mm.NewTiDBFailover(tidbFailoverPeriod, recorder, podInformer.Lister()) tiflashFailover := mm.NewTiFlashFailover(tiflashFailoverPeriod, recorder) pdUpgrader := mm.NewPDUpgrader(pdControl, podControl, podInformer.Lister()) tikvUpgrader := mm.NewTiKVUpgrader(pdControl, podControl, podInformer.Lister()) diff --git a/pkg/manager/member/tidb_failover.go b/pkg/manager/member/tidb_failover.go index 17df4353ee5..44437047d42 100644 --- a/pkg/manager/member/tidb_failover.go +++ b/pkg/manager/member/tidb_failover.go @@ -20,20 +20,24 @@ import ( "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + corelisters "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/record" "k8s.io/klog" + podutil "k8s.io/kubernetes/pkg/api/v1/pod" ) type tidbFailover struct { tidbFailoverPeriod time.Duration recorder record.EventRecorder + podLister corelisters.PodLister } // NewTiDBFailover returns a tidbFailover instance -func NewTiDBFailover(failoverPeriod time.Duration, recorder record.EventRecorder) Failover { +func NewTiDBFailover(failoverPeriod time.Duration, recorder record.EventRecorder, podLister corelisters.PodLister) Failover { return &tidbFailover{ tidbFailoverPeriod: failoverPeriod, recorder: recorder, + podLister: podLister, } } @@ -50,24 +54,38 @@ func (tf *tidbFailover) Failover(tc *v1alpha1.TidbCluster) error { } } - if tc.Spec.TiDB.MaxFailoverCount != nil && *tc.Spec.TiDB.MaxFailoverCount > 0 { - maxFailoverCount := *tc.Spec.TiDB.MaxFailoverCount - if len(tc.Status.TiDB.FailureMembers) >= int(maxFailoverCount) { - klog.Warningf("the failure members count reached the limit:%d", tc.Spec.TiDB.MaxFailoverCount) - return nil - } - for _, tidbMember := range tc.Status.TiDB.Members { - _, exist := tc.Status.TiDB.FailureMembers[tidbMember.Name] - deadline := tidbMember.LastTransitionTime.Add(tf.tidbFailoverPeriod) - if !tidbMember.Health && time.Now().After(deadline) && !exist { - tc.Status.TiDB.FailureMembers[tidbMember.Name] = v1alpha1.TiDBFailureMember{ - PodName: tidbMember.Name, - CreatedAt: metav1.Now(), - } - msg := fmt.Sprintf("tidb[%s] is unhealthy", tidbMember.Name) - tf.recorder.Event(tc, corev1.EventTypeWarning, unHealthEventReason, fmt.Sprintf(unHealthEventMsgPattern, "tidb", tidbMember.Name, msg)) + if tc.Spec.TiDB.MaxFailoverCount == nil || *tc.Spec.TiDB.MaxFailoverCount <= 0 { + klog.Infof("tidb failover is disabled for %s/%s, skipped", tc.Namespace, tc.Name) + return nil + } + + maxFailoverCount := *tc.Spec.TiDB.MaxFailoverCount + for _, tidbMember := range tc.Status.TiDB.Members { + _, exist := tc.Status.TiDB.FailureMembers[tidbMember.Name] + deadline := tidbMember.LastTransitionTime.Add(tf.tidbFailoverPeriod) + if !tidbMember.Health && time.Now().After(deadline) && !exist { + if len(tc.Status.TiDB.FailureMembers) >= int(maxFailoverCount) { + klog.Warningf("the failover count reachs the limit (%d), no more failover pods will be created", maxFailoverCount) break } + pod, err := tf.podLister.Pods(tc.Namespace).Get(tidbMember.Name) + if err != nil { + return err + } + _, condition := podutil.GetPodCondition(&pod.Status, corev1.PodScheduled) + if condition == nil || condition.Status != corev1.ConditionTrue { + // if a member is unheathy because it's not scheduled yet, we + // should not create failover pod for it + klog.Warningf("pod %s/%s is not scheduled yet, skipping failover", pod.Namespace, pod.Name) + continue + } + tc.Status.TiDB.FailureMembers[tidbMember.Name] = v1alpha1.TiDBFailureMember{ + PodName: tidbMember.Name, + CreatedAt: metav1.Now(), + } + msg := fmt.Sprintf("tidb[%s] is unhealthy", tidbMember.Name) + tf.recorder.Event(tc, corev1.EventTypeWarning, unHealthEventReason, fmt.Sprintf(unHealthEventMsgPattern, "tidb", tidbMember.Name, msg)) + break } } diff --git a/pkg/manager/member/tidb_failover_test.go b/pkg/manager/member/tidb_failover_test.go index 1b15420fa6c..aea54194436 100644 --- a/pkg/manager/member/tidb_failover_test.go +++ b/pkg/manager/member/tidb_failover_test.go @@ -14,40 +14,31 @@ package member import ( + "context" "testing" "time" . "github.com/onsi/gomega" "k8s.io/apimachinery/pkg/types" + kubeinformers "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/record" "k8s.io/utils/pointer" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" ) -func TestFakeTiDBFailoverFailover(t *testing.T) { - type testcase struct { +func TestTiDBFailoverFailover(t *testing.T) { + tests := []struct { name string + pods []*corev1.Pod update func(*v1alpha1.TidbCluster) errExpectFn func(*GomegaWithT, error) expectFn func(*GomegaWithT, *v1alpha1.TidbCluster) - } - - testFn := func(test *testcase, t *testing.T) { - t.Logf(test.name) - g := NewGomegaWithT(t) - tidbFailover := newTiDBFailover() - tc := newTidbClusterForTiDBFailover() - test.update(tc) - - err := tidbFailover.Failover(tc) - test.errExpectFn(g, err) - test.expectFn(g, tc) - } - - tests := []testcase{ + }{ { name: "all tidb members are ready", update: func(tc *v1alpha1.TidbCluster) { @@ -72,6 +63,36 @@ func TestFakeTiDBFailoverFailover(t *testing.T) { }, { name: "one tidb member failed", + pods: []*corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: corev1.NamespaceDefault, + Name: "failover-tidb-0", + }, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodScheduled, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: corev1.NamespaceDefault, + Name: "failover-tidb-1", + }, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodScheduled, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + }, update: func(tc *v1alpha1.TidbCluster) { tc.Status.TiDB.Members = map[string]v1alpha1.TiDBMember{ "failover-tidb-0": { @@ -92,8 +113,90 @@ func TestFakeTiDBFailoverFailover(t *testing.T) { t.Expect(int(tc.Spec.TiDB.Replicas)).To(Equal(2)) }, }, + { + name: "one tidb member failed but not scheduled yet", + pods: []*corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: corev1.NamespaceDefault, + Name: "failover-tidb-0", + }, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodScheduled, + Status: corev1.ConditionUnknown, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: corev1.NamespaceDefault, + Name: "failover-tidb-1", + }, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodScheduled, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + }, + update: func(tc *v1alpha1.TidbCluster) { + tc.Status.TiDB.Members = map[string]v1alpha1.TiDBMember{ + "failover-tidb-0": { + Name: "failover-tidb-0", + Health: false, + }, + "failover-tidb-1": { + Name: "failover-tidb-1", + Health: true, + }, + } + }, + errExpectFn: func(t *GomegaWithT, err error) { + t.Expect(err).NotTo(HaveOccurred()) + }, + expectFn: func(t *GomegaWithT, tc *v1alpha1.TidbCluster) { + t.Expect(len(tc.Status.TiDB.FailureMembers)).To(Equal(0)) + t.Expect(int(tc.Spec.TiDB.Replicas)).To(Equal(2)) + }, + }, { name: "two tidb members failed", + pods: []*corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: corev1.NamespaceDefault, + Name: "failover-tidb-0", + }, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodScheduled, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: corev1.NamespaceDefault, + Name: "failover-tidb-1", + }, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodScheduled, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + }, update: func(tc *v1alpha1.TidbCluster) { tc.Status.TiDB.Members = map[string]v1alpha1.TiDBMember{ "failover-tidb-0": { @@ -207,30 +310,31 @@ func TestFakeTiDBFailoverFailover(t *testing.T) { }, } - for i := range tests { - testFn(&tests[i], t) + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + g := NewGomegaWithT(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + fakeClient := fake.NewSimpleClientset() + for _, pod := range test.pods { + fakeClient.CoreV1().Pods(pod.Namespace).Create(pod) + } + tidbFailover := newTiDBFailover(ctx, fakeClient) + tc := newTidbClusterForTiDBFailover() + test.update(tc) + err := tidbFailover.Failover(tc) + test.errExpectFn(g, err) + test.expectFn(g, tc) + }) } } -func TestFakeTiDBFailoverRecover(t *testing.T) { - type testcase struct { +func TestTiDBFailoverRecover(t *testing.T) { + tests := []struct { name string update func(*v1alpha1.TidbCluster) expectFn func(*GomegaWithT, *v1alpha1.TidbCluster) - } - - testFn := func(test *testcase, t *testing.T) { - t.Log(test.name) - g := NewGomegaWithT(t) - tidbFailover := newTiDBFailover() - tc := newTidbClusterForTiDBFailover() - test.update(tc) - - tidbFailover.Recover(tc) - test.expectFn(g, tc) - } - - tests := []testcase{ + }{ { name: "have not failure tidb member to recover", update: func(tc *v1alpha1.TidbCluster) { @@ -377,14 +481,28 @@ func TestFakeTiDBFailoverRecover(t *testing.T) { }, } - for i := range tests { - testFn(&tests[i], t) + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + g := NewGomegaWithT(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + fakeClient := fake.NewSimpleClientset() + tidbFailover := newTiDBFailover(ctx, fakeClient) + tc := newTidbClusterForTiDBFailover() + test.update(tc) + tidbFailover.Recover(tc) + test.expectFn(g, tc) + }) } } -func newTiDBFailover() Failover { +func newTiDBFailover(ctx context.Context, client kubernetes.Interface) Failover { + kubeInformerFactory := kubeinformers.NewSharedInformerFactory(client, 0) + podLister := kubeInformerFactory.Core().V1().Pods().Lister() + kubeInformerFactory.Start(ctx.Done()) + kubeInformerFactory.WaitForCacheSync(ctx.Done()) recorder := record.NewFakeRecorder(100) - return &tidbFailover{tidbFailoverPeriod: time.Duration(5 * time.Minute), recorder: recorder} + return NewTiDBFailover(time.Duration(5*time.Minute), recorder, podLister) } func newTidbClusterForTiDBFailover() *v1alpha1.TidbCluster { diff --git a/pkg/manager/member/tidb_member_manager.go b/pkg/manager/member/tidb_member_manager.go index 9676a40b38b..831aebf0e18 100644 --- a/pkg/manager/member/tidb_member_manager.go +++ b/pkg/manager/member/tidb_member_manager.go @@ -35,6 +35,7 @@ import ( v1 "k8s.io/client-go/listers/apps/v1" corelisters "k8s.io/client-go/listers/core/v1" "k8s.io/klog" + podutil "k8s.io/kubernetes/pkg/api/v1/pod" "k8s.io/utils/pointer" ) @@ -234,11 +235,8 @@ func (tmm *tidbMemberManager) syncTiDBStatefulSetForTidbCluster(tc *v1alpha1.Tid } } - if tmm.autoFailover && tc.Spec.TiDB.MaxFailoverCount != nil { - if tc.Spec.TiDB.Replicas == int32(0) && tc.Status.TiDB.FailureMembers != nil { - tmm.tidbFailover.Recover(tc) - } - if tc.TiDBAllPodsStarted() && tc.TiDBAllMembersReady() && tc.Status.TiDB.FailureMembers != nil { + if tmm.autoFailover { + if tmm.shouldRecover(tc) { tmm.tidbFailover.Recover(tc) } else if tc.TiDBAllPodsStarted() && !tc.TiDBAllMembersReady() { if err := tmm.tidbFailover.Failover(tc); err != nil { @@ -250,6 +248,32 @@ func (tmm *tidbMemberManager) syncTiDBStatefulSetForTidbCluster(tc *v1alpha1.Tid return updateStatefulSet(tmm.setControl, tc, newTiDBSet, oldTiDBSet) } +func (tmm *tidbMemberManager) shouldRecover(tc *v1alpha1.TidbCluster) bool { + if tc.Status.TiDB.FailureMembers == nil { + return false + } + // If all desired replicas (excluding failover pods) of tidb cluster are + // healthy, we can perform our failover recovery operation. + // Note that failover pods may fail (e.g. lack of resources) and we don't care + // about them because we're going to delete them. + for ordinal := range tc.TiDBStsDesiredOrdinals(true) { + name := fmt.Sprintf("%s-%d", controller.TiDBMemberName(tc.GetName()), ordinal) + pod, err := tmm.podLister.Pods(tc.Namespace).Get(name) + if err != nil { + klog.Errorf("pod %s/%s does not exist: %v", tc.Namespace, name, err) + return false + } + if !podutil.IsPodReady(pod) { + return false + } + status, ok := tc.Status.TiDB.Members[pod.Name] + if !ok || !status.Health { + return false + } + } + return true +} + func (tmm *tidbMemberManager) syncTiDBService(tc *v1alpha1.TidbCluster) error { if tc.Spec.Paused { klog.V(4).Infof("tidb cluster %s/%s is paused, skip syncing for tidb service", tc.GetNamespace(), tc.GetName()) diff --git a/pkg/manager/member/tidb_member_manager_test.go b/pkg/manager/member/tidb_member_manager_test.go index 1512c89d491..58cc1dc8677 100644 --- a/pkg/manager/member/tidb_member_manager_test.go +++ b/pkg/manager/member/tidb_member_manager_test.go @@ -14,6 +14,7 @@ package member import ( + "context" "fmt" "strings" "testing" @@ -1796,3 +1797,198 @@ func TestTiDBMemberManagerScaleToZeroReplica(t *testing.T) { testFn(&tests[i], t) } } + +func TestTiDBShouldRecover(t *testing.T) { + pods := []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "failover-tidb-0", + Namespace: v1.NamespaceDefault, + }, + Status: v1.PodStatus{ + Conditions: []v1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "failover-tidb-1", + Namespace: v1.NamespaceDefault, + }, + Status: v1.PodStatus{ + Conditions: []v1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + } + podsWithFailover := append(pods, &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "failover-tidb-2", + Namespace: v1.NamespaceDefault, + }, + Status: v1.PodStatus{ + Conditions: []v1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionFalse, + }, + }, + }, + }) + tests := []struct { + name string + tc *v1alpha1.TidbCluster + pods []*v1.Pod + want bool + }{ + { + name: "should not recover if no failure members", + tc: &v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "failover", + Namespace: v1.NamespaceDefault, + }, + Status: v1alpha1.TidbClusterStatus{}, + }, + pods: pods, + want: false, + }, + { + name: "should not recover if a member is not healthy", + tc: &v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "failover", + Namespace: v1.NamespaceDefault, + }, + Spec: v1alpha1.TidbClusterSpec{ + TiDB: v1alpha1.TiDBSpec{ + Replicas: 2, + }, + }, + Status: v1alpha1.TidbClusterStatus{ + TiDB: v1alpha1.TiDBStatus{ + Members: map[string]v1alpha1.TiDBMember{ + "failover-tidb-0": { + Name: "failover-tidb-0", + Health: false, + }, + "failover-tidb-1": { + Name: "failover-tidb-1", + Health: true, + }, + }, + FailureMembers: map[string]v1alpha1.TiDBFailureMember{ + "failover-tidb-0": { + PodName: "failover-tidb-0", + }, + }, + }, + }, + }, + pods: pods, + want: false, + }, + { + name: "should recover if all members are ready and healthy", + tc: &v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "failover", + Namespace: v1.NamespaceDefault, + }, + Spec: v1alpha1.TidbClusterSpec{ + TiDB: v1alpha1.TiDBSpec{ + Replicas: 2, + }, + }, + Status: v1alpha1.TidbClusterStatus{ + TiDB: v1alpha1.TiDBStatus{ + Members: map[string]v1alpha1.TiDBMember{ + "failover-tidb-0": { + Name: "failover-tidb-0", + Health: true, + }, + "failover-tidb-1": { + Name: "failover-tidb-1", + Health: true, + }, + }, + FailureMembers: map[string]v1alpha1.TiDBFailureMember{ + "failover-tidb-0": { + PodName: "failover-tidb-0", + }, + }, + }, + }, + }, + pods: pods, + want: true, + }, + { + name: "should recover if all members are ready and healthy (ignore auto-created failover pods)", + tc: &v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "failover", + Namespace: v1.NamespaceDefault, + }, + Spec: v1alpha1.TidbClusterSpec{ + TiDB: v1alpha1.TiDBSpec{ + Replicas: 2, + }, + }, + Status: v1alpha1.TidbClusterStatus{ + TiDB: v1alpha1.TiDBStatus{ + Members: map[string]v1alpha1.TiDBMember{ + "failover-tidb-0": { + Name: "failover-tidb-0", + Health: true, + }, + "failover-tidb-1": { + Name: "failover-tidb-1", + Health: true, + }, + "failover-tidb-2": { + Name: "failover-tidb-1", + Health: false, + }, + }, + FailureMembers: map[string]v1alpha1.TiDBFailureMember{ + "failover-tidb-0": { + PodName: "failover-tidb-0", + }, + }, + }, + }, + }, + pods: podsWithFailover, + want: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + client := kubefake.NewSimpleClientset() + for _, pod := range tt.pods { + client.CoreV1().Pods(pod.Namespace).Create(pod) + } + kubeInformerFactory := kubeinformers.NewSharedInformerFactory(client, 0) + podLister := kubeInformerFactory.Core().V1().Pods().Lister() + kubeInformerFactory.Start(ctx.Done()) + kubeInformerFactory.WaitForCacheSync(ctx.Done()) + tidbMemberManager := &tidbMemberManager{podLister: podLister} + got := tidbMemberManager.shouldRecover(tt.tc) + if got != tt.want { + t.Fatalf("wants %v, got %v", tt.want, got) + } + }) + } +} diff --git a/tests/actions.go b/tests/actions.go index 0a52dc950af..51d7482d20a 100644 --- a/tests/actions.go +++ b/tests/actions.go @@ -296,6 +296,8 @@ type OperatorConfig struct { Cabundle string BackupImage string AutoFailover *bool + // Additional STRING values, set via --set-string flag. + StringValues map[string]string } type TidbClusterConfig struct { @@ -447,6 +449,11 @@ func (oi *OperatorConfig) OperatorHelmSetString(m map[string]string) string { set["controllerManager.autoFailover"] = strconv.FormatBool(*oi.AutoFailover) } + // merge with additional STRING values + for k, v := range oi.StringValues { + set[k] = v + } + arr := make([]string, 0, len(set)) for k, v := range set { arr = append(arr, fmt.Sprintf("%s=%s", k, v)) @@ -1398,6 +1405,10 @@ func (oa *operatorActions) pdMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, err return false, nil } + if pdSet.Status.CurrentRevision != pdSet.Status.UpdateRevision { + return false, nil + } + if !utilstatefulset.IsAllDesiredPodsRunningAndReady(helper.NewHijackClient(oa.kubeCli, oa.asCli), pdSet) { return false, nil } @@ -1475,6 +1486,10 @@ func (oa *operatorActions) tikvMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, e return false, nil } + if tikvSet.Status.CurrentRevision != tikvSet.Status.UpdateRevision { + return false, nil + } + if !utilstatefulset.IsAllDesiredPodsRunningAndReady(helper.NewHijackClient(oa.kubeCli, oa.asCli), tikvSet) { return false, nil } @@ -1546,6 +1561,10 @@ func (oa *operatorActions) tidbMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, e return false, nil } + if tidbSet.Status.CurrentRevision != tidbSet.Status.UpdateRevision { + return false, nil + } + if !utilstatefulset.IsAllDesiredPodsRunningAndReady(helper.NewHijackClient(oa.kubeCli, oa.asCli), tidbSet) { return false, nil } diff --git a/tests/e2e/tidbcluster/stability.go b/tests/e2e/tidbcluster/stability.go index 2549f046d0f..858aa1c6dbc 100644 --- a/tests/e2e/tidbcluster/stability.go +++ b/tests/e2e/tidbcluster/stability.go @@ -44,15 +44,18 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" restclient "k8s.io/client-go/rest" aggregatorclient "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset" + podutil "k8s.io/kubernetes/pkg/api/v1/pod" "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" storageutils "k8s.io/kubernetes/test/e2e/storage/utils" + testutils "k8s.io/kubernetes/test/utils" "k8s.io/utils/pointer" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -129,11 +132,6 @@ var _ = ginkgo.Describe("[tidb-operator][Stability]", func() { framework.ExpectNoError(err, "failed to create clientset") }) - ginkgo.AfterEach(func() { - ginkgo.By("Uninstall tidb-operator") - oa.CleanOperatorOrDie(ocfg) - }) - testCases := []struct { name string fn func() @@ -555,6 +553,40 @@ var _ = ginkgo.Describe("[tidb-operator][Stability]", func() { }) framework.ExpectNoError(err) }) + }) + + ginkgo.Context("operator with short auto-failover periods", func() { + var ocfg *tests.OperatorConfig + var oa tests.OperatorActions + var genericCli client.Client + failoverPeriod := time.Minute + + ginkgo.BeforeEach(func() { + ocfg = &tests.OperatorConfig{ + Namespace: ns, + ReleaseName: "operator", + Image: cfg.OperatorImage, + Tag: cfg.OperatorTag, + LogLevel: "4", + TestMode: true, + StringValues: map[string]string{ + "controllerManager.pdFailoverPeriod": failoverPeriod.String(), + "controllerManager.tidbFailoverPeriod": failoverPeriod.String(), + "controllerManager.tikvFailoverPeriod": failoverPeriod.String(), + "controllerManager.tiflashFailoverPeriod": failoverPeriod.String(), + }, + } + oa = tests.NewOperatorActions(cli, c, asCli, aggrCli, apiExtCli, tests.DefaultPollInterval, ocfg, e2econfig.TestConfig, nil, fw, f) + ginkgo.By("Installing CRDs") + oa.CleanCRDOrDie() + oa.InstallCRDOrDie(ocfg) + ginkgo.By("Installing tidb-operator") + oa.CleanOperatorOrDie(ocfg) + oa.DeployOperatorOrDie(ocfg) + var err error + genericCli, err = client.New(config, client.Options{Scheme: scheme.Scheme}) + framework.ExpectNoError(err, "failed to create clientset") + }) ginkgo.It("[Feature: AutoFailover] PD: one replacement for one failed member and replacements should be deleted when failed members are recovered", func() { // TODO support aws (eks), kind @@ -618,7 +650,7 @@ var _ = ginkgo.Describe("[tidb-operator][Stability]", func() { ginkgo.By("Wait for a replacement to be created") podName := controller.PDMemberName(clusterName) + "-3" - err = wait.PollImmediate(time.Second*10, 10*time.Minute /* 5 minutes failover period + 5 extra minute */, func() (bool, error) { + err = wait.PollImmediate(time.Second*10, 2*failoverPeriod, func() (bool, error) { _, err := c.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{}) if err != nil && !apierrors.IsNotFound(err) { return false, nil @@ -651,6 +683,178 @@ var _ = ginkgo.Describe("[tidb-operator][Stability]", func() { err = e2epod.WaitForPodNotFoundInNamespace(c, podName, ns, time.Minute*5) framework.ExpectNoError(err) }) + + ginkgo.It("[Feature: AutoFailover] TiDB: one replacement for one failed member and replacements should be deleted when failed members are recovered", func() { + ginkgo.By("Make sure we have at least 3 schedulable nodes") + nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet) + gomega.Expect(len(nodeList.Items)).To(gomega.BeNumerically(">=", 3)) + + clusterName := "failover" + tc := fixture.GetTidbCluster(ns, clusterName, utilimage.TiDBV3Version) + tc.Spec.PD.Replicas = 1 + tc.Spec.TiKV.Replicas = 1 + tc.Spec.TiDB.Replicas = 2 + // We use special affinity requiremnets to make sure only 2 tidb pods can be scheduled. + tc.Spec.TiDB.Affinity = &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: v1.LabelHostname, + Operator: v1.NodeSelectorOpIn, + Values: []string{ + nodeList.Items[0].Name, + nodeList.Items[1].Name, + }, + }, + }, + }, + }, + }, + }, + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app.kubernetes.io/instance": clusterName, + "app.kubernetes.io/component": label.TiDBLabelVal, + }, + }, + TopologyKey: v1.LabelHostname, + }, + }, + }, + } + err := genericCli.Create(context.TODO(), tc) + framework.ExpectNoError(err) + err = oa.WaitForTidbClusterReady(tc, 30*time.Minute, 15*time.Second) + framework.ExpectNoError(err) + + ginkgo.By("Increase replicas of TiDB from 2 to 3") + err = controller.GuaranteedUpdate(genericCli, tc, func() error { + tc.Spec.TiDB.Replicas = 3 + return nil + }) + framework.ExpectNoError(err) + + ginkgo.By("Wait for the new pod to be created") + podName := controller.TiDBMemberName(clusterName) + "-2" + err = wait.PollImmediate(time.Second*10, 1*time.Minute, func() (bool, error) { + _, err := c.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + return false, nil + } + return !apierrors.IsNotFound(err), nil + }) + framework.ExpectNoError(err) + + ginkgo.By("Make sure the new pod will not be scheduled") + err = wait.PollImmediate(time.Second*10, 1*time.Minute, func() (bool, error) { + pod, err := c.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{}) + if err != nil { + if testutils.IsRetryableAPIError(err) { + return false, nil + } + return false, err + } + _, condition := podutil.GetPodCondition(&pod.Status, v1.PodScheduled) + if condition == nil || condition.Status != v1.ConditionTrue { + return false, nil + } + return true, nil + }) + framework.ExpectEqual(err, wait.ErrWaitTimeout) + + listOptions := metav1.ListOptions{ + LabelSelector: labels.SelectorFromSet( + label.New().Instance(clusterName).Component(label.TiDBLabelVal).Labels()).String(), + } + ginkgo.By("Wait for no new replacement will be created for non-scheduled TiDB pod") + err = wait.PollImmediate(time.Second*10, 2*time.Minute, func() (bool, error) { + pdPodList, err := c.CoreV1().Pods(ns).List(listOptions) + if err != nil && !apierrors.IsNotFound(err) { + return false, nil + } + if len(pdPodList.Items) != 3 { + return true, nil + } + return false, nil + }) + framework.ExpectEqual(err, wait.ErrWaitTimeout) + + ginkgo.By("Fix the TiDB scheduling requirements") + err = controller.GuaranteedUpdate(genericCli, tc, func() error { + tc.Spec.TiDB.Affinity = nil + return nil + }) + framework.ExpectNoError(err) + + err = oa.WaitForTidbClusterReady(tc, 30*time.Minute, 15*time.Second) + framework.ExpectNoError(err) + + ginkgo.By(fmt.Sprintf("Fail the TiDB pod %q", podName)) + patch := []byte(` +{ + "spec": { + "containers": [ + { + "name": "tidb", + "image": "pingcap/does-not-exist:latest" + } + ] + } +}`) + _, err = c.CoreV1().Pods(ns).Patch(podName, types.StrategicMergePatchType, patch) + framework.ExpectNoError(err) + + err = wait.PollImmediate(time.Second*10, 1*time.Minute, func() (bool, error) { + pod, err := c.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{}) + if err != nil { + if testutils.IsRetryableAPIError(err) { + return false, nil + } + return false, err + } + return !podutil.IsPodReady(pod), nil + }) + framework.ExpectNoError(err) + + ginkgo.By("Wait for a replacement to be created") + newPodName := controller.TiDBMemberName(clusterName) + "-3" + err = wait.PollImmediate(time.Second*10, 2*failoverPeriod, func() (bool, error) { + _, err := c.CoreV1().Pods(ns).Get(newPodName, metav1.GetOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + return false, nil + } + return !apierrors.IsNotFound(err), nil + }) + framework.ExpectNoError(err) + + ginkgo.By("Wait for only one replacement to be created") + err = wait.PollImmediate(time.Second*10, 1*time.Minute, func() (bool, error) { + podList, err := c.CoreV1().Pods(ns).List(listOptions) + if err != nil && !apierrors.IsNotFound(err) { + return false, nil + } + if len(podList.Items) != 4 { + return true, nil + } + return false, nil + }) + framework.ExpectEqual(err, wait.ErrWaitTimeout) + + ginkgo.By(fmt.Sprintf("Fix the TiDB pod %q", podName)) + err = c.CoreV1().Pods(ns).Delete(podName, &metav1.DeleteOptions{}) + framework.ExpectNoError(err) + + ginkgo.By("Wait for the replacement to be gone") + err = e2epod.WaitForPodNotFoundInNamespace(c, newPodName, ns, time.Minute*5) + framework.ExpectNoError(err) + }) + }) }) diff --git a/tests/e2e/tidbcluster/tidbcluster.go b/tests/e2e/tidbcluster/tidbcluster.go index cff395d58ff..cc0d97162b5 100644 --- a/tests/e2e/tidbcluster/tidbcluster.go +++ b/tests/e2e/tidbcluster/tidbcluster.go @@ -1081,7 +1081,7 @@ var _ = ginkgo.Describe("[tidb-operator] TiDBCluster", func() { framework.ExpectNoError(err) }) - ginkgo.It("tidb-scale: clear TiDB failureMembers when scale TiDB to zero", func() { + ginkgo.It("[Feature: AutoFailover] clear TiDB failureMembers when scale TiDB to zero", func() { cluster := newTidbClusterConfig(e2econfig.TestConfig, ns, "tidb-scale", "admin", utilimage.TiDBV3Version) cluster.Resources["pd.replicas"] = "3" cluster.Resources["tikv.replicas"] = "1"