diff --git a/pkg/controller/tidbcluster/tidb_cluster_controller.go b/pkg/controller/tidbcluster/tidb_cluster_controller.go index 277214bafa..60f7a78d89 100644 --- a/pkg/controller/tidbcluster/tidb_cluster_controller.go +++ b/pkg/controller/tidbcluster/tidb_cluster_controller.go @@ -76,11 +76,11 @@ func NewController( tikvFailoverPeriod time.Duration, tidbFailoverPeriod time.Duration, ) *Controller { - eventBroadcaster := record.NewBroadcaster() - eventBroadcaster.StartLogging(glog.Infof) + eventBroadcaster := record.NewBroadcasterWithCorrelatorOptions(record.CorrelatorOptions{QPS: 1}) + eventBroadcaster.StartLogging(glog.V(2).Infof) eventBroadcaster.StartRecordingToSink(&eventv1.EventSinkImpl{ Interface: eventv1.New(kubeCli.CoreV1().RESTClient()).Events("")}) - recorder := eventBroadcaster.NewRecorder(v1alpha1.Scheme, corev1.EventSource{Component: "tidbcluster"}) + recorder := eventBroadcaster.NewRecorder(v1alpha1.Scheme, corev1.EventSource{Component: "tidb-controller-manager"}) tcInformer := informerFactory.Pingcap().V1alpha1().TidbClusters() setInformer := kubeInformerFactory.Apps().V1().StatefulSets() @@ -108,7 +108,7 @@ func NewController( typedControl := controller.NewTypedControl(controller.NewRealGenericControl(genericCli, recorder)) pdScaler := mm.NewPDScaler(pdControl, pvcInformer.Lister(), pvcControl) tikvScaler := mm.NewTiKVScaler(pdControl, pvcInformer.Lister(), pvcControl, podInformer.Lister()) - pdFailover := mm.NewPDFailover(cli, pdControl, pdFailoverPeriod, podInformer.Lister(), podControl, pvcInformer.Lister(), pvcControl, pvInformer.Lister()) + pdFailover := mm.NewPDFailover(cli, pdControl, pdFailoverPeriod, podInformer.Lister(), podControl, pvcInformer.Lister(), pvcControl, pvInformer.Lister(), recorder) tikvFailover := mm.NewTiKVFailover(tikvFailoverPeriod) tidbFailover := mm.NewTiDBFailover(tidbFailoverPeriod) pdUpgrader := mm.NewPDUpgrader(pdControl, podControl, podInformer.Lister()) diff --git a/pkg/manager/member/pd_failover.go b/pkg/manager/member/pd_failover.go index fca380fd98..449474d28a 100644 --- a/pkg/manager/member/pd_failover.go +++ b/pkg/manager/member/pd_failover.go @@ -23,9 +23,11 @@ import ( "github.com/pingcap/tidb-operator/pkg/controller" "github.com/pingcap/tidb-operator/pkg/pdapi" "github.com/pingcap/tidb-operator/pkg/util" + apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" corelisters "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/record" glog "k8s.io/klog" ) @@ -39,6 +41,7 @@ type pdFailover struct { pvcLister corelisters.PersistentVolumeClaimLister pvcControl controller.PVCControlInterface pvLister corelisters.PersistentVolumeLister + recorder record.EventRecorder } // NewPDFailover returns a pd Failover @@ -49,7 +52,8 @@ func NewPDFailover(cli versioned.Interface, podControl controller.PodControlInterface, pvcLister corelisters.PersistentVolumeClaimLister, pvcControl controller.PVCControlInterface, - pvLister corelisters.PersistentVolumeLister) Failover { + pvLister corelisters.PersistentVolumeLister, + recorder record.EventRecorder) Failover { return &pdFailover{ cli, pdControl, @@ -58,7 +62,8 @@ func NewPDFailover(cli versioned.Interface, podControl, pvcLister, pvcControl, - pvLister} + pvLister, + recorder} } func (pf *pdFailover) Failover(tc *v1alpha1.TidbCluster) error { @@ -73,9 +78,12 @@ func (pf *pdFailover) Failover(tc *v1alpha1.TidbCluster) error { } healthCount := 0 - for _, pdMember := range tc.Status.PD.Members { + for podName, pdMember := range tc.Status.PD.Members { if pdMember.Health { healthCount++ + } else { + pf.recorder.Eventf(tc, apiv1.EventTypeWarning, "PDMemberUnhealthy", + "%s(%s) is unhealthy", podName, pdMember.ID) } } inQuorum := healthCount > len(tc.Status.PD.Members)/2 @@ -132,6 +140,9 @@ func (pf *pdFailover) tryToMarkAPeerAsFailure(tc *v1alpha1.TidbCluster) error { return err } + pf.recorder.Eventf(tc, apiv1.EventTypeWarning, "PDMemberMarkedAsFailure", + "%s(%s) marked as a failure member", podName, pdMember.ID) + tc.Status.PD.FailureMembers[podName] = v1alpha1.PDFailureMember{ PodName: podName, MemberID: pdMember.ID, @@ -173,6 +184,8 @@ func (pf *pdFailover) tryToDeleteAFailureMember(tc *v1alpha1.TidbCluster) error return err } glog.Infof("pd failover: delete member: %d successfully", memberID) + pf.recorder.Eventf(tc, apiv1.EventTypeWarning, "PDMemberDeleted", + "%s(%d) deleted from cluster", failurePodName, memberID) // The order of old PVC deleting and the new Pod creating is not guaranteed by Kubernetes. // If new Pod is created before old PVC deleted, new Pod will reuse old PVC. diff --git a/pkg/manager/member/pd_failover_test.go b/pkg/manager/member/pd_failover_test.go index f19c71f90b..cd9ca847d1 100644 --- a/pkg/manager/member/pd_failover_test.go +++ b/pkg/manager/member/pd_failover_test.go @@ -15,6 +15,7 @@ package member import ( "fmt" + "sort" "strings" "testing" "time" @@ -31,11 +32,13 @@ import ( kubeinformers "k8s.io/client-go/informers" kubefake "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" ) func TestPDFailoverFailover(t *testing.T) { g := NewGomegaWithT(t) + recorder := record.NewFakeRecorder(100) type testcase struct { name string update func(*v1alpha1.TidbCluster) @@ -57,6 +60,7 @@ func TestPDFailoverFailover(t *testing.T) { pdFailover, pvcIndexer, podIndexer, fakePDControl, fakePodControl, fakePVCControl := newFakePDFailover() pdClient := controller.NewFakePDClient(fakePDControl, tc) + pdFailover.recorder = recorder pdClient.AddReaction(pdapi.DeleteMemberByIDActionType, func(action *pdapi.Action) (interface{}, error) { if test.delMemberFailed { @@ -107,6 +111,8 @@ func TestPDFailoverFailover(t *testing.T) { expectFn: func(tc *v1alpha1.TidbCluster, _ *pdFailover) { g.Expect(int(tc.Spec.PD.Replicas)).To(Equal(3)) g.Expect(len(tc.Status.PD.FailureMembers)).To(Equal(0)) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(0)) }, }, { @@ -122,6 +128,8 @@ func TestPDFailoverFailover(t *testing.T) { errExpectFn: errExpectNotNil, expectFn: func(tc *v1alpha1.TidbCluster, _ *pdFailover) { g.Expect(int(tc.Spec.PD.Replicas)).To(Equal(3)) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(0)) }, }, { @@ -141,6 +149,11 @@ func TestPDFailoverFailover(t *testing.T) { expectFn: func(tc *v1alpha1.TidbCluster, _ *pdFailover) { g.Expect(int(tc.Spec.PD.Replicas)).To(Equal(3)) g.Expect(len(tc.Status.PD.FailureMembers)).To(Equal(0)) + events := collectEvents(recorder.Events) + sort.Strings(events) + g.Expect(events).To(HaveLen(2)) + g.Expect(events[0]).To(ContainSubstring("test-pd-0(0) is unhealthy")) + g.Expect(events[1]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy")) }, }, { @@ -160,6 +173,9 @@ func TestPDFailoverFailover(t *testing.T) { pd1, ok := tc.Status.PD.FailureMembers[pd1Name] g.Expect(ok).To(Equal(true)) g.Expect(pd1.MemberDeleted).To(Equal(true)) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(1)) + g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) deleted from cluster")) }, }, { @@ -182,6 +198,9 @@ func TestPDFailoverFailover(t *testing.T) { expectFn: func(tc *v1alpha1.TidbCluster, _ *pdFailover) { g.Expect(int(tc.Spec.PD.Replicas)).To(Equal(3)) g.Expect(len(tc.Status.PD.FailureMembers)).To(Equal(0)) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(1)) + g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy")) }, }, { @@ -204,6 +223,9 @@ func TestPDFailoverFailover(t *testing.T) { expectFn: func(tc *v1alpha1.TidbCluster, _ *pdFailover) { g.Expect(int(tc.Spec.PD.Replicas)).To(Equal(3)) g.Expect(len(tc.Status.PD.FailureMembers)).To(Equal(0)) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(1)) + g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy")) }, }, { @@ -223,6 +245,9 @@ func TestPDFailoverFailover(t *testing.T) { expectFn: func(tc *v1alpha1.TidbCluster, _ *pdFailover) { g.Expect(int(tc.Spec.PD.Replicas)).To(Equal(3)) g.Expect(len(tc.Status.PD.FailureMembers)).To(Equal(0)) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(1)) + g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy")) }, }, { @@ -247,6 +272,10 @@ func TestPDFailoverFailover(t *testing.T) { g.Expect(failureMembers.MemberID).To(Equal("12891273174085095651")) g.Expect(string(failureMembers.PVCUID)).To(Equal("pvc-1-uid")) g.Expect(failureMembers.MemberDeleted).To(BeFalse()) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(2)) + g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy")) + g.Expect(events[1]).To(ContainSubstring("test-pd-1(12891273174085095651) marked as a failure member")) }, }, { @@ -266,6 +295,10 @@ func TestPDFailoverFailover(t *testing.T) { pd1, ok := tc.Status.PD.FailureMembers[pd1Name] g.Expect(ok).To(Equal(true)) g.Expect(pd1.MemberDeleted).To(Equal(true)) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(2)) + g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy")) + g.Expect(events[1]).To(ContainSubstring("test-pd-1(12891273174085095651) deleted from cluster")) }, }, { @@ -294,6 +327,9 @@ func TestPDFailoverFailover(t *testing.T) { pd1, ok := tc.Status.PD.FailureMembers[pd1Name] g.Expect(ok).To(Equal(true)) g.Expect(pd1.MemberDeleted).To(Equal(false)) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(1)) + g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy")) }, }, { @@ -316,6 +352,9 @@ func TestPDFailoverFailover(t *testing.T) { pd1, ok := tc.Status.PD.FailureMembers[pd1Name] g.Expect(ok).To(Equal(true)) g.Expect(pd1.MemberDeleted).To(Equal(false)) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(1)) + g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy")) }, }, { @@ -338,6 +377,10 @@ func TestPDFailoverFailover(t *testing.T) { pd1, ok := tc.Status.PD.FailureMembers[pd1Name] g.Expect(ok).To(Equal(true)) g.Expect(pd1.MemberDeleted).To(Equal(false)) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(2)) + g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy")) + g.Expect(events[1]).To(ContainSubstring("test-pd-1(12891273174085095651) deleted from cluster")) }, }, { @@ -360,6 +403,10 @@ func TestPDFailoverFailover(t *testing.T) { pd1, ok := tc.Status.PD.FailureMembers[pd1Name] g.Expect(ok).To(Equal(true)) g.Expect(pd1.MemberDeleted).To(Equal(false)) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(2)) + g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy")) + g.Expect(events[1]).To(ContainSubstring("test-pd-1(12891273174085095651) deleted from cluster")) }, }, { @@ -385,6 +432,10 @@ func TestPDFailoverFailover(t *testing.T) { _, err = pf.pvcLister.PersistentVolumeClaims(metav1.NamespaceDefault).Get(pvcName) g.Expect(err).To(HaveOccurred()) g.Expect(errors.IsNotFound(err)).To(BeTrue()) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(2)) + g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy")) + g.Expect(events[1]).To(ContainSubstring("test-pd-1(12891273174085095651) deleted from cluster")) }, }, { @@ -411,6 +462,10 @@ func TestPDFailoverFailover(t *testing.T) { g.Expect(errors.IsNotFound(err)).To(BeTrue()) _, err = pf.pvcLister.PersistentVolumeClaims(metav1.NamespaceDefault).Get(pvcName) g.Expect(err).NotTo(HaveOccurred()) + events := collectEvents(recorder.Events) + g.Expect(events).To(HaveLen(2)) + g.Expect(events[0]).To(ContainSubstring("test-pd-1(12891273174085095651) is unhealthy")) + g.Expect(events[1]).To(ContainSubstring("test-pd-1(12891273174085095651) deleted from cluster")) }, }, } @@ -541,7 +596,8 @@ func newFakePDFailover() (*pdFailover, cache.Indexer, cache.Indexer, *pdapi.Fake podControl, pvcInformer.Lister(), pvcControl, - pvInformer.Lister()}, + pvInformer.Lister(), + nil}, pvcInformer.Informer().GetIndexer(), podInformer.Informer().GetIndexer(), pdControl, podControl, pvcControl @@ -649,3 +705,17 @@ func newPodForPDFailover(tc *v1alpha1.TidbCluster, memberType v1alpha1.MemberTyp }, } } + +func collectEvents(source <-chan string) []string { + done := false + events := make([]string, 0) + for !done { + select { + case event := <-source: + events = append(events, event) + default: + done = true + } + } + return events +}