Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compare k8s pod running info with pd client health info , improve inspection mechanism #1484

Merged
merged 11 commits into from
Jan 8, 2020
20 changes: 14 additions & 6 deletions pkg/apis/pingcap/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,12 +499,13 @@ type Service struct {

// PDStatus is PD status
type PDStatus struct {
Synced bool `json:"synced,omitempty"`
Phase MemberPhase `json:"phase,omitempty"`
StatefulSet *apps.StatefulSetStatus `json:"statefulSet,omitempty"`
Members map[string]PDMember `json:"members,omitempty"`
Leader PDMember `json:"leader,omitempty"`
FailureMembers map[string]PDFailureMember `json:"failureMembers,omitempty"`
Synced bool `json:"synced,omitempty"`
Phase MemberPhase `json:"phase,omitempty"`
StatefulSet *apps.StatefulSetStatus `json:"statefulSet,omitempty"`
Members map[string]PDMember `json:"members,omitempty"`
Leader PDMember `json:"leader,omitempty"`
FailureMembers map[string]PDFailureMember `json:"failureMembers,omitempty"`
UnjoinedMembers map[string]UnjoinedMember `json:"unJoinedMembers,omitempty"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
UnjoinedMembers map[string]UnjoinedMember `json:"unJoinedMembers,omitempty"`
UnjoinedMembers map[string]UnjoinedMember `json:"unjoinedMembers,omitempty"`

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}

// PDMember is PD member
Expand All @@ -528,6 +529,13 @@ type PDFailureMember struct {
CreatedAt metav1.Time `json:"createdAt,omitempty"`
}

// UnjoinedMember is the pd unjoin cluster member information
type UnjoinedMember struct {
Copy link
Contributor

@weekface weekface Jan 6, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of add an attribute to TidbCluster, does just emit a UnjoinedMember and JoinedMember events to TidbCluster make sense?

User can use kubectl describe to get these messages easily. ref: #1466

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think event flow can't express my meaning in a concise manner , as a user, I need to understand and integrate the event flow and sort out each step. Use status type display current status is a concise way.

PodName string `json:"podName,omitempty"`
PVCUID types.UID `json:"pvcUID,omitempty"`
CreatedAt metav1.Time `json:"createdAt,omitempty"`
}

// TiDBStatus is TiDB status
type TiDBStatus struct {
Phase MemberPhase `json:"phase,omitempty"`
Expand Down
24 changes: 24 additions & 0 deletions pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 52 additions & 0 deletions pkg/manager/member/pd_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package member

import (
"fmt"
"github.com/pingcap/tidb-operator/pkg/util"
"strconv"
"strings"

Expand Down Expand Up @@ -407,6 +408,11 @@ func (pmm *pdMemberManager) syncTidbClusterStatus(tc *v1alpha1.TidbCluster, set
tc.Status.PD.Members = pdStatus
tc.Status.PD.Leader = tc.Status.PD.Members[leader.GetName()]

// k8s check
err = pmm.collectUnjoinedMembers(tc, set, pdStatus)
if err != nil {
return err
}
return nil
}

Expand Down Expand Up @@ -756,6 +762,52 @@ func getPDConfigMap(tc *v1alpha1.TidbCluster) (*corev1.ConfigMap, error) {
return cm, nil
}

func (pmm *pdMemberManager) collectUnjoinedMembers(tc *v1alpha1.TidbCluster, set *apps.StatefulSet, pdStatus map[string]v1alpha1.PDMember) error {
podSelector, podSelectErr := metav1.LabelSelectorAsSelector(set.Spec.Selector)
if podSelectErr != nil {
return podSelectErr
}
pods, podErr := pmm.podLister.Pods(tc.Namespace).List(podSelector)
if podErr != nil {
return podErr
}
for _, pod := range pods {
var joined = false
for podName := range pdStatus {
if strings.EqualFold(pod.Name, podName) {
joined = true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
joined = true
joined = true
break

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}
}
if !joined {
if tc.Status.PD.UnjoinedMembers == nil {
tc.Status.PD.UnjoinedMembers = map[string]v1alpha1.UnjoinedMember{}
}
ordinal, err := util.GetOrdinalFromPodName(pod.Name)
if err != nil {
return err
}
pvcName := ordinalPVCName(v1alpha1.PDMemberType, controller.PDMemberName(tc.Name), ordinal)
pvc, err := pmm.pvcLister.PersistentVolumeClaims(tc.Namespace).Get(pvcName)
if err != nil {
return err
}
tc.Status.PD.UnjoinedMembers[pod.Name] = v1alpha1.UnjoinedMember{
PodName: pod.Name,
PVCUID: pvc.UID,
CreatedAt: metav1.Now(),
}
} else {
if tc.Status.PD.UnjoinedMembers != nil {
if _, ok := tc.Status.PD.UnjoinedMembers[pod.Name]; ok {
delete(tc.Status.PD.UnjoinedMembers, pod.Name)
}

}
}
}
return nil
}

type FakePDMemberManager struct {
err error
}
Expand Down
138 changes: 138 additions & 0 deletions pkg/manager/member/pd_member_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1408,3 +1408,141 @@ func TestGetNewPdServiceForTidbCluster(t *testing.T) {
})
}
}

func TestPDMemberManagerSyncPDStsWhenPdNotJoinCluster(t *testing.T) {
g := NewGomegaWithT(t)
type testcase struct {
name string
modify func(cluster *v1alpha1.TidbCluster, podIndexer cache.Indexer, pvcIndexer cache.Indexer)
pdHealth *pdapi.HealthInfo
tcStatusChange func(cluster *v1alpha1.TidbCluster)
err bool
expectTidbClusterFn func(*GomegaWithT, *v1alpha1.TidbCluster)
}

testFn := func(test *testcase, t *testing.T) {
tc := newTidbClusterForPD()
ns := tc.Namespace
tcName := tc.Name

pmm, _, _, fakePDControl, podIndexer, pvcIndexer, _ := newFakePDMemberManager()
pdClient := controller.NewFakePDClient(fakePDControl, tc)

pdClient.AddReaction(pdapi.GetHealthActionType, func(action *pdapi.Action) (interface{}, error) {
return test.pdHealth, nil
})
pdClient.AddReaction(pdapi.GetClusterActionType, func(action *pdapi.Action) (interface{}, error) {
return &metapb.Cluster{Id: uint64(1)}, nil
})

err := pmm.Sync(tc)
g.Expect(controller.IsRequeueError(err)).To(BeTrue())
_, err = pmm.svcLister.Services(ns).Get(controller.PDMemberName(tcName))
g.Expect(err).NotTo(HaveOccurred())
_, err = pmm.svcLister.Services(ns).Get(controller.PDPeerMemberName(tcName))
g.Expect(err).NotTo(HaveOccurred())
_, err = pmm.setLister.StatefulSets(ns).Get(controller.PDMemberName(tcName))
g.Expect(err).NotTo(HaveOccurred())
if test.tcStatusChange != nil {
test.tcStatusChange(tc)
}
test.modify(tc, podIndexer, pvcIndexer)
err = pmm.syncPDStatefulSetForTidbCluster(tc)
if test.err {
g.Expect(err).To(HaveOccurred())
} else {
g.Expect(err).NotTo(HaveOccurred())
}
if test.expectTidbClusterFn != nil {
test.expectTidbClusterFn(g, tc)
}
}
tests := []testcase{
{
name: "add pd unjoin cluster member info ",
modify: func(cluster *v1alpha1.TidbCluster, podIndexer cache.Indexer, pvcIndexer cache.Indexer) {
for ordinal := 0; ordinal < 3; ordinal++ {
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: ordinalPodName(v1alpha1.PDMemberType, cluster.GetName(), int32(ordinal)),
Namespace: metav1.NamespaceDefault,
Annotations: map[string]string{},
Labels: label.New().Instance(cluster.GetInstanceName()).PD().Labels(),
},
}
podIndexer.Add(pod)
}
for ordinal := 0; ordinal < 3; ordinal++ {
pvc := &corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: ordinalPVCName(v1alpha1.PDMemberType, controller.PDMemberName(cluster.GetName()), int32(ordinal)),
Namespace: metav1.NamespaceDefault,
Annotations: map[string]string{},
Labels: label.New().Instance(cluster.GetInstanceName()).PD().Labels(),
},
}
pvcIndexer.Add(pvc)
}

},
pdHealth: &pdapi.HealthInfo{Healths: []pdapi.MemberHealth{
{Name: "test-pd-0", MemberID: uint64(1), ClientUrls: []string{"http://test-pd-0:2379"}, Health: false},
{Name: "test-pd-1", MemberID: uint64(2), ClientUrls: []string{"http://test-pd-1:2379"}, Health: false},
}},
err: false,
expectTidbClusterFn: func(g *GomegaWithT, tc *v1alpha1.TidbCluster) {
g.Expect(tc.Status.PD.UnjoinedMembers["test-pd-2"]).NotTo(BeNil())
},
},
{
name: "clear unjoin cluster member info when the member join the cluster ",
tcStatusChange: func(cluster *v1alpha1.TidbCluster) {
cluster.Status.PD.UnjoinedMembers = map[string]v1alpha1.UnjoinedMember{
"test-pd-0": {
PodName: "test-pd-0",
CreatedAt: metav1.Now(),
},
}
},
modify: func(cluster *v1alpha1.TidbCluster, podIndexer cache.Indexer, pvcIndexer cache.Indexer) {
for ordinal := 0; ordinal < 3; ordinal++ {
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: ordinalPodName(v1alpha1.PDMemberType, cluster.GetName(), int32(ordinal)),
Namespace: metav1.NamespaceDefault,
Annotations: map[string]string{},
Labels: label.New().Instance(cluster.GetInstanceName()).PD().Labels(),
},
}
podIndexer.Add(pod)
}
for ordinal := 0; ordinal < 3; ordinal++ {
pvc := &corev1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: ordinalPVCName(v1alpha1.PDMemberType, controller.PDMemberName(cluster.GetName()), int32(ordinal)),
Namespace: metav1.NamespaceDefault,
Annotations: map[string]string{},
Labels: label.New().Instance(cluster.GetInstanceName()).PD().Labels(),
},
}
pvcIndexer.Add(pvc)
}

},
pdHealth: &pdapi.HealthInfo{Healths: []pdapi.MemberHealth{
{Name: "test-pd-0", MemberID: uint64(1), ClientUrls: []string{"http://test-pd-0:2379"}, Health: false},
{Name: "test-pd-1", MemberID: uint64(2), ClientUrls: []string{"http://test-pd-1:2379"}, Health: false},
{Name: "test-pd-2", MemberID: uint64(2), ClientUrls: []string{"http://test-pd-2:2379"}, Health: false},
}},
err: false,
expectTidbClusterFn: func(g *GomegaWithT, tc *v1alpha1.TidbCluster) {
g.Expect(tc.Status.PD.UnjoinedMembers).To(BeEmpty())
},
},
}
for i := range tests {
t.Logf("begin: %s", tests[i].name)
testFn(&tests[i], t)
t.Logf("end: %s", tests[i].name)
}
}