From fc490eb778235f61912b044dd8bdce8beafa14b1 Mon Sep 17 00:00:00 2001 From: weekface Date: Mon, 22 Jul 2019 16:13:43 +0800 Subject: [PATCH 1/4] add more logs --- pkg/apis/pingcap.com/v1alpha1/types.go | 17 +++++++------ .../v1alpha1/zz_generated.deepcopy.go | 9 ++++--- pkg/manager/member/orphan_pods_cleaner.go | 3 +++ pkg/manager/member/pd_failover.go | 9 +++++++ pkg/manager/member/pd_failover_test.go | 8 ++++--- pkg/manager/member/pd_scaler.go | 10 +++++++- pkg/manager/member/pd_upgrader.go | 9 +++---- pkg/manager/member/scaler.go | 14 ++++++++++- pkg/manager/member/tidb_failover.go | 7 +++++- pkg/manager/member/tidb_upgrader.go | 3 +++ pkg/manager/member/tikv_failover.go | 6 +++-- pkg/manager/member/tikv_scaler.go | 9 ++++++- pkg/manager/member/tikv_upgrader.go | 23 +++++++++++++----- pkg/manager/member/utils.go | 1 + pkg/scheduler/predicates/ha.go | 24 +++++++++++++++++-- 15 files changed, 119 insertions(+), 33 deletions(-) diff --git a/pkg/apis/pingcap.com/v1alpha1/types.go b/pkg/apis/pingcap.com/v1alpha1/types.go index a539b4c123f..2ef114530c0 100644 --- a/pkg/apis/pingcap.com/v1alpha1/types.go +++ b/pkg/apis/pingcap.com/v1alpha1/types.go @@ -199,10 +199,11 @@ type PDMember struct { // PDFailureMember is the pd failure member information type PDFailureMember struct { - PodName string `json:"podName,omitempty"` - MemberID string `json:"memberID,omitempty"` - PVCUID types.UID `json:"pvcUID,omitempty"` - MemberDeleted bool `json:"memberDeleted,omitempty"` + PodName string `json:"podName,omitempty"` + MemberID string `json:"memberID,omitempty"` + PVCUID types.UID `json:"pvcUID,omitempty"` + MemberDeleted bool `json:"memberDeleted,omitempty"` + CreatedAt metav1.Time `json:"createdAt,omitempty"` } // TiDBStatus is TiDB status @@ -226,7 +227,8 @@ type TiDBMember struct { // TiDBFailureMember is the tidb failure member information type TiDBFailureMember struct { - PodName string `json:"podName,omitempty"` + PodName string `json:"podName,omitempty"` + CreatedAt metav1.Time `json:"createdAt,omitempty"` } // TiKVStatus is TiKV status @@ -254,6 +256,7 @@ type TiKVStore struct { // TiKVFailureStore is the tikv failure store information type TiKVFailureStore struct { - PodName string `json:"podName,omitempty"` - StoreID string `json:"storeID,omitempty"` + PodName string `json:"podName,omitempty"` + StoreID string `json:"storeID,omitempty"` + CreatedAt metav1.Time `json:"createdAt,omitempty"` } diff --git a/pkg/apis/pingcap.com/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/pingcap.com/v1alpha1/zz_generated.deepcopy.go index 1bf3b6d605d..231c46ff993 100644 --- a/pkg/apis/pingcap.com/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/pingcap.com/v1alpha1/zz_generated.deepcopy.go @@ -55,6 +55,7 @@ func (in *ContainerSpec) DeepCopy() *ContainerSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PDFailureMember) DeepCopyInto(out *PDFailureMember) { *out = *in + in.CreatedAt.DeepCopyInto(&out.CreatedAt) return } @@ -148,7 +149,7 @@ func (in *PDStatus) DeepCopyInto(out *PDStatus) { in, out := &in.FailureMembers, &out.FailureMembers *out = make(map[string]PDFailureMember, len(*in)) for key, val := range *in { - (*out)[key] = val + (*out)[key] = *val.DeepCopy() } } return @@ -199,6 +200,7 @@ func (in *Service) DeepCopy() *Service { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TiDBFailureMember) DeepCopyInto(out *TiDBFailureMember) { *out = *in + in.CreatedAt.DeepCopyInto(&out.CreatedAt) return } @@ -309,7 +311,7 @@ func (in *TiDBStatus) DeepCopyInto(out *TiDBStatus) { in, out := &in.FailureMembers, &out.FailureMembers *out = make(map[string]TiDBFailureMember, len(*in)) for key, val := range *in { - (*out)[key] = val + (*out)[key] = *val.DeepCopy() } } return @@ -328,6 +330,7 @@ func (in *TiDBStatus) DeepCopy() *TiDBStatus { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TiKVFailureStore) DeepCopyInto(out *TiKVFailureStore) { *out = *in + in.CreatedAt.DeepCopyInto(&out.CreatedAt) return } @@ -427,7 +430,7 @@ func (in *TiKVStatus) DeepCopyInto(out *TiKVStatus) { in, out := &in.FailureStores, &out.FailureStores *out = make(map[string]TiKVFailureStore, len(*in)) for key, val := range *in { - (*out)[key] = val + (*out)[key] = *val.DeepCopy() } } return diff --git a/pkg/manager/member/orphan_pods_cleaner.go b/pkg/manager/member/orphan_pods_cleaner.go index 3ea725a024a..fcd70d9678e 100644 --- a/pkg/manager/member/orphan_pods_cleaner.go +++ b/pkg/manager/member/orphan_pods_cleaner.go @@ -14,6 +14,7 @@ package member import ( + "github.com/golang/glog" "github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1" "github.com/pingcap/tidb-operator/pkg/controller" "github.com/pingcap/tidb-operator/pkg/label" @@ -90,8 +91,10 @@ func (opc *orphanPodsCleaner) Clean(tc *v1alpha1.TidbCluster) (map[string]string err = opc.podControl.DeletePod(tc, pod) if err != nil { + glog.Errorf("orphan pods cleaner: failed to clean orphan pod: %s/%s, %v", ns, podName, err) return skipReason, err } + glog.Infof("orphan pods cleaner: clean orphan pod: %s/%s successfully", ns, podName) } return skipReason, nil diff --git a/pkg/manager/member/pd_failover.go b/pkg/manager/member/pd_failover.go index 19773c7f70e..89437500532 100644 --- a/pkg/manager/member/pd_failover.go +++ b/pkg/manager/member/pd_failover.go @@ -18,12 +18,14 @@ import ( "strconv" "time" + "github.com/golang/glog" "github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1" "github.com/pingcap/tidb-operator/pkg/client/clientset/versioned" "github.com/pingcap/tidb-operator/pkg/controller" "github.com/pingcap/tidb-operator/pkg/pdapi" "github.com/pingcap/tidb-operator/pkg/util" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" corelisters "k8s.io/client-go/listers/core/v1" ) @@ -99,6 +101,7 @@ func (pf *pdFailover) Failover(tc *v1alpha1.TidbCluster) error { func (pf *pdFailover) Recover(tc *v1alpha1.TidbCluster) { tc.Status.PD.FailureMembers = nil + glog.Infof("pd failover: clearing pd failoverMembers, %s/%s", tc.GetNamespace(), tc.GetName()) } func (pf *pdFailover) tryToMarkAPeerAsFailure(tc *v1alpha1.TidbCluster) error { @@ -134,6 +137,7 @@ func (pf *pdFailover) tryToMarkAPeerAsFailure(tc *v1alpha1.TidbCluster) error { MemberID: pdMember.ID, PVCUID: pvc.UID, MemberDeleted: false, + CreatedAt: metav1.Now(), } return controller.RequeueErrorf("marking Pod: %s/%s pd member: %s as failure", ns, podName, pdMember.Name) } @@ -165,8 +169,10 @@ func (pf *pdFailover) tryToDeleteAFailureMember(tc *v1alpha1.TidbCluster) error // invoke deleteMember api to delete a member from the pd cluster err = controller.GetPDClient(pf.pdControl, tc).DeleteMemberByID(memberID) if err != nil { + glog.Errorf("pd failover: failed to delete member: %d, %v", memberID, err) return err } + glog.Infof("pd failover: delete member: %d successfully", memberID) // The order of old PVC deleting and the new Pod creating is not guaranteed by Kubernetes. // If new Pod is created before old PVC deleted, new Pod will reuse old PVC. @@ -196,8 +202,10 @@ func (pf *pdFailover) tryToDeleteAFailureMember(tc *v1alpha1.TidbCluster) error if pvc != nil && pvc.DeletionTimestamp == nil && pvc.GetUID() == failureMember.PVCUID { err = pf.pvcControl.DeletePVC(tc, pvc) if err != nil { + glog.Errorf("pd failover: failed to delete pvc: %s/%s, %v", ns, pvcName, err) return err } + glog.Infof("pd failover: pvc: %s/%s successfully", ns, pvcName) } setMemberDeleted(tc, failurePodName) @@ -208,6 +216,7 @@ func setMemberDeleted(tc *v1alpha1.TidbCluster, podName string) { failureMember := tc.Status.PD.FailureMembers[podName] failureMember.MemberDeleted = true tc.Status.PD.FailureMembers[podName] = failureMember + glog.Infof("pd failover: set pd member: %s/%s deleted", tc.GetName(), podName) } type fakePDFailover struct{} diff --git a/pkg/manager/member/pd_failover_test.go b/pkg/manager/member/pd_failover_test.go index 6ecba278747..28bf7e635dc 100644 --- a/pkg/manager/member/pd_failover_test.go +++ b/pkg/manager/member/pd_failover_test.go @@ -242,9 +242,11 @@ func TestPDFailoverFailover(t *testing.T) { expectFn: func(tc *v1alpha1.TidbCluster, _ *pdFailover) { g.Expect(int(tc.Spec.PD.Replicas)).To(Equal(3)) g.Expect(len(tc.Status.PD.FailureMembers)).To(Equal(1)) - g.Expect(tc.Status.PD.FailureMembers).To(Equal(map[string]v1alpha1.PDFailureMember{ - "test-pd-1": {PodName: "test-pd-1", MemberID: "12891273174085095651", PVCUID: "pvc-1-uid", MemberDeleted: false}, - })) + failureMembers := tc.Status.PD.FailureMembers["test-pd-1"] + g.Expect(failureMembers.PodName).To(Equal("test-pd-1")) + g.Expect(failureMembers.MemberID).To(Equal("12891273174085095651")) + g.Expect(string(failureMembers.PVCUID)).To(Equal("pvc-1-uid")) + g.Expect(failureMembers.MemberDeleted).To(BeFalse()) }, }, { diff --git a/pkg/manager/member/pd_scaler.go b/pkg/manager/member/pd_scaler.go index 2c4fe6be781..8a2c24f51e8 100644 --- a/pkg/manager/member/pd_scaler.go +++ b/pkg/manager/member/pd_scaler.go @@ -17,6 +17,7 @@ import ( "fmt" "time" + "github.com/golang/glog" "github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1" "github.com/pingcap/tidb-operator/pkg/controller" "github.com/pingcap/tidb-operator/pkg/label" @@ -102,9 +103,11 @@ func (psd *pdScaler) ScaleIn(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet, err := controller.GetPDClient(psd.pdControl, tc).DeleteMember(memberName) if err != nil { + glog.Errorf("pd scale in: failed to delete member %s, %v", memberName, err) resetReplicas(newSet, oldSet) return err } + glog.Infof("pd scale in: delete member %s successfully", memberName) pvcName := ordinalPVCName(v1alpha1.PDMemberType, setName, ordinal) pvc, err := psd.pvcLister.PersistentVolumeClaims(ns).Get(pvcName) @@ -116,13 +119,18 @@ func (psd *pdScaler) ScaleIn(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet, if pvc.Annotations == nil { pvc.Annotations = map[string]string{} } - pvc.Annotations[label.AnnPVCDeferDeleting] = time.Now().Format(time.RFC3339) + now := time.Now().Format(time.RFC3339) + pvc.Annotations[label.AnnPVCDeferDeleting] = now _, err = psd.pvcControl.UpdatePVC(tc, pvc) if err != nil { + glog.Errorf("pd scale in: failed to set pvc %s/%s annotation: %s to %s", + ns, pvcName, label.AnnPVCDeferDeleting, now) resetReplicas(newSet, oldSet) return err } + glog.Infof("pd scale in: set pvc %s/%s annotation: %s to %s", + ns, pvcName, label.AnnPVCDeferDeleting, now) decreaseReplicas(newSet, oldSet) return nil diff --git a/pkg/manager/member/pd_upgrader.go b/pkg/manager/member/pd_upgrader.go index bea952f1306..ea1d45ef1ad 100644 --- a/pkg/manager/member/pd_upgrader.go +++ b/pkg/manager/member/pd_upgrader.go @@ -16,6 +16,7 @@ package member import ( "fmt" + "github.com/golang/glog" "github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1" "github.com/pingcap/tidb-operator/pkg/controller" "github.com/pingcap/tidb-operator/pkg/pdapi" @@ -44,12 +45,6 @@ func (pu *pdUpgrader) Upgrade(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet return pu.gracefulUpgrade(tc, oldSet, newSet) } -func (pu *pdUpgrader) forceUpgrade(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet, newSet *apps.StatefulSet) error { - tc.Status.PD.Phase = v1alpha1.UpgradePhase - setUpgradePartition(newSet, 0) - return nil -} - func (pu *pdUpgrader) gracefulUpgrade(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet, newSet *apps.StatefulSet) error { ns := tc.GetNamespace() tcName := tc.GetName() @@ -102,8 +97,10 @@ func (pu *pdUpgrader) upgradePDPod(tc *v1alpha1.TidbCluster, ordinal int32, newS } err := pu.transferPDLeaderTo(tc, targetName) if err != nil { + glog.Errorf("pd upgrader: failed to transfer pd leader to: %s, %v", targetName, err) return err } + glog.Infof("pd upgrader: transfer pd leader to: %s successfully", targetName) return controller.RequeueErrorf("tidbcluster: [%s/%s]'s pd member: [%s] is transferring leader to pd member: [%s]", ns, tcName, upgradePodName, targetName) } diff --git a/pkg/manager/member/scaler.go b/pkg/manager/member/scaler.go index eb93a1ff328..4ebbe1fb3d9 100644 --- a/pkg/manager/member/scaler.go +++ b/pkg/manager/member/scaler.go @@ -16,6 +16,7 @@ package member import ( "fmt" + "github.com/golang/glog" "github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1" "github.com/pingcap/tidb-operator/pkg/controller" "github.com/pingcap/tidb-operator/pkg/label" @@ -70,7 +71,14 @@ func (gs *generalScaler) deleteDeferDeletingPVC(tc *v1alpha1.TidbCluster, return skipReason, nil } - return skipReason, gs.pvcControl.DeletePVC(tc, pvc) + err = gs.pvcControl.DeletePVC(tc, pvc) + if err != nil { + glog.Errorf("scale out: failed to delete pvc %s/%s, %v", ns, pvcName, err) + return skipReason, err + } + glog.Infof("scale out: delete pvc %s/%s successfully", ns, pvcName) + + return skipReason, nil } func resetReplicas(newSet *apps.StatefulSet, oldSet *apps.StatefulSet) { @@ -78,9 +86,13 @@ func resetReplicas(newSet *apps.StatefulSet, oldSet *apps.StatefulSet) { } func increaseReplicas(newSet *apps.StatefulSet, oldSet *apps.StatefulSet) { *newSet.Spec.Replicas = *oldSet.Spec.Replicas + 1 + glog.Infof("pd scale out: increase pd statefulset: %s/%s replicas to %d", + newSet.GetNamespace(), newSet.GetName(), newSet.Spec.Replicas) } func decreaseReplicas(newSet *apps.StatefulSet, oldSet *apps.StatefulSet) { *newSet.Spec.Replicas = *oldSet.Spec.Replicas - 1 + glog.Infof("pd scale in: decrease pd statefulset: %s/%s replicas to %d", + newSet.GetNamespace(), newSet.GetName(), newSet.Spec.Replicas) } func ordinalPVCName(memberType v1alpha1.MemberType, setName string, ordinal int32) string { diff --git a/pkg/manager/member/tidb_failover.go b/pkg/manager/member/tidb_failover.go index d100d12c635..1acdb2a7e72 100644 --- a/pkg/manager/member/tidb_failover.go +++ b/pkg/manager/member/tidb_failover.go @@ -18,6 +18,7 @@ import ( "github.com/golang/glog" "github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) type tidbFailover struct { @@ -40,6 +41,7 @@ func (tf *tidbFailover) Failover(tc *v1alpha1.TidbCluster) error { _, exist := tc.Status.TiDB.FailureMembers[tidbMember.Name] if exist && tidbMember.Health { delete(tc.Status.TiDB.FailureMembers, tidbMember.Name) + glog.Errorf("tidb failover: delete %s from tidb failoverMembers", tidbMember.Name) } } @@ -51,7 +53,10 @@ func (tf *tidbFailover) Failover(tc *v1alpha1.TidbCluster) error { _, exist := tc.Status.TiDB.FailureMembers[tidbMember.Name] deadline := tidbMember.LastTransitionTime.Add(tf.tidbFailoverPeriod) if !tidbMember.Health && time.Now().After(deadline) && !exist { - tc.Status.TiDB.FailureMembers[tidbMember.Name] = v1alpha1.TiDBFailureMember{PodName: tidbMember.Name} + tc.Status.TiDB.FailureMembers[tidbMember.Name] = v1alpha1.TiDBFailureMember{ + PodName: tidbMember.Name, + CreatedAt: metav1.Now(), + } break } } diff --git a/pkg/manager/member/tidb_upgrader.go b/pkg/manager/member/tidb_upgrader.go index 7e22705ffa0..0faba1a69ef 100644 --- a/pkg/manager/member/tidb_upgrader.go +++ b/pkg/manager/member/tidb_upgrader.go @@ -14,6 +14,7 @@ package member import ( + "github.com/golang/glog" "github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1" "github.com/pingcap/tidb-operator/pkg/controller" apps "k8s.io/api/apps/v1beta1" @@ -86,9 +87,11 @@ func (tdu *tidbUpgrader) upgradeTiDBPod(tc *v1alpha1.TidbCluster, ordinal int32, if member, exist := tc.Status.TiDB.Members[tidbPodName(tcName, ordinal)]; exist && member.Health { hasResign, err := tdu.tidbControl.ResignDDLOwner(tc, ordinal) if (!hasResign || err != nil) && tc.Status.TiDB.ResignDDLOwnerRetryCount < MaxResignDDLOwnerCount { + glog.Errorf("tidb upgrader: failed to resign ddl owner to %s, %v", member.Name, err) tc.Status.TiDB.ResignDDLOwnerRetryCount++ return err } + glog.Infof("tidb upgrader: resign ddl owner to %s successfully", member.Name) } } diff --git a/pkg/manager/member/tikv_failover.go b/pkg/manager/member/tikv_failover.go index 8f39e1d8fa8..9481d991f35 100644 --- a/pkg/manager/member/tikv_failover.go +++ b/pkg/manager/member/tikv_failover.go @@ -17,6 +17,7 @@ import ( "time" "github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) type tikvFailover struct { @@ -47,8 +48,9 @@ func (tf *tikvFailover) Failover(tc *v1alpha1.TidbCluster) error { tc.Status.TiKV.FailureStores = map[string]v1alpha1.TiKVFailureStore{} } tc.Status.TiKV.FailureStores[storeID] = v1alpha1.TiKVFailureStore{ - PodName: podName, - StoreID: store.ID, + PodName: podName, + StoreID: store.ID, + CreatedAt: metav1.Now(), } } } diff --git a/pkg/manager/member/tikv_scaler.go b/pkg/manager/member/tikv_scaler.go index 5c96abc14d0..4160d2edd8a 100644 --- a/pkg/manager/member/tikv_scaler.go +++ b/pkg/manager/member/tikv_scaler.go @@ -88,9 +88,11 @@ func (tsd *tikvScaler) ScaleIn(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSe } if state != v1alpha1.TiKVStateOffline { if err := controller.GetPDClient(tsd.pdControl, tc).DeleteStore(id); err != nil { + glog.Errorf("tikv scale in: failed to delete store %d, %v", id, err) resetReplicas(newSet, oldSet) return err } + glog.Infof("tikv scale in: delete store %d successfully", id) } resetReplicas(newSet, oldSet) return controller.RequeueErrorf("TiKV %s/%s store %d still in cluster, state: %s", ns, podName, id, state) @@ -116,12 +118,17 @@ func (tsd *tikvScaler) ScaleIn(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSe if pvc.Annotations == nil { pvc.Annotations = map[string]string{} } - pvc.Annotations[label.AnnPVCDeferDeleting] = time.Now().Format(time.RFC3339) + now := time.Now().Format(time.RFC3339) + pvc.Annotations[label.AnnPVCDeferDeleting] = now _, err = tsd.pvcControl.UpdatePVC(tc, pvc) if err != nil { + glog.Errorf("tikv scale in: failed to set pvc %s/%s annotation: %s to %s", + ns, pvcName, label.AnnPVCDeferDeleting, now) resetReplicas(newSet, oldSet) return err } + glog.Infof("tikv scale in: set pvc %s/%s annotation: %s to %s", + ns, pvcName, label.AnnPVCDeferDeleting, now) decreaseReplicas(newSet, oldSet) return nil diff --git a/pkg/manager/member/tikv_upgrader.go b/pkg/manager/member/tikv_upgrader.go index e00139e0c36..fc9e05ba645 100644 --- a/pkg/manager/member/tikv_upgrader.go +++ b/pkg/manager/member/tikv_upgrader.go @@ -123,7 +123,6 @@ func (tku *tikvUpgrader) upgradeTiKVPod(tc *v1alpha1.TidbCluster, ordinal int32, } _, evicting := upgradePod.Annotations[EvictLeaderBeginTime] if !evicting { - glog.Infof("start to evict leader:index:%d,upgradePodName:%s,storeID:%d", ordinal, upgradePodName, storeID) return tku.beginEvictLeader(tc, storeID, upgradePod) } @@ -161,16 +160,29 @@ func (tku *tikvUpgrader) readyToUpgrade(upgradePod *corev1.Pod, store v1alpha1.T } func (tku *tikvUpgrader) beginEvictLeader(tc *v1alpha1.TidbCluster, storeID uint64, pod *corev1.Pod) error { + ns := tc.GetNamespace() + podName := pod.GetName() err := controller.GetPDClient(tku.pdControl, tc).BeginEvictLeader(storeID) if err != nil { + glog.Errorf("tikv upgrader: failed to begin evict leader: %d, %s/%s, %v", + storeID, ns, podName, err) return err } + glog.Infof("tikv upgrader: begin evict leader: %d, %s/%s successfully", storeID, ns, podName) if pod.Annotations == nil { pod.Annotations = map[string]string{} } - pod.Annotations[EvictLeaderBeginTime] = time.Now().Format(time.RFC3339) + now := time.Now().Format(time.RFC3339) + pod.Annotations[EvictLeaderBeginTime] = now _, err = tku.podControl.UpdatePod(tc, pod) - return err + if err != nil { + glog.Errorf("tikv upgrader: failed to set pod %s/%s annotation %s to %s, %v", + ns, podName, EvictLeaderBeginTime, now, err) + return err + } + glog.Infof("tikv upgrader: set pod %s/%s annotation %s to %s successfully", + ns, podName, EvictLeaderBeginTime, now) + return nil } func (tku *tikvUpgrader) endEvictLeader(tc *v1alpha1.TidbCluster, ordinal int32) error { @@ -186,11 +198,10 @@ func (tku *tikvUpgrader) endEvictLeader(tc *v1alpha1.TidbCluster, ordinal int32) err = tku.pdControl.GetPDClient(pdapi.Namespace(tc.GetNamespace()), tc.GetName()).EndEvictLeader(storeID) if err != nil { + glog.Errorf("tikv upgrader: failed to end evict leader storeID: %d ordinal: %d, %v", storeID, ordinal, err) return err } - - glog.Infof("successed to remove evict leader,ordinal:%d,storeID:%d", ordinal, storeID) - + glog.Infof("tikv upgrader: end evict leader storeID: %d ordinal: %d successfully", storeID, ordinal) return nil } diff --git a/pkg/manager/member/utils.go b/pkg/manager/member/utils.go index 7a1d061635d..55eaa6537da 100644 --- a/pkg/manager/member/utils.go +++ b/pkg/manager/member/utils.go @@ -182,6 +182,7 @@ func serviceEqual(new, old *corev1.Service) (bool, error) { // setUpgradePartition set statefulSet's rolling update partition func setUpgradePartition(set *apps.StatefulSet, upgradeOrdinal int32) { set.Spec.UpdateStrategy.RollingUpdate = &apps.RollingUpdateStatefulSetStrategy{Partition: &upgradeOrdinal} + glog.Infof("set %s/%s partition to %d successfully", set.GetNamespace(), set.GetName(), upgradeOrdinal) } func imagePullFailed(pod *corev1.Pod) bool { diff --git a/pkg/scheduler/predicates/ha.go b/pkg/scheduler/predicates/ha.go index 708b4cc6d95..ddd7ae5fa6e 100644 --- a/pkg/scheduler/predicates/ha.go +++ b/pkg/scheduler/predicates/ha.go @@ -106,6 +106,7 @@ func (h *ha) Filter(instanceName string, pod *apiv1.Pod, nodes []apiv1.Node) ([] return nil, err } replicas := getReplicasFrom(tc, component) + glog.Infof("ha: tidbcluster %s/%s component %s replicas %d", ns, tcName, component, replicas) nodeMap := make(map[string][]string) for _, node := range nodes { @@ -128,11 +129,13 @@ func (h *ha) Filter(instanceName string, pod *apiv1.Pod, nodes []apiv1.Node) ([] // replicas less than 3 cannot achieve high availability if replicas < 3 { minNodeNames = append(minNodeNames, nodeName) + glog.Infof("replicas is %d, add node %s to minNodeNames", replicas, nodeName) continue } podsCount := len(podNames) if podsCount+1 >= int(replicas+1)/2 { + glog.Infof("node %s podsCount is %d, skipping", nodeName, podsCount) continue } if min == -1 { @@ -140,6 +143,7 @@ func (h *ha) Filter(instanceName string, pod *apiv1.Pod, nodes []apiv1.Node) ([] } if podsCount > min { + glog.Infof("node %s podsCount %d > min %d, skipping", nodeName, podsCount, min) continue } if podsCount < min { @@ -151,6 +155,7 @@ func (h *ha) Filter(instanceName string, pod *apiv1.Pod, nodes []apiv1.Node) ([] if len(minNodeNames) == 0 { msg := fmt.Sprintf("can't schedule to nodes: %v, because these pods had been scheduled to nodes: %v", GetNodeNames(nodes), nodeMap) + glog.Info(msg) h.recorder.Event(pod, apiv1.EventTypeWarning, "FailedScheduling", msg) return nil, errors.New(msg) } @@ -209,8 +214,12 @@ func (h *ha) realAcquireLock(pod *apiv1.Pod) (*apiv1.PersistentVolumeClaim, *api delete(schedulingPVC.Annotations, label.AnnPVCPodScheduling) err = h.updatePVCFn(schedulingPVC) if err != nil { + glog.Errorf("ha: failed to delete pvc %s/%s annotation %s, %v", + ns, schedulingPVC.GetName(), label.AnnPVCPodScheduling, err) return schedulingPVC, currentPVC, err } + glog.Infof("ha: delete pvc %s/%s annotation %s successfully", + ns, schedulingPVC.GetName(), label.AnnPVCPodScheduling) return schedulingPVC, currentPVC, h.setCurrentPodScheduling(currentPVC) } @@ -246,11 +255,22 @@ func (h *ha) realTCGetFn(ns, tcName string) (*v1alpha1.TidbCluster, error) { } func (h *ha) setCurrentPodScheduling(pvc *apiv1.PersistentVolumeClaim) error { + ns := pvc.GetNamespace() + pvcName := pvc.GetName() if pvc.Annotations == nil { pvc.Annotations = map[string]string{} } - pvc.Annotations[label.AnnPVCPodScheduling] = time.Now().Format(time.RFC3339) - return h.updatePVCFn(pvc) + now := time.Now().Format(time.RFC3339) + pvc.Annotations[label.AnnPVCPodScheduling] = now + err := h.updatePVCFn(pvc) + if err != nil { + glog.Errorf("ha: failed to set pvc %s/%s annotation %s to %s, %v", + ns, pvcName, label.AnnPVCPodScheduling, now, err) + return err + } + glog.Infof("ha: set pvc %s/%s annotation %s to %s successfully", + ns, pvcName, label.AnnPVCPodScheduling, now) + return nil } func getTCNameFromPod(pod *apiv1.Pod, component string) string { From 1c0e652da2ab7348bb334a33f374159534f3e929 Mon Sep 17 00:00:00 2001 From: weekface Date: Mon, 22 Jul 2019 18:59:01 +0800 Subject: [PATCH 2/4] address comment --- pkg/manager/member/tidb_failover.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/manager/member/tidb_failover.go b/pkg/manager/member/tidb_failover.go index 1acdb2a7e72..4fd8e5cefd6 100644 --- a/pkg/manager/member/tidb_failover.go +++ b/pkg/manager/member/tidb_failover.go @@ -41,7 +41,7 @@ func (tf *tidbFailover) Failover(tc *v1alpha1.TidbCluster) error { _, exist := tc.Status.TiDB.FailureMembers[tidbMember.Name] if exist && tidbMember.Health { delete(tc.Status.TiDB.FailureMembers, tidbMember.Name) - glog.Errorf("tidb failover: delete %s from tidb failoverMembers", tidbMember.Name) + glog.Infof("tidb failover: delete %s from tidb failoverMembers", tidbMember.Name) } } From fb292372d9e2775ad0f2f10e1dc5afe6f963e2c1 Mon Sep 17 00:00:00 2001 From: weekface Date: Mon, 22 Jul 2019 19:26:02 +0800 Subject: [PATCH 3/4] address comment --- pkg/manager/member/utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/manager/member/utils.go b/pkg/manager/member/utils.go index 55eaa6537da..e69f1c6ffa9 100644 --- a/pkg/manager/member/utils.go +++ b/pkg/manager/member/utils.go @@ -182,7 +182,7 @@ func serviceEqual(new, old *corev1.Service) (bool, error) { // setUpgradePartition set statefulSet's rolling update partition func setUpgradePartition(set *apps.StatefulSet, upgradeOrdinal int32) { set.Spec.UpdateStrategy.RollingUpdate = &apps.RollingUpdateStatefulSetStrategy{Partition: &upgradeOrdinal} - glog.Infof("set %s/%s partition to %d successfully", set.GetNamespace(), set.GetName(), upgradeOrdinal) + glog.Infof("set %s/%s partition to %d", set.GetNamespace(), set.GetName(), upgradeOrdinal) } func imagePullFailed(pod *corev1.Pod) bool { From d4f1f693014d8a9bf0c093ac6227a984361d71ab Mon Sep 17 00:00:00 2001 From: weekface Date: Mon, 22 Jul 2019 20:05:46 +0800 Subject: [PATCH 4/4] address comment --- pkg/scheduler/predicates/ha.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/scheduler/predicates/ha.go b/pkg/scheduler/predicates/ha.go index ddd7ae5fa6e..e72792c29a9 100644 --- a/pkg/scheduler/predicates/ha.go +++ b/pkg/scheduler/predicates/ha.go @@ -135,7 +135,8 @@ func (h *ha) Filter(instanceName string, pod *apiv1.Pod, nodes []apiv1.Node) ([] podsCount := len(podNames) if podsCount+1 >= int(replicas+1)/2 { - glog.Infof("node %s podsCount is %d, skipping", nodeName, podsCount) + glog.Infof("node %s podsCount+1 is %d, int(replicas+1)/2 is %d, skipping", + nodeName, podsCount+1, int(replicas+1)/2) continue } if min == -1 {