Skip to content

Commit

Permalink
Clean pod schedule annotation (#790) (#909)
Browse files Browse the repository at this point in the history
* add retry logic for update pvc

* add pvc cleaner for clean the pod-scheduling annotation

* add e2e test
  • Loading branch information
sre-bot authored and onlymellb committed Sep 13, 2019
1 parent 1f28b2f commit 566f991
Show file tree
Hide file tree
Showing 8 changed files with 593 additions and 5 deletions.
11 changes: 10 additions & 1 deletion pkg/controller/tidbcluster/tidb_cluster_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ func NewDefaultTidbClusterControl(
reclaimPolicyManager manager.Manager,
metaManager manager.Manager,
orphanPodsCleaner member.OrphanPodsCleaner,
pvcCleaner member.PVCCleanerInterface,
recorder record.EventRecorder) ControlInterface {
return &defaultTidbClusterControl{
tcControl,
Expand All @@ -50,6 +51,7 @@ func NewDefaultTidbClusterControl(
reclaimPolicyManager,
metaManager,
orphanPodsCleaner,
pvcCleaner,
recorder,
}
}
Expand All @@ -62,6 +64,7 @@ type defaultTidbClusterControl struct {
reclaimPolicyManager manager.Manager
metaManager manager.Manager
orphanPodsCleaner member.OrphanPodsCleaner
pvcCleaner member.PVCCleanerInterface
recorder record.EventRecorder
}

Expand Down Expand Up @@ -138,5 +141,11 @@ func (tcc *defaultTidbClusterControl) updateTidbCluster(tc *v1alpha1.TidbCluster
// - label.StoreIDLabelKey
// - label.MemberIDLabelKey
// - label.NamespaceLabelKey
return tcc.metaManager.Sync(tc)
if err := tcc.metaManager.Sync(tc); err != nil {
return err
}

// cleaning the pod scheduling annotation for pd and tikv
_, err := tcc.pvcCleaner.Clean(tc)
return err
}
3 changes: 2 additions & 1 deletion pkg/controller/tidbcluster/tidb_cluster_control_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ func newFakeTidbClusterControl() (ControlInterface, *meta.FakeReclaimPolicyManag
reclaimPolicyManager := meta.NewFakeReclaimPolicyManager()
metaManager := meta.NewFakeMetaManager()
opc := mm.NewFakeOrphanPodsCleaner()
control := NewDefaultTidbClusterControl(tcControl, pdMemberManager, tikvMemberManager, tidbMemberManager, reclaimPolicyManager, metaManager, opc, recorder)
pcc := mm.NewFakePVCCleaner()
control := NewDefaultTidbClusterControl(tcControl, pdMemberManager, tikvMemberManager, tidbMemberManager, reclaimPolicyManager, metaManager, opc, pcc, recorder)

return control, reclaimPolicyManager, pdMemberManager, tikvMemberManager, tidbMemberManager, metaManager
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/controller/tidbcluster/tidb_cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ func NewController(
pvcInformer.Lister(),
kubeCli,
),
mm.NewRealPVCCleaner(
podInformer.Lister(),
pvcControl,
pvcInformer.Lister(),
),
recorder,
),
queue: workqueue.NewNamedRateLimitingQueue(
Expand Down
1 change: 1 addition & 0 deletions pkg/controller/tidbcluster/tidb_cluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ func newFakeTidbClusterController() (*Controller, cache.Indexer, cache.Indexer)
podControl,
),
mm.NewFakeOrphanPodsCleaner(),
mm.NewFakePVCCleaner(),
recorder,
)

Expand Down
152 changes: 152 additions & 0 deletions pkg/manager/member/pvc_cleaner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package member

import (
"fmt"

"github.com/golang/glog"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap.com/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/label"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
corelisters "k8s.io/client-go/listers/core/v1"
)

const (
skipReasonPVCCleanerIsNotPDOrTiKV = "pvc cleaner: member type is not pd or tikv"
skipReasonPVCCleanerDeferDeletePVCNotHasLock = "pvc cleaner: defer delete PVC not has schedule lock"
skipReasonPVCCleanerPVCNotHasLock = "pvc cleaner: pvc not has schedule lock"
skipReasonPVCCleanerPodWaitingForScheduling = "pvc cleaner: waiting for pod scheduling"
skipReasonPVCCleanerPodNotFound = "pvc cleaner: the corresponding pod of pvc has not been found"
skipReasonPVCCleanerWaitingForPVCSync = "pvc cleaner: waiting for pvc's meta info to be synced"
)

// PVCCleaner implements the logic for cleaning the pvc related resource
type PVCCleanerInterface interface {
Clean(*v1alpha1.TidbCluster) (map[string]string, error)
}

type realPVCCleaner struct {
podLister corelisters.PodLister
pvcControl controller.PVCControlInterface
pvcLister corelisters.PersistentVolumeClaimLister
}

// NewRealPVCCleaner returns a realPVCCleaner
func NewRealPVCCleaner(
podLister corelisters.PodLister,
pvcControl controller.PVCControlInterface,
pvcLister corelisters.PersistentVolumeClaimLister) PVCCleanerInterface {
return &realPVCCleaner{
podLister,
pvcControl,
pvcLister,
}
}

func (rpc *realPVCCleaner) Clean(tc *v1alpha1.TidbCluster) (map[string]string, error) {
ns := tc.GetNamespace()
tcName := tc.GetName()
// for unit test
skipReason := map[string]string{}

selector, err := label.New().Instance(tc.GetLabels()[label.InstanceLabelKey]).Selector()
if err != nil {
return skipReason, fmt.Errorf("cluster %s/%s assemble label selector failed, err: %v", ns, tcName, err)
}

pvcs, err := rpc.pvcLister.PersistentVolumeClaims(ns).List(selector)
if err != nil {
return skipReason, fmt.Errorf("cluster %s/%s list pvc failed, selector: %s, err: %v", ns, tcName, selector, err)
}

for _, pvc := range pvcs {
pvcName := pvc.GetName()
l := label.Label(pvc.Labels)
if !(l.IsPD() || l.IsTiKV()) {
skipReason[pvcName] = skipReasonPVCCleanerIsNotPDOrTiKV
continue
}

if pvc.Annotations[label.AnnPVCDeferDeleting] != "" {
if _, exist := pvc.Annotations[label.AnnPVCPodScheduling]; !exist {
// The defer deleting PVC without pod scheduling annotation, do nothing
glog.V(4).Infof("cluster %s/%s defer delete pvc %s has not pod scheduling annotation, skip clean", ns, tcName, pvcName)
skipReason[pvcName] = skipReasonPVCCleanerDeferDeletePVCNotHasLock
continue
}
// The defer deleting PVC has pod scheduling annotation, so we need to delete the pod scheduling annotation
delete(pvc.Annotations, label.AnnPVCPodScheduling)
if _, err := rpc.pvcControl.UpdatePVC(tc, pvc); err != nil {
return skipReason, fmt.Errorf("cluster %s/%s remove pvc %s pod scheduling annotation faild, err: %v", ns, tcName, pvcName, err)
}
continue
}

podName, exist := pvc.Annotations[label.AnnPodNameKey]
if !exist {
// waiting for pvc's meta info to be synced
skipReason[pvcName] = skipReasonPVCCleanerWaitingForPVCSync
continue
}

pod, err := rpc.podLister.Pods(ns).Get(podName)
if err != nil {
if !errors.IsNotFound(err) {
return skipReason, fmt.Errorf("cluster %s/%s get pvc %s pod %s failed, err: %v", ns, tcName, pvcName, podName, err)
}
skipReason[pvcName] = skipReasonPVCCleanerPodNotFound
continue
}

if _, exist := pvc.Annotations[label.AnnPVCPodScheduling]; !exist {
// The PVC without pod scheduling annotation, do nothing
glog.V(4).Infof("cluster %s/%s pvc %s has not pod scheduling annotation, skip clean", ns, tcName, pvcName)
skipReason[pvcName] = skipReasonPVCCleanerPVCNotHasLock
continue
}

if pvc.Status.Phase != corev1.ClaimBound || pod.Spec.NodeName == "" {
// This pod has not been scheduled yet, no need to clean up the pvc pod schedule annotation
glog.V(4).Infof("cluster %s/%s pod %s has not been scheduled yet, skip clean pvc %s pod schedule annotation", ns, tcName, podName, pvcName)
skipReason[pvcName] = skipReasonPVCCleanerPodWaitingForScheduling
continue
}

delete(pvc.Annotations, label.AnnPVCPodScheduling)
if _, err := rpc.pvcControl.UpdatePVC(tc, pvc); err != nil {
return skipReason, fmt.Errorf("cluster %s/%s remove pvc %s pod scheduling annotation faild, err: %v", ns, tcName, pvcName, err)
}
glog.Infof("cluster %s/%s, clean pvc %s pod scheduling annotation successfully", ns, tcName, pvcName)
}

return skipReason, nil
}

var _ PVCCleanerInterface = &realPVCCleaner{}

type fakePVCCleaner struct{}

// NewFakePVCCleaner returns a fake PVC cleaner
func NewFakePVCCleaner() PVCCleanerInterface {
return &fakePVCCleaner{}
}

func (fpc *fakePVCCleaner) Clean(_ *v1alpha1.TidbCluster) (map[string]string, error) {
return nil, nil
}

var _ PVCCleanerInterface = &fakePVCCleaner{}
Loading

0 comments on commit 566f991

Please sign in to comment.