-
Notifications
You must be signed in to change notification settings - Fork 501
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add scheduled-backup test case #322
Merged
weekface
merged 7 commits into
pingcap:stability
from
shuijing198799:yinliang/backup-and-restore
Mar 19, 2019
Merged
Changes from 2 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
a7e16bf
add schduler test case
shuijing198799 2746c82
delete unnessary step and add scheduled-backup-job check step
shuijing198799 79dfe9a
change name to backupPVC
shuijing198799 3dbddde
return a string instead of int in getbackupdir
shuijing198799 2eb505d
conflict resovle
shuijing198799 84d4698
resolve some merge error and naming conflict
shuijing198799 aa70d18
resolve some merge error and naming conflict
shuijing198799 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,8 +31,10 @@ import ( | |
"github.com/pingcap/tidb-operator/pkg/label" | ||
batchv1 "k8s.io/api/batch/v1" | ||
corev1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/api/errors" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/labels" | ||
"k8s.io/apimachinery/pkg/types" | ||
"k8s.io/apimachinery/pkg/util/wait" | ||
"k8s.io/client-go/kubernetes" | ||
) | ||
|
@@ -47,7 +49,8 @@ func NewOperatorActions(cli versioned.Interface, kubeCli kubernetes.Interface) O | |
|
||
const ( | ||
DefaultPollTimeout time.Duration = 10 * time.Minute | ||
DefaultPollInterval time.Duration = 1 * time.Minute | ||
DefaultPollInterval time.Duration = 10 * time.Second | ||
getBackupDirPodName = "get-backup-dir" | ||
) | ||
|
||
type OperatorActions interface { | ||
|
@@ -74,6 +77,7 @@ type OperatorActions interface { | |
CleanMonitor(info *TidbClusterInfo) error | ||
ForceDeploy(info *TidbClusterInfo) error | ||
CreateSecret(info *TidbClusterInfo) error | ||
getBackupDir(info *TidbClusterInfo) (int, error) | ||
} | ||
|
||
type FaultTriggerActions interface { | ||
|
@@ -113,6 +117,7 @@ type OperatorInfo struct { | |
} | ||
|
||
type TidbClusterInfo struct { | ||
Name string | ||
Namespace string | ||
ClusterName string | ||
OperatorTag string | ||
|
@@ -121,6 +126,7 @@ type TidbClusterInfo struct { | |
TiDBImage string | ||
StorageClassName string | ||
Password string | ||
InitSql string | ||
RecordCount string | ||
InsertBetchSize string | ||
Resources map[string]string | ||
|
@@ -129,9 +135,6 @@ type TidbClusterInfo struct { | |
|
||
func (tc *TidbClusterInfo) HelmSetString() string { | ||
|
||
// add a database and table for test | ||
initSql := `"create database record;use record;create table test(t char(32));"` | ||
|
||
set := map[string]string{ | ||
"clusterName": tc.ClusterName, | ||
"pd.storageClassName": tc.StorageClassName, | ||
|
@@ -143,7 +146,7 @@ func (tc *TidbClusterInfo) HelmSetString() string { | |
"tikv.image": tc.TiKVImage, | ||
"tidb.image": tc.TiDBImage, | ||
"tidb.passwordSecretName": "set-secret", | ||
"tidb.initSql": initSql, | ||
"tidb.initSql": tc.InitSql, | ||
} | ||
|
||
for k, v := range tc.Resources { | ||
|
@@ -243,6 +246,7 @@ func (oa *operatorActions) CleanTidbCluster(info *TidbClusterInfo) error { | |
info.ClusterName, | ||
fmt.Sprintf("%s-backup", info.ClusterName), | ||
fmt.Sprintf("%s-restore", info.ClusterName), | ||
fmt.Sprintf("%s-scheduler-backup", info.ClusterName), | ||
} | ||
for _, chartName := range charts { | ||
res, err := exec.Command("helm", "del", "--purge", chartName).CombinedOutput() | ||
|
@@ -252,6 +256,12 @@ func (oa *operatorActions) CleanTidbCluster(info *TidbClusterInfo) error { | |
} | ||
} | ||
|
||
err := oa.kubeCli.CoreV1().Pods(info.Namespace).Delete(getBackupDirPodName, &metav1.DeleteOptions{}) | ||
|
||
if err != nil && !errors.IsNotFound(err) { | ||
return fmt.Errorf("failed to delete dir pod %v", err) | ||
} | ||
|
||
setStr := label.New().Instance(info.ClusterName).String() | ||
|
||
resources := []string{"pvc"} | ||
|
@@ -849,7 +859,7 @@ func (oa *operatorActions) DeployAdHocBackup(info *TidbClusterInfo) error { | |
}() | ||
sets := map[string]string{ | ||
"clusterName": info.ClusterName, | ||
"name": "test-backup", | ||
"name": info.Name, | ||
"mode": "backup", | ||
"user": "root", | ||
"password": info.Password, | ||
|
@@ -873,6 +883,7 @@ func (oa *operatorActions) DeployAdHocBackup(info *TidbClusterInfo) error { | |
if err != nil { | ||
return fmt.Errorf("failed to launch adhoc backup job: %v, %s", err, string(res)) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
|
@@ -882,7 +893,7 @@ func (oa *operatorActions) CheckAdHocBackup(info *TidbClusterInfo) error { | |
glog.Infof("deploy clean backup end cluster[%s] namespace[%s]", info.ClusterName, info.Namespace) | ||
}() | ||
|
||
jobName := fmt.Sprintf("%s-%s", info.ClusterName, "test-backup") | ||
jobName := fmt.Sprintf("%s-%s", info.ClusterName, info.Name) | ||
fn := func() (bool, error) { | ||
job, err := oa.kubeCli.BatchV1().Jobs(info.Namespace).Get(jobName, metav1.GetOptions{}) | ||
if err != nil { | ||
|
@@ -901,6 +912,7 @@ func (oa *operatorActions) CheckAdHocBackup(info *TidbClusterInfo) error { | |
if err != nil { | ||
return fmt.Errorf("failed to launch scheduler backup job: %v", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
|
@@ -911,7 +923,7 @@ func (oa *operatorActions) Restore(from *TidbClusterInfo, to *TidbClusterInfo) e | |
}() | ||
sets := map[string]string{ | ||
"clusterName": to.ClusterName, | ||
"name": "test-backup", | ||
"name": to.Name, | ||
"mode": "restore", | ||
"user": "root", | ||
"password": to.Password, | ||
|
@@ -945,7 +957,7 @@ func (oa *operatorActions) CheckRestore(from *TidbClusterInfo, to *TidbClusterIn | |
glog.Infof("check restore end cluster[%s] namespace[%s]", to.ClusterName, to.Namespace) | ||
}() | ||
|
||
jobName := fmt.Sprintf("%s-restore-test-backup", to.ClusterName) | ||
jobName := fmt.Sprintf("%s-restore-%s", to.ClusterName, from.Name) | ||
fn := func() (bool, error) { | ||
job, err := oa.kubeCli.BatchV1().Jobs(to.Namespace).Get(jobName, metav1.GetOptions{}) | ||
if err != nil { | ||
|
@@ -1065,13 +1077,203 @@ func releaseIsExist(err error) bool { | |
} | ||
|
||
func (oa *operatorActions) DeployScheduledBackup(info *TidbClusterInfo) error { | ||
glog.Infof("begin to deploy scheduled backup") | ||
defer func() { | ||
glog.Infof("deploy shceduled backup end") | ||
}() | ||
|
||
cron := fmt.Sprintf("'*/1 * * * *'") | ||
sets := map[string]string{ | ||
"clusterName": info.ClusterName, | ||
"scheduledBackup.create": "true", | ||
"scheduledBackup.user": "root", | ||
"scheduledBackup.password": info.Password, | ||
"scheduledBackup.schedule": cron, | ||
"scheduledBackup.storage": "10Gi", | ||
} | ||
var buffer bytes.Buffer | ||
for k, v := range sets { | ||
set := fmt.Sprintf(" --set %s=%s", k, v) | ||
_, err := buffer.WriteString(set) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
|
||
setStr := buffer.String() | ||
|
||
cmd := fmt.Sprintf("helm upgrade %s /charts/%s/tidb-cluster %s", | ||
info.ClusterName, info.OperatorTag, setStr) | ||
|
||
glog.Infof("scheduled-backup delploy [%s]", cmd) | ||
res, err := exec.Command("/bin/sh", "-c", cmd).CombinedOutput() | ||
if err != nil { | ||
return fmt.Errorf("failed to launch scheduler backup job: %v, %s", err, string(res)) | ||
} | ||
return nil | ||
} | ||
|
||
func (oa *operatorActions) CheckScheduledBackup(info *TidbClusterInfo) error { | ||
glog.Infof("begin to check scheduler backup cluster[%s] namespace[%s]", info.ClusterName, info.Namespace) | ||
defer func() { | ||
glog.Infof("deploy check scheduler end cluster[%s] namespace[%s]", info.ClusterName, info.Namespace) | ||
}() | ||
|
||
jobName := fmt.Sprintf("%s-scheduled-backup", info.ClusterName) | ||
fn := func() (bool, error) { | ||
job, err := oa.kubeCli.BatchV1beta1().CronJobs(info.Namespace).Get(jobName, metav1.GetOptions{}) | ||
if err != nil { | ||
glog.Errorf("failed to get cronjobs %s ,%v", jobName, err) | ||
return false, nil | ||
} | ||
|
||
jobs, err := oa.kubeCli.BatchV1().Jobs(info.Namespace).List(metav1.ListOptions{}) | ||
if err != nil { | ||
glog.Errorf("failed to list jobs %s ,%v", info.Namespace, err) | ||
return false, nil | ||
} | ||
|
||
backupJobs := []batchv1.Job{} | ||
for _, j := range jobs.Items { | ||
if pid, found := getParentUIDFromJob(j); found && pid == job.UID { | ||
backupJobs = append(backupJobs, j) | ||
} | ||
} | ||
|
||
if len(backupJobs) == 0 { | ||
glog.Errorf("cluster [%s] scheduler jobs is creating, please wait!", info.ClusterName) | ||
return false, nil | ||
} | ||
|
||
for _, j := range backupJobs { | ||
if j.Status.Succeeded == 0 { | ||
glog.Errorf("cluster [%s] back up job is not completed, please wait! ", info.ClusterName) | ||
return false, nil | ||
} | ||
} | ||
|
||
return true, nil | ||
} | ||
|
||
err := wait.Poll(DefaultPollInterval, DefaultPollTimeout, fn) | ||
if err != nil { | ||
return fmt.Errorf("failed to launch scheduler backup job: %v", err) | ||
} | ||
|
||
// sleep 1 minute for cronjob | ||
time.Sleep(60 * time.Second) | ||
|
||
dirs, err := oa.getBackupDir(info) | ||
if err != nil { | ||
return fmt.Errorf("failed to get backup dir: %v", err) | ||
} | ||
|
||
if dirs != 3 { | ||
return fmt.Errorf("scheduler job failed!") | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func getParentUIDFromJob(j batchv1.Job) (types.UID, bool) { | ||
controllerRef := metav1.GetControllerOf(&j) | ||
|
||
if controllerRef == nil { | ||
return types.UID(""), false | ||
} | ||
|
||
if controllerRef.Kind != "CronJob" { | ||
glog.Infof("Job with non-CronJob parent, name %s namespace %s", j.Name, j.Namespace) | ||
return types.UID(""), false | ||
} | ||
|
||
return controllerRef.UID, true | ||
} | ||
|
||
func (oa *operatorActions) getBackupDir(info *TidbClusterInfo) (int, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. return a |
||
pod := &corev1.Pod{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: getBackupDirPodName, | ||
Namespace: info.Namespace, | ||
}, | ||
Spec: corev1.PodSpec{ | ||
Containers: []corev1.Container{ | ||
{ | ||
Name: getBackupDirPodName, | ||
Image: "pingcap/tidb-cloud-backup:latest", | ||
Command: []string{"sleep", "3000"}, | ||
VolumeMounts: []corev1.VolumeMount{ | ||
{ | ||
Name: "data", | ||
MountPath: "/data", | ||
}, | ||
}, | ||
}, | ||
}, | ||
Volumes: []corev1.Volume{ | ||
{ | ||
Name: "data", | ||
VolumeSource: corev1.VolumeSource{ | ||
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ | ||
ClaimName: info.Name, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
fn := func() (bool, error) { | ||
_, err := oa.kubeCli.CoreV1().Pods(info.Namespace).Get(getBackupDirPodName, metav1.GetOptions{}) | ||
if !errors.IsNotFound(err) { | ||
return false, nil | ||
} | ||
return true, nil | ||
} | ||
|
||
err := wait.Poll(DefaultPollInterval, DefaultPollTimeout, fn) | ||
|
||
if err != nil { | ||
return 0, fmt.Errorf("failed to delete pod %s", getBackupDirPodName) | ||
} | ||
|
||
_, err = oa.kubeCli.CoreV1().Pods(info.Namespace).Create(pod) | ||
if err != nil && !errors.IsAlreadyExists(err) { | ||
glog.Errorf("cluster: [%s/%s] create get backup dir pod failed, error :%v", info.Namespace, info.ClusterName, err) | ||
return 0, err | ||
} | ||
|
||
fn = func() (bool, error) { | ||
_, err := oa.kubeCli.CoreV1().Pods(info.Namespace).Get(getBackupDirPodName, metav1.GetOptions{}) | ||
if errors.IsNotFound(err) { | ||
return false, nil | ||
} | ||
return true, nil | ||
} | ||
|
||
err = wait.Poll(DefaultPollInterval, DefaultPollTimeout, fn) | ||
|
||
if err != nil { | ||
return 0, fmt.Errorf("failed to create pod %s", getBackupDirPodName) | ||
} | ||
|
||
cmd := fmt.Sprintf("kubectl exec %s -n %s ls /data", getBackupDirPodName, info.Namespace) | ||
glog.Infof(cmd) | ||
res, err := exec.Command("/bin/sh", "-c", cmd).CombinedOutput() | ||
if err != nil { | ||
glog.Errorf("cluster:[%s/%s] exec :%s failed,error:%v,result:%s", info.Namespace, info.ClusterName, cmd, err, res) | ||
return 0, err | ||
} | ||
|
||
dirs := strings.Split(string(res), "\n") | ||
glog.Infof("dirs in pod info name [%s] dir name [%s]", info.Name, strings.Join(dirs, ",")) | ||
return len(dirs), nil | ||
} | ||
|
||
func (info *TidbClusterInfo) FullName() string { | ||
return fmt.Sprintf("%s/%s", info.Namespace, info.ClusterName) | ||
} | ||
|
||
func (oa *operatorActions) DeployIncrementalBackup(from *TidbClusterInfo, to *TidbClusterInfo) error { | ||
return nil | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why change this to 10 seconds, it will be very noisy
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
xiaojing and I need to check the log and scheduledbackup-cronjob is called job every minute