Skip to content

Commit

Permalink
refine fullbackup (#570)
Browse files Browse the repository at this point in the history
  • Loading branch information
weekface authored Jun 18, 2019
1 parent fe6bef7 commit ddcb27c
Show file tree
Hide file tree
Showing 11 changed files with 93 additions and 73 deletions.
16 changes: 0 additions & 16 deletions charts/tidb-backup/templates/backup-job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,6 @@ spec:
app.kubernetes.io/component: backup
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
spec:
initContainers:
- name: get-ts
image: {{ .Values.image.binlog }}
imagePullPolicy: {{ .Values.image.pullPolicy | default "IfNotPresent" }}
command:
- /binlogctl
- -pd-urls=http://{{ .Values.clusterName }}-pd:2379
- -cmd=generate_meta
- -data-dir=/savepoint-dir
volumeMounts:
- name: savepoint-dir
mountPath: "/savepoint-dir"
containers:
- name: backup
image: {{ .Values.image.backup }}
Expand All @@ -41,8 +29,6 @@ spec:
- |-
{{ tuple "scripts/_start_backup.sh.tpl" . | include "helm-toolkit.utils.template" | indent 10 }}
volumeMounts:
- name: savepoint-dir
mountPath: "/savepoint-dir"
- name: data
mountPath: "/data"
{{- if .Values.gcp }}
Expand Down Expand Up @@ -81,8 +67,6 @@ spec:
key: password
restartPolicy: OnFailure
volumes:
- name: savepoint-dir
emptyDir: {}
- name: data
persistentVolumeClaim:
claimName: {{ .Values.name }}
Expand Down
31 changes: 22 additions & 9 deletions charts/tidb-backup/templates/scripts/_start_backup.sh.tpl
Original file line number Diff line number Diff line change
@@ -1,25 +1,38 @@
set -euo pipefail

host=`echo {{ .Values.clusterName }}_TIDB_SERVICE_HOST | tr '[a-z]' '[A-Z]' | tr '-' '_'`
host=$(getent hosts {{ .Values.clusterName }}-tidb | head | awk '{print $1}')

dirname=/data/${BACKUP_NAME}
echo "making dir ${dirname}"
mkdir -p ${dirname}
cp /savepoint-dir/savepoint ${dirname}/

# the content of savepoint file is:
# commitTS = 408824443621605409
savepoint=`cat ${dirname}/savepoint | cut -d "=" -f2 | sed 's/ *//g'`
gc_life_time=`/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "select variable_value from mysql.tidb where variable_name='tikv_gc_life_time';"`
echo "Old TiKV GC life time is ${gc_life_time}"

cat ${dirname}/savepoint
echo "Increase TiKV GC life time to 3h"
/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='3h' where variable_name='tikv_gc_life_time';"
/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';"

if [ -n "{{ .Values.initialCommitTs }}" ];
then
snapshot_args="--tidb-snapshot={{ .Values.initialCommitTs }}"
echo "commitTS = {{ .Values.initialCommitTs }}" > ${dirname}/savepoint
cat ${dirname}/savepoint
fi

/mydumper \
--outputdir=${dirname} \
--host=`eval echo '${'$host'}'` \
--host=${host} \
--port=4000 \
--user=${TIDB_USER} \
--password=${TIDB_PASSWORD} \
--tidb-snapshot=${savepoint} \
{{ .Values.backupOptions }}
--long-query-guard=3600 \
--tidb-force-priority=LOW_PRIORITY \
{{ .Values.backupOptions }} ${snapshot_args}

echo "Reset TiKV GC life time to ${gc_life_time}"
/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='${gc_life_time}' where variable_name='tikv_gc_life_time';"
/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';"

{{- if .Values.gcp }}
uploader \
Expand Down
16 changes: 13 additions & 3 deletions charts/tidb-backup/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@ mode: backup # backup | restore
name: fullbackup-{{ date "200601021504" .Release.Time }}
image:
pullPolicy: IfNotPresent
binlog: pingcap/tidb-binlog:v3.0.0-rc.1
# https://github.com/tennix/tidb-cloud-backup
backup: pingcap/tidb-cloud-backup:latest
# https://github.com/pingcap/tidb-cloud-backup
backup: pingcap/tidb-cloud-backup:20190610

# secretName is the name of the secret which stores user and password used for backup/restore
# Note: you must give the user enough privilege to do the backup and restore
Expand All @@ -26,6 +25,17 @@ storage:

# backupOptions is the options of mydumper https://github.com/maxbube/mydumper/blob/master/docs/mydumper_usage.rst#options
backupOptions: "--verbose=3"
# Set the tidb_snapshot to be used for the backup
# Use `show master status` to get the ts:
# MySQL [(none)]> show master status;
# +-------------+--------------------+--------------+------------------+-------------------+
# | File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
# +-------------+--------------------+--------------+------------------+-------------------+
# | tidb-binlog | 409076965619269635 | | | |
# +-------------+--------------------+--------------+------------------+-------------------+
# 1 row in set (0.01 sec)
# For this example, "409076965619269635" is the initialCommitTs
initialCommitTs: ""
# restoreOptions is the options of loader https://www.pingcap.com/docs-cn/tools/loader/
restoreOptions: "-t 16"

Expand Down
16 changes: 0 additions & 16 deletions charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,6 @@ spec:
app.kubernetes.io/component: scheduled-backup
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
spec:
initContainers:
- name: get-ts
image: {{ .Values.scheduledBackup.binlogImage }}
imagePullPolicy: {{ .Values.scheduledBackup.binlogImagePullPolicy | default "IfNotPresent" }}
command:
- /binlogctl
- -pd-urls=http://{{ template "cluster.name" . }}-pd:2379
- -cmd=generate_meta
- -data-dir=/savepoint-dir
volumeMounts:
- name: savepoint-dir
mountPath: "/savepoint-dir"
containers:
- name: scheduled-backup
image: {{ .Values.scheduledBackup.mydumperImage }}
Expand All @@ -54,8 +42,6 @@ spec:
- |-
{{ tuple "scripts/_start_scheduled_backup.sh.tpl" . | include "helm-toolkit.utils.template" | indent 14 }}
volumeMounts:
- name: savepoint-dir
mountPath: "/savepoint-dir"
- name: data
mountPath: "/data"
{{- if .Values.scheduledBackup.gcp }}
Expand Down Expand Up @@ -84,8 +70,6 @@ spec:
key: password
restartPolicy: OnFailure
volumes:
- name: savepoint-dir
emptyDir: {}
- name: data
persistentVolumeClaim:
claimName: {{ template "cluster.name" . }}-scheduled-backup
Expand Down
Original file line number Diff line number Diff line change
@@ -1,36 +1,43 @@
set -euo pipefail
dirname=scheduled-backup-`date +%Y-%m-%dT%H%M%S`-${MY_POD_NAME}
host=`echo {{ template "cluster.name" . }}_TIDB_SERVICE_HOST | tr '[a-z]' '[A-Z]' | tr '-' '_'`

mkdir -p /data/${dirname}/
cp /savepoint-dir/savepoint /data/${dirname}/
host=$(getent hosts {{ template "cluster.name" . }}-tidb | head | awk '{print $1}')

# the content of savepoint file is:
# commitTS = 408824443621605409
savepoint=`cat /data/${dirname}/savepoint | cut -d "=" -f2 | sed 's/ *//g'`
dirname=/data/scheduled-backup-`date +%Y-%m-%dT%H%M%S`-${MY_POD_NAME}
echo "making dir ${dirname}"
mkdir -p ${dirname}

cat /data/${dirname}/savepoint
gc_life_time=`/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "select variable_value from mysql.tidb where variable_name='tikv_gc_life_time';"`
echo "Old TiKV GC life time is ${gc_life_time}"

echo "Increase TiKV GC life time to 3h"
/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='3h' where variable_name='tikv_gc_life_time';"
/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';"

/mydumper \
--outputdir=/data/${dirname} \
--host=`eval echo '${'$host'}'` \
--outputdir=${dirname} \
--host=${host} \
--port=4000 \
--user={{ .Values.scheduledBackup.user }} \
--password=${TIDB_PASSWORD} \
--tidb-snapshot=${savepoint} \
--long-query-guard=3600 \
--tidb-force-priority=LOW_PRIORITY \
{{ .Values.scheduledBackup.options }}

echo "Reset TiKV GC life time to ${gc_life_time}"
/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='${gc_life_time}' where variable_name='tikv_gc_life_time';"
/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';"

{{- if .Values.scheduledBackup.gcp }}
uploader \
--cloud=gcp \
--bucket={{ .Values.scheduledBackup.gcp.bucket }} \
--backup-dir=/data/${dirname}
--backup-dir=${dirname}
{{- end }}

{{- if .Values.scheduledBackup.ceph }}
uploader \
--cloud=ceph \
--bucket={{ .Values.scheduledBackup.ceph.bucket }} \
--endpoint={{ .Values.scheduledBackup.ceph.endpoint }} \
--backup-dir=/data/${dirname}
--backup-dir=${dirname}
{{- end }}
6 changes: 2 additions & 4 deletions charts/tidb-cluster/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -437,10 +437,8 @@ binlog:

scheduledBackup:
create: false
binlogImage: pingcap/tidb-binlog:v3.0.0-rc.1
binlogImagePullPolicy: IfNotPresent
# https://github.com/tennix/tidb-cloud-backup
mydumperImage: pingcap/tidb-cloud-backup:latest
# https://github.com/pingcap/tidb-cloud-backup
mydumperImage: pingcap/tidb-cloud-backup:20190610
mydumperImagePullPolicy: IfNotPresent
# storageClassName is a StorageClass provides a way for administrators to describe the "classes" of storage they offer.
# different classes might map to quality-of-service levels, or to backup policies,
Expand Down
37 changes: 31 additions & 6 deletions tests/actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -1594,13 +1594,38 @@ func (oa *operatorActions) DeployAdHocBackup(info *TidbClusterConfig) error {
oa.EmitEvent(info, "DeployAdHocBackup")
glog.Infof("begin to deploy adhoc backup cluster[%s] namespace[%s]", info.ClusterName, info.Namespace)

getTSCmd := fmt.Sprintf("set -euo pipefail; mysql -u%s -p%s -h%s-tidb.%s -P 4000 -Nse 'show master status;' | awk '{print $2}'",
info.UserName,
info.Password,
info.ClusterName,
info.Namespace,
)
glog.Info(getTSCmd)

var tsStr string
getTSFn := func() (bool, error) {
res, err := exec.Command("/bin/sh", "-c", getTSCmd).CombinedOutput()
if err != nil {
glog.Errorf("failed to get ts %v, %s", err, string(res))
return false, nil
}
tsStr = string(res)
return true, nil
}

err := wait.Poll(DefaultPollInterval, BackupAndRestorePollTimeOut, getTSFn)
if err != nil {
return err
}

sets := map[string]string{
"name": info.BackupName,
"mode": "backup",
"user": "root",
"password": info.Password,
"storage.size": "10Gi",
"backupOptions": "\"--verbose=3\"",
"name": info.BackupName,
"mode": "backup",
"user": "root",
"password": info.Password,
"storage.size": "10Gi",
"backupOptions": "\"--verbose=3\"",
"initialCommitTs": strings.TrimSpace(tsStr),
}

setString := info.BackupHelmSetString(sets)
Expand Down
5 changes: 2 additions & 3 deletions tests/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ type Nodes struct {
// NewConfig creates a new config.
func NewConfig() (*Config, error) {
cfg := &Config{
OperatorRepoUrl: "https://github.com/pingcap/tidb-operator.git",

PDMaxReplicas: 5,
TiDBTokenLimit: 1024,
TiKVGrpcConcurrency: 8,
Expand All @@ -78,10 +76,11 @@ func NewConfig() (*Config, error) {
flag.StringVar(&cfg.configFile, "config", "", "Config file")
flag.StringVar(&cfg.LogDir, "log-dir", "/logDir", "log directory")
flag.IntVar(&cfg.FaultTriggerPort, "fault-trigger-port", 23332, "the http port of fault trigger service")
flag.StringVar(&cfg.TidbVersions, "tidb-versions", "v3.0.0-beta.1,v3.0.0-rc.1", "tidb versions")
flag.StringVar(&cfg.TidbVersions, "tidb-versions", "v3.0.0-rc.1,v3.0.0-rc.2", "tidb versions")
flag.StringVar(&cfg.OperatorTag, "operator-tag", "master", "operator tag used to choose charts")
flag.StringVar(&cfg.OperatorImage, "operator-image", "pingcap/tidb-operator:latest", "operator image")
flag.StringVar(&cfg.OperatorRepoDir, "operator-repo-dir", "/tidb-operator", "local directory to which tidb-operator cloned")
flag.StringVar(&cfg.OperatorRepoUrl, "operator-repo-url", "https://github.com/pingcap/tidb-operator.git", "tidb-operator repo url used")
flag.StringVar(&cfg.ChartDir, "chart-dir", "", "chart dir")
flag.StringVar(&slack.WebhookURL, "slack-webhook-url", "", "slack webhook url")
flag.Parse()
Expand Down
2 changes: 1 addition & 1 deletion tests/failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ func (oa *operatorActions) tikvFailover(pod *corev1.Pod, tc *v1alpha1.TidbCluste
healthCount++
}
}
if tc.Status.TiKV.Synced && healthCount == int(tc.Spec.TiKV.Replicas) {
if tc.Status.TiKV.Synced && healthCount >= int(tc.Spec.TiKV.Replicas) {
return true
}

Expand Down
2 changes: 1 addition & 1 deletion tests/images/e2e/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM alpine:3.5
ENV KUBECTL_VERSION=v1.12.2
ENV HELM_VERSION=v2.9.1

RUN apk update && apk add --no-cache ca-certificates curl git openssl bash
RUN apk update && apk add --no-cache ca-certificates curl git openssl bash mysql-client
RUN curl https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl \
-o /usr/local/bin/kubectl && \
chmod +x /usr/local/bin/kubectl && \
Expand Down
2 changes: 1 addition & 1 deletion tests/images/stability-test/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM alpine:3.5
ENV KUBECTL_VERSION=v1.12.2
ENV HELM_VERSION=v2.9.1

RUN apk update && apk add --no-cache ca-certificates curl git openssl bash
RUN apk update && apk add --no-cache ca-certificates curl git openssl bash mysql-client
RUN curl https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl \
-o /usr/local/bin/kubectl && \
chmod +x /usr/local/bin/kubectl && \
Expand Down

0 comments on commit ddcb27c

Please sign in to comment.