Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix scheduled backup to ceph object storage #576

Merged
merged 15 commits into from
Jun 18, 2019
3 changes: 3 additions & 0 deletions charts/tidb-backup/templates/backup-job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ spec:
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: backup
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
{{- if .Values.extraLabels }}
{{ toYaml .Values.extraLabels | indent 8 }}
{{- end }}
spec:
containers:
- name: backup
Expand Down
4 changes: 2 additions & 2 deletions charts/tidb-backup/templates/backup-pvc.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if eq .Values.mode "backup" }}
{{- if (or (eq .Values.mode "backup") (eq .Values.mode "scheduled-restore")) }}
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
Expand All @@ -7,7 +7,7 @@ metadata:
app.kubernetes.io/name: {{ template "chart.name" . }}
app.kubernetes.io/managed-by: tidb-operator
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: backup
app.kubernetes.io/component: {{ .Values.mode }}
pingcap.com/backup-cluster-name: {{ .Values.clusterName }}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
spec:
Expand Down
5 changes: 4 additions & 1 deletion charts/tidb-backup/templates/restore-job.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if eq .Values.mode "restore" }}
{{- if (or (eq .Values.mode "restore") (eq .Values.mode "scheduled-restore")) }}
apiVersion: batch/v1
kind: Job
metadata:
Expand All @@ -16,6 +16,9 @@ spec:
app.kubernetes.io/name: {{ template "chart.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: restore
{{- if .Values.extraLabels }}
{{ toYaml .Values.extraLabels | indent 8 }}
{{- end }}
spec:
restartPolicy: OnFailure
containers:
Expand Down
16 changes: 14 additions & 2 deletions charts/tidb-backup/templates/scripts/_start_restore.sh.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ downloader \
--cloud=gcp \
--bucket={{ .Values.gcp.bucket }} \
--srcDir=${BACKUP_NAME} \
--destDir=${dirname}
--destDir=/data
LinuxGit marked this conversation as resolved.
Show resolved Hide resolved
{{- end }}

{{- if .Values.ceph }}
Expand All @@ -18,9 +18,21 @@ downloader \
--bucket={{ .Values.ceph.bucket }} \
--endpoint={{ .Values.ceph.endpoint }} \
--srcDir=${BACKUP_NAME} \
--destDir=${dirname}
--destDir=/data
LinuxGit marked this conversation as resolved.
Show resolved Hide resolved
{{- end }}

count=1
while ! mysql -u ${TIDB_USER} -h `eval echo '${'$host'}'` -P 4000 -p${TIDB_PASSWORD} -e 'select version();'
do
echo "waiting for tidb, retry ${count} times ..."
sleep 10
if [ ${count} -ge 180 ];then
echo "30 minutes timeout"
exit 1
fi
let "count++"
done

/loader \
-d=${dirname} \
-h=`eval echo '${'$host'}'` \
Expand Down
6 changes: 5 additions & 1 deletion charts/tidb-backup/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@
# clusterName is the TiDB cluster name that should backup from or restore to.
clusterName: demo

mode: backup # backup | restore
mode: backup # backup | restore | scheduled-restore
# name is the backup name
name: fullbackup-{{ date "200601021504" .Release.Time }}
image:
pullPolicy: IfNotPresent
# https://github.com/pingcap/tidb-cloud-backup
backup: pingcap/tidb-cloud-backup:20190610

# Add additional labels for backup/restore job's pod
# ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
extraLabels: {}

# secretName is the name of the secret which stores user and password used for backup/restore
# Note: you must give the user enough privilege to do the backup and restore
# you can create the secret by:
Expand Down
20 changes: 19 additions & 1 deletion charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ spec:
schedule: {{ .Values.scheduledBackup.schedule | quote }}
concurrencyPolicy: Forbid
suspend: {{ .Values.scheduledBackup.suspend }}
successfulJobsHistoryLimit: {{ .Values.scheduledBackup.successfulJobsHistoryLimit }}
failedJobsHistoryLimit: {{ .Values.scheduledBackup.failedJobsHistoryLimit }}
startingDeadlineSeconds: {{ .Values.scheduledBackup.startingDeadlineSeconds }}
jobTemplate:
metadata:
Expand Down Expand Up @@ -50,13 +52,29 @@ spec:
readOnly: true
{{- end }}
env:
- name: MY_POD_NAME
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
{{- if .Values.scheduledBackup.gcp }}
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /gcp/credentials.json
{{- end }}
{{- if .Values.scheduledBackup.ceph }}
- name: AWS_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: {{ .Values.scheduledBackup.ceph.secretName }}
key: access_key
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: {{ .Values.scheduledBackup.ceph.secretName }}
key: secret_key
{{- end }}
- name: TIDB_USER
valueFrom:
Expand Down
19 changes: 12 additions & 7 deletions charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ set -euo pipefail

host=$(getent hosts {{ template "cluster.name" . }}-tidb | head | awk '{print $1}')

dirname=/data/scheduled-backup-`date +%Y-%m-%dT%H%M%S`-${MY_POD_NAME}
echo "making dir ${dirname}"
mkdir -p ${dirname}
timestamp=$(echo ${POD_NAME}|awk -F- '{print $(NF-1)}')
## use UTC time zone to resolve timestamp, avoiding different parsing results due to different default time zones
backupName=scheduled-backup-`date -u -d @${timestamp} "+%Y%m%d-%H%M%S"`
backupPath=/data/${backupName}

echo "making dir ${backupPath}"
mkdir -p ${backupPath}

gc_life_time=`/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "select variable_value from mysql.tidb where variable_name='tikv_gc_life_time';"`
echo "Old TiKV GC life time is ${gc_life_time}"
Expand All @@ -14,13 +18,14 @@ echo "Increase TiKV GC life time to 3h"
/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';"

/mydumper \
--outputdir=${dirname} \
--outputdir=${backupPath} \
--host=${host} \
--port=4000 \
--user={{ .Values.scheduledBackup.user }} \
--user=${TIDB_USER} \
--password=${TIDB_PASSWORD} \
--long-query-guard=3600 \
--tidb-force-priority=LOW_PRIORITY \
--regex '^(?!(mysql\.))' \
{{ .Values.scheduledBackup.options }}

echo "Reset TiKV GC life time to ${gc_life_time}"
Expand All @@ -31,13 +36,13 @@ echo "Reset TiKV GC life time to ${gc_life_time}"
uploader \
--cloud=gcp \
--bucket={{ .Values.scheduledBackup.gcp.bucket }} \
--backup-dir=${dirname}
--backup-dir=${backupPath}
{{- end }}

{{- if .Values.scheduledBackup.ceph }}
uploader \
--cloud=ceph \
--bucket={{ .Values.scheduledBackup.ceph.bucket }} \
--endpoint={{ .Values.scheduledBackup.ceph.endpoint }} \
--backup-dir=${dirname}
--backup-dir=${backupPath}
{{- end }}