Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix scheduled backup to ceph object storage #576

Merged
merged 15 commits into from
Jun 18, 2019
3 changes: 3 additions & 0 deletions charts/tidb-backup/templates/backup-job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ spec:
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: backup
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
{{- if .Values.extraLabels }}
{{ toYaml .Values.extraLabels | indent 8 }}
{{- end }}
spec:
initContainers:
- name: get-ts
Expand Down
5 changes: 4 additions & 1 deletion charts/tidb-backup/templates/restore-job.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if eq .Values.mode "restore" }}
{{- if (or (eq .Values.mode "restore") (eq .Values.mode "scheduled-restore")) }}
apiVersion: batch/v1
kind: Job
metadata:
Expand All @@ -16,6 +16,9 @@ spec:
app.kubernetes.io/name: {{ template "chart.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: restore
{{- if .Values.extraLabels }}
{{ toYaml .Values.extraLabels | indent 8 }}
{{- end }}
spec:
restartPolicy: OnFailure
containers:
Expand Down
21 changes: 21 additions & 0 deletions charts/tidb-backup/templates/scheduled-restore-pvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{- if eq .Values.mode "scheduled-restore" }}
LinuxGit marked this conversation as resolved.
Show resolved Hide resolved
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: {{ tpl .Values.name . }}
labels:
app.kubernetes.io/name: {{ template "chart.name" . }}
app.kubernetes.io/managed-by: tidb-operator
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: scheduled-restore
pingcap.com/backup-cluster-name: {{ .Values.clusterName }}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
spec:
accessModes:
- ReadWriteOnce
volumeMode: Filesystem
resources:
requests:
storage: {{ .Values.storage.size }}
storageClassName: {{ .Values.storage.className }}
{{- end }}
16 changes: 14 additions & 2 deletions charts/tidb-backup/templates/scripts/_start_restore.sh.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ downloader \
--cloud=gcp \
--bucket={{ .Values.gcp.bucket }} \
--srcDir=${BACKUP_NAME} \
--destDir=${dirname}
--destDir=/data
LinuxGit marked this conversation as resolved.
Show resolved Hide resolved
{{- end }}

{{- if .Values.ceph }}
Expand All @@ -18,9 +18,21 @@ downloader \
--bucket={{ .Values.ceph.bucket }} \
--endpoint={{ .Values.ceph.endpoint }} \
--srcDir=${BACKUP_NAME} \
--destDir=${dirname}
--destDir=/data
LinuxGit marked this conversation as resolved.
Show resolved Hide resolved
{{- end }}

count=1
while ! mysql -u ${TIDB_USER} -h `eval echo '${'$host'}'` -P 4000 -p${TIDB_PASSWORD} -e 'select version();'
do
echo "waiting for tidb, retry ${count} times ..."
sleep 10
if [ ${count} -ge 180 ];then
echo "30 minutes timeout"
exit 1
fi
let "count++"
done

/loader \
-d=${dirname} \
-h=`eval echo '${'$host'}'` \
Expand Down
6 changes: 5 additions & 1 deletion charts/tidb-backup/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# clusterName is the TiDB cluster name that should backup from or restore to.
clusterName: demo

mode: backup # backup | restore
mode: backup # backup | restore | scheduled-restore
# name is the backup name
name: fullbackup-{{ date "200601021504" .Release.Time }}
image:
Expand All @@ -14,6 +14,10 @@ image:
# https://github.com/tennix/tidb-cloud-backup
backup: pingcap/tidb-cloud-backup:latest

# Add additional labels for backup/restore job's pod
# ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
extraLabels: {}

# secretName is the name of the secret which stores user and password used for backup/restore
# Note: you must give the user enough privilege to do the backup and restore
# you can create the secret by:
Expand Down
20 changes: 19 additions & 1 deletion charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ spec:
schedule: {{ .Values.scheduledBackup.schedule | quote }}
concurrencyPolicy: Forbid
suspend: {{ .Values.scheduledBackup.suspend }}
successfulJobsHistoryLimit: {{ .Values.scheduledBackup.successfulJobsHistoryLimit }}
failedJobsHistoryLimit: {{ .Values.scheduledBackup.failedJobsHistoryLimit }}
startingDeadlineSeconds: {{ .Values.scheduledBackup.startingDeadlineSeconds }}
jobTemplate:
metadata:
Expand Down Expand Up @@ -64,13 +66,29 @@ spec:
readOnly: true
{{- end }}
env:
- name: MY_POD_NAME
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
{{- if .Values.scheduledBackup.gcp }}
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /gcp/credentials.json
{{- end }}
{{- if .Values.scheduledBackup.ceph }}
- name: AWS_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: {{ .Values.scheduledBackup.ceph.secretName }}
key: access_key
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: {{ .Values.scheduledBackup.ceph.secretName }}
key: secret_key
{{- end }}
- name: TIDB_USER
valueFrom:
Expand Down
23 changes: 14 additions & 9 deletions charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,36 +1,41 @@
set -euo pipefail
dirname=scheduled-backup-`date +%Y-%m-%dT%H%M%S`-${MY_POD_NAME}

timestamp=$(echo ${POD_NAME}|awk -F- '{print $(NF-1)}')
## use UTC time zone to resolve timestamp, avoiding different parsing results due to different default time zones
backupName=scheduled-backup-`date -u -d @${timestamp} "+%Y%m%d-%H%M%S"`
backupPath=/data/${backupName}
host=`echo {{ template "cluster.name" . }}_TIDB_SERVICE_HOST | tr '[a-z]' '[A-Z]' | tr '-' '_'`

mkdir -p /data/${dirname}/
cp /savepoint-dir/savepoint /data/${dirname}/
mkdir -p ${backupPath}
cp /savepoint-dir/savepoint ${backupPath}

# the content of savepoint file is:
# commitTS = 408824443621605409
savepoint=`cat /data/${dirname}/savepoint | cut -d "=" -f2 | sed 's/ *//g'`
savepoint=`cat ${backupPath}/savepoint | cut -d "=" -f2 | sed 's/ *//g'`

cat /data/${dirname}/savepoint
cat ${backupPath}/savepoint

/mydumper \
--outputdir=/data/${dirname} \
--outputdir=${backupPath} \
--host=`eval echo '${'$host'}'` \
--port=4000 \
--user={{ .Values.scheduledBackup.user }} \
--user=${TIDB_USER} \
--password=${TIDB_PASSWORD} \
--tidb-snapshot=${savepoint} \
--regex '^(?!(mysql\.))' \
{{ .Values.scheduledBackup.options }}

{{- if .Values.scheduledBackup.gcp }}
uploader \
--cloud=gcp \
--bucket={{ .Values.scheduledBackup.gcp.bucket }} \
--backup-dir=/data/${dirname}
--backup-dir=${backupPath}
{{- end }}

{{- if .Values.scheduledBackup.ceph }}
uploader \
--cloud=ceph \
--bucket={{ .Values.scheduledBackup.ceph.bucket }} \
--endpoint={{ .Values.scheduledBackup.ceph.endpoint }} \
--backup-dir=/data/${dirname}
--backup-dir=${backupPath}
{{- end }}
4 changes: 2 additions & 2 deletions charts/tidb-cluster/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -439,8 +439,8 @@ scheduledBackup:
create: false
binlogImage: pingcap/tidb-binlog:v3.0.0-rc.1
binlogImagePullPolicy: IfNotPresent
# https://github.com/tennix/tidb-cloud-backup
mydumperImage: pingcap/tidb-cloud-backup:latest
# https://github.com/pingcap/tidb-cloud-backup
mydumperImage: pingcap/tidb-cloud-backup:20190610
mydumperImagePullPolicy: IfNotPresent
# storageClassName is a StorageClass provides a way for administrators to describe the "classes" of storage they offer.
# different classes might map to quality-of-service levels, or to backup policies,
Expand Down