diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000000..79e62b6891 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,50 @@ +name: ci + +on: + pull_request: + branches: + - master + - release-* + +jobs: + + pull: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + job: + - verify + - build + - test + - e2e-examples + steps: + - name: Set up Go 1.13 + uses: actions/setup-go@v1 + with: + go-version: 1.13 + id: go + + - name: Check out code into the Go module directory + uses: actions/checkout@v2 + with: + ref: ${{ github.event.pull_request.head.sha }} + path: go/src/github.com/${{ github.repository }} + + - name: ${{ matrix.job }} + run: | + # workaround for https://github.com/actions/setup-go/issues/14 + export GOPATH=${GITHUB_WORKSPACE}/go + export PATH=$PATH:$GOPATH/bin + if [[ "$job" == "verify" ]]; then + make check-setup check + elif [[ "$job" == "build" ]]; then + make docker e2e-docker cli debug-build-docker + elif [[ "$job" == "test" ]]; then + make test GOFLAGS=-race + else + make $job + fi + working-directory: ${{ github.workspace }}/go/src/github.com/${{ github.repository }} + env: + job: ${{ matrix.job }} diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml new file mode 100644 index 0000000000..d51da31113 --- /dev/null +++ b/.github/workflows/stale.yaml @@ -0,0 +1,20 @@ +name: "Close stale issues/prs" +on: + schedule: + - cron: "0 0 * * *" + +jobs: + stale: + runs-on: ubuntu-latest + steps: + - uses: actions/stale@v1.1.0 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-issue-message: 'This issue is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed in 15 days' + stale-pr-message: 'This pr is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed in 15 days' + days-before-stale: 60 + days-before-close: 15 + stale-issue-label: 'lifecycle/stale' + stale-pr-label: 'lifecycle/stale' + exempt-issue-label: 'lifecycle/frozen' + exempt-pr-label: 'lifecycle/frozen' diff --git a/.gitignore b/.gitignore index 4d93436715..9976dd0eef 100644 --- a/.gitignore +++ b/.gitignore @@ -37,5 +37,7 @@ kubeconfig # local output directory /output/ -# local artifacts directory -/artifacts/ +# kubetest2 default artifacts directory +/_artifacts/ + +.DS_Store diff --git a/CHANGELOG-1.1.md b/CHANGELOG-1.1.md index 1166452ee5..75203c9d48 100644 --- a/CHANGELOG-1.1.md +++ b/CHANGELOG-1.1.md @@ -1,10 +1,121 @@ +# TiDB Operator v1.1.0-rc.2 Release Notes + +This is the second release candidate of `v1.1.0`, which focuses on the usability, extensibility and security of TiDB Operator. While we encourage usage in non-critical environments, it is **NOT** recommended to use this version in critical environments. + +## Notable Changes + +- Add `status` field for `TidbAutoScaler` CR ([#2182](https://github.com/pingcap/tidb-operator/pull/2182), [@Yisaer](https://github.com/Yisaer)) +- Add `spec.pd.maxFailoverCount` field to limit max failover replicas for PD ([#2184](https://github.com/pingcap/tidb-operator/pull/2184), [@cofyc](https://github.com/cofyc)) +- Emit more events for `TidbCluster` and `TidbClusterAutoScaler` to help users know TiDB running status ([#2150](https://github.com/pingcap/tidb-operator/pull/2150), [@Yisaer](https://github.com/Yisaer)) +- Add the `AGE` column to show creation timestamp for all CRDs ([#2168](https://github.com/pingcap/tidb-operator/pull/2168), [@cofyc](https://github.com/cofyc)) +- Add a switch to skip PD Dashboard TLS configuration ([#2143](https://github.com/pingcap/tidb-operator/pull/2143), [@weekface](https://github.com/weekface)) +- Change TiDB pod `readiness` probe from `HTTPGet` to `TCPSocket` 4000 port ([#2139](https://github.com/pingcap/tidb-operator/pull/2139), [@weekface](https://github.com/weekface)) +- Support deploying TiFlash with TidbCluster CR ([#2157](https://github.com/pingcap/tidb-operator/pull/2157), [@DanielZhangQD](https://github.com/DanielZhangQD)) +- Add TLS support for TiKV metrics API ([#2137](https://github.com/pingcap/tidb-operator/pull/2137), [@weekface](https://github.com/weekface)) +- Set PD DashboardConfig when TLS between the MySQL client and TiDB server is enabled ([#2085](https://github.com/pingcap/tidb-operator/pull/2085), [@weekface](https://github.com/weekface)) +- Remove unnecessary informer caches to reduce the memory footprint of tidb-controller-manager ([#1504](https://github.com/pingcap/tidb-operator/pull/1504), [@aylei](https://github.com/aylei)) +- Fix the failure that Helm cannot load the kubeconfig file when deleting the tidb-operator release during `terraform destroy` ([#2148](https://github.com/pingcap/tidb-operator/pull/2148), [@DanielZhangQD](https://github.com/DanielZhangQD)) +- Support configuring the Webhook TLS setting by loading a secret ([#2135](https://github.com/pingcap/tidb-operator/pull/2135), [@Yisaer](https://github.com/Yisaer)) +- Support TiFlash in TidbCluster CR ([#2122](https://github.com/pingcap/tidb-operator/pull/2122), [@DanielZhangQD](https://github.com/DanielZhangQD)) +- Fix the error that alertmanager couldn't be set in `TidbMonitor` ([#2108](https://github.com/pingcap/tidb-operator/pull/2108), [@Yisaer](https://github.com/Yisaer)) + + +# TiDB Operator v1.1.0-rc.1 Release Notes + +This is a release candidate of `v1.1.0`, which focuses on the usability, extensibility and security of TiDB Operator. While we encourage usage in non-critical environments, it is **NOT** recommended to use this version in critical environments. + +## Action Required + +- `--advertise-address` will be configured for `tidb-server`, which would trigger rolling-upgrade for the `tidb-server` component. You can set `spec.paused` to `true` before upgrading tidb-operator to avoid the rolling upgrade, and set it back to `false` when you are ready to upgrade your tidb server ([#2076](https://github.com/pingcap/tidb-operator/pull/2076), [@cofyc](https://github.com/cofyc)) +- Add the `tlsClient.tlsSecret` field in the backup and restore spec, which supports specifying a secret name that includes the cert ([#2003](https://github.com/pingcap/tidb-operator/pull/2003), [@shuijing198799](https://github.com/shuijing198799)) + + +## Other Notable Changes + +- Use `tidb-lightning` in `Restore` instead of `loader` ([#2068](https://github.com/pingcap/tidb-operator/pull/2068), [@Yisaer](https://github.com/Yisaer)) +- Add `cert-allowed-cn` support to TiDB components ([#2061](https://github.com/pingcap/tidb-operator/pull/2061), [@weekface](https://github.com/weekface)) +- Fix the PD `location-labels` configuration ([#1941](https://github.com/pingcap/tidb-operator/pull/1941), [@aylei](https://github.com/aylei)) +- Able to pause and unpause tidb cluster deployment via `spec.paused` ([#2013](https://github.com/pingcap/tidb-operator/pull/2013), [@cofyc](https://github.com/cofyc)) +- Default the `max-backups` for TiDB server configuration to `3` if the TiDB cluster is deployed by CR ([#2045](https://github.com/pingcap/tidb-operator/pull/2045), [@Yisaer](https://github.com/Yisaer)) +- Able to configure custom environments for components ([#2052](https://github.com/pingcap/tidb-operator/pull/2052), [@cofyc](https://github.com/cofyc)) +- Fix the error that `kubectl get tc` cannot show correct images ([#2031](https://github.com/pingcap/tidb-operator/pull/2031), [@Yisaer](https://github.com/Yisaer)) +- 1. Default the `spec.tikv.maxFailoverCount` and `spec.tidb.maxFailoverCount` to `3` when they are not defined + 2. Disable auto-failover when `maxFailoverCount` is set to `0` ([#2015](https://github.com/pingcap/tidb-operator/pull/2015), [@Yisaer](https://github.com/Yisaer)) +- Support deploying TiDB clusters with TidbCluster and TidbMonitor CRs via Terraform on ACK ([#2012](https://github.com/pingcap/tidb-operator/pull/2012), [@DanielZhangQD](https://github.com/DanielZhangQD)) +- Update PDConfig for TidbCluster to PD v3.1.0 ([#1928](https://github.com/pingcap/tidb-operator/pull/1928), [@Yisaer](https://github.com/Yisaer)) +- Support deploying TiDB clusters with TidbCluster and TidbMonitor CRs via Terraform on AWS ([#2004](https://github.com/pingcap/tidb-operator/pull/2004), [@DanielZhangQD](https://github.com/DanielZhangQD)) +- Update TidbConfig for TidbCluster to TiDB v3.1.0 ([#1906](https://github.com/pingcap/tidb-operator/pull/1906), [@Yisaer](https://github.com/Yisaer)) +- Allow users to define resources for initContainers in TiDB initializer job ([#1938](https://github.com/pingcap/tidb-operator/pull/1938), [@tfulcrand](https://github.com/tfulcrand)) +- Add TLS support for Pump and Drainer ([#1979](https://github.com/pingcap/tidb-operator/pull/1979), [@weekface](https://github.com/weekface)) +- Add documents and examples for auto-scaler and initializer ([#1772](https://github.com/pingcap/tidb-operator/pull/1772), [@Yisaer](https://github.com/Yisaer)) +- 1. Add check to guarantee the NodePort won't be changed if the serviceType of TidbMonitor is NodePort + 2. Add EnvVar sort to avoid the monitor rendering different results from the same TidbMonitor spec + 3. Fix the problem that the TidbMonitor LoadBalancer IP is not used ([#1962](https://github.com/pingcap/tidb-operator/pull/1962), [@Yisaer](https://github.com/Yisaer)) +- Make tidb-initializer support TLS ([#1931](https://github.com/pingcap/tidb-operator/pull/1931), [@weekface](https://github.com/weekface)) +- 1. Fix the problem that Advanced StatefulSet cannot work with webhook + 2. Change the Reaction for the Down State TiKV pod during deleting request in webhook from admit to reject ([#1963](https://github.com/pingcap/tidb-operator/pull/1963), [@Yisaer](https://github.com/Yisaer)) +- Fix the drainer installation error when `drainerName` is set ([#1961](https://github.com/pingcap/tidb-operator/pull/1961), [@DanielZhangQD](https://github.com/DanielZhangQD)) +- Fix some TiKV configuration keys in toml ([#1887](https://github.com/pingcap/tidb-operator/pull/1887), [@aylei](https://github.com/aylei)) +- Support using a remote directory as data source for tidb-lightning ([#1629](https://github.com/pingcap/tidb-operator/pull/1629), [@aylei](https://github.com/aylei)) +- Add the API document and a script that generates documentation ([#1945](https://github.com/pingcap/tidb-operator/pull/1945), [@Yisaer](https://github.com/Yisaer)) +- Add the tikv-importer chart ([#1910](https://github.com/pingcap/tidb-operator/pull/1910), [@shonge](https://github.com/shonge)) +- Fix the Prometheus scrape config issue while TLS is enabled ([#1919](https://github.com/pingcap/tidb-operator/pull/1919), [@weekface](https://github.com/weekface)) +- Enable TLS between TiDB components ([#1870](https://github.com/pingcap/tidb-operator/pull/1870), [@weekface](https://github.com/weekface)) +- Fix the timeout error when `.Values.admission.validation.pods` is `true` during the TiKV upgrade ([#1875](https://github.com/pingcap/tidb-operator/pull/1875), [@Yisaer](https://github.com/Yisaer)) +- Enable TLS for MySQL clients ([#1878](https://github.com/pingcap/tidb-operator/pull/1878), [@weekface](https://github.com/weekface)) +- Fix the bug which would cause broken TiDB image property ([#1860](https://github.com/pingcap/tidb-operator/pull/1860), [@Yisaer](https://github.com/Yisaer)) +- TidbMonitor would use its namespace for the targetRef if it is not defined ([#1834](https://github.com/pingcap/tidb-operator/pull/1834), [@Yisaer](https://github.com/Yisaer)) +- Support starting tidb-server with `--advertise-address` parameter ([#1859](https://github.com/pingcap/tidb-operator/pull/1859), [@LinuxGit](https://github.com/LinuxGit)) +- Backup/Restore: support configuring TiKV GC life time ([#1835](https://github.com/pingcap/tidb-operator/pull/1835), [@LinuxGit](https://github.com/LinuxGit)) +- Support no secret for S3/Ceph when the OIDC authentication is used ([#1817](https://github.com/pingcap/tidb-operator/pull/1817), [@tirsen](https://github.com/tirsen)) +- 1. Change the setting from the previous `admission.hookEnabled.pods` to the `admission.validation.pods` + 2. Change the setting from the previous `admission.hookEnabled.statefulSets` to the `admission.validation.statefulSets` + 3. Change the setting from the previous `admission.hookEnabled.validating` to the `admission.validation.pingcapResources` + 4. Change the setting from the previous `admission.hookEnabled.defaulting` to the `admission.mutation.pingcapResources` + 5. Change the setting from the previous `admission.failurePolicy.defaulting` to the `admission.failurePolicy.mutation` + 6. Change the setting from the previous `admission.failurePolicy.*` to the `admission.failurePolicy.validation` ([#1832](https://github.com/pingcap/tidb-operator/pull/1832), [@Yisaer](https://github.com/Yisaer)) +- Enable TidbCluster defaulting mutation by default which is recommended when admission webhook is used ([#1816](https://github.com/pingcap/tidb-operator/pull/1816), [@Yisaer](https://github.com/Yisaer)) +- Fix a bug that TiKV fails to start while creating the cluster using CR with cluster TLS enabled ([#1808](https://github.com/pingcap/tidb-operator/pull/1808), [@weekface](https://github.com/weekface)) +- Support using prefix in remote storage during backup/restore ([#1790](https://github.com/pingcap/tidb-operator/pull/1790), [@DanielZhangQD](https://github.com/DanielZhangQD)) + + +# TiDB Operator v1.1.0-beta.2 Release Notes + +This is a pre-release of `v1.1.0`, which focuses on the usability, extensibility and security of TiDB Operator. While we encourage usage in non-critical environments, it is **NOT** recommended to use this version in critical environments. + +## Changes since v1.1.0-beta.1 + +## Action Required + +- `--default-storage-class-name` and `--default-backup-storage-class-name `are abandoned, and the storage class defaults to Kubernetes default storage class right now. If you have set default storage class different than Kubernetes default storage class, please set them explicitly in your TiDB cluster helm or YAML files. ([#1581](https://github.com/pingcap/tidb-operator/pull/1581), [@cofyc](https://github.com/cofyc)) + + +## Other Notable Changes + +- Allow users to configure affinity and tolerations for `Backup` and `Restore`. ([#1737](https://github.com/pingcap/tidb-operator/pull/1737), [@Smana](https://github.com/Smana)) +- Allow AdvancedStatefulSet and Admission Webhook to work together. ([#1640](https://github.com/pingcap/tidb-operator/pull/1640), [@Yisaer](https://github.com/Yisaer)) +- Add a basic deployment example of managing TiDB cluster with custom resources only. ([#1573](https://github.com/pingcap/tidb-operator/pull/1573), [@aylei](https://github.com/aylei)) +- Support TidbCluster Auto-scaling feature based on CPU average utilization load. ([#1731](https://github.com/pingcap/tidb-operator/pull/1731), [@Yisaer](https://github.com/Yisaer)) +- Support user-defined TiDB server/client certificate ([#1714](https://github.com/pingcap/tidb-operator/pull/1714), [@weekface](https://github.com/weekface)) +- Add an option for tidb-backup chart to allow reusing existing PVC or not for restore ([#1708](https://github.com/pingcap/tidb-operator/pull/1708), [@mightyguava](https://github.com/mightyguava)) +- Add `resources`, `imagePullPolicy` and `nodeSelector` field for tidb-backup chart ([#1705](https://github.com/pingcap/tidb-operator/pull/1705), [@mightyguava](https://github.com/mightyguava)) +- Add more SANs (Subject Alternative Name) to TiDB server certificate ([#1702](https://github.com/pingcap/tidb-operator/pull/1702), [@weekface](https://github.com/weekface)) +- Support automatically migrating existing Kubernetes StatefulSets to Advanced StatefulSets when AdvancedStatfulSet feature is enabled ([#1580](https://github.com/pingcap/tidb-operator/pull/1580), [@cofyc](https://github.com/cofyc)) +- Fix the bug in admission webhook which causes PD pod deleting error and allow the deleting pod to request for PD and TiKV when PVC is not found. ([#1568](https://github.com/pingcap/tidb-operator/pull/1568), [@Yisaer](https://github.com/Yisaer)) +- Limit the restart rate for PD and TiKV - only one instance would be restarted each time ([#1532](https://github.com/pingcap/tidb-operator/pull/1532), [@Yisaer](https://github.com/Yisaer)) +- Add default ClusterRef namespace for TidbMonitor as the same as it is deployed and fix the bug that TidbMonitor's Pod can't be created when Spec.PrometheusSpec.logLevel is missing. ([#1500](https://github.com/pingcap/tidb-operator/pull/1500), [@Yisaer](https://github.com/Yisaer)) +- Refine logs for `TidbMonitor` and `TidbInitializer` controller ([#1493](https://github.com/pingcap/tidb-operator/pull/1493), [@aylei](https://github.com/aylei)) +- Avoid unnecessary updates to `Service` and `Deployment` of discovery ([#1499](https://github.com/pingcap/tidb-operator/pull/1499), [@aylei](https://github.com/aylei)) +- Remove some update events that are not very useful ([#1486](https://github.com/pingcap/tidb-operator/pull/1486), [@weekface](https://github.com/weekface)) + + # TiDB Operator v1.1.0-beta.1 Release Notes This is a pre-release of `v1.1.0`, which focuses on the usability, extensibility and security of TiDB Operator. While we encourage usage in non-critical environments, it is **NOT** recommended to use this version in critical environments. ## Changes since v1.0.0 -### Action required +### Action Required - ACTION REQUIRED: Add the `timezone` support for [all charts](https://github.com/pingcap/tidb-operator/tree/master/charts) ([#1122](https://github.com/pingcap/tidb-operator/pull/1122), [@weekface](https://github.com/weekface)). @@ -17,7 +128,7 @@ This is a pre-release of `v1.1.0`, which focuses on the usability, extensibility All images' time zone maintained by `tidb-operator` is `UTC`. If you use your own images, you need to make sure that the time zone inside your images is `UTC`. -### Other notable changes +### Other Notable Changes - Support backup to S3 with [Backup & Restore (BR)](https://github.com/pingcap/br) ([#1280](https://github.com/pingcap/tidb-operator/pull/1280), [@DanielZhangQD](https://github.com/DanielZhangQD)) - Add basic defaulting and validating for `TidbCluster` ([#1429](https://github.com/pingcap/tidb-operator/pull/1429), [@aylei](https://github.com/aylei)) diff --git a/Makefile b/Makefile index 2369a5a08e..9ac4af82f0 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,6 @@ # Set DEBUGGER=1 to build debug symbols LDFLAGS = $(if $(DEBUGGER),,-s -w) $(shell ./hack/version.sh) -# SET DOCKER_REGISTRY to change the docker registry -DOCKER_REGISTRY := $(if $(DOCKER_REGISTRY),$(DOCKER_REGISTRY),localhost:5000) - GOVER_MAJOR := $(shell go version | sed -E -e "s/.*go([0-9]+)[.]([0-9]+).*/\1/") GOVER_MINOR := $(shell go version | sed -E -e "s/.*go([0-9]+)[.]([0-9]+).*/\2/") GO113 := $(shell [ $(GOVER_MAJOR) -gt 1 ] || [ $(GOVER_MAJOR) -eq 1 ] && [ $(GOVER_MINOR) -ge 13 ]; echo $$?) @@ -19,6 +16,8 @@ GOENV := GO15VENDOREXPERIMENT="1" CGO_ENABLED=0 GOOS=$(GOOS) GOARCH=$(GOARCH) GO := $(GOENV) go GO_BUILD := $(GO) build -trimpath +DOCKER_REGISTRY ?= localhost:5000 +DOCKER_REPO ?= ${DOCKER_REGISTRY}/pingcap IMAGE_TAG ?= latest PACKAGE_LIST := go list ./... | grep -vE "client/(clientset|informers|listers)" PACKAGE_DIRECTORIES := $(PACKAGE_LIST) | sed 's|github.com/pingcap/tidb-operator/||' @@ -29,8 +28,8 @@ TEST_COVER_PACKAGES:=go list ./pkg/... | grep -vE "pkg/client" | grep -vE "pkg/t default: build docker-push: docker backup-docker - docker push "${DOCKER_REGISTRY}/pingcap/tidb-operator:${IMAGE_TAG}" - docker push "${DOCKER_REGISTRY}/pingcap/tidb-backup-manager:${IMAGE_TAG}" + docker push "${DOCKER_REPO}/tidb-operator:${IMAGE_TAG}" + docker push "${DOCKER_REPO}/tidb-backup-manager:${IMAGE_TAG}" ifeq ($(NO_BUILD),y) docker: @@ -38,7 +37,8 @@ docker: else docker: build endif - docker build --tag "${DOCKER_REGISTRY}/pingcap/tidb-operator:${IMAGE_TAG}" images/tidb-operator + docker build --tag "${DOCKER_REPO}/tidb-operator:${IMAGE_TAG}" images/tidb-operator + docker build --tag "${DOCKER_REPO}/tidb-backup-manager:${IMAGE_TAG}" images/tidb-backup-manager build: controller-manager scheduler discovery admission-webhook apiserver backup-manager @@ -58,7 +58,7 @@ apiserver: $(GO_BUILD) -ldflags '$(LDFLAGS)' -o images/tidb-operator/bin/tidb-apiserver cmd/apiserver/main.go backup-manager: - $(GO_BUILD) -ldflags '$(LDFLAGS)' -o images/backup-manager/bin/tidb-backup-manager cmd/backup-manager/main.go + $(GO_BUILD) -ldflags '$(LDFLAGS)' -o images/tidb-backup-manager/bin/tidb-backup-manager cmd/backup-manager/main.go ifeq ($(NO_BUILD),y) backup-docker: @@ -66,10 +66,10 @@ backup-docker: else backup-docker: backup-manager endif - docker build --tag "${DOCKER_REGISTRY}/pingcap/tidb-backup-manager:${IMAGE_TAG}" images/backup-manager + docker build --tag "${DOCKER_REPO}/tidb-backup-manager:${IMAGE_TAG}" images/tidb-backup-manager e2e-docker-push: e2e-docker - docker push "${DOCKER_REGISTRY}/pingcap/tidb-operator-e2e:${IMAGE_TAG}" + docker push "${DOCKER_REPO}/tidb-operator-e2e:${IMAGE_TAG}" ifeq ($(NO_BUILD),y) e2e-docker: @@ -85,7 +85,7 @@ endif cp -r charts/tidb-cluster tests/images/e2e cp -r charts/tidb-backup tests/images/e2e cp -r manifests tests/images/e2e - docker build -t "${DOCKER_REGISTRY}/pingcap/tidb-operator-e2e:${IMAGE_TAG}" tests/images/e2e + docker build -t "${DOCKER_REPO}/tidb-operator-e2e:${IMAGE_TAG}" tests/images/e2e e2e-build: $(GO_BUILD) -ldflags '$(LDFLAGS)' -o tests/images/e2e/bin/ginkgo github.com/onsi/ginkgo/ginkgo @@ -97,15 +97,18 @@ e2e-build: e2e: ./hack/e2e.sh +e2e-examples: + ./hack/e2e-examples.sh + stability-test-build: $(GO_BUILD) -ldflags '$(LDFLAGS)' -o tests/images/stability-test/bin/blockwriter ./tests/cmd/blockwriter $(GO_BUILD) -ldflags '$(LDFLAGS)' -o tests/images/stability-test/bin/stability-test ./tests/cmd/stability stability-test-docker: stability-test-build - docker build -t "${DOCKER_REGISTRY}/pingcap/tidb-operator-stability-test:${IMAGE_TAG}" tests/images/stability-test + docker build -t "${DOCKER_REPO}/tidb-operator-stability-test:${IMAGE_TAG}" tests/images/stability-test stability-test-push: stability-test-docker - docker push "${DOCKER_REGISTRY}/pingcap/tidb-operator-stability-test:${IMAGE_TAG}" + docker push "${DOCKER_REPO}/tidb-operator-stability-test:${IMAGE_TAG}" fault-trigger: $(GO_BUILD) -ldflags '$(LDFLAGS)' -o tests/images/fault-trigger/bin/fault-trigger tests/cmd/fault-trigger/*.go @@ -202,16 +205,16 @@ cli: $(GO_BUILD) -ldflags '$(LDFLAGS)' -o tkctl cmd/tkctl/main.go debug-docker-push: debug-build-docker - docker push "${DOCKER_REGISTRY}/pingcap/debug-launcher:latest" - docker push "${DOCKER_REGISTRY}/pingcap/tidb-control:latest" - docker push "${DOCKER_REGISTRY}/pingcap/tidb-debug:latest" + docker push "${DOCKER_REPO}/debug-launcher:latest" + docker push "${DOCKER_REPO}/tidb-control:latest" + docker push "${DOCKER_REPO}/tidb-debug:latest" debug-build-docker: debug-build - docker build -t "${DOCKER_REGISTRY}/pingcap/debug-launcher:latest" misc/images/debug-launcher - docker build -t "${DOCKER_REGISTRY}/pingcap/tidb-control:latest" misc/images/tidb-control - docker build -t "${DOCKER_REGISTRY}/pingcap/tidb-debug:latest" misc/images/tidb-debug + docker build -t "${DOCKER_REPO}/debug-launcher:latest" misc/images/debug-launcher + docker build -t "${DOCKER_REPO}/tidb-control:latest" misc/images/tidb-control + docker build -t "${DOCKER_REPO}/tidb-debug:latest" misc/images/tidb-debug debug-build: $(GO_BUILD) -ldflags '$(LDFLAGS)' -o misc/images/debug-launcher/bin/debug-launcher misc/cmd/debug-launcher/main.go -.PHONY: check check-setup check-all build e2e-build debug-build cli e2e +.PHONY: check check-setup check-all build e2e-build debug-build cli e2e test docker e2e-docker debug-build-docker diff --git a/README.md b/README.md index 2a8fa19297..be0ebc8e0f 100644 --- a/README.md +++ b/README.md @@ -53,11 +53,12 @@ Read the [Roadmap](./ROADMAP.md). Read the [Quick Start Guide](https://pingcap.com/docs/v3.0/tidb-in-kubernetes/tidb-operator-overview/), which includes all the guides for managing TiDB clusters in Kubernetes. - ## Documentation -- [English](https://pingcap.com/docs/v3.0/tidb-in-kubernetes/tidb-operator-overview/) -- [简体中文](https://pingcap.com/docs-cn/v3.0/tidb-in-kubernetes/tidb-operator-overview/) +All the TiDB Operator documentation is maintained in the [docs-tidb-operator repository](https://github.com/pingcap/docs-tidb-operator). You can also see the documentation at PingCAP website: + +- [English](https://pingcap.com/docs/tidb-in-kubernetes/stable/tidb-operator-overview/) +- [简体中文](https://pingcap.com/docs-cn/tidb-in-kubernetes/stable/tidb-operator-overview/) ## Contributing diff --git a/charts/tidb-backup/templates/backup-job.yaml b/charts/tidb-backup/templates/backup-job.yaml index 24b7346252..a6c8eb953d 100644 --- a/charts/tidb-backup/templates/backup-job.yaml +++ b/charts/tidb-backup/templates/backup-job.yaml @@ -33,6 +33,8 @@ spec: {{- if .Values.serviceAccount }} serviceAccount: {{ .Values.serviceAccount }} {{- end }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} containers: - name: backup image: {{ .Values.image.backup }} @@ -88,6 +90,14 @@ spec: name: {{ .Values.secretName }} key: password restartPolicy: OnFailure + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} volumes: - name: data persistentVolumeClaim: diff --git a/charts/tidb-backup/templates/backup-pvc.yaml b/charts/tidb-backup/templates/backup-pvc.yaml index 290868f8af..5f1bfbc051 100644 --- a/charts/tidb-backup/templates/backup-pvc.yaml +++ b/charts/tidb-backup/templates/backup-pvc.yaml @@ -1,8 +1,12 @@ -{{- if (or (eq .Values.mode "backup") (eq .Values.mode "scheduled-restore")) }} +{{- if (or (eq .Values.mode "backup") (eq .Values.mode "scheduled-restore") (and (eq .Values.mode "restore") (not .Values.restoreUsingExistingVolume))) }} kind: PersistentVolumeClaim apiVersion: v1 metadata: + {{- if eq .Values.mode "restore" }} + name: restore-{{ tpl .Values.name . }} + {{- else }} name: {{ tpl .Values.name . }} + {{- end }} labels: app.kubernetes.io/name: {{ template "chart.name" . }} app.kubernetes.io/managed-by: tidb-operator diff --git a/charts/tidb-backup/templates/restore-job.yaml b/charts/tidb-backup/templates/restore-job.yaml index d5cd17a6ab..a4347eaac4 100644 --- a/charts/tidb-backup/templates/restore-job.yaml +++ b/charts/tidb-backup/templates/restore-job.yaml @@ -25,9 +25,16 @@ spec: {{- end }} spec: restartPolicy: OnFailure + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} containers: - name: tidb-restore-job image: {{ .Values.image.backup }} + imagePullPolicy: {{ .Values.image.pullPolicy | default "IfNotPresent" }} + {{- if .Values.resources }} + resources: +{{ toYaml .Values.resources | indent 10 }} + {{- end }} command: - /bin/sh - -c @@ -74,10 +81,20 @@ spec: secretKeyRef: name: {{ .Values.secretName }} key: password + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} volumes: - name: data persistentVolumeClaim: - {{- if .Values.scheduledBackupName }} + {{- if not .Values.restoreUsingExistingVolume }} + claimName: restore-{{ .Values.name }} + {{- else if .Values.scheduledBackupName }} claimName: {{ .Values.name }}-scheduled-backup {{- else }} claimName: {{ .Values.name }} diff --git a/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl b/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl index f33b0d1b14..b321c6a52d 100644 --- a/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl +++ b/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl @@ -67,22 +67,46 @@ $creds EOF cd "${backup_base_dir}" +{{- if .Values.gcp.prefix }} +tar -cf - "${backup_name}" | pigz -p 16 \ + | rclone --config /tmp/rclone.conf rcat gcp:${bucket}/{{ .Values.gcp.prefix }}/${backup_name}/${backup_name}.tgz +{{- else }} tar -cf - "${backup_name}" | pigz -p 16 \ | rclone --config /tmp/rclone.conf rcat gcp:${bucket}/${backup_name}/${backup_name}.tgz {{- end }} +{{- end }} {{- if .Values.ceph }} uploader \ --cloud=ceph \ + {{- if .Values.ceph.prefix }} + --bucket={{ .Values.ceph.bucket }}/{{ .Values.ceph.prefix }} \ + {{- else }} --bucket={{ .Values.ceph.bucket }} \ + {{- end }} --endpoint={{ .Values.ceph.endpoint }} \ --backup-dir=${dirname} {{- end }} {{- if .Values.s3 }} -uploader \ - --cloud=aws \ - --region={{ .Values.s3.region }} \ - --bucket={{ .Values.s3.bucket }} \ - --backup-dir=${dirname} +# Once we know there are no more credentials that will be logged we can run with -x +set -x +bucket={{ .Values.s3.bucket }} + +cat < /tmp/rclone.conf +[s3] +type = s3 +provider = AWS +env_auth = true +region = {{ .Values.s3.region }} +EOF + +cd "${backup_base_dir}" +{{- if .Values.s3.prefix }} +tar -cf - "${backup_name}" | pigz -p 16 \ + | rclone --config /tmp/rclone.conf rcat s3:${bucket}/{{ .Values.s3.prefix }}/${backup_name}/${backup_name}.tgz +{{- else }} +tar -cf - "${backup_name}" | pigz -p 16 \ + | rclone --config /tmp/rclone.conf rcat s3:${bucket}/${backup_name}/${backup_name}.tgz +{{- end }} {{- end }} diff --git a/charts/tidb-backup/templates/scripts/_start_restore.sh.tpl b/charts/tidb-backup/templates/scripts/_start_restore.sh.tpl index 1489630a81..c15ffa146f 100644 --- a/charts/tidb-backup/templates/scripts/_start_restore.sh.tpl +++ b/charts/tidb-backup/templates/scripts/_start_restore.sh.tpl @@ -7,7 +7,11 @@ host=`echo {{ .Values.clusterName }}_TIDB_SERVICE_HOST | tr '[a-z]' '[A-Z]' | tr {{- if .Values.gcp }} downloader \ --cloud=gcp \ + {{- if .Values.gcp.prefix }} + --bucket={{ .Values.gcp.bucket }}/{{ .Values.gcp.prefix }} \ + {{- else }} --bucket={{ .Values.gcp.bucket }} \ + {{- end }} --srcDir=${BACKUP_NAME} \ --destDir=/data {{- end }} @@ -15,7 +19,11 @@ downloader \ {{- if .Values.ceph }} downloader \ --cloud=ceph \ + {{- if .Values.ceph.prefix }} + --bucket={{ .Values.ceph.bucket }}/{{ .Values.ceph.prefix }} \ + {{- else }} --bucket={{ .Values.ceph.bucket }} \ + {{- end }} --endpoint={{ .Values.ceph.endpoint }} \ --srcDir=${BACKUP_NAME} \ --destDir=/data @@ -25,7 +33,11 @@ downloader \ downloader \ --cloud=aws \ --region={{ .Values.s3.region }} \ + {{- if .Values.s3.prefix }} + --bucket={{ .Values.s3.bucket }}/{{ .Values.s3.prefix }} \ + {{- else }} --bucket={{ .Values.s3.bucket }} \ + {{- end }} --srcDir=${BACKUP_NAME} \ --destDir=/data {{- end }} diff --git a/charts/tidb-backup/values.yaml b/charts/tidb-backup/values.yaml index 1d6085d166..14a1044a37 100644 --- a/charts/tidb-backup/values.yaml +++ b/charts/tidb-backup/values.yaml @@ -27,7 +27,11 @@ name: fullbackup-{{ date "200601021504" .Release.Time }} image: pullPolicy: IfNotPresent # https://github.com/pingcap/tidb-cloud-backup - backup: pingcap/tidb-cloud-backup:20191217 + backup: pingcap/tidb-cloud-backup:20200229 + +## nodeSelector ensure pods only assigning to nodes which have each of the indicated key-value pairs as labels +## ref:https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +nodeSelector: {} # Add additional labels for backup/restore job's pod # ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ @@ -36,6 +40,15 @@ extraLabels: {} # Add annotations for backup/restore job's pod annotations: {} +## affinity defines pd scheduling rules,it's default settings is empty. +## please read the affinity document before set your scheduling rule: +## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity +affinity: {} + +## Tolerations are applied to pods, and allow pods to schedule onto nodes with matching taints. +## refer to https://kubernetes.io/docs/concepts/configuration/taint-and-toleration +tolerations: [] + # secretName is the name of the secret which stores user and password used for backup/restore # Note: you must give the user enough privilege to do the backup and restore # you can create the secret by: @@ -80,12 +93,18 @@ restoreOptions: "-t 16" # When a GC happens, the current time minus this value is the safe point. tikvGCLifeTime: 720h +# By default, restores are performed by binding to an existing volume containing backup data. +# To restore from gcp, ceph, or s3, set this to false to create a new volume to load the backup into +# This setting only affects the "restore" mode. +restoreUsingExistingVolume: true + # By default, the backup/restore uses PV to store/load backup data # You can choose to store/load backup data to/from gcp, ceph or s3 bucket by enabling the following corresponding section: # backup to or restore from gcp bucket, the backup path is in the form of - gcp: {} # bucket: "" + # prefix: "" # secretName is not necessary on GKE if you use the workload identity feature # secretName is the name of the secret which stores the gcp service account credentials json file # The service account must have read/write permission to the above bucket. @@ -99,6 +118,7 @@ gcp: {} ceph: {} # endpoint: "" # bucket: "" + # prefix: "" # secretName is the name of the secret which stores ceph object store access key and secret key # You can create the secret by: # kubectl create secret generic ceph-backup-secret --namespace= --from-literal=access_key= --from-literal=secret_key= @@ -108,6 +128,7 @@ ceph: {} s3: {} # region: "" # bucket: "" + # prefix: "" # secretName is the name of the secret which stores s3 object store access key and secret key # This is not necessary on AWS. Instead you should be able to get the credentials from the EKS service IAM role. # You can create the secret by: diff --git a/charts/tidb-cluster/templates/_helpers.tpl b/charts/tidb-cluster/templates/_helpers.tpl index 75732a0456..766d43789b 100644 --- a/charts/tidb-cluster/templates/_helpers.tpl +++ b/charts/tidb-cluster/templates/_helpers.tpl @@ -28,7 +28,7 @@ We truncate at 63 chars because some Kubernetes name fields are limited to this {{- end -}} {{- define "cluster.scheme" -}} -{{ if .Values.enableTLSCluster }}https{{ else }}http{{ end }} +{{ if and .Values.tlsCluster .Values.tlsCluster.enabled }}https{{ else }}http{{ end }} {{- end -}} {{/* @@ -41,11 +41,11 @@ config-file: |- {{- if .Values.pd.config }} {{ .Values.pd.config | indent 2 }} {{- end -}} - {{- if .Values.enableTLSCluster }} + {{- if and .Values.tlsCluster .Values.tlsCluster.enabled }} [security] - cacert-path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" - cert-path = "/var/lib/pd-tls/cert" - key-path = "/var/lib/pd-tls/key" + cacert-path = "/var/lib/pd-tls/ca.crt" + cert-path = "/var/lib/pd-tls/tls.crt" + key-path = "/var/lib/pd-tls/tls.key" {{- end -}} {{- end -}} @@ -64,11 +64,11 @@ config-file: |- {{- if .Values.tikv.config }} {{ .Values.tikv.config | indent 2 }} {{- end -}} - {{- if .Values.enableTLSCluster }} + {{- if and .Values.tlsCluster .Values.tlsCluster.enabled }} [security] - ca-path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" - cert-path = "/var/lib/tikv-tls/cert" - key-path = "/var/lib/tikv-tls/key" + ca-path = "/var/lib/tikv-tls/ca.crt" + cert-path = "/var/lib/tikv-tls/tls.crt" + key-path = "/var/lib/tikv-tls/tls.key" {{- end -}} {{- end -}} @@ -91,18 +91,18 @@ config-file: |- {{- if .Values.tidb.config }} {{ .Values.tidb.config | indent 2 }} {{- end -}} - {{- if or .Values.enableTLSCluster .Values.enableTLSClient }} + {{- if or (and .Values.tlsCluster .Values.tlsCluster.enabled) (and .Values.tidb.tlsClient .Values.tidb.tlsClient.enabled) }} [security] {{- end -}} - {{- if .Values.enableTLSCluster }} - cluster-ssl-ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" - cluster-ssl-cert = "/var/lib/tidb-tls/cert" - cluster-ssl-key = "/var/lib/tidb-tls/key" + {{- if and .Values.tlsCluster .Values.tlsCluster.enabled }} + cluster-ssl-ca = "/var/lib/tidb-tls/ca.crt" + cluster-ssl-cert = "/var/lib/tidb-tls/tls.crt" + cluster-ssl-key = "/var/lib/tidb-tls/tls.key" {{- end -}} - {{- if .Values.tidb.enableTLSClient }} - ssl-ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" - ssl-cert = "/var/lib/tidb-server-tls/cert" - ssl-key = "/var/lib/tidb-server-tls/key" + {{- if and .Values.tidb.tlsClient .Values.tidb.tlsClient.enabled }} + ssl-ca = "/var/lib/tidb-server-tls/ca.crt" + ssl-cert = "/var/lib/tidb-server-tls/tls.crt" + ssl-key = "/var/lib/tidb-server-tls/tls.key" {{- end -}} {{- end -}} @@ -114,10 +114,20 @@ config-file: |- {{/* Encapsulate pump configmap data for consistent digest calculation */}} +{{- define "pump.tlsSecretName" -}} +{{ .Values.clusterName }}-pump +{{- end -}} + {{- define "pump-configmap.data" -}} pump-config: |- {{- if .Values.binlog.pump.config }} {{ .Values.binlog.pump.config | indent 2 }} + {{- if and .Values.tlsCluster .Values.tlsCluster.enabled }} + [security] + ssl-ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + ssl-cert = "/var/lib/pump-tls/tls.crt" + ssl-key = "/var/lib/pump-tls/tls.key" + {{- end -}} {{- else -}} {{ tuple "config/_pump-config.tpl" . | include "helm-toolkit.utils.template" | indent 2 }} {{- end -}} diff --git a/charts/tidb-cluster/templates/config/_prometheus-config.tpl b/charts/tidb-cluster/templates/config/_prometheus-config.tpl index f73573fa74..12bdc79bee 100644 --- a/charts/tidb-cluster/templates/config/_prometheus-config.tpl +++ b/charts/tidb-cluster/templates/config/_prometheus-config.tpl @@ -19,13 +19,17 @@ scrape_configs: names: - {{ .Release.Namespace }} {{- end }} + {{- if and .Values.tlsCluster .Values.tlsCluster.enabled }} + scheme: https + tls_config: + insecure_skip_verify: false + ca_file: /var/lib/cluster-client-tls/ca.crt + cert_file: /var/lib/cluster-client-tls/tls.crt + key_file: /var/lib/cluster-client-tls/tls.key + {{- else }} + scheme: http tls_config: insecure_skip_verify: true - {{- if .Values.enableTLSCluster }} - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - cert_file: /var/lib/pd-client-tls/cert - key_file: /var/lib/pd-client-tls/key - scheme: https {{- end }} relabel_configs: - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] @@ -41,11 +45,12 @@ scrape_configs: action: replace target_label: __metrics_path__ regex: (.+) - - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 + - source_labels: [__meta_kubernetes_pod_name, __meta_kubernetes_pod_label_app_kubernetes_io_instance, + __meta_kubernetes_pod_annotation_prometheus_io_port] + regex: (.+);(.+);(.+) target_label: __address__ + replacement: $1.$2-pd-peer:$3 + action: replace - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace @@ -71,13 +76,17 @@ scrape_configs: names: - {{ .Release.Namespace }} {{- end }} + {{- if and .Values.tlsCluster .Values.tlsCluster.enabled }} + scheme: https + tls_config: + insecure_skip_verify: false + ca_file: /var/lib/cluster-client-tls/ca.crt + cert_file: /var/lib/cluster-client-tls/tls.crt + key_file: /var/lib/cluster-client-tls/tls.key + {{- else }} + scheme: http tls_config: insecure_skip_verify: true - {{- if .Values.enableTLSCluster }} - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - cert_file: /var/lib/pd-client-tls/cert - key_file: /var/lib/pd-client-tls/key - scheme: https {{- end }} relabel_configs: - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] @@ -93,11 +102,12 @@ scrape_configs: action: replace target_label: __metrics_path__ regex: (.+) - - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 + - source_labels: [__meta_kubernetes_pod_name, __meta_kubernetes_pod_label_app_kubernetes_io_instance, + __meta_kubernetes_pod_annotation_prometheus_io_port] + regex: (.+);(.+);(.+) target_label: __address__ + replacement: $1.$2-tidb-peer:$3 + action: replace - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace @@ -123,16 +133,23 @@ scrape_configs: names: - {{ .Release.Namespace }} {{- end }} + scheme: http tls_config: insecure_skip_verify: true -# TiKV doesn't support scheme https for now. -# And we should fix it after TiKV fix this issue: https://github.com/tikv/tikv/issues/5340 -# {{- if .Values.enableTLSCluster }} -# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt -# cert_file: /var/lib/pd-client-tls/cert -# key_file: /var/lib/pd-client-tls/key -# scheme: https -# {{- end }} + # TiKV doesn't support scheme https for now. + # And we should fix it after TiKV fix this issue: https://github.com/tikv/tikv/issues/5340 + # {{- if and .Values.tlsCluster .Values.tlsCluster.enabled }} + # scheme: https + # tls_config: + # insecure_skip_verify: false + # ca_file: /var/lib/cluster-client-tls/ca.crt + # cert_file: /var/lib/cluster-client-tls/tls.crt + # key_file: /var/lib/cluster-client-tls/tls.key + # {{- else }} + # scheme: http + # tls_config: + # insecure_skip_verify: true + # {{- end }} relabel_configs: - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] action: keep @@ -147,11 +164,12 @@ scrape_configs: action: replace target_label: __metrics_path__ regex: (.+) - - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 + - source_labels: [__meta_kubernetes_pod_name, __meta_kubernetes_pod_label_app_kubernetes_io_instance, + __meta_kubernetes_pod_annotation_prometheus_io_port] + regex: (.+);(.+);(.+) target_label: __address__ + replacement: $1.$2-tikv-peer:$3 + action: replace - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace diff --git a/charts/tidb-cluster/templates/config/_pump-config.tpl b/charts/tidb-cluster/templates/config/_pump-config.tpl index d0b41eddf0..07795a899e 100644 --- a/charts/tidb-cluster/templates/config/_pump-config.tpl +++ b/charts/tidb-cluster/templates/config/_pump-config.tpl @@ -19,14 +19,6 @@ heartbeat-interval = {{ .Values.binlog.pump.heartbeatInterval | default 2 }} # a comma separated list of PD endpoints pd-urls = "{{ template "cluster.scheme" . }}://{{ template "cluster.name" . }}-pd:2379" -#[security] -# Path of file that contains list of trusted SSL CAs for connection with cluster components. -# ssl-ca = "/path/to/ca.pem" -# Path of file that contains X509 certificate in PEM format for connection with cluster components. -# ssl-cert = "/path/to/drainer.pem" -# Path of file that contains X509 key in PEM format for connection with cluster components. -# ssl-key = "/path/to/drainer-key.pem" -# [storage] # Set to `true` (default) for best reliability, which prevents data loss when there is a power failure. sync-log = {{ .Values.binlog.pump.syncLog | default true }} @@ -43,3 +35,13 @@ sync-log = {{ .Values.binlog.pump.syncLog | default true }} # write-buffer = 67108864 # write-L0-pause-trigger = 24 # write-L0-slowdown-trigger = 17 +{{ if and .Values.tlsCluster .Values.tlsCluster.enabled }} +[security] +# Path of file that contains list of trusted SSL CAs for connection with cluster components. +ssl-ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" +# Path of file that contains X509 certificate in PEM format for connection with cluster components. +ssl-cert = "/var/lib/pump-tls/tls.crt" +# Path of file that contains X509 key in PEM format for connection with cluster components. +ssl-key = "/var/lib/pump-tls/tls.key" +{{- end -}} + diff --git a/charts/tidb-cluster/templates/discovery-deployment.yaml b/charts/tidb-cluster/templates/discovery-deployment.yaml index 38e21e41b0..df6bd7c507 100644 --- a/charts/tidb-cluster/templates/discovery-deployment.yaml +++ b/charts/tidb-cluster/templates/discovery-deployment.yaml @@ -29,6 +29,14 @@ spec: {{- if .Values.rbac.create }} serviceAccount: {{ template "cluster.name" . }}-discovery {{- end }} + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} {{- end }} containers: - name: discovery diff --git a/charts/tidb-cluster/templates/monitor-deployment.yaml b/charts/tidb-cluster/templates/monitor-deployment.yaml index 3b7c82c7f4..b297a1553c 100644 --- a/charts/tidb-cluster/templates/monitor-deployment.yaml +++ b/charts/tidb-cluster/templates/monitor-deployment.yaml @@ -134,9 +134,9 @@ spec: - name: prometheus-rules mountPath: /prometheus-rules readOnly: false - {{- if .Values.enableTLSCluster }} - - name: tls-pd-client - mountPath: /var/lib/pd-client-tls + {{- if and .Values.tlsCluster .Values.tlsCluster.enabled }} + - name: cluster-client-tls + mountPath: /var/lib/cluster-client-tls readOnly: true {{- end }} {{- if .Values.monitor.grafana.create }} @@ -241,11 +241,11 @@ spec: name: prometheus-rules - emptyDir: {} name: grafana-dashboard - {{- if .Values.enableTLSCluster }} - - name: tls-pd-client + {{- if and .Values.tlsCluster .Values.tlsCluster.enabled }} + - name: cluster-client-tls secret: defaultMode: 420 - secretName: {{ .Release.Name }}-pd-client + secretName: {{ .Release.Name }}-cluster-client-secret {{- end }} {{- if .Values.monitor.tolerations }} tolerations: diff --git a/charts/tidb-cluster/templates/monitor-rbac.yaml b/charts/tidb-cluster/templates/monitor-rbac.yaml index 1bb7aca7b2..628100b7c1 100644 --- a/charts/tidb-cluster/templates/monitor-rbac.yaml +++ b/charts/tidb-cluster/templates/monitor-rbac.yaml @@ -23,6 +23,16 @@ rules: resources: - pods verbs: ["get", "list", "watch"] + {{- if .Capabilities.APIVersions.Has "security.openshift.io/v1" }} +- apiGroups: + - security.openshift.io + resourceNames: + - anyuid + resources: + - securitycontextconstraints + verbs: + - use + {{- end }} {{- if .Values.rbac.crossNamespace }} - nonResourceURLs: ["/metrics"] verbs: ["get"] diff --git a/charts/tidb-cluster/templates/pump-statefulset.yaml b/charts/tidb-cluster/templates/pump-statefulset.yaml index 90a61b62c9..f73fa3c06d 100644 --- a/charts/tidb-cluster/templates/pump-statefulset.yaml +++ b/charts/tidb-cluster/templates/pump-statefulset.yaml @@ -55,6 +55,11 @@ spec: mountPath: /data - name: config mountPath: /etc/pump + {{- if and .Values.tlsCluster .Values.tlsCluster.enabled }} + - name: pump-tls + mountPath: /var/lib/pump-tls + readOnly: true + {{- end }} resources: {{ toYaml .Values.binlog.pump.resources | indent 10 }} {{- if and (ne .Values.timezone "UTC") (ne .Values.timezone "") }} @@ -73,6 +78,11 @@ spec: items: - key: pump-config path: pump.toml + {{- if and .Values.tlsCluster .Values.tlsCluster.enabled }} + - name: pump-tls + secret: + secretName: {{ include "pump.tlsSecretName" . }} + {{- end }} volumeClaimTemplates: - metadata: name: data diff --git a/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml b/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml index f7860f3ea7..d99b88ace8 100644 --- a/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml +++ b/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml @@ -74,7 +74,7 @@ spec: - name: GOOGLE_APPLICATION_CREDENTIALS value: /gcp/credentials.json {{- end }} - {{- if or .Values.scheduledBackup.ceph .Values.scheduledBackup.s3 }} + {{- if or .Values.scheduledBackup.ceph.secretName .Values.scheduledBackup.s3.secretName }} - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: @@ -97,6 +97,14 @@ spec: name: {{ .Values.scheduledBackup.secretName }} key: password restartPolicy: {{ .Values.scheduledBackup.restartPolicy | default "OnFailure" }} + {{- if .Values.scheduledBackup.affinity }} + affinity: +{{ toYaml .Values.scheduledBackup.affinity | indent 12 }} + {{- end }} + {{- if .Values.scheduledBackup.tolerations }} + tolerations: +{{ toYaml .Values.scheduledBackup.tolerations | indent 12 }} + {{- end }} volumes: - name: data persistentVolumeClaim: diff --git a/charts/tidb-cluster/templates/scripts/_initialize_tidb_users.py.tpl b/charts/tidb-cluster/templates/scripts/_initialize_tidb_users.py.tpl index 290376a493..05051b3c5f 100755 --- a/charts/tidb-cluster/templates/scripts/_initialize_tidb_users.py.tpl +++ b/charts/tidb-cluster/templates/scripts/_initialize_tidb_users.py.tpl @@ -26,3 +26,4 @@ if permit_host != '%%': conn.cursor().execute("update mysql.user set Host=%s where User='root';", (permit_host,)) conn.cursor().execute("flush privileges;") conn.commit() +conn.close() diff --git a/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl b/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl index 3e42ef7091..9901756475 100755 --- a/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl +++ b/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl @@ -3,7 +3,8 @@ set -euo pipefail host=$(getent hosts {{ template "cluster.name" . }}-tidb | head | awk '{print $1}') backupName=scheduled-backup-`date "+%Y%m%d-%H%M%S"` -backupPath=/data/${backupName} +backupBase=/data +backupPath=${backupBase}/${backupName} echo "making dir ${backupPath}" mkdir -p ${backupPath} @@ -37,10 +38,29 @@ echo "Reset TiKV GC life time to ${gc_life_time}" /usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} ${password_str} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';" {{- if .Values.scheduledBackup.gcp }} -uploader \ - --cloud=gcp \ - --bucket={{ .Values.scheduledBackup.gcp.bucket }} \ - --backup-dir=${backupPath} +# Once we know there are no more credentials that will be logged we can run with -x +set -x +bucket={{ .Values.scheduledBackup.gcp.bucket }} +creds=${GOOGLE_APPLICATION_CREDENTIALS:-""} +if ! [[ -z $creds ]] ; then +creds="service_account_file = ${creds}" +fi + +cat < /tmp/rclone.conf +[gcp] +type = google cloud storage +bucket_policy_only = true +$creds +EOF + +cd "${backupBase}" +{{- if .Values.scheduledBackup.gcp.prefix }} +tar -cf - "${backupName}" | pigz -p 16 \ + | rclone --config /tmp/rclone.conf rcat gcp:${bucket}/{{ .Values.scheduledBackup.gcp.prefix }}/${backupName}/${backupName}.tgz +{{- else }} +tar -cf - "${backupName}" | pigz -p 16 \ + | rclone --config /tmp/rclone.conf rcat gcp:${bucket}/${backupName}/${backupName}.tgz +{{- end }} {{- end }} {{- if .Values.scheduledBackup.ceph }} @@ -52,11 +72,26 @@ uploader \ {{- end }} {{- if .Values.scheduledBackup.s3 }} -uploader \ - --cloud=aws \ - --region={{ .Values.scheduledBackup.s3.region }} \ - --bucket={{ .Values.scheduledBackup.s3.bucket }} \ - --backup-dir=${backupPath} +# Once we know there are no more credentials that will be logged we can run with -x +set -x +bucket={{ .Values.scheduledBackup.s3.bucket }} + +cat < /tmp/rclone.conf +[s3] +type = s3 +provider = AWS +env_auth = true +region = {{ .Values.scheduledBackup.s3.region }} +EOF + +cd "${backupBase}" +{{- if .Values.scheduledBackup.s3.prefix }} +tar -cf - "${backupName}" | pigz -p 16 \ + | rclone --config /tmp/rclone.conf rcat s3:${bucket}/{{ .Values.scheduledBackup.s3.prefix }}/${backupName}/${backupName}.tgz +{{- else }} +tar -cf - "${backupName}" | pigz -p 16 \ + | rclone --config /tmp/rclone.conf rcat s3:${bucket}/${backupName}/${backupName}.tgz +{{- end }} {{- end }} {{- if and (.Values.scheduledBackup.cleanupAfterUpload) (or (.Values.scheduledBackup.gcp) (or .Values.scheduledBackup.ceph .Values.scheduledBackup.s3)) }} diff --git a/charts/tidb-cluster/templates/scripts/_start_tidb.sh.tpl b/charts/tidb-cluster/templates/scripts/_start_tidb.sh.tpl old mode 100644 new mode 100755 index ee79a7594e..0ad0027130 --- a/charts/tidb-cluster/templates/scripts/_start_tidb.sh.tpl +++ b/charts/tidb-cluster/templates/scripts/_start_tidb.sh.tpl @@ -26,7 +26,10 @@ then tail -f /dev/null fi +# Use HOSTNAME if POD_NAME is unset for backward compatibility. +POD_NAME=${POD_NAME:-$HOSTNAME} ARGS="--store=tikv \ +--advertise-address=${POD_NAME}.${HEADLESS_SERVICE_NAME}.${NAMESPACE}.svc \ --host=0.0.0.0 \ --path=${CLUSTER_NAME}-pd:2379 \ --config=/etc/tidb/tidb.toml diff --git a/charts/tidb-cluster/templates/scripts/_start_tikv.sh.tpl b/charts/tidb-cluster/templates/scripts/_start_tikv.sh.tpl index d4bb6c590f..806a242be4 100644 --- a/charts/tidb-cluster/templates/scripts/_start_tikv.sh.tpl +++ b/charts/tidb-cluster/templates/scripts/_start_tikv.sh.tpl @@ -39,6 +39,8 @@ ARGS="--pd={{ template "cluster.scheme" . }}://${CLUSTER_NAME}-pd:2379 \ --config=/etc/tikv/tikv.toml " +{{ .Values.tikv.postArgScript }} + echo "starting tikv-server ..." echo "/tikv-server ${ARGS}" exec /tikv-server ${ARGS} diff --git a/charts/tidb-cluster/templates/tidb-cluster.yaml b/charts/tidb-cluster/templates/tidb-cluster.yaml index 6c010a668d..1567b6d9df 100644 --- a/charts/tidb-cluster/templates/tidb-cluster.yaml +++ b/charts/tidb-cluster/templates/tidb-cluster.yaml @@ -21,7 +21,10 @@ spec: pvReclaimPolicy: {{ .Values.pvReclaimPolicy }} enablePVReclaim: {{ .Values.enablePVReclaim }} timezone: {{ .Values.timezone | default "UTC" }} - enableTLSCluster: {{ .Values.enableTLSCluster | default false }} +{{- if .Values.tlsCluster }} + tlsCluster: +{{ toYaml .Values.tlsCluster | indent 4 }} +{{- end }} services: {{ toYaml .Values.services | indent 4 }} schedulerName: {{ .Values.schedulerName | default "default-scheduler" }} @@ -92,7 +95,10 @@ spec: {{- end }} maxFailoverCount: {{ .Values.tikv.maxFailoverCount | default 3 }} tidb: - enableTLSClient: {{ .Values.tidb.enableTLSClient | default false }} + {{- if .Values.tidb.tlsClient }} + tlsClient: +{{ toYaml .Values.tidb.tlsClient | indent 6 }} + {{- end }} replicas: {{ .Values.tidb.replicas }} image: {{ .Values.tidb.image }} imagePullPolicy: {{ .Values.tidb.imagePullPolicy | default "IfNotPresent" }} diff --git a/charts/tidb-cluster/templates/tidb-initializer-job.yaml b/charts/tidb-cluster/templates/tidb-initializer-job.yaml index f5dab4c2e6..e792dc8037 100644 --- a/charts/tidb-cluster/templates/tidb-initializer-job.yaml +++ b/charts/tidb-cluster/templates/tidb-initializer-job.yaml @@ -44,6 +44,8 @@ spec: fi done echo "info: successfully connected to $host:$port, able to initialize TiDB now" + resources: +{{ toYaml .Values.tidb.initializer.resources | indent 10 }} containers: - name: mysql-client image: {{ .Values.mysqlClient.image }} diff --git a/charts/tidb-cluster/values.yaml b/charts/tidb-cluster/values.yaml index 80dd0bd2c4..ed5d245ff1 100644 --- a/charts/tidb-cluster/values.yaml +++ b/charts/tidb-cluster/values.yaml @@ -38,7 +38,7 @@ services: type: ClusterIP discovery: - image: pingcap/tidb-operator:v1.1.0-beta.1 + image: pingcap/tidb-operator:v1.1.0-rc.2 imagePullPolicy: IfNotPresent resources: limits: @@ -48,6 +48,15 @@ discovery: cpu: 80m memory: 50Mi + ## affinity defines discovery scheduling rules,it's default settings is empty. + ## please read the affinity document before set your scheduling rule: + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + affinity: {} + + ## Tolerations are applied to pods, and allow pods to schedule onto nodes with matching taints. + ## refer to https://kubernetes.io/docs/concepts/configuration/taint-and-toleration + tolerations: [] + # Whether enable ConfigMap Rollout management. # When enabling, change of ConfigMap will trigger a graceful rolling-update of the component. # This feature is only available in tidb-operator v1.0 or higher. @@ -55,10 +64,23 @@ discovery: # if the ConfigMap was not changed. enableConfigMapRollout: true -# Whether enable TLS connections between server nodes. -# When enabled, PD/TiDB/TiKV will use TLS encrypted connections to transfer data between each node, -# certificates will be generated automatically (if not already present). -enableTLSCluster: false +# Whether enable the TLS connection between TiDB server components +tlsCluster: + # The steps to enable this feature: + # 1. Generate TiDB server components certificates and a client-side certifiacete for them. + # There are multiple ways to generate these certificates: + # - user-provided certificates: https://pingcap.com/docs/stable/how-to/secure/generate-self-signed-certificates/ + # - use the K8s built-in certificate signing system signed certificates: https://kubernetes.io/docs/tasks/tls/managing-tls-in-a-cluster/ + # - or use cert-manager signed certificates: https://cert-manager.io/ + # 2. Create one secret object for one component which contains the certificates created above. + # The name of this Secret must be: --cluster-secret. + # For PD: kubectl create secret generic -pd-cluster-secret --namespace= --from-file=tls.crt= --from-file=tls.key= --from-file=ca.crt= + # For TiKV: kubectl create secret generic -tikv-cluster-secret --namespace= --from-file=tls.crt= --from-file=tls.key= --from-file=ca.crt= + # For TiDB: kubectl create secret generic -tidb-cluster-secret --namespace= --from-file=tls.crt= --from-file=tls.key= --from-file=ca.crt= + # For Client: kubectl create secret generic -cluster-client-secret --namespace= --from-file=tls.crt= --from-file=tls.key= --from-file=ca.crt= + # Same for other components. + # 3. Then create the TiDB cluster with `tlsCluster.enabled` set to `true`. + enabled: false pd: # Please refer to https://github.com/pingcap/pd/blob/master/conf/config.toml for the default @@ -75,9 +97,12 @@ pd: # pd Service # we can only specify clusterIP and loadBalancerIP now - service: - clusterIP: "None" - + service: {} + # type: "< default use global service type >" + # loadBalancerIP: "" + # clusterIP: "" + # annotations: {} "" + # portName: "client" replicas: 3 image: pingcap/pd:v3.0.8 # storageClassName is a StorageClass provides a way for administrators to describe the "classes" of storage they offer. @@ -284,6 +309,13 @@ tikv: # After waiting for 5 minutes, TiDB Operator creates a new TiKV node if this TiKV node is still down. # maxFailoverCount is used to configure the maximum number of TiKV nodes that TiDB Operator can create when failover occurs. maxFailoverCount: 3 + # postArgscript is the script executed after the normal tikv instance start args is built, + # it is recommended to modify the args constructor logic if you have any special needs. + postArgScript: | + if [ ! -z "${STORE_LABELS:-}" ]; then + LABELS=" --labels ${STORE_LABELS} " + ARGS="${ARGS}${LABELS}" + fi tidb: # Please refer to https://github.com/pingcap/tidb/blob/master/config/config.toml.example for the default @@ -396,9 +428,11 @@ tidb: service: type: NodePort exposeStatus: true + # portName: "mysql-client" # annotations: # cloud.google.com/load-balancer-type: Internal separateSlowLog: true + slowLogTailer: image: busybox:1.26.2 resources: @@ -411,12 +445,12 @@ tidb: initializer: resources: {} - # limits: - # cpu: 100m - # memory: 100Mi - # requests: - # cpu: 100m - # memory: 100Mi + # limits: + # cpu: 100m + # memory: 100Mi + # requests: + # cpu: 100m + # memory: 100Mi # tidb plugin configuration plugin: @@ -428,10 +462,22 @@ tidb: list: ["whitelist-1"] # Whether enable TLS connection between TiDB server and MySQL client. - # When enabled, TiDB will accept TLS encrypted connections from MySQL client, certificates will be generated - # automatically. - # Note: TLS connection is not forced on the server side, plain connections are also accepted after enableing. - enableTLSClient: false + # https://pingcap.com/docs/stable/how-to/secure/enable-tls-clients/ + tlsClient: + # The steps to enable this feature: + # 1. Generate a TiDB server-side certificate and a client-side certifiacete for the TiDB cluster. + # There are multiple ways to generate certificates: + # - user-provided certificates: https://pingcap.com/docs/stable/how-to/secure/enable-tls-clients/ + # - use the K8s built-in certificate signing system signed certificates: https://kubernetes.io/docs/tasks/tls/managing-tls-in-a-cluster/ + # - or use cert-manager signed certificates: https://cert-manager.io/ + # 2. Create a K8s Secret object which contains the TiDB server-side certificate created above. + # The name of this Secret must be: -tidb-server-secret. + # kubectl create secret generic -tidb-server-secret --namespace= --from-file=tls.crt= --from-file=tls.key= --from-file=ca.crt= + # 3. Create a K8s Secret object which contains the TiDB client-side certificate created above which will be used by TiDB Operator. + # The name of this Secret must be: -tidb-client-secret. + # kubectl create secret generic -tidb-client-secret --namespace= --from-file=tls.crt= --from-file=tls.key= --from-file=ca.crt= + # 4. Then create the TiDB cluster with `tlsClient.enabled` set to `true`. + enabled: false # mysqlClient is used to set password for TiDB # it must has Python MySQL client installed @@ -596,6 +642,7 @@ binlog: # pump configurations (change to the tags of your pump version), # just follow the format in the file and configure in the 'config' section # as below if you want to customize any configuration. + # [security] section will be generated automatically if tlsCluster.enabled is set to true so users do not need to configure it. # config: | # gc = 7 # heartbeat-interval = 2 @@ -683,7 +730,7 @@ binlog: scheduledBackup: create: false # https://github.com/pingcap/tidb-cloud-backup - mydumperImage: pingcap/tidb-cloud-backup:20191217 + mydumperImage: pingcap/tidb-cloud-backup:20200229 mydumperImagePullPolicy: IfNotPresent # storageClassName is a StorageClass provides a way for administrators to describe the "classes" of storage they offer. # different classes might map to quality-of-service levels, or to backup policies, @@ -724,6 +771,7 @@ scheduledBackup: # backup to gcp gcp: {} # bucket: "" + # prefix: "" # secretName is the name of the secret which stores the gcp service account credentials json file # The service account must have read/write permission to the above bucket. # Read the following document to create the service account and download the credentials file as credentials.json: @@ -744,6 +792,7 @@ scheduledBackup: s3: {} # region: "" # bucket: "" + # prefix: "" # secretName is the name of the secret which stores s3 object store access key and secret key # You can create the secret by: # kubectl create secret generic s3-backup-secret --from-literal=access_key= --from-literal=secret_key= @@ -757,6 +806,15 @@ scheduledBackup: # cpu: 4000m # memory: 4Gi + ## affinity defines pd scheduling rules,it's default settings is empty. + ## please read the affinity document before set your scheduling rule: + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + affinity: {} + + ## Tolerations are applied to pods, and allow pods to schedule onto nodes with matching taints. + ## refer to https://kubernetes.io/docs/concepts/configuration/taint-and-toleration + tolerations: [] + importer: create: false image: pingcap/tidb-lightning:v3.0.8 diff --git a/charts/tidb-drainer/templates/_helpers.tpl b/charts/tidb-drainer/templates/_helpers.tpl index 082f3615cc..fe2c408e71 100644 --- a/charts/tidb-drainer/templates/_helpers.tpl +++ b/charts/tidb-drainer/templates/_helpers.tpl @@ -1,6 +1,14 @@ {{- define "drainer.name" -}} +{{- if .Values.drainerName -}} +{{ .Values.drainerName }} +{{- else -}} {{ .Values.clusterName }}-{{ .Release.Name }}-drainer {{- end -}} +{{- end -}} + +{{- define "drainer.tlsSecretName" -}} +{{ .Values.clusterName }}-drainer-cluster-secret +{{- end -}} {{/* Encapsulate config data for consistent digest calculation @@ -10,12 +18,25 @@ config-file: |- {{- if .Values.config }} {{ .Values.config | indent 2 }} {{- end -}} + {{- if and .Values.tlsCluster .Values.tlsCluster.enabled }} + [security] + ssl-ca = "/var/lib/drainer-tls/ca.crt" + ssl-cert = "/var/lib/drainer-tls/tls.crt" + ssl-key = "/var/lib/drainer-tls/tls.key" + {{- if .Values.tlsCluster.certAllowedCN }} + cert-allowed-cn = {{ .Values.tlsCluster.certAllowedCN | toJson }} + {{- end -}} + {{- end -}} {{- end -}} {{- define "drainer-configmap.name" -}} {{ include "drainer.name" . }}-{{ include "drainer-configmap.data" . | sha256sum | trunc 8 }} {{- end -}} +{{- define "cluster.scheme" -}} +{{ if and .Values.tlsCluster .Values.tlsCluster.enabled }}https{{ else }}http{{ end }} +{{- end -}} + {{- define "helm-toolkit.utils.template" -}} {{- $name := index . 0 -}} {{- $context := index . 1 -}} diff --git a/charts/tidb-drainer/templates/drainer-statefulset.yaml b/charts/tidb-drainer/templates/drainer-statefulset.yaml index 8d162763f5..e0bb02776a 100644 --- a/charts/tidb-drainer/templates/drainer-statefulset.yaml +++ b/charts/tidb-drainer/templates/drainer-statefulset.yaml @@ -46,6 +46,11 @@ spec: mountPath: /data - name: config mountPath: /etc/drainer + {{- if and .Values.tlsCluster .Values.tlsCluster.enabled }} + - name: drainer-tls + mountPath: /var/lib/drainer-tls + readOnly: true + {{- end }} {{- if and (ne .Values.timezone "UTC") (ne .Values.timezone "") }} env: - name: TZ @@ -60,6 +65,11 @@ spec: items: - key: config-file path: drainer.toml + {{- if and .Values.tlsCluster .Values.tlsCluster.enabled }} + - name: drainer-tls + secret: + secretName: {{ include "drainer.tlsSecretName" . }} + {{- end }} {{- with .Values.nodeSelector }} nodeSelector: {{ toYaml . | indent 8 }} diff --git a/charts/tidb-drainer/templates/scripts/_start_drainer.sh.tpl b/charts/tidb-drainer/templates/scripts/_start_drainer.sh.tpl index ada3d3128f..8f258ce6f3 100644 --- a/charts/tidb-drainer/templates/scripts/_start_drainer.sh.tpl +++ b/charts/tidb-drainer/templates/scripts/_start_drainer.sh.tpl @@ -26,7 +26,7 @@ done /drainer \ -L={{ .Values.logLevel | default "info" }} \ --pd-urls=http://{{ .Values.clusterName }}-pd:2379 \ +-pd-urls={{ include "cluster.scheme" . }}://{{ .Values.clusterName }}-pd:2379 \ -addr=`echo ${HOSTNAME}`.{{ include "drainer.name" . }}:8249 \ -config=/etc/drainer/drainer.toml \ -disable-detect={{ .Values.disableDetect | default false }} \ diff --git a/charts/tidb-drainer/values.yaml b/charts/tidb-drainer/values.yaml index 66ac70655c..6d951eab8b 100644 --- a/charts/tidb-drainer/values.yaml +++ b/charts/tidb-drainer/values.yaml @@ -5,6 +5,11 @@ # timezone is the default system timzone timezone: UTC +# Change the name of the statefulset and pod +# The default is clusterName-ReleaseName-drainer +# Do not change the name of an existing running drainer: this is unsupported. +# drainerName: + # clusterName is the TiDB cluster name that should backup from or restore to. clusterName: demo clusterVersion: v3.0.8 @@ -24,7 +29,26 @@ disableDetect: false # if drainer donesn't have checkpoint, use initial commitTS to initial checkpoint initialCommitTs: 0 +# Whether enable the TLS connection between TiDB server components +tlsCluster: + # The steps to enable this feature: + # 1. Generate Drainer certificate. + # There are multiple ways to generate these certificates: + # - user-provided certificates: https://pingcap.com/docs/stable/how-to/secure/generate-self-signed-certificates/ + # - use the K8s built-in certificate signing system signed certificates: https://kubernetes.io/docs/tasks/tls/managing-tls-in-a-cluster/ + # - or use cert-manager signed certificates: https://cert-manager.io/ + # 2. Create one secret object for Drainer which contains the certificates created above. + # The name of this Secret must be: -drainer-cluster-secret. + # For Drainer: kubectl create secret generic -drainer-cluster-secret --namespace= --from-file=tls.crt= --from-file=tls.key= --from-file=ca.crt= + # 3. Then create the Drainer cluster with `tlsCluster.enabled` set to `true`. + enabled: false + + # certAllowedCN is the Common Name that allowed + certAllowedCN: [] + # - TiDB + # Refer to https://github.com/pingcap/tidb-binlog/blob/master/cmd/drainer/drainer.toml +# [security] section will be generated automatically if tlsCluster.enabled is set to true so users do not need to configure it. config: | detect-interval = 10 compressor = "" diff --git a/charts/tidb-lightning/templates/job.yaml b/charts/tidb-lightning/templates/job.yaml index 4a4b46433d..2f5e9a8ba7 100644 --- a/charts/tidb-lightning/templates/job.yaml +++ b/charts/tidb-lightning/templates/job.yaml @@ -28,6 +28,9 @@ spec: {{ toYaml .Values.annotations | indent 8 }} {{- end }} spec: + {{- if .Values.serviceAccount }} + serviceAccountName: {{ .Values.serviceAccount }} + {{- end }} {{ if and .Values.dataSource.local.hostPath .Values.dataSource.local.nodeName -}} nodeName: {{ .Values.dataSource.local.nodeName }} {{ else if not .Values.dataSource.adhoc.pvcName -}} @@ -134,6 +137,10 @@ spec: {{- if .Values.affinity }} affinity: {{ toYaml .Values.affinity | indent 6 }} + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} {{- end }} {{- if .Values.tolerations }} tolerations: diff --git a/charts/tidb-lightning/templates/rclone-conf.yaml b/charts/tidb-lightning/templates/rclone-conf.yaml index 77cf277ed6..90fc7b8672 100644 --- a/charts/tidb-lightning/templates/rclone-conf.yaml +++ b/charts/tidb-lightning/templates/rclone-conf.yaml @@ -1,10 +1,9 @@ +{{- if .Values.dataSource.remote.rcloneConfig }} apiVersion: v1 kind: ConfigMap metadata: name: rclone-{{ include "tidb-lightning.name" . }} -type: Opaque data: config-file: |- - {{- if .Values.dataSource.remote.rcloneConfig }} {{ .Values.dataSource.remote.rcloneConfig | indent 4 }} - {{- end -}} +{{- end }} diff --git a/charts/tidb-lightning/templates/scripts/_start_data_retriever.sh.tpl b/charts/tidb-lightning/templates/scripts/_start_data_retriever.sh.tpl index 96e16a1bc9..4529f7ce09 100644 --- a/charts/tidb-lightning/templates/scripts/_start_data_retriever.sh.tpl +++ b/charts/tidb-lightning/templates/scripts/_start_data_retriever.sh.tpl @@ -1,4 +1,8 @@ set -euo pipefail +{{ if .Values.dataSource.remote.directory }} +# rclone sync skip identical files automatically +rclone --config /etc/rclone/rclone.conf sync -P {{ .Values.dataSource.remote.directory}} /data +{{- else -}} filename=$(basename {{ .Values.dataSource.remote.path }}) if find /data -name metadata | egrep '.*'; then echo "data already exist" @@ -7,3 +11,4 @@ else rclone --config /etc/rclone/rclone.conf copy -P {{ .Values.dataSource.remote.path }} /data cd /data && tar xzvf ${filename} fi +{{- end -}} diff --git a/charts/tidb-lightning/templates/scripts/_start_lightning.sh.tpl b/charts/tidb-lightning/templates/scripts/_start_lightning.sh.tpl index ba8d2708cc..32ec7be54b 100644 --- a/charts/tidb-lightning/templates/scripts/_start_lightning.sh.tpl +++ b/charts/tidb-lightning/templates/scripts/_start_lightning.sh.tpl @@ -2,6 +2,16 @@ data_dir={{ .Values.dataSource.local.hostPath }} {{- else if .Values.dataSource.adhoc.pvcName -}} data_dir=/var/lib/tidb-lightning/{{ .Values.dataSource.adhoc.backupName | default .Values.dataSource.adhoc.pvcName }} +{{- else if .Values.dataSource.remote.directory -}} +data_dir=/var/lib/tidb-lightning +if [ -z "$(ls -A ${data_dir})" ]; then + if [ ! -z ${FAIL_FAST} ]; then + exit 1 + else + echo "No files in data dir, please exec into my container to diagnose" + tail -f /dev/null + fi +fi {{- else -}} data_dir=$(dirname $(find /var/lib/tidb-lightning -name metadata 2>/dev/null) 2>/dev/null) if [ -z $data_dir ]; then diff --git a/charts/tidb-lightning/values.yaml b/charts/tidb-lightning/values.yaml index d294728992..fc95e7b367 100644 --- a/charts/tidb-lightning/values.yaml +++ b/charts/tidb-lightning/values.yaml @@ -25,11 +25,13 @@ dataSource: # pvcName: tidb-cluster-scheduled-backup # backupName: scheduled-backup-20190822-041004 remote: - rcloneImage: tynor88/rclone + rcloneImage: pingcap/tidb-cloud-backup:20200229 storageClassName: local-storage storage: 100Gi secretName: cloud-storage-secret path: s3:bench-data-us/sysbench/sbtest_16_1e7.tar.gz + # Directory support downloading all files in a remote directory, shadow dataSoure.remote.path if present + # directory: s3:bench-data-us # If rcloneConfig is configured, then `secretName` will be ignored, # `rcloneConfig` should only be used for the cases where no sensitive # information need to be configured, e.g. the configuration as below, @@ -72,6 +74,9 @@ affinity: {} backend: importer # importer | tidb +# Specify a Service Account for lightning +# serviceAccount: + config: | [lightning] level = "info" diff --git a/charts/tidb-operator/templates/admission/admission-webhook-deployment.yaml b/charts/tidb-operator/templates/admission/admission-webhook-deployment.yaml index 228cdb590b..1efd651728 100644 --- a/charts/tidb-operator/templates/admission/admission-webhook-deployment.yaml +++ b/charts/tidb-operator/templates/admission/admission-webhook-deployment.yaml @@ -30,9 +30,11 @@ spec: imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }} command: - /usr/local/bin/tidb-admission-webhook + # use > 1024 port, then we can run it as non-root user + - --secure-port=6443 {{- if eq .Values.admissionWebhook.apiservice.insecureSkipTLSVerify false }} - - --tls-cert-file=/var/serving-cert/cert.pem - - --tls-private-key-file=/var/serving-cert/key.pem + - --tls-cert-file=/var/serving-cert/tls.crt + - --tls-private-key-file=/var/serving-cert/tls.key {{- end }} {{- if .Values.features }} - --features={{ join "," .Values.features }} @@ -41,7 +43,7 @@ spec: failureThreshold: 5 httpGet: path: /healthz - port: 443 + port: 6443 scheme: HTTPS initialDelaySeconds: 5 timeoutSeconds: 5 @@ -49,20 +51,32 @@ spec: failureThreshold: 5 httpGet: path: /healthz - port: 443 + port: 6443 scheme: HTTPS initialDelaySeconds: 5 timeoutSeconds: 5 - {{- if eq .Values.admissionWebhook.apiservice.insecureSkipTLSVerify false }} + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace volumeMounts: + {{- if eq .Values.admissionWebhook.apiservice.insecureSkipTLSVerify false }} - mountPath: /var/serving-cert name: serving-cert + {{- else }} + - mountPath: /apiserver.local.config + name: apiserver-local-config {{- end }} - {{- if eq .Values.admissionWebhook.apiservice.insecureSkipTLSVerify false }} volumes: + {{- if eq .Values.admissionWebhook.apiservice.insecureSkipTLSVerify false }} - name: serving-cert secret: defaultMode: 420 - secretName: tidb-admission-webhook-certs + secretName: {{ .Values.admissionWebhook.apiservice.tlsSecret }} + {{- else }} + # rootfs maybe read-only, we need to an empty dir volume to store self-signed certifiates, etc. + - name: apiserver-local-config + emptyDir: {} {{- end }} {{- end }} diff --git a/charts/tidb-operator/templates/admission/admission-webhook-rbac.yaml b/charts/tidb-operator/templates/admission/admission-webhook-rbac.yaml index 748be3a295..4d78b8e603 100644 --- a/charts/tidb-operator/templates/admission/admission-webhook-rbac.yaml +++ b/charts/tidb-operator/templates/admission/admission-webhook-rbac.yaml @@ -27,6 +27,9 @@ rules: - apiGroups: [""] resources: ["pods"] verbs: ["get", "list", "watch", "update"] + - apiGroups: [""] + resources: ["secrets","configmaps"] + verbs: ["get", "list"] - apiGroups: [""] resources: ["events"] verbs: ["create","patch","update"] diff --git a/charts/tidb-operator/templates/admission/admission-webhook-registration.yaml b/charts/tidb-operator/templates/admission/admission-webhook-registration.yaml index 54def4b222..f39dc9262d 100644 --- a/charts/tidb-operator/templates/admission/admission-webhook-registration.yaml +++ b/charts/tidb-operator/templates/admission/admission-webhook-registration.yaml @@ -13,7 +13,7 @@ spec: {{- if .Values.admissionWebhook.apiservice.insecureSkipTLSVerify }} insecureSkipTLSVerify: true {{- else }} - caBundle: {{ .Values.admissionWebhook.apiservice.cert | b64enc }} + caBundle: {{ .Values.admissionWebhook.apiservice.caBundle }} {{- end }} group: admission.tidb.pingcap.com groupPriorityMinimum: 1000 @@ -23,11 +23,11 @@ spec: namespace: {{ .Release.Namespace }} version: v1alpha1 --- -{{- if .Values.admissionWebhook.hooksEnabled.pods }} +{{- if .Values.admissionWebhook.validation.pods }} apiVersion: admissionregistration.k8s.io/v1beta1 kind: ValidatingWebhookConfiguration metadata: - name: validation-delete-tidb-admission-webhook-cfg + name: validation-tidb-pod-webhook-cfg labels: app.kubernetes.io/name: {{ template "chart.name" . }} app.kubernetes.io/managed-by: {{ .Release.Service }} @@ -35,34 +35,36 @@ metadata: app.kubernetes.io/component: admission-webhook helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} webhooks: - - name: delete.podadmission.tidb.pingcap.com + - name: podadmission.tidb.pingcap.com {{- if semverCompare ">=1.15-0" .Capabilities.KubeVersion.GitVersion }} objectSelector: matchLabels: "app.kubernetes.io/managed-by": "tidb-operator" "app.kubernetes.io/name": "tidb-cluster" {{- end }} - failurePolicy: {{ .Values.admissionWebhook.failurePolicy.deletePod | default "Fail" }} + failurePolicy: {{ .Values.admissionWebhook.failurePolicy.validation | default "Fail" }} clientConfig: service: name: kubernetes namespace: default path: "/apis/admission.tidb.pingcap.com/v1alpha1/admissionreviews" {{- if .Values.admissionWebhook.cabundle }} - caBundle: {{ .Values.admissionWebhook.cabundle | b64enc }} + caBundle: {{ .Values.admissionWebhook.cabundle }} {{- else }} caBundle: null {{- end }} rules: - - operations: ["DELETE"] + - operations: ["DELETE","CREATE"] apiGroups: [""] apiVersions: ["v1"] resources: ["pods"] +{{- end }} --- +{{- if .Values.admissionWebhook.validation.statefulSets }} apiVersion: admissionregistration.k8s.io/v1beta1 kind: ValidatingWebhookConfiguration metadata: - name: validation-create-tidb-admission-webhook-cfg + name: validation-tidb-statefulset-webhook-cfg labels: app.kubernetes.io/name: {{ template "chart.name" . }} app.kubernetes.io/managed-by: {{ .Release.Service }} @@ -70,36 +72,40 @@ metadata: app.kubernetes.io/component: admission-webhook helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} webhooks: - - name: create.podadmission.tidb.pingcap.com + - name: stsadmission.tidb.pingcap.com {{- if semverCompare ">=1.15-0" .Capabilities.KubeVersion.GitVersion }} objectSelector: matchLabels: "app.kubernetes.io/managed-by": "tidb-operator" "app.kubernetes.io/name": "tidb-cluster" {{- end }} - failurePolicy: {{ .Values.admissionWebhook.failurePolicy.createPod | default "Ignore" }} + failurePolicy: {{ .Values.admissionWebhook.failurePolicy.validation | default "Ignore" }} clientConfig: service: name: kubernetes namespace: default path: "/apis/admission.tidb.pingcap.com/v1alpha1/admissionreviews" {{- if .Values.admissionWebhook.cabundle }} - caBundle: {{ .Values.admissionWebhook.cabundle | b64enc }} + caBundle: {{ .Values.admissionWebhook.cabundle }} {{- else }} caBundle: null {{- end }} rules: - - operations: ["CREATE"] - apiGroups: [""] - apiVersions: ["v1"] - resources: ["pods"] + - operations: [ "UPDATE" ] + apiGroups: [ "apps", "" ] + apiVersions: ["v1beta1", "v1"] + resources: ["statefulsets"] + - operations: [ "UPDATE" ] + apiGroups: [ "apps.pingcap.com"] + apiVersions: ["v1alpha1", "v1"] + resources: ["statefulsets"] {{- end }} --- -{{- if .Values.admissionWebhook.hooksEnabled.statefulSets }} +{{- if .Values.admissionWebhook.validation.pingcapResources }} apiVersion: admissionregistration.k8s.io/v1beta1 kind: ValidatingWebhookConfiguration metadata: - name: validation-update-tidb-admission-webhook-cfg + name: pingcap-tidb-resources-validating labels: app.kubernetes.io/name: {{ template "chart.name" . }} app.kubernetes.io/managed-by: {{ .Release.Service }} @@ -107,34 +113,30 @@ metadata: app.kubernetes.io/component: admission-webhook helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} webhooks: - - name: update.stsadmission.tidb.pingcap.com - failurePolicy: {{ .Values.admissionWebhook.failurePolicy.updateStatefulSet | default "Ignore" }} + - name: validating.admission.tidb.pingcap.com + failurePolicy: {{ .Values.admissionWebhook.failurePolicy.validation | default "Ignore" }} clientConfig: service: name: kubernetes namespace: default path: "/apis/admission.tidb.pingcap.com/v1alpha1/admissionreviews" {{- if .Values.admissionWebhook.cabundle }} - caBundle: {{ .Values.admissionWebhook.cabundle | b64enc }} + caBundle: {{ .Values.admissionWebhook.cabundle }} {{- else }} caBundle: null {{- end }} rules: - - operations: [ "UPDATE" ] - apiGroups: [ "apps", "" ] - apiVersions: ["v1beta1", "v1"] - resources: ["statefulsets"] - - operations: [ "UPDATE" ] - apiGroups: [ "apps.pingcap.com"] - apiVersions: ["v1alpha1", "v1"] - resources: ["statefulsets"] + - operations: [ "UPDATE", "CREATE" ] + apiGroups: [ "pingcap.com"] + apiVersions: ["v1alpha1"] + resources: ["tidbclusters"] {{- end }} --- -{{- if .Values.admissionWebhook.hooksEnabled.validating }} +{{- if .Values.admissionWebhook.mutation.pingcapResources }} apiVersion: admissionregistration.k8s.io/v1beta1 -kind: ValidatingWebhookConfiguration +kind: MutatingWebhookConfiguration metadata: - name: pingcap-resources-validating + name: pingcap-tidb-resources-defaulitng labels: app.kubernetes.io/name: {{ template "chart.name" . }} app.kubernetes.io/managed-by: {{ .Release.Service }} @@ -142,15 +144,15 @@ metadata: app.kubernetes.io/component: admission-webhook helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} webhooks: - - name: validating.admission.tidb.pingcap.com - failurePolicy: {{ .Values.admissionWebhook.failurePolicy.validating | default "Ignore" }} + - name: defaulting.admission.tidb.pingcap.com + failurePolicy: {{ .Values.admissionWebhook.failurePolicy.mutation | default "Ignore" }} clientConfig: service: name: kubernetes namespace: default - path: "/apis/admission.tidb.pingcap.com/v1alpha1/admissionreviews" + path: "/apis/admission.tidb.pingcap.com/v1alpha1/mutatingreviews" {{- if .Values.admissionWebhook.cabundle }} - caBundle: {{ .Values.admissionWebhook.cabundle | b64enc }} + caBundle: {{ .Values.admissionWebhook.cabundle }} {{- else }} caBundle: null {{- end }} @@ -161,11 +163,11 @@ webhooks: resources: ["tidbclusters"] {{- end }} --- -{{- if .Values.admissionWebhook.hooksEnabled.defaulting }} +{{- if .Values.admissionWebhook.mutation.pods }} apiVersion: admissionregistration.k8s.io/v1beta1 kind: MutatingWebhookConfiguration metadata: - name: pingcap-resources-defaulitng + name: mutation-tidb-pod-webhook-cfg labels: app.kubernetes.io/name: {{ template "chart.name" . }} app.kubernetes.io/managed-by: {{ .Release.Service }} @@ -173,22 +175,28 @@ metadata: app.kubernetes.io/component: admission-webhook helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} webhooks: - - name: defaulting.dmission.tidb.pingcap.com - failurePolicy: {{ .Values.admissionWebhook.failurePolicy.defaulting | default "Ignore" }} + - name: podadmission.tidb.pingcap.com + {{- if semverCompare ">=1.15-0" .Capabilities.KubeVersion.GitVersion }} + objectSelector: + matchLabels: + "app.kubernetes.io/managed-by": "tidb-operator" + "app.kubernetes.io/name": "tidb-cluster" + {{- end }} + failurePolicy: {{ .Values.admissionWebhook.failurePolicy.mutation | default "Ignore" }} clientConfig: service: name: kubernetes namespace: default path: "/apis/admission.tidb.pingcap.com/v1alpha1/mutatingreviews" {{- if .Values.admissionWebhook.cabundle }} - caBundle: {{ .Values.admissionWebhook.cabundle | b64enc }} + caBundle: {{ .Values.admissionWebhook.cabundle }} {{- else }} caBundle: null {{- end }} rules: - - operations: [ "UPDATE", "CREATE" ] - apiGroups: [ "pingcap.com"] - apiVersions: ["v1alpha1"] - resources: ["tidbclusters"] + - operations: ["CREATE"] + apiGroups: [""] + apiVersions: ["v1"] + resources: ["pods"] {{- end }} {{- end }} diff --git a/charts/tidb-operator/templates/admission/admission-webhook-secret.yaml b/charts/tidb-operator/templates/admission/admission-webhook-secret.yaml deleted file mode 100644 index 5723299e68..0000000000 --- a/charts/tidb-operator/templates/admission/admission-webhook-secret.yaml +++ /dev/null @@ -1,15 +0,0 @@ -{{- if and ( .Values.admissionWebhook.create ) ( eq .Values.admissionWebhook.apiservice.insecureSkipTLSVerify false ) }} -apiVersion: v1 -kind: Secret -metadata: - name: tidb-admission-webhook-certs - labels: - app.kubernetes.io/name: {{ template "chart.name" . }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/component: admission-cert - helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} -data: - cert.pem: {{ .Values.admissionWebhook.apiservice.cert | b64enc }} - key.pem: {{ .Values.admissionWebhook.apiservice.key | b64enc }} -{{- end }} diff --git a/charts/tidb-operator/templates/admission/admission-webhook-service.yaml b/charts/tidb-operator/templates/admission/admission-webhook-service.yaml index c4fa485745..fa64e37f29 100644 --- a/charts/tidb-operator/templates/admission/admission-webhook-service.yaml +++ b/charts/tidb-operator/templates/admission/admission-webhook-service.yaml @@ -13,7 +13,7 @@ spec: ports: - name: https-webhook # optional port: 443 - targetPort: 443 + targetPort: 6443 selector: app.kubernetes.io/name: {{ template "chart.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} diff --git a/charts/tidb-operator/templates/admission/pre-delete-job.yaml b/charts/tidb-operator/templates/admission/pre-delete-job.yaml index 26a9fe3a46..205c407d2c 100644 --- a/charts/tidb-operator/templates/admission/pre-delete-job.yaml +++ b/charts/tidb-operator/templates/admission/pre-delete-job.yaml @@ -1,4 +1,4 @@ -{{- if and ( .Values.admissionWebhook.create ) ( .Values.admissionWebhook.hooksEnabled.pods ) }} +{{- if and ( .Values.admissionWebhook.create ) ( .Values.admissionWebhook.validation.pods ) }} apiVersion: v1 kind: ServiceAccount metadata: @@ -96,5 +96,5 @@ spec: - "-c" - | set -e - kubectl delete validatingWebhookConfigurations.admissionregistration.k8s.io validation-delete-tidb-admission-webhook-cfg || true + kubectl delete validatingWebhookConfigurations.admissionregistration.k8s.io validation-tidb-pod-webhook-cfg || true {{- end }} diff --git a/charts/tidb-operator/templates/controller-manager-deployment.yaml b/charts/tidb-operator/templates/controller-manager-deployment.yaml index 7de4a59a79..56a86edfe2 100644 --- a/charts/tidb-operator/templates/controller-manager-deployment.yaml +++ b/charts/tidb-operator/templates/controller-manager-deployment.yaml @@ -38,9 +38,15 @@ spec: {{- end }} - -tidb-discovery-image={{ .Values.operatorImage }} - -cluster-scoped={{ .Values.clusterScoped }} - - -auto-failover={{ .Values.controllerManager.autoFailover | default true }} + {{- if eq .Values.controllerManager.autoFailover true }} + - -auto-failover=true + {{- end }} + {{- if eq .Values.controllerManager.autoFailover false }} + - -auto-failover=false + {{- end }} - -pd-failover-period={{ .Values.controllerManager.pdFailoverPeriod | default "5m" }} - -tikv-failover-period={{ .Values.controllerManager.tikvFailoverPeriod | default "5m" }} + - -tiflash-failover-period={{ .Values.controllerManager.tiflashFailoverPeriod | default "5m" }} - -tidb-failover-period={{ .Values.controllerManager.tidbFailoverPeriod | default "5m" }} - -v={{ .Values.controllerManager.logLevel }} {{- if .Values.testMode }} @@ -49,7 +55,7 @@ spec: {{- if .Values.features }} - -features={{ join "," .Values.features }} {{- end }} - {{- if and ( .Values.admissionWebhook.create ) ( .Values.admissionWebhook.hooksEnabled.pods ) }} + {{- if and ( .Values.admissionWebhook.create ) ( .Values.admissionWebhook.validation.pods ) }} - -pod-webhook-enabled=true {{- end }} env: diff --git a/charts/tidb-operator/templates/controller-manager-rbac.yaml b/charts/tidb-operator/templates/controller-manager-rbac.yaml index 7e4930ef9c..079e897a0f 100644 --- a/charts/tidb-operator/templates/controller-manager-rbac.yaml +++ b/charts/tidb-operator/templates/controller-manager-rbac.yaml @@ -1,3 +1,6 @@ +{{/* +Delete permission is required in OpenShift because we can't own resources we created if we can't delete them. +*/}} {{- if .Values.rbac.create }} kind: ServiceAccount apiVersion: v1 @@ -29,16 +32,16 @@ rules: verbs: ["*"] - apiGroups: [""] resources: ["endpoints","configmaps"] - verbs: ["create", "get", "list", "watch", "update"] + verbs: ["create", "get", "list", "watch", "update","delete"] - apiGroups: [""] resources: ["serviceaccounts"] - verbs: ["create","get","update"] + verbs: ["create","get","update","delete"] - apiGroups: ["batch"] resources: ["jobs"] verbs: ["get", "list", "watch", "create", "update", "delete"] - apiGroups: [""] resources: ["secrets"] - verbs: ["create", "get", "list", "watch"] + verbs: ["create", "update", "get", "list", "watch","delete"] - apiGroups: [""] resources: ["persistentvolumeclaims"] verbs: ["get", "list", "watch", "create", "update", "delete"] @@ -71,22 +74,16 @@ rules: - apiGroups: [""] resources: ["persistentvolumes"] verbs: ["get", "list", "watch", "patch","update"] -- apiGroups: ["certificates.k8s.io"] - resources: ["certificatesigningrequests"] - verbs: ["create", "get", "list", "watch", "delete"] -- apiGroups: ["certificates.k8s.io"] - resources: ["certificatesigningrequests/approval", "certificatesigningrequests/status"] - verbs: ["update"] {{/* Allow controller manager to escalate its privileges to other subjects, the subjects may never have privilege over the controller. Ref: https://kubernetes.io/docs/reference/access-authn-authz/rbac/#privilege-escalation-prevention-and-bootstrapping */}} - apiGroups: ["rbac.authorization.k8s.io"] resources: [clusterroles,roles] - verbs: ["escalate","create","get","update"] + verbs: ["escalate","create","get","update", "delete"] - apiGroups: ["rbac.authorization.k8s.io"] resources: ["rolebindings","clusterrolebindings"] - verbs: ["create","get","update"] + verbs: ["create","get","update", "delete"] --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1beta1 @@ -126,16 +123,16 @@ rules: verbs: ["*"] - apiGroups: [""] resources: ["endpoints","configmaps"] - verbs: ["create", "get", "list", "watch", "update"] + verbs: ["create", "get", "list", "watch", "update", "delete"] - apiGroups: [""] resources: ["serviceaccounts"] - verbs: ["create","get","update"] + verbs: ["create","get","update","delete"] - apiGroups: ["batch"] resources: ["jobs"] verbs: ["get", "list", "watch", "create", "update", "delete"] - apiGroups: [""] resources: ["secrets"] - verbs: ["create", "get", "list", "watch"] + verbs: ["create", "update", "get", "list", "watch", "delete"] - apiGroups: [""] resources: ["persistentvolumeclaims"] verbs: ["get", "list", "watch", "create", "update", "delete"] @@ -153,10 +150,10 @@ rules: verbs: ["*"] - apiGroups: ["rbac.authorization.k8s.io"] resources: ["roles"] - verbs: ["escalate","create","get","update"] + verbs: ["escalate","create","get","update", "delete"] - apiGroups: ["rbac.authorization.k8s.io"] resources: ["rolebindings"] - verbs: ["create","get","update"] + verbs: ["create","get","update", "delete"] {{- if .Values.features | has "AdvancedStatefulSet=true" }} - apiGroups: - apps.pingcap.com diff --git a/charts/tidb-operator/values.yaml b/charts/tidb-operator/values.yaml index b98fa674af..33c53aac78 100644 --- a/charts/tidb-operator/values.yaml +++ b/charts/tidb-operator/values.yaml @@ -12,11 +12,11 @@ rbac: timezone: UTC # operatorImage is TiDB Operator image -operatorImage: pingcap/tidb-operator:v1.1.0-beta.1 +operatorImage: pingcap/tidb-operator:v1.1.0-rc.2 imagePullPolicy: IfNotPresent # tidbBackupManagerImage is tidb backup manager image -# tidbBackupManagerImage: pingcap/tidb-backup-manager:latest +tidbBackupManagerImage: pingcap/tidb-backup-manager:v1.1.0-rc.2 # # Enable or disable tidb-operator features: @@ -58,6 +58,8 @@ controllerManager: tikvFailoverPeriod: 5m # tidb failover period default(5m) tidbFailoverPeriod: 5m + # tiflash failover period default(5m) + tiflashFailoverPeriod: 5m ## affinity defines pod scheduling rules,affinity default settings is empty. ## please read the affinity document before set your scheduling rule: ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity @@ -182,10 +184,11 @@ admissionWebhook: rbac: create: true ## jobImage is to indicate the image used in `pre-delete-job.yaml` - ## if admissionWebhook.create and admissionWebhook.hooksEnabled.pods are both enabled, + ## if admissionWebhook.create and admissionWebhook.validation.pods are both enabled, ## The pre-delete-job would delete the validationWebhookConfiguration using this image jobImage: "bitnami/kubectl:latest" - hooksEnabled: + ## validation webhook would check the given request for the specific resource and operation + validation: ## statefulsets hook would check requests for updating tidbcluster's statefulsets ## If enabled it, the statefulsets of tidbcluseter would update in partition by tidbcluster's annotation statefulSets: false @@ -193,36 +196,41 @@ admissionWebhook: ## if enabled it, the pods of tidbcluster would safely created or deleted by webhook instead of controller pods: true ## validating hook validates the correctness of the resources under pingcap.com group - validating: false + pingcapResources: false + ## mutation webhook would mutate the given request for the specific resource and operation + mutation: + ## pods mutation hook would mutate the pod. Currently It is used for TiKV Auto-Scaling. + ## refer to https://github.com/pingcap/tidb-operator/issues/1651 + pods: true ## defaulting hook set default values for the the resources under pingcap.com group - defaulting: false + pingcapResources: true ## failurePolicy are applied to ValidatingWebhookConfiguration which affect tidb-admission-webhook ## refer to https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#failure-policy failurePolicy: - ## deletePod Webhook would check the deleting request of tidbcluster pod and the failurePolicy is recommended as Fail - deletePod: Fail - ## createPod Webhook would check the creating request of tidbcluster pod and the failurePolicy is recommended as Ignore - createPod: Ignore - ## updateStatefulSet Webhook would check the updating request of tidbcluster statefulset and the failurePolicy is recommended as Ignore - updateStatefulSet: Ignore - ## validation hook validates the correctness of the resources under pingcap.com group - validating: Ignore - ## defaulting hook set default values for the the resources under pingcap.com group - defaulting: Ignore + ## the validation webhook would check the request of the given resources. + ## If the kubernetes api-server version >= 1.15.0, we recommend the failurePolicy as Fail, otherwise, as Ignore. + validation: Ignore + ## the mutation webhook would mutate the request of the given resources. + ## If the kubernetes api-server version >= 1.15.0, we recommend the failurePolicy as Fail, otherwise, as Ignore. + mutation: Ignore ## tidb-admission-webhook deployed as kubernetes apiservice server ## refer to https://github.com/openshift/generic-admission-server apiservice: ## apiservice config ## refer to https://kubernetes.io/docs/tasks/access-kubernetes-api/configure-aggregation-layer/#contacting-the-extension-apiserver insecureSkipTLSVerify: true - ## The key and cert for `tidb-admission-webook.` Service. + ## The Secret includes the TLS ca, cert and key for the `tidb-admission-webook..svc` Service. ## If insecureSkipTLSVerify is true, this would be ignored. - cert: "" - key: "" + ## You can create the tls secret by: + ## kubectl create secret generic --namespace= --from-file=tls.crt= --from-file=tls.key= --from-file=ca.crt= + tlsSecret: "" + ## The caBundle for the webhook apiservice, you could get it by the secret you created previously: + ## kubectl get secret --namespace= -o=jsonpath='{.data.ca\.crt}' + caBundle: "" ## certProvider indicate the key and cert for the webhook configuration to communicate with `kubernetes.default` service. ## If your kube-apiserver's version >= 1.13.0, you can leave cabundle empty and the kube-apiserver ## would trust the roots on the apiserver. ## refer to https://github.com/kubernetes/api/blob/master/admissionregistration/v1/types.go#L529 ## or you can get the cabundle by: - ## kubectl get configmap -n kube-system extension-apiserver-authentication -o=jsonpath='{.data.client-ca-file}' + ## kubectl get configmap -n kube-system extension-apiserver-authentication -o=jsonpath='{.data.client-ca-file}' | base64 | tr -d '\n' cabundle: "" diff --git a/charts/tikv-importer/.helmignore b/charts/tikv-importer/.helmignore new file mode 100644 index 0000000000..f0c1319444 --- /dev/null +++ b/charts/tikv-importer/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/charts/tikv-importer/Chart.yaml b/charts/tikv-importer/Chart.yaml new file mode 100644 index 0000000000..0c24c3ce3e --- /dev/null +++ b/charts/tikv-importer/Chart.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +description: A Helm chart for TiKV Importer +name: tikv-importer +version: dev +home: https://github.com/pingcap/tidb-operator +sources: + - https://github.com/pingcap/tidb-operator +keywords: + - newsql + - htap + - database + - mysql + - raft diff --git a/charts/tikv-importer/templates/_helpers.tpl b/charts/tikv-importer/templates/_helpers.tpl new file mode 100644 index 0000000000..2372f181e8 --- /dev/null +++ b/charts/tikv-importer/templates/_helpers.tpl @@ -0,0 +1,21 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "chart.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Encapsulate tikv-importer configmap data for consistent digest calculation +*/}} +{{- define "importer-configmap.data" -}} +config-file: |- + {{- if .Values.config }} +{{ .Values.config | indent 2 }} + {{- end -}} +{{- end -}} + +{{- define "importer-configmap.data-digest" -}} +{{ include "importer-configmap.data" . | sha256sum | trunc 8 }} +{{- end -}} diff --git a/charts/tikv-importer/templates/tikv-importer-configmap.yaml b/charts/tikv-importer/templates/tikv-importer-configmap.yaml new file mode 100644 index 0000000000..1bbcabafb0 --- /dev/null +++ b/charts/tikv-importer/templates/tikv-importer-configmap.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Values.clusterName }}-importer-{{ template "importer-configmap.data-digest" . }} + labels: + app.kubernetes.io/name: {{ template "chart.name" . }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: importer + helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} +data: +{{ include "importer-configmap.data" . | indent 2 }} diff --git a/charts/tikv-importer/templates/tikv-importer-service.yaml b/charts/tikv-importer/templates/tikv-importer-service.yaml new file mode 100644 index 0000000000..07ded6f97a --- /dev/null +++ b/charts/tikv-importer/templates/tikv-importer-service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ .Values.clusterName }}-importer + labels: + app.kubernetes.io/name: {{ template "chart.name" . }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: importer + helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} +spec: + clusterIP: None + ports: + - name: importer + port: 8287 + selector: + app.kubernetes.io/name: {{ template "chart.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: importer diff --git a/charts/tikv-importer/templates/tikv-importer-statefulset.yaml b/charts/tikv-importer/templates/tikv-importer-statefulset.yaml new file mode 100644 index 0000000000..18a6653ec3 --- /dev/null +++ b/charts/tikv-importer/templates/tikv-importer-statefulset.yaml @@ -0,0 +1,89 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ .Values.clusterName }}-importer + labels: + app.kubernetes.io/name: {{ template "chart.name" . }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: importer + helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} +spec: + selector: + matchLabels: + app.kubernetes.io/name: {{ template "chart.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: importer + serviceName: {{ .Values.clusterName }}-importer + replicas: 1 + template: + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/path: "/metrics" + prometheus.io/port: "9091" + labels: + app.kubernetes.io/name: {{ template "chart.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: importer + spec: + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 6 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 6 }} + {{- end }} + containers: + - name: importer + image: {{ .Values.image }} + imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent"}} + command: + - /tikv-importer + # tikv-importer does not support domain name: https://github.com/tikv/importer/issues/16 + # - --addr=${MY_POD_NAME}.tikv-importer:8287 + - --addr=$(MY_POD_IP):8287 + - --config=/etc/tikv-importer/tikv-importer.toml + - --import-dir=/var/lib/tikv-importer + env: + - name: MY_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: MY_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: TZ + value: {{ .Values.timezone | default "UTC" }} + volumeMounts: + - name: data + mountPath: /var/lib/tikv-importer + - name: config + mountPath: /etc/tikv-importer + {{- if .Values.resources }} + resources: +{{ toYaml .Values.resources | indent 10 }} + {{- end }} + - name: pushgateway + image: {{ .Values.pushgatewayImage }} + imagePullPolicy: {{ .Values.pushgatewayImagePullPolicy | default "IfNotPresent" }} + volumes: + - name: config + configMap: + name: {{ .Values.clusterName }}-importer-{{ template "importer-configmap.data-digest" . }} + items: + - key: config-file + path: tikv-importer.toml + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: [ "ReadWriteOnce" ] + storageClassName: {{ .Values.storageClassName }} + resources: + requests: + storage: {{ .Values.storage }} diff --git a/charts/tikv-importer/values.yaml b/charts/tikv-importer/values.yaml new file mode 100644 index 0000000000..b0ac35eb5e --- /dev/null +++ b/charts/tikv-importer/values.yaml @@ -0,0 +1,32 @@ +# Default values for tikv-importer. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# timezone is the default system timzone for TiDB +timezone: UTC + +# clusterName is the TiDB cluster name, if not specified, the chart release name will be used +clusterName: demo + +image: pingcap/tidb-lightning:v3.0.8 +imagePullPolicy: IfNotPresent +storageClassName: local-storage +storage: 20Gi +resources: + {} + # limits: + # cpu: 16000m + # memory: 8Gi + # requests: + # cpu: 16000m + # memory: 8Gi +affinity: {} +tolerations: [] +pushgatewayImage: prom/pushgateway:v0.3.1 +pushgatewayImagePullPolicy: IfNotPresent +config: | + log-level = "info" + [metric] + job = "tikv-importer" + interval = "15s" + address = "localhost:9091" diff --git a/ci/aws-clean-eks.sh b/ci/aws-clean-eks.sh new file mode 100755 index 0000000000..fc9c94eb81 --- /dev/null +++ b/ci/aws-clean-eks.sh @@ -0,0 +1,150 @@ +#!/bin/bash + +# Copyright 2020 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# aws-k8s-tester cannot clean all resources created when some error happened. +# This script is used to clean resources created by aws-k8s-tester in our CI. +# +# DO NOT USE THIS SCRIPT FOR OTHER USES! +# + +function get_stacks() { + aws cloudformation list-stacks --stack-status-filter CREATE_COMPLETE DELETE_FAILED --query 'StackSummaries[*].StackName' --output text +} + +function delete_security_group() { + local sgId="$1" + echo "info: deleting security group '$sgId'" + for eni in $(aws ec2 describe-network-interfaces --filters "Name=group-id,Values=$sgId" --query 'NetworkInterfaces[*].NetworkInterfaceId' --output text); do + echo "info: clear leaked network interfaces '$eni'" + aws ec2 delete-network-interface --network-interface-id "$eni" + done + aws ec2 delete-security-group --group-id "$sgId" + if [ $? -eq 0 ]; then + echo "info: succesfully deleted security group '$sgId'" + else + echo "error: failed to deleted security group '$sgId'" + fi +} + +function delete_vpc() { + local vpcId="$1" + echo "info: deleting vpc '$vpcId'" + for sgId in $(aws ec2 describe-security-groups --filters "Name=vpc-id,Values=$vpcId" --query "SecurityGroups[?GroupName != 'default'].GroupId" --output text); do + delete_security_group "$sgId" + done + aws ec2 delete-vpc --vpc-id "$vpcId" + if [ $? -eq 0 ]; then + echo "info: succesfully deleted vpc '$vpcId'" + else + echo "error: failed to deleted vpc '$vpcId'" + fi +} + +function fix_eks_mng_deletion_issues() { + local cluster="$1" + local mng="$2" + while IFS=$'\n' read -r line; do + read -r code resourceIds <<< $line + if [ "$code" == "Ec2SecurityGroupDeletionFailure" ]; then + IFS=',' read -ra sgIds <<< "$resourceIds" + for sgId in ${sgIds[@]}; do + delete_security_group "$sgId" + done + fi + done <<< $(aws eks describe-nodegroup --cluster-name "$cluster" --nodegroup-name "$mng" --query 'nodegroup.health.issues' --output json | jq -r '.[].resourceIds |= join(",") | .[] | "\(.code)\t\(.resourceIds)"') +} + +function clean_eks() { + local CLUSTER="$1" + echo "info: searching mng stack" + local regex='^'$CLUSTER'-mng-[0-9]+$' + local mngStack= + for stackName in $(get_stacks); do + if [[ ! "$stackName" =~ $regex ]]; then + continue + fi + mngStack=$stackName + break + done + if [ -n "$mngStack" ]; then + echo "info: mng stack found '$mngStack'" + else + echo "info: mng stack not found" + fi + + echo "info: deleting mng/cluster/cluster-role/mng-role/vpc stacks" + local stacks=( + $mngStack + $CLUSTER-cluster + $CLUSTER-role-cluster + $CLUSTER-role-mng + $CLUSTER-vpc + ) + for stack in ${stacks[@]}; do + echo "info: deleting stack $stack" + aws cloudformation delete-stack --stack-name $stack + aws cloudformation wait stack-delete-complete --stack-name $stack + if [ $? -ne 0 ]; then + echo "error: failed to delete stack '$stack'" + if [ "$stack" == "$mngStack" ]; then + echo "info: try to fix mng stack '$stack'" + for mngName in $(aws eks list-nodegroups --cluster-name "$CLUSTER" --query 'nodegroups[*]' --output text); do + fix_eks_mng_deletion_issues "$CLUSTER" $mngName + done + elif [ "$stack" == "$CLUSTER-vpc" ]; then + echo "info: try to fix vpc stack '$stack'" + while IFS=$'\n' read -r sgId; do + delete_security_group "$sgId" + done <<< $(aws cloudformation describe-stacks --stack-name "$stack" --query 'Stacks[*].Outputs[*]' --output json | jq -r '.[] | .[] | select(.OutputKey == "ControlPlaneSecurityGroupID") | .OutputValue') + while IFS=$'\n' read -r vpcId; do + delete_vpc "$vpcId" + done <<< $(aws cloudformation describe-stacks --stack-name "$stack" --query 'Stacks[*].Outputs[*]' --output json | jq -r '.[] | .[] | select(.OutputKey == "VPCID") | .OutputValue') + else + echo "fatal: unable to delete stack $stack" + exit 1 + fi + echo "info: try to delete the stack '$stack' again" + aws cloudformation delete-stack --stack-name $stack + aws cloudformation wait stack-delete-complete --stack-name $stack + if [ $? -ne 0 ]; then + echo "fatal: unable to delete stack $stack" + exit 1 + fi + fi + done +} + +# https://github.com/aws/aws-cli#other-configurable-variables +if [ -n "${AWS_REGION}" ]; then + export AWS_DEFAULT_REGION=${AWS_REGION:-} +fi + +aws sts get-caller-identity +if [ $? -ne 0 ]; then + echo "error: failed to get caller identity" + exit 1 +fi + +for CLUSTER in $@; do + echo "info: start to clean eks test cluster '$CLUSTER'" + clean_eks "$CLUSTER" + if [ $? -eq 0 ]; then + echo "info: succesfully cleaned the eks test cluster '$CLUSTER'" + else + echo "fatal: failed to clean the eks test cluster '$CLUSTER'" + exit 1 + fi +done diff --git a/ci/deploy_tidb_operator_staging.groovy b/ci/deploy_tidb_operator_staging.groovy index cfb5a86e22..17b8b8b3cd 100644 --- a/ci/deploy_tidb_operator_staging.groovy +++ b/ci/deploy_tidb_operator_staging.groovy @@ -23,12 +23,18 @@ scheduler: replicas: 2 admissionWebhook: create: true - hooksEnabled: + replicas: 2 + validation: statefulSets: true pods: true - # TODO: enable validating and defaulting after we ease the constrain - validating: false - defaulting: false + pingcapResources: false + mutation: + pingcapResources: true + failurePolicy: + validation: Fail + mutation: Fail +features: + - AutoScaling=true ''' def call(BUILD_BRANCH) { diff --git a/ci/e2e_eks.groovy b/ci/e2e_eks.groovy new file mode 100644 index 0000000000..1966b95d54 --- /dev/null +++ b/ci/e2e_eks.groovy @@ -0,0 +1,159 @@ +// +// Jenkins pipeline for EKS e2e job. +// +// This script is written in declarative syntax. Refer to +// https://jenkins.io/doc/book/pipeline/syntax/ for more details. +// +// Note that parameters of the job is configured in this script. +// + +import groovy.transform.Field + +@Field +def podYAML = ''' +apiVersion: v1 +kind: Pod +spec: + containers: + - name: main + image: gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master + command: + - runner.sh + - sleep + - 1d + # we need privileged mode in order to do docker in docker + securityContext: + privileged: true + env: + - name: DOCKER_IN_DOCKER_ENABLED + value: "true" + resources: + requests: + memory: "4000Mi" + cpu: 2000m + volumeMounts: + # dind expects /var/lib/docker to be volume + - name: docker-root + mountPath: /var/lib/docker + volumes: + - name: docker-root + emptyDir: {} +''' + +// Able to override default values in Jenkins job via environment variables. +if (!env.DEFAULT_GIT_REF) { + env.DEFAULT_GIT_REF = "master" +} + +if (!env.DEFAULT_GINKGO_NODES) { + env.DEFAULT_GINKGO_NODES = "8" +} + +if (!env.DEFAULT_E2E_ARGS) { + env.DEFAULT_E2E_ARGS = "--ginkgo.skip='\\[Serial\\]|\\[Stability\\]' --ginkgo.focus='\\[tidb-operator\\]'" +} + +if (!env.DEFAULT_CLUSTER) { + env.DEFAULT_CLUSTER = "jenkins-tidb-operator-e2e" +} + +if (!env.DEFAULT_AWS_REGION) { + env.DEFAULT_AWS_REGION = "us-west-2" +} + +pipeline { + agent { + kubernetes { + yaml podYAML + defaultContainer "main" + customWorkspace "/home/jenkins/agent/workspace/go/src/github.com/pingcap/tidb-operator" + } + } + + options { + timeout(time: 3, unit: 'HOURS') + } + + parameters { + string(name: 'GIT_URL', defaultValue: 'git@github.com:pingcap/tidb-operator.git', description: 'git repo url') + string(name: 'GIT_REF', defaultValue: env.DEFAULT_GIT_REF, description: 'git ref spec to checkout, e.g. master, release-1.1') + string(name: 'PR_ID', defaultValue: '', description: 'pull request ID, this will override GIT_REF if set, e.g. 1889') + string(name: 'GINKGO_NODES', defaultValue: env.DEFAULT_GINKGO_NODES, description: 'the number of ginkgo nodes') + string(name: 'E2E_ARGS', defaultValue: env.DEFAULT_E2E_ARGS, description: "e2e args, e.g. --ginkgo.focus='\\[Stability\\]'") + string(name: 'CLUSTER', defaultValue: env.DEFAULT_CLUSTER, description: 'the name of the cluster') + string(name: 'AWS_REGION', defaultValue: env.DEFAULT_AWS_REGION, description: 'the AWS region') + } + + environment { + GIT_REF = '' + ARTIFACTS = "${env.WORKSPACE}/artifacts" + } + + stages { + stage("Prepare") { + steps { + // The declarative model for Jenkins Pipelines has a restricted + // subset of syntax that it allows in the stage blocks. We use + // script step to bypass the restriction. + // https://jenkins.io/doc/book/pipeline/syntax/#script + script { + GIT_REF = params.GIT_REF + if (params.PR_ID != "") { + GIT_REF = "refs/remotes/origin/pr/${params.PR_ID}/head" + } + } + echo "env.NODE_NAME: ${env.NODE_NAME}" + echo "env.WORKSPACE: ${env.WORKSPACE}" + echo "GIT_REF: ${GIT_REF}" + echo "ARTIFACTS: ${ARTIFACTS}" + } + } + + stage("Checkout") { + steps { + checkout scm: [ + $class: 'GitSCM', + branches: [[name: GIT_REF]], + userRemoteConfigs: [[ + credentialsId: 'github-sre-bot-ssh', + refspec: '+refs/heads/*:refs/remotes/origin/* +refs/pull/*:refs/remotes/origin/pr/*', + url: "${params.GIT_URL}", + ]] + ] + } + } + + stage("Run") { + steps { + withCredentials([ + string(credentialsId: 'TIDB_OPERATOR_AWS_ACCESS_KEY_ID', variable: 'AWS_ACCESS_KEY_ID'), + string(credentialsId: 'TIDB_OPERATOR_AWS_SECRET_ACCESS_KEY', variable: 'AWS_SECRET_ACCESS_KEY'), + ]) { + sh """ + #!/bin/bash + export PROVIDER=eks + export CLUSTER=${params.CLUSTER} + export AWS_REGION=${params.AWS_REGION} + export GINKGO_NODES=${params.GINKGO_NODES} + export REPORT_DIR=${ARTIFACTS} + echo "info: try to clean the cluster created previously" + ./ci/aws-clean-eks.sh \$CLUSTER + echo "info: begin to run e2e" + ./hack/e2e.sh -- ${params.E2E_ARGS} + """ + } + } + } + } + + post { + always { + dir(ARTIFACTS) { + archiveArtifacts artifacts: "**", allowEmptyArchive: true + junit testResults: "*.xml", allowEmptyResults: true + } + } + } +} + +// vim: et sw=4 ts=4 diff --git a/ci/e2e_gke.groovy b/ci/e2e_gke.groovy new file mode 100644 index 0000000000..e85f5809ce --- /dev/null +++ b/ci/e2e_gke.groovy @@ -0,0 +1,166 @@ +// +// Jenkins pipeline for GKE e2e job. +// +// This script is written in declarative syntax. Refer to +// https://jenkins.io/doc/book/pipeline/syntax/ for more details. +// +// Note that parameters of the job is configured in this script. +// + +import groovy.transform.Field + +@Field +def podYAML = ''' +apiVersion: v1 +kind: Pod +spec: + containers: + - name: main + image: gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master + command: + - runner.sh + - sleep + - 1d + # we need privileged mode in order to do docker in docker + securityContext: + privileged: true + env: + - name: DOCKER_IN_DOCKER_ENABLED + value: "true" + resources: + requests: + memory: "4000Mi" + cpu: 2000m + volumeMounts: + # dind expects /var/lib/docker to be volume + - name: docker-root + mountPath: /var/lib/docker + volumes: + - name: docker-root + emptyDir: {} +''' + +// Able to override default values in Jenkins job via environment variables. +if (!env.DEFAULT_GIT_REF) { + env.DEFAULT_GIT_REF = "master" +} + +if (!env.DEFAULT_GINKGO_NODES) { + env.DEFAULT_GINKGO_NODES = "8" +} + +if (!env.DEFAULT_E2E_ARGS) { + env.DEFAULT_E2E_ARGS = "--ginkgo.skip='\\[Serial\\]|\\[Stability\\]' --ginkgo.focus='\\[tidb-operator\\]'" +} + +if (!env.DEFAULT_CLUSTER) { + env.DEFAULT_CLUSTER = "jenkins-tidb-operator-e2e" +} + +if (!env.DEFAULT_GCP_PROJECT) { + env.DEFAULT_GCP_PROJECT = "" +} + +if (!env.DEFAULT_GCP_ZONE) { + env.DEFAULT_GCP_ZONE = "us-central1-b" +} + +pipeline { + agent { + kubernetes { + yaml podYAML + defaultContainer "main" + customWorkspace "/home/jenkins/agent/workspace/go/src/github.com/pingcap/tidb-operator" + } + } + + options { + timeout(time: 3, unit: 'HOURS') + } + + parameters { + string(name: 'GIT_URL', defaultValue: 'git@github.com:pingcap/tidb-operator.git', description: 'git repo url') + string(name: 'GIT_REF', defaultValue: env.DEFAULT_GIT_REF, description: 'git ref spec to checkout, e.g. master, release-1.1') + string(name: 'PR_ID', defaultValue: '', description: 'pull request ID, this will override GIT_REF if set, e.g. 1889') + string(name: 'GINKGO_NODES', defaultValue: env.DEFAULT_GINKGO_NODES, description: 'the number of ginkgo nodes') + string(name: 'E2E_ARGS', defaultValue: env.DEFAULT_E2E_ARGS, description: "e2e args, e.g. --ginkgo.focus='\\[Stability\\]'") + string(name: 'CLUSTER', defaultValue: env.DEFAULT_CLUSTER, description: 'the name of the cluster') + string(name: 'GCP_PROJECT', defaultValue: env.DEFAULT_GCP_PROJECT, description: 'the GCP project ID') + string(name: 'GCP_ZONE', defaultValue: env.DEFAULT_GCP_ZONE, description: 'the GCP zone') + } + + environment { + GIT_REF = '' + ARTIFACTS = "${env.WORKSPACE}/artifacts" + } + + stages { + stage("Prepare") { + steps { + // The declarative model for Jenkins Pipelines has a restricted + // subset of syntax that it allows in the stage blocks. We use + // script step to bypass the restriction. + // https://jenkins.io/doc/book/pipeline/syntax/#script + script { + GIT_REF = params.GIT_REF + if (params.PR_ID != "") { + GIT_REF = "refs/remotes/origin/pr/${params.PR_ID}/head" + } + } + echo "env.NODE_NAME: ${env.NODE_NAME}" + echo "env.WORKSPACE: ${env.WORKSPACE}" + echo "GIT_REF: ${GIT_REF}" + echo "ARTIFACTS: ${ARTIFACTS}" + } + } + + stage("Checkout") { + steps { + checkout scm: [ + $class: 'GitSCM', + branches: [[name: GIT_REF]], + userRemoteConfigs: [[ + credentialsId: 'github-sre-bot-ssh', + refspec: '+refs/heads/*:refs/remotes/origin/* +refs/pull/*:refs/remotes/origin/pr/*', + url: "${params.GIT_URL}", + ]] + ] + } + } + + stage("Run") { + steps { + withCredentials([ + file(credentialsId: 'TIDB_OPERATOR_GCP_CREDENTIALS', variable: 'GCP_CREDENTIALS'), + file(credentialsId: 'TIDB_OPERATOR_GCP_SSH_PRIVATE_KEY', variable: 'GCP_SSH_PRIVATE_KEY'), + file(credentialsId: 'TIDB_OPERATOR_GCP_SSH_PUBLIC_KEY', variable: 'GCP_SSH_PUBLIC_KEY'), + ]) { + sh """ + #!/bin/bash + export PROVIDER=gke + export CLUSTER=${params.CLUSTER} + export GCP_ZONE=${params.GCP_ZONE} + export GCP_PROJECT=${params.GCP_PROJECT} + export GINKGO_NODES=${params.GINKGO_NODES} + export REPORT_DIR=${ARTIFACTS} + echo "info: try to clean the cluster created previously" + SKIP_BUILD=y SKIP_IMAGE_BUILD=y SKIP_UP=y SKIP_TEST=y ./hack/e2e.sh + echo "info: begin to run e2e" + ./hack/e2e.sh -- ${params.E2E_ARGS} + """ + } + } + } + } + + post { + always { + dir(ARTIFACTS) { + archiveArtifacts artifacts: "**", allowEmptyArchive: true + junit testResults: "*.xml", allowEmptyResults: true + } + } + } +} + +// vim: et sw=4 ts=4 diff --git a/ci/e2e_kind.groovy b/ci/e2e_kind.groovy new file mode 100644 index 0000000000..245b75a3b8 --- /dev/null +++ b/ci/e2e_kind.groovy @@ -0,0 +1,220 @@ +// +// Jenkins pipeline for Kind e2e job. +// +// This script is written in declarative syntax. Refer to +// https://jenkins.io/doc/book/pipeline/syntax/ for more details. +// +// Note that parameters of the job is configured in this script. +// + +import groovy.transform.Field + +@Field +def podYAML = ''' +apiVersion: v1 +kind: Pod +metadata: + labels: + app: tidb-operator-e2e +spec: + containers: + - name: main + image: gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master + command: + - runner.sh + # Clean containers on TERM signal in root process to avoid cgroup leaking. + # https://github.com/pingcap/tidb-operator/issues/1603#issuecomment-582402196 + - exec + - bash + - -c + - | + function clean() { + echo "info: clean all containers to avoid cgroup leaking" + docker kill $(docker ps -q) || true + docker system prune -af || true + } + trap clean TERM + sleep 1d & wait + # we need privileged mode in order to do docker in docker + securityContext: + privileged: true + env: + - name: DOCKER_IN_DOCKER_ENABLED + value: "true" + resources: + requests: + memory: "8000Mi" + cpu: 8000m + ephemeral-storage: "50Gi" + limits: + memory: "8000Mi" + cpu: 8000m + ephemeral-storage: "50Gi" + # kind needs /lib/modules and cgroups from the host + volumeMounts: + - mountPath: /lib/modules + name: modules + readOnly: true + - mountPath: /sys/fs/cgroup + name: cgroup + # dind expects /var/lib/docker to be volume + - name: docker-root + mountPath: /var/lib/docker + # legacy docker path for cr.io/k8s-testimages/kubekins-e2e + - name: docker-graph + mountPath: /docker-graph + volumes: + - name: modules + hostPath: + path: /lib/modules + type: Directory + - name: cgroup + hostPath: + path: /sys/fs/cgroup + type: Directory + - name: docker-root + emptyDir: {} + - name: docker-graph + emptyDir: {} + tolerations: + - effect: NoSchedule + key: tidb-operator + operator: Exists + affinity: + # running on nodes for tidb-operator only + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: ci.pingcap.com + operator: In + values: + - tidb-operator + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - tidb-operator-e2e + topologyKey: kubernetes.io/hostname +''' + +// Able to override default values in Jenkins job via environment variables. +if (!env.DEFAULT_GIT_REF) { + env.DEFAULT_GIT_REF = "master" +} + +if (!env.DEFAULT_GINKGO_NODES) { + env.DEFAULT_GINKGO_NODES = "8" +} + +if (!env.DEFAULT_E2E_ARGS) { + env.DEFAULT_E2E_ARGS = "" +} + +if (!env.DEFAULT_DOCKER_IO_MIRROR) { + env.DEFAULT_DOCKER_IO_MIRROR = "" +} + +if (!env.DEFAULT_QUAY_IO_MIRROR) { + env.DEFAULT_QUAY_IO_MIRROR = "" +} + +if (!env.DEFAULT_GCR_IO_MIRROR) { + env.DEFAULT_GCR_IO_MIRROR = "" +} + +pipeline { + agent { + kubernetes { + yaml podYAML + defaultContainer "main" + customWorkspace "/home/jenkins/agent/workspace/go/src/github.com/pingcap/tidb-operator" + } + } + + options { + timeout(time: 3, unit: 'HOURS') + } + + parameters { + string(name: 'GIT_URL', defaultValue: 'git@github.com:pingcap/tidb-operator.git', description: 'git repo url') + string(name: 'GIT_REF', defaultValue: env.DEFAULT_GIT_REF, description: 'git ref spec to checkout, e.g. master, release-1.1') + string(name: 'PR_ID', defaultValue: '', description: 'pull request ID, this will override GIT_REF if set, e.g. 1889') + string(name: 'GINKGO_NODES', defaultValue: env.DEFAULT_GINKGO_NODES, description: 'the number of ginkgo nodes') + string(name: 'E2E_ARGS', defaultValue: env.DEFAULT_E2E_ARGS, description: "e2e args, e.g. --ginkgo.focus='\\[Stability\\]'") + string(name: 'DOCKER_IO_MIRROR', defaultValue: env.DEFAULT_DOCKER_IO_MIRROR, description: "docker mirror for docker.io") + string(name: 'QUAY_IO_MIRROR', defaultValue: env.DEFAULT_QUAY_IO_MIRROR, description: "mirror for quay.io") + string(name: 'GCR_IO_MIRROR', defaultValue: env.DEFAULT_GCR_IO_MIRROR, description: "mirror for gcr.io") + } + + environment { + GIT_REF = '' + ARTIFACTS = "${env.WORKSPACE}/artifacts" + } + + stages { + stage("Prepare") { + steps { + // The declarative model for Jenkins Pipelines has a restricted + // subset of syntax that it allows in the stage blocks. We use + // script step to bypass the restriction. + // https://jenkins.io/doc/book/pipeline/syntax/#script + script { + GIT_REF = params.GIT_REF + if (params.PR_ID != "") { + GIT_REF = "refs/remotes/origin/pr/${params.PR_ID}/head" + } + } + echo "env.NODE_NAME: ${env.NODE_NAME}" + echo "env.WORKSPACE: ${env.WORKSPACE}" + echo "GIT_REF: ${GIT_REF}" + echo "ARTIFACTS: ${ARTIFACTS}" + } + } + + stage("Checkout") { + steps { + checkout scm: [ + $class: 'GitSCM', + branches: [[name: GIT_REF]], + userRemoteConfigs: [[ + credentialsId: 'github-sre-bot-ssh', + refspec: '+refs/heads/*:refs/remotes/origin/* +refs/pull/*:refs/remotes/origin/pr/*', + url: "${params.GIT_URL}", + ]] + ] + } + } + + stage("Run") { + steps { + sh """ + #!/bin/bash + export GINKGO_NODES=${params.GINKGO_NODES} + export REPORT_DIR=${ARTIFACTS} + export DOCKER_IO_MIRROR=${params.DOCKER_IO_MIRROR} + export QUAY_IO_MIRROR=${params.QUAY_IO_MIRROR} + export GCR_IO_MIRROR=${params.GCR_IO_MIRROR} + echo "info: begin to run e2e" + ./hack/e2e.sh -- ${params.E2E_ARGS} + """ + } + } + } + + post { + always { + dir(ARTIFACTS) { + archiveArtifacts artifacts: "**", allowEmptyArchive: true + junit testResults: "*.xml", allowEmptyResults: true + } + } + } +} + +// vim: et sw=4 ts=4 diff --git a/ci/pingcap_tidb_operator_build_kind.groovy b/ci/pingcap_tidb_operator_build_kind.groovy index ccc176c26f..6ff759fa60 100644 --- a/ci/pingcap_tidb_operator_build_kind.groovy +++ b/ci/pingcap_tidb_operator_build_kind.groovy @@ -14,7 +14,7 @@ metadata: spec: containers: - name: main - image: gcr.io/k8s-testimages/kubekins-e2e:v20191108-9467d02-master + image: gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master command: - runner.sh # Clean containers on TERM signal in root process to avoid cgroup leaking. @@ -191,16 +191,17 @@ def call(BUILD_BRANCH, CREDENTIALS_ID, CODECOV_CREDENTIALS_ID) { } } - stage("Check") { - ansiColor('xterm') { - sh """ - export GOPATH=${WORKSPACE}/go - export PATH=${WORKSPACE}/go/bin:\$PATH - make check-setup - make check - """ - } - } + // moved to Github Actions + // stage("Check") { + // ansiColor('xterm') { + // sh """ + // export GOPATH=${WORKSPACE}/go + // export PATH=${WORKSPACE}/go/bin:\$PATH + // make check-setup + // make check + // """ + // } + // } stage("Build and Test") { ansiColor('xterm') { @@ -209,10 +210,10 @@ def call(BUILD_BRANCH, CREDENTIALS_ID, CODECOV_CREDENTIALS_ID) { make e2e-build if [ ${BUILD_BRANCH} == "master" ] then - make test GO_COVER=y + make test GOFLAGS='-race' GO_COVER=y curl -s https://codecov.io/bash | bash -s - -t ${CODECOV_TOKEN} || echo 'Codecov did not collect coverage reports' else - make test + make test GOFLAGS='-race' fi """ } @@ -237,40 +238,40 @@ def call(BUILD_BRANCH, CREDENTIALS_ID, CODECOV_CREDENTIALS_ID) { def MIRRORS = "DOCKER_IO_MIRROR=http://172.16.4.143:5000 QUAY_IO_MIRROR=http://172.16.4.143:5001" def builds = [:] builds["E2E v1.12.10"] = { - build("${MIRRORS} IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=8 KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_ ./hack/e2e.sh -- --preload-images --ginkgo.skip='\\[Serial\\]'", artifacts) + build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12 IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_ ./hack/e2e.sh -- --preload-images --operator-killer", artifacts) } builds["E2E v1.12.10 AdvancedStatefulSet"] = { - build("${MIRRORS} IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=8 KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_advanced_statefulset ./hack/e2e.sh -- --preload-images --ginkgo.skip='\\[Serial\\]' --operator-features AdvancedStatefulSet=true", artifacts) + build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12-advanced-statefulset IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_advanced_statefulset ./hack/e2e.sh -- --preload-images --operator-features AdvancedStatefulSet=true", artifacts) } - builds["E2E v1.17.0"] = { - build("${MIRRORS} IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=8 KUBE_VERSION=v1.17.0 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.17.0_ ./hack/e2e.sh -- -preload-images --ginkgo.skip='\\[Serial\\]'", artifacts) + builds["E2E v1.18.0"] = { + build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.18 IMAGE_TAG=${GITHASH} SKIP_BUILD=y GINKGO_NODES=6 KUBE_VERSION=v1.18.0 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.18.0_ ./hack/e2e.sh -- -preload-images --operator-killer", artifacts) } builds["E2E v1.12.10 Serial"] = { - build("${MIRRORS} IMAGE_TAG=${GITHASH} SKIP_BUILD=y KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_serial_ ./hack/e2e.sh -- --preload-images --ginkgo.focus='\\[Serial\\]' --install-operator=false", artifacts) + build("${MIRRORS} RUNNER_SUITE_NAME=e2e-v1.12-serial IMAGE_TAG=${GITHASH} SKIP_BUILD=y KUBE_VERSION=v1.12.10 REPORT_DIR=\$(pwd)/artifacts REPORT_PREFIX=v1.12.10_serial_ ./hack/e2e.sh -- --preload-images --ginkgo.focus='\\[Serial\\]' --install-operator=false", artifacts) } builds.failFast = false parallel builds - // we requires ~/bin/config.cfg, filemgr-linux64 utilities on k8s-kind node - // TODO make it possible to run on any node - node('k8s-kind') { - dir("${PROJECT_DIR}"){ - deleteDir() - unstash 'tidb-operator' - if ( !(BUILD_BRANCH ==~ /[a-z0-9]{40}/) ) { - stage('upload tidb-operator, backup-manager binary and charts'){ - //upload binary and charts - sh """ - cp ~/bin/config.cfg ./ - tar -zcvf tidb-operator.tar.gz images/tidb-operator images/backup-manager charts - filemgr-linux64 --action mput --bucket pingcap-dev --nobar --key builds/pingcap/operator/${GITHASH}/centos7/tidb-operator.tar.gz --file tidb-operator.tar.gz - """ - //update refs - writeFile file: 'sha1', text: "${GITHASH}" - sh """ - filemgr-linux64 --action mput --bucket pingcap-dev --nobar --key refs/pingcap/operator/${BUILD_BRANCH}/centos7/sha1 --file sha1 - rm -f sha1 tidb-operator.tar.gz config.cfg - """ + if ( !(BUILD_BRANCH ==~ /[a-z0-9]{40}/) ) { + node('build_go1130_memvolume') { + container("golang") { + def WORKSPACE = pwd() + dir("${PROJECT_DIR}") { + unstash 'tidb-operator' + stage('upload tidb-operator binaries and charts'){ + withCredentials([ + string(credentialsId: 'UCLOUD_PUBLIC_KEY', variable: 'UCLOUD_PUBLIC_KEY'), + string(credentialsId: 'UCLOUD_PRIVATE_KEY', variable: 'UCLOUD_PRIVATE_KEY'), + ]) { + sh """ + export UCLOUD_UFILE_PROXY_HOST=mainland-hk.ufileos.com + export UCLOUD_UFILE_BUCKET=pingcap-dev + export BUILD_BRANCH=${BUILD_BRANCH} + export GITHASH=${GITHASH} + ./ci/upload-binaries-charts.sh + """ + } + } } } } diff --git a/ci/release_tidb_operator_binary_and_image.groovy b/ci/release_tidb_operator_binary_and_image.groovy index 8b8b117b72..0850b68fa0 100644 --- a/ci/release_tidb_operator_binary_and_image.groovy +++ b/ci/release_tidb_operator_binary_and_image.groovy @@ -30,8 +30,8 @@ def call(BUILD_BRANCH, RELEASE_TAG, CREDENTIALS_ID, CHART_ITEMS) { stage('Push tidb-backup-manager Docker Image'){ withDockerServer([uri: "${env.DOCKER_HOST}"]) { - docker.build("uhub.service.ucloud.cn/pingcap/backup-manager:${RELEASE_TAG}", "images/backup-manager").push() - docker.build("pingcap/backup-manager:${RELEASE_TAG}", "images/backup-manager").push() + docker.build("uhub.service.ucloud.cn/pingcap/tidb-backup-manager:${RELEASE_TAG}", "images/tidb-backup-manager").push() + docker.build("pingcap/tidb-backup-manager:${RELEASE_TAG}", "images/tidb-backup-manager").push() } } @@ -94,8 +94,8 @@ def call(BUILD_BRANCH, RELEASE_TAG, CREDENTIALS_ID, CHART_ITEMS) { slackmsg = "${slackmsg}" + "\n" + "tidb-operator Docker Image: `pingcap/tidb-operator:${RELEASE_TAG}`" + "\n" + "tidb-operator Docker Image: `uhub.ucloud.cn/pingcap/tidb-operator:${RELEASE_TAG}`" + "\n" + - "backup-manager Docker Image: `pingcap/backup-manager:${RELEASE_TAG}`" + "\n" + - "backup-manager Docker Image: `uhub.ucloud.cn/pingcap/backup-manager:${RELEASE_TAG}`" + "tidb-backup-manager Docker Image: `pingcap/tidb-backup-manager:${RELEASE_TAG}`" + "\n" + + "tidb-backup-manager Docker Image: `uhub.ucloud.cn/pingcap/tidb-backup-manager:${RELEASE_TAG}`" for(String chartItem : CHART_ITEMS.split(' ')){ diff --git a/ci/run-in-vm.sh b/ci/run-in-vm.sh new file mode 100755 index 0000000000..1dfac69a13 --- /dev/null +++ b/ci/run-in-vm.sh @@ -0,0 +1,162 @@ +#!/bin/bash + +# Copyright 2020 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This is a helper script to start a VM and run command in it. +# +# TODO create an isolated network + +set -o errexit +set -o nounset +set -o pipefail + +ROOT=$(unset CDPATH && cd $(dirname "${BASH_SOURCE[0]}")/.. && pwd) +cd $ROOT + +source "${ROOT}/hack/lib.sh" + +GCP_CREDENTIALS=${GCP_CREDENTIALS:-} +GCP_PROJECT=${GCP_PROJECT:-} +GCP_ZONE=${GCP_ZONE:-} +GCP_SSH_PRIVATE_KEY=${GCP_SSH_PRIVATE_KEY:-} +GCP_SSH_PUBLIC_KEY=${GCP_SSH_PUBLIC_KEY:-} +NAME=${NAME:-tidb-operator-e2e} +SSH_USER=${SSH_USER:-vagrant} +GIT_URL=${GIT_URL:-https://github.com/pingcap/tidb-operator} +GIT_REF=${GIT_REF:-origin/master} +SYNC_FILES=${SYNC_FILES:-} + +echo "GCP_CREDENTIALS: $GCP_CREDENTIALS" +echo "GCP_PROJECT: $GCP_PROJECT" +echo "GCP_ZONE: $GCP_ZONE" +echo "GCP_SSH_PRIVATE_KEY: $GCP_SSH_PRIVATE_KEY" +echo "GCP_SSH_PUBLIC_KEY: $GCP_SSH_PUBLIC_KEY" +echo "NAME: $NAME" +echo "GIT_URL: $GIT_URL" +echo "GIT_REF: $GIT_REF" +echo "SYNC_FILES: $SYNC_FILES" + +# Pre-created nested virtualization enabled image with following commands: +# +# gcloud compute disks create disk1 --image-project centos-cloud --image-family centos-8 --zone us-central1-b +# gcloud compute images create centos-8-nested-vm \ +# --source-disk disk1 --source-disk-zone us-central1-b \ +# --licenses "https://compute.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx" +# gcloud compute disks delete disk1 +# +# Refer to +# https://cloud.google.com/compute/docs/instances/enable-nested-virtualization-vm-instances +# for more details. +IMAGE=centos-8-nested-vm + +echo "info: configure gcloud" +if [ -z "$GCP_PROJECT" ]; then + echo "error: GCP_PROJECT is required" + exit 1 +fi +if [ -z "$GCP_CREDENTIALS" ]; then + echo "error: GCP_CREDENTIALS is required" + exit 1 +fi +if [ -z "$GCP_ZONE" ]; then + echo "error: GCP_ZONE is required" + exit 1 +fi +gcloud auth activate-service-account --key-file "$GCP_CREDENTIALS" +gcloud config set core/project $GCP_PROJECT +gcloud config set compute/zone $GCP_ZONE + +echo "info: preparing ssh keypairs for GCP" +if [ ! -d ~/.ssh ]; then + mkdir ~/.ssh +fi +if [ ! -e ~/.ssh/google_compute_engine -a -n "$GCP_SSH_PRIVATE_KEY" ]; then + echo "Copying $GCP_SSH_PRIVATE_KEY to ~/.ssh/google_compute_engine" >&2 + cp $GCP_SSH_PRIVATE_KEY ~/.ssh/google_compute_engine + chmod 0600 ~/.ssh/google_compute_engine +fi +if [ ! -e ~/.ssh/google_compute_engine.pub -a -n "$GCP_SSH_PUBLIC_KEY" ]; then + echo "Copying $GCP_SSH_PUBLIC_KEY to ~/.ssh/google_compute_engine.pub" >&2 + cp $GCP_SSH_PUBLIC_KEY ~/.ssh/google_compute_engine.pub + chmod 0600 ~/.ssh/google_compute_engine.pub +fi + +function gcloud_resource_exists() { + local args=($(tr -s '_' ' ' <<<"$1")) + unset args[$[${#args[@]}-1]] + local name="$2" + x=$(${args[@]} list --filter="name='$name'" --format='table[no-heading](name)' | wc -l) + [ "$x" -ge 1 ] +} + +function gcloud_compute_instances_exists() { + gcloud_resource_exists ${FUNCNAME[0]} $@ +} + +function e2e::down() { + echo "info: tearing down" + if ! gcloud_compute_instances_exists $NAME; then + echo "info: instance '$NAME' does not exist, skipped" + return 0 + fi + echo "info: deleting instance '$NAME'" + gcloud compute instances delete $NAME -q +} + +function e2e::up() { + echo "info: setting up" + echo "info: creating instance '$NAME'" + gcloud compute instances create $NAME \ + --machine-type n1-standard-8 \ + --min-cpu-platform "Intel Haswell" \ + --image $IMAGE \ + --boot-disk-size 30GB \ + --local-ssd interface=scsi +} + +function e2e::test() { + echo "info: testing" + echo "info: waiting for the VM is ready" + hack::wait_for_success 60 3 "gcloud compute ssh $SSH_USER@$NAME --command 'uname -a'" + echo "info: syncing files $SYNC_FILES" + while IFS=$',' read -r line; do + IFS=':' read -r src dst <<< "$line" + if [ -z "$dst" ]; then + dst="$src" + fi + gcloud compute scp $src $SSH_USER@$NAME:$dst + done <<< "$SYNC_FILES" + local tmpfile=$(mktemp) + trap "rm -f $tmpfile" RETURN + cat < $tmpfile +sudo yum install -y git +cd \$HOME +sudo rm -rf tidb-operator +git init tidb-operator +cd tidb-operator +git fetch --depth 1 --tags --progress ${GIT_URL} +refs/heads/*:refs/remotes/origin/* +refs/pull/*:refs/remotes/origin/pr/* +refs/heads/*:refs/* +GIT_COMMIT=\$(git rev-parse ${GIT_REF}^{commit}) +git checkout -f \${GIT_COMMIT} +$@ +EOF + cat $tmpfile + gcloud compute scp $tmpfile $SSH_USER@$NAME:/tmp/e2e.sh + gcloud compute ssh $SSH_USER@$NAME --command "bash /tmp/e2e.sh" +} + +e2e::down +trap 'e2e::down' EXIT +e2e::up +e2e::test "$@" diff --git a/ci/upload-binaries-charts.sh b/ci/upload-binaries-charts.sh new file mode 100755 index 0000000000..e9c429365f --- /dev/null +++ b/ci/upload-binaries-charts.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash + +# Copyright 2020 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit +set -o nounset +set -o pipefail + +ROOT=$(unset CDPATH && cd $(dirname "${BASH_SOURCE[0]}")/.. && pwd) +cd $ROOT + +source "${ROOT}/hack/lib.sh" + +UCLOUD_PUBLIC_KEY=${UCLOUD_PUBLIC_KEY:-} +UCLOUD_PRIVATE_KEY=${UCLOUD_PRIVATE_KEY:-} +UCLOUD_UFILE_PROXY_HOST=${UCLOUD_UFILE_PROXY_HOST:-} +UCLOUD_UFILE_API_HOST=${UCLOUD_UFILE_API_HOST:-api.spark.ucloud.cn} +UCLOUD_UFILE_BUCKET=${UCLOUD_UFILE_BUCKET:-} +GITHASH=${GITHASH:-} +BUILD_BRANCH=${BUILD_BRANCH:-} + +FILEMGR_URL="http://tools.ufile.ucloud.com.cn/filemgr-linux64.tar.gz" + +if [ -z "$UCLOUD_PUBLIC_KEY" -o -z "$UCLOUD_PRIVATE_KEY" -o -z "$UCLOUD_UFILE_PROXY_HOST" ]; then + echo "error: UCLOUD_PUBLIC_KEY/UCLOUD_PUBLIC_KEY/UCLOUD_UFILE_PROXY_HOST are required" + exit 1 +fi + +if [ -z "$UCLOUD_UFILE_BUCKET" ]; then + echo "error: UCLOUD_UFILE_BUCKET is required" + exit 1 +fi + +if [ -z "$GITHASH" -o -z "$BUILD_BRANCH" ]; then + echo "error: GITHASH/BUILD_BRANCH are required" + exit 1 +fi + +function upload() { + local dir=$(mktemp -d) + trap "test -d $dir && rm -rf $dir" RETURN + + echo "info: create a temporary directory: $dir" + + cat < $dir/config.cfg +{ + "public_key" : "${UCLOUD_PUBLIC_KEY}", + "private_key" : "${UCLOUD_PRIVATE_KEY}", + "proxy_host" : "${UCLOUD_UFILE_PROXY_HOST}", + "api_host" : "${UCLOUD_UFILE_API_HOST}" +} +EOF + + echo "info: downloading filemgr from $FILEMGR_URL" + curl --retry 10 -L -s "$FILEMGR_URL" | tar --strip-components 2 -C $dir -xzvf - ./linux64/filemgr-linux64 + + echo "info: uploading charts and binaries" + tar -zcvf $dir/tidb-operator.tar.gz images/tidb-operator images/tidb-backup-manager charts + $dir/filemgr-linux64 --config $dir/config.cfg --action mput --bucket ${UCLOUD_UFILE_BUCKET} --nobar --key builds/pingcap/operator/${GITHASH}/centos7/tidb-operator.tar.gz --file $dir/tidb-operator.tar.gz + + echo "info: update ref of branch '$BUILD_BRANCH'" + echo -n $GITHASH > $dir/sha1 + $dir/filemgr-linux64 --config $dir/config.cfg --action mput --bucket ${UCLOUD_UFILE_BUCKET} --nobar --key refs/pingcap/operator/${BUILD_BRANCH}/centos7/sha1 --file $dir/sha1 +} + +# retry a few times until it succeeds, this can avoid temporary network flakes +hack::wait_for_success 120 5 "upload" diff --git a/ci/vm.groovy b/ci/vm.groovy new file mode 100644 index 0000000000..64acda4de4 --- /dev/null +++ b/ci/vm.groovy @@ -0,0 +1,144 @@ +// +// Jenkins pipeline for VM jobs. +// +// This script is written in declarative syntax. Refer to +// https://jenkins.io/doc/book/pipeline/syntax/ for more details. +// +// Note that parameters of the job is configured in this script. +// + +import groovy.transform.Field + +@Field +def podYAML = ''' +apiVersion: v1 +kind: Pod +spec: + containers: + - name: main + image: gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master + command: + - runner.sh + - sleep + - 1d + # we need privileged mode in order to do docker in docker + securityContext: + privileged: true + env: + - name: DOCKER_IN_DOCKER_ENABLED + value: "true" + resources: + requests: + memory: "4000Mi" + cpu: 2000m + volumeMounts: + # dind expects /var/lib/docker to be volume + - name: docker-root + mountPath: /var/lib/docker + volumes: + - name: docker-root + emptyDir: {} +''' + +// Able to override default values in Jenkins job via environment variables. + +if (!env.DEFAULT_GIT_URL) { + env.DEFAULT_GIT_URL = "https://github.com/pingcap/tidb-operator" +} + +if (!env.DEFAULT_GIT_REF) { + env.DEFAULT_GIT_REF = "master" +} + +if (!env.DEFAULT_GCP_PROJECT) { + env.DEFAULT_GCP_PROJECT = "" +} + +if (!env.DEFAULT_GCP_ZONE) { + env.DEFAULT_GCP_ZONE = "us-central1-b" +} + +if (!env.DEFAULT_NAME) { + env.DEFAULT_NAME = "tidb-operator-e2e" +} + +pipeline { + agent { + kubernetes { + yaml podYAML + defaultContainer "main" + customWorkspace "/home/jenkins/agent/workspace/go/src/github.com/pingcap/tidb-operator" + } + } + + options { + timeout(time: 3, unit: 'HOURS') + } + + parameters { + string(name: 'GIT_URL', defaultValue: env.DEFAULT_GIT_URL, description: 'git repo url') + string(name: 'GIT_REF', defaultValue: env.DEFAULT_GIT_REF, description: 'git ref spec to checkout, e.g. master, release-1.1') + string(name: 'PR_ID', defaultValue: '', description: 'pull request ID, this will override GIT_REF if set, e.g. 1889') + string(name: 'GCP_PROJECT', defaultValue: env.DEFAULT_GCP_PROJECT, description: 'the GCP project ID') + string(name: 'GCP_ZONE', defaultValue: env.DEFAULT_GCP_ZONE, description: 'the GCP zone') + string(name: 'NAME', defaultValue: env.DEFAULT_NAME, description: 'the name of VM instance') + } + + environment { + GIT_REF = '' + } + + stages { + stage("Prepare") { + steps { + // The declarative model for Jenkins Pipelines has a restricted + // subset of syntax that it allows in the stage blocks. We use + // script step to bypass the restriction. + // https://jenkins.io/doc/book/pipeline/syntax/#script + script { + GIT_REF = params.GIT_REF + if (params.PR_ID != "") { + GIT_REF = "refs/remotes/origin/pr/${params.PR_ID}/head" + } + } + echo "env.NODE_NAME: ${env.NODE_NAME}" + echo "env.WORKSPACE: ${env.WORKSPACE}" + echo "GIT_REF: ${GIT_REF}" + } + } + + stage("Checkout") { + steps { + checkout scm: [ + $class: 'GitSCM', + branches: [[name: GIT_REF]], + userRemoteConfigs: [[ + refspec: '+refs/heads/*:refs/remotes/origin/* +refs/pull/*:refs/remotes/origin/pr/*', + url: "${params.GIT_URL}", + ]] + ] + } + } + + stage("Run") { + steps { + withCredentials([ + file(credentialsId: 'TIDB_OPERATOR_GCP_CREDENTIALS', variable: 'GCP_CREDENTIALS'), + file(credentialsId: 'TIDB_OPERATOR_GCP_SSH_PRIVATE_KEY', variable: 'GCP_SSH_PRIVATE_KEY'), + file(credentialsId: 'TIDB_OPERATOR_GCP_SSH_PUBLIC_KEY', variable: 'GCP_SSH_PUBLIC_KEY'), + file(credentialsId: 'TIDB_OPERATOR_REDHAT_PULL_SECRET', variable: 'REDHAT_PULL_SECRET'), + ]) { + sh """ + #!/bin/bash + export GIT_REF=${GIT_REF} + export SYNC_FILES=\$REDHAT_PULL_SECRET:/tmp/pull-secret.txt + # TODO make the command configurable + ./ci/run-in-vm.sh PULL_SECRET_FILE=/tmp/pull-secret.txt ./hack/e2e-openshift.sh + """ + } + } + } + } +} + +// vim: et sw=4 ts=4 diff --git a/cmd/admission-webhook/main.go b/cmd/admission-webhook/main.go index 120b0f70a1..600f86c758 100644 --- a/cmd/admission-webhook/main.go +++ b/cmd/admission-webhook/main.go @@ -15,15 +15,17 @@ package main import ( "flag" + "fmt" "os" "time" "github.com/openshift/generic-admission-server/pkg/cmd" - "github.com/pingcap/tidb-operator/pkg/features" "github.com/pingcap/tidb-operator/pkg/version" "github.com/pingcap/tidb-operator/pkg/webhook" + "github.com/pingcap/tidb-operator/pkg/webhook/pod" "k8s.io/component-base/logs" + "k8s.io/klog" ) var ( @@ -51,9 +53,19 @@ func main() { } version.LogVersionInfo() + flag.CommandLine.VisitAll(func(flag *flag.Flag) { + klog.V(1).Infof("FLAG: --%s=%q", flag.Name, flag.Value) + }) + ah := &webhook.AdmissionHook{ ExtraServiceAccounts: extraServiceAccounts, EvictRegionLeaderTimeout: evictRegionLeaderTimeout, } + ns := os.Getenv("NAMESPACE") + if len(ns) < 1 { + klog.Fatal("ENV NAMESPACE should be set.") + } + pod.AstsControllerServiceAccounts = fmt.Sprintf("system:serviceaccount:%s:advanced-statefulset-controller", ns) + cmd.RunAdmissionServer(ah) } diff --git a/cmd/backup-manager/app/backup/backup.go b/cmd/backup-manager/app/backup/backup.go index 7d9f0729a5..8e3b3748cc 100644 --- a/cmd/backup-manager/app/backup/backup.go +++ b/cmd/backup-manager/app/backup/backup.go @@ -14,35 +14,46 @@ package backup import ( + "bufio" "context" "fmt" "io" + "io/ioutil" "os/exec" + "path" + "strings" "github.com/gogo/protobuf/proto" - glog "k8s.io/klog" - kvbackup "github.com/pingcap/kvproto/pkg/backup" "github.com/pingcap/tidb-operator/cmd/backup-manager/app/constants" - "github.com/pingcap/tidb-operator/cmd/backup-manager/app/util" + backupUtil "github.com/pingcap/tidb-operator/cmd/backup-manager/app/util" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" + "github.com/pingcap/tidb-operator/pkg/util" + corev1 "k8s.io/api/core/v1" + "k8s.io/klog" ) // Options contains the input arguments to the backup command type Options struct { - Namespace string - BackupName string -} - -func (bo *Options) String() string { - return fmt.Sprintf("%s/%s", bo.Namespace, bo.BackupName) + backupUtil.GenericOptions } func (bo *Options) backupData(backup *v1alpha1.Backup) (string, error) { - args, path, err := constructOptions(backup) + clusterNamespace := backup.Spec.BR.ClusterNamespace + if backup.Spec.BR.ClusterNamespace == "" { + clusterNamespace = backup.Namespace + } + args, remotePath, err := constructOptions(backup) if err != nil { return "", err } + args = append(args, fmt.Sprintf("--pd=%s-pd.%s:2379", backup.Spec.BR.Cluster, clusterNamespace)) + if bo.TLSCluster { + args = append(args, fmt.Sprintf("--ca=%s", path.Join(util.ClusterClientTLSPath, corev1.ServiceAccountRootCAKey))) + args = append(args, fmt.Sprintf("--cert=%s", path.Join(util.ClusterClientTLSPath, corev1.TLSCertKey))) + args = append(args, fmt.Sprintf("--key=%s", path.Join(util.ClusterClientTLSPath, corev1.TLSPrivateKeyKey))) + } + var btype string if backup.Spec.Type == "" { btype = string(v1alpha1.BackupTypeFull) @@ -54,19 +65,53 @@ func (bo *Options) backupData(backup *v1alpha1.Backup) (string, error) { btype, } fullArgs = append(fullArgs, args...) - glog.Infof("Running br command with args: %v", fullArgs) - output, err := exec.Command("br", fullArgs...).CombinedOutput() + klog.Infof("Running br command with args: %v", fullArgs) + bin := "br" + backupUtil.Suffix(bo.TiKVVersion) + cmd := exec.Command(bin, fullArgs...) + + stdOut, err := cmd.StdoutPipe() + if err != nil { + return remotePath, fmt.Errorf("cluster %s, create stdout pipe failed, err: %v", bo, err) + } + stdErr, err := cmd.StderrPipe() + if err != nil { + return remotePath, fmt.Errorf("cluster %s, create stderr pipe failed, err: %v", bo, err) + } + err = cmd.Start() if err != nil { - return path, fmt.Errorf("cluster %s, execute br command %v failed, output: %s, err: %v", bo, fullArgs, string(output), err) + return remotePath, fmt.Errorf("cluster %s, execute br command failed, args: %s, err: %v", bo, fullArgs, err) } - glog.Infof("Backup data for cluster %s successfully, output: %s", bo, string(output)) - return path, nil + var errMsg string + reader := bufio.NewReader(stdOut) + for { + line, err := reader.ReadString('\n') + if strings.Contains(line, "[ERROR]") { + errMsg += line + } + + klog.Infof(strings.Replace(line, "\n", "", -1)) + if err != nil || io.EOF == err { + break + } + } + tmpErr, _ := ioutil.ReadAll(stdErr) + if len(tmpErr) > 0 { + klog.Infof(string(tmpErr)) + errMsg += string(tmpErr) + } + err = cmd.Wait() + if err != nil { + return remotePath, fmt.Errorf("cluster %s, wait pipe message failed, errMsg %s, err: %v", bo, errMsg, err) + } + + klog.Infof("Backup data for cluster %s successfully", bo) + return remotePath, nil } // getCommitTs get backup position from `EndVersion` in BR backup meta func getCommitTs(backup *v1alpha1.Backup) (uint64, error) { var commitTs uint64 - s, err := util.NewRemoteStorage(backup) + s, err := backupUtil.NewRemoteStorage(backup) if err != nil { return commitTs, err } @@ -94,9 +139,9 @@ func getCommitTs(backup *v1alpha1.Backup) (uint64, error) { // constructOptions constructs options for BR and also return the remote path func constructOptions(backup *v1alpha1.Backup) ([]string, string, error) { - args, path, err := util.ConstructBRGlobalOptionsForBackup(backup) + args, remotePath, err := backupUtil.ConstructBRGlobalOptionsForBackup(backup) if err != nil { - return args, path, err + return args, remotePath, err } config := backup.Spec.BR if config.Concurrency != nil { @@ -111,13 +156,13 @@ func constructOptions(backup *v1alpha1.Backup) ([]string, string, error) { if config.Checksum != nil { args = append(args, fmt.Sprintf("--checksum=%t", *config.Checksum)) } - return args, path, nil + return args, remotePath, nil } // getBackupSize get the backup data size from remote func getBackupSize(backup *v1alpha1.Backup) (int64, error) { var size int64 - s, err := util.NewRemoteStorage(backup) + s, err := backupUtil.NewRemoteStorage(backup) if err != nil { return size, err } diff --git a/cmd/backup-manager/app/backup/manager.go b/cmd/backup-manager/app/backup/manager.go index 78f652126b..0e3f99ab2b 100644 --- a/cmd/backup-manager/app/backup/manager.go +++ b/cmd/backup-manager/app/backup/manager.go @@ -14,15 +14,20 @@ package backup import ( + "database/sql" "fmt" "time" + "github.com/pingcap/tidb-operator/cmd/backup-manager/app/constants" + "github.com/pingcap/tidb-operator/cmd/backup-manager/app/util" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" + bkconstants "github.com/pingcap/tidb-operator/pkg/backup/constants" listers "github.com/pingcap/tidb-operator/pkg/client/listers/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/controller" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - glog "k8s.io/klog" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/klog" ) // Manager mainly used to manage backup related work @@ -44,11 +49,29 @@ func NewManager( } } +func (bm *Manager) setOptions(backup *v1alpha1.Backup) { + bm.Options.Host = backup.Spec.From.Host + + if backup.Spec.From.Port != 0 { + bm.Options.Port = backup.Spec.From.Port + } else { + bm.Options.Port = bkconstants.DefaultTidbPort + } + + if backup.Spec.From.User != "" { + bm.Options.User = backup.Spec.From.User + } else { + bm.Options.User = bkconstants.DefaultTidbUser + } + + bm.Options.Password = util.GetOptionValueFromEnv(bkconstants.TidbPasswordKey, bkconstants.BackupManagerEnvVarPrefix) +} + // ProcessBackup used to process the backup logic func (bm *Manager) ProcessBackup() error { - backup, err := bm.backupLister.Backups(bm.Namespace).Get(bm.BackupName) + backup, err := bm.backupLister.Backups(bm.Namespace).Get(bm.ResourceName) if err != nil { - glog.Errorf("can't find cluster %s backup %s CRD object, err: %v", bm, bm.BackupName, err) + klog.Errorf("can't find cluster %s backup %s CRD object, err: %v", bm, bm.ResourceName, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -60,10 +83,40 @@ func (bm *Manager) ProcessBackup() error { if backup.Spec.BR == nil { return fmt.Errorf("no br config in %s", bm) } - return bm.performBackup(backup.DeepCopy()) + + bm.setOptions(backup) + + var db *sql.DB + var dsn string + err = wait.PollImmediate(constants.PollInterval, constants.CheckTimeout, func() (done bool, err error) { + dsn, err = bm.GetDSN(bm.TLSClient) + if err != nil { + klog.Errorf("can't get dsn of tidb cluster %s, err: %s", bm, err) + return false, err + } + db, err = util.OpenDB(dsn) + if err != nil { + klog.Warningf("can't connect to tidb cluster %s, err: %s", bm, err) + return false, nil + } + return true, nil + }) + + if err != nil { + klog.Errorf("cluster %s connect failed, err: %s", bm, err) + return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ + Type: v1alpha1.BackupFailed, + Status: corev1.ConditionTrue, + Reason: "ConnectTidbFailed", + Message: err.Error(), + }) + } + + defer db.Close() + return bm.performBackup(backup.DeepCopy(), db) } -func (bm *Manager) performBackup(backup *v1alpha1.Backup) error { +func (bm *Manager) performBackup(backup *v1alpha1.Backup, db *sql.DB) error { started := time.Now() err := bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ @@ -74,23 +127,101 @@ func (bm *Manager) performBackup(backup *v1alpha1.Backup) error { return err } - backupFullPath, err := bm.backupData(backup) + oldTikvGCTime, err := bm.GetTikvGCLifeTime(db) if err != nil { - glog.Errorf("backup cluster %s data failed, err: %s", bm, err) + klog.Errorf("cluster %s get %s failed, err: %s", bm, constants.TikvGCVariable, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, - Reason: "BackupDataToRemoteFailed", + Reason: "GetTikvGCLifeTimeFailed", Message: err.Error(), }) } - glog.Infof("backup cluster %s data to %s success", bm, backupFullPath) + klog.Infof("cluster %s %s is %s", bm, constants.TikvGCVariable, oldTikvGCTime) + + oldTikvGCTimeDuration, err := time.ParseDuration(oldTikvGCTime) + if err != nil { + klog.Errorf("cluster %s parse old %s failed, err: %s", bm, constants.TikvGCVariable, err) + return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ + Type: v1alpha1.BackupFailed, + Status: corev1.ConditionTrue, + Reason: "ParseOldTikvGCLifeTimeFailed", + Message: err.Error(), + }) + } + + var tikvGCTimeDuration time.Duration + var tikvGCLifeTime string + if backup.Spec.TikvGCLifeTime != nil { + tikvGCLifeTime = *backup.Spec.TikvGCLifeTime + tikvGCTimeDuration, err = time.ParseDuration(tikvGCLifeTime) + if err != nil { + klog.Errorf("cluster %s parse configured %s failed, err: %s", bm, constants.TikvGCVariable, err) + return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ + Type: v1alpha1.BackupFailed, + Status: corev1.ConditionTrue, + Reason: "ParseConfiguredTikvGCLifeTimeFailed", + Message: err.Error(), + }) + } + } else { + tikvGCLifeTime = constants.TikvGCLifeTime + tikvGCTimeDuration, err = time.ParseDuration(tikvGCLifeTime) + if err != nil { + klog.Errorf("cluster %s parse default %s failed, err: %s", bm, constants.TikvGCVariable, err) + return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ + Type: v1alpha1.BackupFailed, + Status: corev1.ConditionTrue, + Reason: "ParseDefaultTikvGCLifeTimeFailed", + Message: err.Error(), + }) + } + } + + if oldTikvGCTimeDuration < tikvGCTimeDuration { + err = bm.SetTikvGCLifeTime(db, tikvGCLifeTime) + if err != nil { + klog.Errorf("cluster %s set tikv GC life time to %s failed, err: %s", bm, tikvGCLifeTime, err) + return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ + Type: v1alpha1.BackupFailed, + Status: corev1.ConditionTrue, + Reason: "SetTikvGCLifeTimeFailed", + Message: err.Error(), + }) + } + klog.Infof("set cluster %s %s to %s success", bm, constants.TikvGCVariable, tikvGCLifeTime) + } + + backupFullPath, backupErr := bm.backupData(backup) + if oldTikvGCTimeDuration < tikvGCTimeDuration { + err = bm.SetTikvGCLifeTime(db, oldTikvGCTime) + if err != nil { + klog.Errorf("cluster %s reset tikv GC life time to %s failed, err: %s", bm, oldTikvGCTime, err) + return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ + Type: v1alpha1.BackupFailed, + Status: corev1.ConditionTrue, + Reason: "ResetTikvGCLifeTimeFailed", + Message: err.Error(), + }) + } + klog.Infof("reset cluster %s %s to %s success", bm, constants.TikvGCVariable, oldTikvGCTime) + } + if backupErr != nil { + klog.Errorf("backup cluster %s data failed, err: %s", bm, backupErr) + return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ + Type: v1alpha1.BackupFailed, + Status: corev1.ConditionTrue, + Reason: "BackupDataToRemoteFailed", + Message: backupErr.Error(), + }) + } + klog.Infof("backup cluster %s data to %s success", bm, backupFullPath) // Note: The size get from remote may be incorrect because the blobs // are eventually consistent. size, err := getBackupSize(backup) if err != nil { - glog.Errorf("Get size for backup files in %s of cluster %s failed, err: %s", backupFullPath, bm, err) + klog.Errorf("Get size for backup files in %s of cluster %s failed, err: %s", backupFullPath, bm, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -98,11 +229,11 @@ func (bm *Manager) performBackup(backup *v1alpha1.Backup) error { Message: err.Error(), }) } - glog.Infof("Get size %d for backup files in %s of cluster %s success", size, backupFullPath, bm) + klog.Infof("Get size %d for backup files in %s of cluster %s success", size, backupFullPath, bm) commitTs, err := getCommitTs(backup) if err != nil { - glog.Errorf("get cluster %s commitTs failed, err: %s", bm, err) + klog.Errorf("get cluster %s commitTs failed, err: %s", bm, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -110,7 +241,7 @@ func (bm *Manager) performBackup(backup *v1alpha1.Backup) error { Message: err.Error(), }) } - glog.Infof("get cluster %s commitTs %d success", bm, commitTs) + klog.Infof("get cluster %s commitTs %d success", bm, commitTs) finish := time.Now() diff --git a/cmd/backup-manager/app/backup_manager.go b/cmd/backup-manager/app/backup_manager.go index 83b9c64c31..8800891446 100644 --- a/cmd/backup-manager/app/backup_manager.go +++ b/cmd/backup-manager/app/backup_manager.go @@ -26,7 +26,7 @@ func Run() error { logs.InitLogs() defer logs.FlushLogs() - // fix glog parse error + // fix klog parse error flag.CommandLine.Parse([]string{}) pflag.CommandLine.SetNormalizeFunc(cliflag.WordSepNormalizeFunc) diff --git a/cmd/backup-manager/app/clean/clean.go b/cmd/backup-manager/app/clean/clean.go index 136866b0a9..c727319d58 100644 --- a/cmd/backup-manager/app/clean/clean.go +++ b/cmd/backup-manager/app/clean/clean.go @@ -19,7 +19,7 @@ import ( "io" "os/exec" - glog "k8s.io/klog" + "k8s.io/klog" "github.com/pingcap/tidb-operator/cmd/backup-manager/app/constants" "github.com/pingcap/tidb-operator/cmd/backup-manager/app/util" @@ -53,12 +53,12 @@ func (bo *Options) cleanBRRemoteBackupData(backup *v1alpha1.Backup) error { if err != nil { return err } - glog.Infof("Prepare to delete %s for cluster %s", obj.Key, bo) + klog.Infof("Prepare to delete %s for cluster %s", obj.Key, bo) err = s.Delete(context.Background(), obj.Key) if err != nil { return err } - glog.Infof("Delete %s for cluster %s successfully", obj.Key, bo) + klog.Infof("Delete %s for cluster %s successfully", obj.Key, bo) } return nil } @@ -70,6 +70,6 @@ func (bo *Options) cleanRemoteBackupData(bucket string) error { return fmt.Errorf("cluster %s, execute rclone deletefile command failed, output: %s, err: %v", bo, string(output), err) } - glog.Infof("cluster %s backup %s was deleted successfully", bo, bucket) + klog.Infof("cluster %s backup %s was deleted successfully", bo, bucket) return nil } diff --git a/cmd/backup-manager/app/clean/manager.go b/cmd/backup-manager/app/clean/manager.go index 01380b5eec..e2e4061567 100644 --- a/cmd/backup-manager/app/clean/manager.go +++ b/cmd/backup-manager/app/clean/manager.go @@ -17,7 +17,7 @@ import ( "fmt" corev1 "k8s.io/api/core/v1" - glog "k8s.io/klog" + "k8s.io/klog" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" listers "github.com/pingcap/tidb-operator/pkg/client/listers/pingcap/v1alpha1" @@ -55,7 +55,7 @@ func (bm *Manager) ProcessCleanBackup() error { func (bm *Manager) performCleanBackup(backup *v1alpha1.Backup) error { if backup.Status.BackupPath == "" { - glog.Errorf("cluster %s backup path is empty", bm) + klog.Errorf("cluster %s backup path is empty", bm) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -72,7 +72,7 @@ func (bm *Manager) performCleanBackup(backup *v1alpha1.Backup) error { } if err != nil { - glog.Errorf("clean cluster %s backup %s failed, err: %s", bm, backup.Status.BackupPath, err) + klog.Errorf("clean cluster %s backup %s failed, err: %s", bm, backup.Status.BackupPath, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -81,7 +81,7 @@ func (bm *Manager) performCleanBackup(backup *v1alpha1.Backup) error { }) } - glog.Infof("clean cluster %s backup %s success", bm, backup.Status.BackupPath) + klog.Infof("clean cluster %s backup %s success", bm, backup.Status.BackupPath) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupClean, Status: corev1.ConditionTrue, diff --git a/cmd/backup-manager/app/cmd/backup.go b/cmd/backup-manager/app/cmd/backup.go index d0e2fce165..5c7bc86b21 100644 --- a/cmd/backup-manager/app/cmd/backup.go +++ b/cmd/backup-manager/app/cmd/backup.go @@ -23,7 +23,7 @@ import ( "github.com/pingcap/tidb-operator/pkg/controller" "github.com/spf13/cobra" "k8s.io/client-go/tools/cache" - glog "k8s.io/klog" + "k8s.io/klog" cmdutil "k8s.io/kubectl/pkg/cmd/util" ) @@ -41,7 +41,10 @@ func NewBackupCommand() *cobra.Command { } cmd.Flags().StringVar(&bo.Namespace, "namespace", "", "Backup CR's namespace") - cmd.Flags().StringVar(&bo.BackupName, "backupName", "", "Backup CRD object name") + cmd.Flags().StringVar(&bo.ResourceName, "backupName", "", "Backup CRD object name") + cmd.Flags().StringVar(&bo.TiKVVersion, "tikvVersion", util.DefaultVersion, "TiKV version") + cmd.Flags().BoolVar(&bo.TLSClient, "client-tls", false, "Whether client tls is enabled") + cmd.Flags().BoolVar(&bo.TLSCluster, "cluster-tls", false, "Whether cluster tls is enabled") return cmd } @@ -63,7 +66,7 @@ func runBackup(backupOpts backup.Options, kubecfg string) error { // waiting for the shared informer's store has synced. cache.WaitForCacheSync(ctx.Done(), backupInformer.Informer().HasSynced) - glog.Infof("start to process backup %s", backupOpts.String()) + klog.Infof("start to process backup %s", backupOpts.String()) bm := backup.NewManager(backupInformer.Lister(), statusUpdater, backupOpts) return bm.ProcessBackup() } diff --git a/cmd/backup-manager/app/cmd/clean.go b/cmd/backup-manager/app/cmd/clean.go index 35095223de..efff8cc92e 100644 --- a/cmd/backup-manager/app/cmd/clean.go +++ b/cmd/backup-manager/app/cmd/clean.go @@ -23,7 +23,7 @@ import ( "github.com/pingcap/tidb-operator/pkg/controller" "github.com/spf13/cobra" "k8s.io/client-go/tools/cache" - glog "k8s.io/klog" + "k8s.io/klog" cmdutil "k8s.io/kubectl/pkg/cmd/util" ) @@ -64,7 +64,7 @@ func runClean(backupOpts clean.Options, kubecfg string) error { // waiting for the shared informer's store has synced. cache.WaitForCacheSync(ctx.Done(), backupInformer.Informer().HasSynced) - glog.Infof("start to clean backup %s", backupOpts.String()) + klog.Infof("start to clean backup %s", backupOpts.String()) bm := clean.NewManager(backupInformer.Lister(), statusUpdater, backupOpts) return bm.ProcessCleanBackup() } diff --git a/cmd/backup-manager/app/cmd/export.go b/cmd/backup-manager/app/cmd/export.go index 437506c8d1..f4c19f2d63 100644 --- a/cmd/backup-manager/app/cmd/export.go +++ b/cmd/backup-manager/app/cmd/export.go @@ -21,18 +21,17 @@ import ( "github.com/pingcap/tidb-operator/cmd/backup-manager/app/constants" "github.com/pingcap/tidb-operator/cmd/backup-manager/app/export" "github.com/pingcap/tidb-operator/cmd/backup-manager/app/util" - bkconstants "github.com/pingcap/tidb-operator/pkg/backup/constants" informers "github.com/pingcap/tidb-operator/pkg/client/informers/externalversions" "github.com/pingcap/tidb-operator/pkg/controller" "github.com/spf13/cobra" "k8s.io/client-go/tools/cache" - glog "k8s.io/klog" + "k8s.io/klog" cmdutil "k8s.io/kubectl/pkg/cmd/util" ) // NewExportCommand implements the backup command func NewExportCommand() *cobra.Command { - bo := export.BackupOpts{} + bo := export.Options{} cmd := &cobra.Command{ Use: "export", @@ -44,18 +43,13 @@ func NewExportCommand() *cobra.Command { } cmd.Flags().StringVar(&bo.Namespace, "namespace", "", "Backup CR's namespace") - cmd.Flags().StringVar(&bo.Host, "host", "", "Tidb cluster access address") - cmd.Flags().Int32Var(&bo.Port, "port", bkconstants.DefaultTidbPort, "Port number to use for connecting tidb cluster") + cmd.Flags().StringVar(&bo.ResourceName, "backupName", "", "Backup CRD object name") cmd.Flags().StringVar(&bo.Bucket, "bucket", "", "Bucket in which to store the backup data") - cmd.Flags().StringVar(&bo.Password, bkconstants.TidbPasswordKey, "", "Password to use when connecting to tidb cluster") - cmd.Flags().StringVar(&bo.User, "user", "", "User for login tidb cluster") cmd.Flags().StringVar(&bo.StorageType, "storageType", "", "Backend storage type") - cmd.Flags().StringVar(&bo.BackupName, "backupName", "", "Backup CRD object name") - util.SetFlagsFromEnv(cmd.Flags(), bkconstants.BackupManagerEnvVarPrefix) return cmd } -func runExport(backupOpts export.BackupOpts, kubecfg string) error { +func runExport(backupOpts export.Options, kubecfg string) error { kubeCli, cli, err := util.NewKubeAndCRCli(kubecfg) cmdutil.CheckErr(err) options := []informers.SharedInformerOption{ @@ -73,7 +67,7 @@ func runExport(backupOpts export.BackupOpts, kubecfg string) error { // waiting for the shared informer's store has synced. cache.WaitForCacheSync(ctx.Done(), backupInformer.Informer().HasSynced) - glog.Infof("start to process backup %s", backupOpts.String()) + klog.Infof("start to process backup %s", backupOpts.String()) bm := export.NewBackupManager(backupInformer.Lister(), statusUpdater, backupOpts) return bm.ProcessBackup() } diff --git a/cmd/backup-manager/app/cmd/import.go b/cmd/backup-manager/app/cmd/import.go index 92aef28d18..89a796128a 100644 --- a/cmd/backup-manager/app/cmd/import.go +++ b/cmd/backup-manager/app/cmd/import.go @@ -21,18 +21,17 @@ import ( "github.com/pingcap/tidb-operator/cmd/backup-manager/app/constants" "github.com/pingcap/tidb-operator/cmd/backup-manager/app/import" "github.com/pingcap/tidb-operator/cmd/backup-manager/app/util" - bkconstants "github.com/pingcap/tidb-operator/pkg/backup/constants" informers "github.com/pingcap/tidb-operator/pkg/client/informers/externalversions" "github.com/pingcap/tidb-operator/pkg/controller" "github.com/spf13/cobra" "k8s.io/client-go/tools/cache" - glog "k8s.io/klog" + "k8s.io/klog" cmdutil "k8s.io/kubectl/pkg/cmd/util" ) // NewImportCommand implements the restore command func NewImportCommand() *cobra.Command { - ro := _import.RestoreOpts{} + ro := _import.Options{} cmd := &cobra.Command{ Use: "import", @@ -44,17 +43,12 @@ func NewImportCommand() *cobra.Command { } cmd.Flags().StringVar(&ro.Namespace, "namespace", "", "Restore CR's namespace") - cmd.Flags().StringVar(&ro.Host, "host", "", "Tidb cluster access address") - cmd.Flags().Int32Var(&ro.Port, "port", bkconstants.DefaultTidbPort, "Port number to use for connecting tidb cluster") - cmd.Flags().StringVar(&ro.Password, bkconstants.TidbPasswordKey, "", "Password to use when connecting to tidb cluster") - cmd.Flags().StringVar(&ro.User, "user", "", "User for login tidb cluster") - cmd.Flags().StringVar(&ro.RestoreName, "restoreName", "", "Restore CRD object name") + cmd.Flags().StringVar(&ro.ResourceName, "restoreName", "", "Restore CRD object name") cmd.Flags().StringVar(&ro.BackupPath, "backupPath", "", "The location of the backup") - util.SetFlagsFromEnv(cmd.Flags(), bkconstants.BackupManagerEnvVarPrefix) return cmd } -func runImport(restoreOpts _import.RestoreOpts, kubecfg string) error { +func runImport(restoreOpts _import.Options, kubecfg string) error { kubeCli, cli, err := util.NewKubeAndCRCli(kubecfg) cmdutil.CheckErr(err) options := []informers.SharedInformerOption{ @@ -72,7 +66,7 @@ func runImport(restoreOpts _import.RestoreOpts, kubecfg string) error { // waiting for the shared informer's store has synced. cache.WaitForCacheSync(ctx.Done(), restoreInformer.Informer().HasSynced) - glog.Infof("start to process restore %s", restoreOpts.String()) + klog.Infof("start to process restore %s", restoreOpts.String()) rm := _import.NewRestoreManager(restoreInformer.Lister(), statusUpdater, restoreOpts) return rm.ProcessRestore() } diff --git a/cmd/backup-manager/app/cmd/restore.go b/cmd/backup-manager/app/cmd/restore.go index 50152afee2..fa383c2e5c 100644 --- a/cmd/backup-manager/app/cmd/restore.go +++ b/cmd/backup-manager/app/cmd/restore.go @@ -21,12 +21,11 @@ import ( "github.com/pingcap/tidb-operator/cmd/backup-manager/app/constants" "github.com/pingcap/tidb-operator/cmd/backup-manager/app/restore" "github.com/pingcap/tidb-operator/cmd/backup-manager/app/util" - bkconstants "github.com/pingcap/tidb-operator/pkg/backup/constants" informers "github.com/pingcap/tidb-operator/pkg/client/informers/externalversions" "github.com/pingcap/tidb-operator/pkg/controller" "github.com/spf13/cobra" "k8s.io/client-go/tools/cache" - glog "k8s.io/klog" + "k8s.io/klog" cmdutil "k8s.io/kubectl/pkg/cmd/util" ) @@ -44,8 +43,10 @@ func NewRestoreCommand() *cobra.Command { } cmd.Flags().StringVar(&ro.Namespace, "namespace", "", "Restore CR's namespace") - cmd.Flags().StringVar(&ro.RestoreName, "restoreName", "", "Restore CRD object name") - util.SetFlagsFromEnv(cmd.Flags(), bkconstants.BackupManagerEnvVarPrefix) + cmd.Flags().StringVar(&ro.ResourceName, "restoreName", "", "Restore CRD object name") + cmd.Flags().StringVar(&ro.TiKVVersion, "tikvVersion", util.DefaultVersion, "TiKV version") + cmd.Flags().BoolVar(&ro.TLSClient, "client-tls", false, "Whether client tls is enabled") + cmd.Flags().BoolVar(&ro.TLSCluster, "cluster-tls", false, "Whether cluster tls is enabled") return cmd } @@ -67,7 +68,7 @@ func runRestore(restoreOpts restore.Options, kubecfg string) error { // waiting for the shared informer's store has synced. cache.WaitForCacheSync(ctx.Done(), restoreInformer.Informer().HasSynced) - glog.Infof("start to process restore %s", restoreOpts.String()) + klog.Infof("start to process restore %s", restoreOpts.String()) rm := restore.NewManager(restoreInformer.Lister(), statusUpdater, restoreOpts) return rm.ProcessRestore() } diff --git a/cmd/backup-manager/app/constants/constants.go b/cmd/backup-manager/app/constants/constants.go index 24b5ad7d30..c471181138 100644 --- a/cmd/backup-manager/app/constants/constants.go +++ b/cmd/backup-manager/app/constants/constants.go @@ -56,4 +56,10 @@ const ( // MetaFile is the file name for meta data of backup with BR MetaFile = "backupmeta" + + // BR certificate storage path + BRCertPath = "/var/lib/br-tls" + + // ServiceAccountCAPath is where is CABundle of serviceaccount locates + ServiceAccountCAPath = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" ) diff --git a/cmd/backup-manager/app/export/export.go b/cmd/backup-manager/app/export/export.go index 194463ef62..fe9d37e40b 100644 --- a/cmd/backup-manager/app/export/export.go +++ b/cmd/backup-manager/app/export/export.go @@ -14,7 +14,6 @@ package export import ( - "database/sql" "fmt" "io/ioutil" "os/exec" @@ -24,62 +23,32 @@ import ( "time" "github.com/mholt/archiver" - glog "k8s.io/klog" - "github.com/pingcap/tidb-operator/cmd/backup-manager/app/constants" "github.com/pingcap/tidb-operator/cmd/backup-manager/app/util" + "k8s.io/klog" ) -// BackupOpts contains the input arguments to the backup command -type BackupOpts struct { - Namespace string - BackupName string +// Options contains the input arguments to the backup command +type Options struct { + util.GenericOptions Bucket string - Host string - Port int32 - Password string - User string StorageType string } -func (bo *BackupOpts) String() string { - return fmt.Sprintf("%s/%s", bo.Namespace, bo.BackupName) -} - -func (bo *BackupOpts) getBackupFullPath() string { +func (bo *Options) getBackupFullPath() string { return filepath.Join(constants.BackupRootPath, bo.getBackupRelativePath()) } -func (bo *BackupOpts) getBackupRelativePath() string { +func (bo *Options) getBackupRelativePath() string { backupName := fmt.Sprintf("backup-%s", time.Now().UTC().Format(time.RFC3339)) return fmt.Sprintf("%s/%s", bo.Bucket, backupName) } -func (bo *BackupOpts) getDestBucketURI(remotePath string) string { +func (bo *Options) getDestBucketURI(remotePath string) string { return fmt.Sprintf("%s://%s", bo.StorageType, remotePath) } -func (bo *BackupOpts) getTikvGCLifeTime(db *sql.DB) (string, error) { - var tikvGCTime string - sql := fmt.Sprintf("select variable_value from %s where variable_name= ?", constants.TidbMetaTable) - row := db.QueryRow(sql, constants.TikvGCVariable) - err := row.Scan(&tikvGCTime) - if err != nil { - return tikvGCTime, fmt.Errorf("query cluster %s %s failed, sql: %s, err: %v", bo, constants.TikvGCVariable, sql, err) - } - return tikvGCTime, nil -} - -func (bo *BackupOpts) setTikvGCLifeTime(db *sql.DB, gcTime string) error { - sql := fmt.Sprintf("update %s set variable_value = ? where variable_name = ?", constants.TidbMetaTable) - _, err := db.Exec(sql, gcTime, constants.TikvGCVariable) - if err != nil { - return fmt.Errorf("set cluster %s %s failed, sql: %s, err: %v", bo, constants.TikvGCVariable, sql, err) - } - return nil -} - -func (bo *BackupOpts) dumpTidbClusterData() (string, error) { +func (bo *Options) dumpTidbClusterData() (string, error) { bfPath := bo.getBackupFullPath() err := util.EnsureDirectoryExist(bfPath) if err != nil { @@ -95,7 +64,7 @@ func (bo *BackupOpts) dumpTidbClusterData() (string, error) { "--tidb-force-priority=LOW_PRIORITY", "--verbose=3", "--regex", - "^(?!(mysql|test|INFORMATION_SCHEMA|PERFORMANCE_SCHEMA))", + "^(?!(mysql|test|INFORMATION_SCHEMA|PERFORMANCE_SCHEMA|METRICS_SCHEMA|INSPECTION_SCHEMA))", } output, err := exec.Command("/mydumper", args...).CombinedOutput() @@ -105,7 +74,7 @@ func (bo *BackupOpts) dumpTidbClusterData() (string, error) { return bfPath, nil } -func (bo *BackupOpts) backupDataToRemote(source, bucketURI string) error { +func (bo *Options) backupDataToRemote(source, bucketURI string) error { destBucket := util.NormalizeBucketURI(bucketURI) tmpDestBucket := fmt.Sprintf("%s.tmp", destBucket) // TODO: We may need to use exec.CommandContext to control timeouts. @@ -114,7 +83,7 @@ func (bo *BackupOpts) backupDataToRemote(source, bucketURI string) error { return fmt.Errorf("cluster %s, execute rclone copyto command for upload backup data %s failed, output: %s, err: %v", bo, bucketURI, string(output), err) } - glog.Infof("upload cluster %s backup data to %s successfully, now move it to permanent URL %s", bo, tmpDestBucket, destBucket) + klog.Infof("upload cluster %s backup data to %s successfully, now move it to permanent URL %s", bo, tmpDestBucket, destBucket) // the backup was a success // remove .tmp extension @@ -125,10 +94,6 @@ func (bo *BackupOpts) backupDataToRemote(source, bucketURI string) error { return nil } -func (bo *BackupOpts) getDSN(db string) string { - return fmt.Sprintf("%s:%s@(%s:%d)/%s?charset=utf8", bo.User, bo.Password, bo.Host, bo.Port, db) -} - /* getCommitTsFromMetadata get commitTs from mydumper's metadata file diff --git a/cmd/backup-manager/app/export/manager.go b/cmd/backup-manager/app/export/manager.go index f4316c6f5f..544dd75598 100644 --- a/cmd/backup-manager/app/export/manager.go +++ b/cmd/backup-manager/app/export/manager.go @@ -21,26 +21,27 @@ import ( "github.com/pingcap/tidb-operator/cmd/backup-manager/app/constants" "github.com/pingcap/tidb-operator/cmd/backup-manager/app/util" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" + bkconstants "github.com/pingcap/tidb-operator/pkg/backup/constants" listers "github.com/pingcap/tidb-operator/pkg/client/listers/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/controller" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" - glog "k8s.io/klog" + "k8s.io/klog" ) // BackupManager mainly used to manage backup related work type BackupManager struct { backupLister listers.BackupLister StatusUpdater controller.BackupConditionUpdaterInterface - BackupOpts + Options } // NewBackupManager return a BackupManager func NewBackupManager( backupLister listers.BackupLister, statusUpdater controller.BackupConditionUpdaterInterface, - backupOpts BackupOpts) *BackupManager { + backupOpts Options) *BackupManager { return &BackupManager{ backupLister, statusUpdater, @@ -48,11 +49,29 @@ func NewBackupManager( } } +func (bm *BackupManager) setOptions(backup *v1alpha1.Backup) { + bm.Options.Host = backup.Spec.From.Host + + if backup.Spec.From.Port != 0 { + bm.Options.Port = backup.Spec.From.Port + } else { + bm.Options.Port = bkconstants.DefaultTidbPort + } + + if backup.Spec.From.User != "" { + bm.Options.User = backup.Spec.From.User + } else { + bm.Options.User = bkconstants.DefaultTidbUser + } + + bm.Options.Password = util.GetOptionValueFromEnv(bkconstants.TidbPasswordKey, bkconstants.BackupManagerEnvVarPrefix) +} + // ProcessBackup used to process the backup logic func (bm *BackupManager) ProcessBackup() error { - backup, err := bm.backupLister.Backups(bm.Namespace).Get(bm.BackupName) + backup, err := bm.backupLister.Backups(bm.Namespace).Get(bm.ResourceName) if err != nil { - glog.Errorf("can't find cluster %s backup %s CRD object, err: %v", bm, bm.BackupName, err) + klog.Errorf("can't find cluster %s backup %s CRD object, err: %v", bm, bm.ResourceName, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -61,23 +80,28 @@ func (bm *BackupManager) ProcessBackup() error { }) } + bm.setOptions(backup) + var db *sql.DB + var dsn string err = wait.PollImmediate(constants.PollInterval, constants.CheckTimeout, func() (done bool, err error) { - db, err = util.OpenDB(bm.getDSN(constants.TidbMetaDB)) + // TLS is not currently supported + dsn, err = bm.GetDSN(false) if err != nil { - glog.Warningf("can't open connection to tidb cluster %s, err: %v", bm, err) - return false, nil + klog.Errorf("can't get dsn of tidb cluster %s, err: %s", bm, err) + return false, err } - if err := db.Ping(); err != nil { - glog.Warningf("can't connect to tidb cluster %s, err: %s", bm, err) + db, err = util.OpenDB(dsn) + if err != nil { + klog.Warningf("can't connect to tidb cluster %s, err: %s", bm, err) return false, nil } return true, nil }) if err != nil { - glog.Errorf("cluster %s connect failed, err: %s", bm, err) + klog.Errorf("cluster %s connect failed, err: %s", bm, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -101,9 +125,9 @@ func (bm *BackupManager) performBackup(backup *v1alpha1.Backup, db *sql.DB) erro return err } - oldTikvGCTime, err := bm.getTikvGCLifeTime(db) + oldTikvGCTime, err := bm.GetTikvGCLifeTime(db) if err != nil { - glog.Errorf("cluster %s get %s failed, err: %s", bm, constants.TikvGCVariable, err) + klog.Errorf("cluster %s get %s failed, err: %s", bm, constants.TikvGCVariable, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -111,11 +135,11 @@ func (bm *BackupManager) performBackup(backup *v1alpha1.Backup, db *sql.DB) erro Message: err.Error(), }) } - glog.Infof("cluster %s %s is %s", bm, constants.TikvGCVariable, oldTikvGCTime) + klog.Infof("cluster %s %s is %s", bm, constants.TikvGCVariable, oldTikvGCTime) oldTikvGCTimeDuration, err := time.ParseDuration(oldTikvGCTime) if err != nil { - glog.Errorf("cluster %s parse old %s failed, err: %s", bm, constants.TikvGCVariable, err) + klog.Errorf("cluster %s parse old %s failed, err: %s", bm, constants.TikvGCVariable, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -123,20 +147,39 @@ func (bm *BackupManager) performBackup(backup *v1alpha1.Backup, db *sql.DB) erro Message: err.Error(), }) } - tikvGCTimeDuration, err := time.ParseDuration(constants.TikvGCLifeTime) - if err != nil { - glog.Errorf("cluster %s parse default %s failed, err: %s", bm, constants.TikvGCVariable, err) - return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ - Type: v1alpha1.BackupFailed, - Status: corev1.ConditionTrue, - Reason: "ParseDefaultTikvGCLifeTimeFailed", - Message: err.Error(), - }) + + var tikvGCTimeDuration time.Duration + var tikvGCLifeTime string + if backup.Spec.TikvGCLifeTime != nil { + tikvGCLifeTime = *backup.Spec.TikvGCLifeTime + tikvGCTimeDuration, err = time.ParseDuration(tikvGCLifeTime) + if err != nil { + klog.Errorf("cluster %s parse configured %s failed, err: %s", bm, constants.TikvGCVariable, err) + return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ + Type: v1alpha1.BackupFailed, + Status: corev1.ConditionTrue, + Reason: "ParseConfiguredTikvGCLifeTimeFailed", + Message: err.Error(), + }) + } + } else { + tikvGCLifeTime = constants.TikvGCLifeTime + tikvGCTimeDuration, err = time.ParseDuration(tikvGCLifeTime) + if err != nil { + klog.Errorf("cluster %s parse default %s failed, err: %s", bm, constants.TikvGCVariable, err) + return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ + Type: v1alpha1.BackupFailed, + Status: corev1.ConditionTrue, + Reason: "ParseDefaultTikvGCLifeTimeFailed", + Message: err.Error(), + }) + } } + if oldTikvGCTimeDuration < tikvGCTimeDuration { - err = bm.setTikvGCLifeTime(db, constants.TikvGCLifeTime) + err = bm.SetTikvGCLifeTime(db, constants.TikvGCLifeTime) if err != nil { - glog.Errorf("cluster %s set tikv GC life time to %s failed, err: %s", bm, constants.TikvGCLifeTime, err) + klog.Errorf("cluster %s set tikv GC life time to %s failed, err: %s", bm, constants.TikvGCLifeTime, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -144,25 +187,14 @@ func (bm *BackupManager) performBackup(backup *v1alpha1.Backup, db *sql.DB) erro Message: err.Error(), }) } - glog.Infof("set cluster %s %s to %s success", bm, constants.TikvGCVariable, constants.TikvGCLifeTime) - } - - backupFullPath, err := bm.dumpTidbClusterData() - if err != nil { - glog.Errorf("dump cluster %s data failed, err: %s", bm, err) - return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ - Type: v1alpha1.BackupFailed, - Status: corev1.ConditionTrue, - Reason: "DumpTidbClusterFailed", - Message: err.Error(), - }) + klog.Infof("set cluster %s %s to %s success", bm, constants.TikvGCVariable, constants.TikvGCLifeTime) } - glog.Infof("dump cluster %s data to %s success", bm, backupFullPath) + backupFullPath, backupErr := bm.dumpTidbClusterData() if oldTikvGCTimeDuration < tikvGCTimeDuration { - err = bm.setTikvGCLifeTime(db, oldTikvGCTime) + err = bm.SetTikvGCLifeTime(db, oldTikvGCTime) if err != nil { - glog.Errorf("cluster %s reset tikv GC life time to %s failed, err: %s", bm, oldTikvGCTime, err) + klog.Errorf("cluster %s reset tikv GC life time to %s failed, err: %s", bm, oldTikvGCTime, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -170,13 +202,24 @@ func (bm *BackupManager) performBackup(backup *v1alpha1.Backup, db *sql.DB) erro Message: err.Error(), }) } - glog.Infof("reset cluster %s %s to %s success", bm, constants.TikvGCVariable, oldTikvGCTime) + klog.Infof("reset cluster %s %s to %s success", bm, constants.TikvGCVariable, oldTikvGCTime) + } + if backupErr != nil { + klog.Errorf("dump cluster %s data failed, err: %s", bm, backupErr) + return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ + Type: v1alpha1.BackupFailed, + Status: corev1.ConditionTrue, + Reason: "DumpTidbClusterFailed", + Message: backupErr.Error(), + }) } + klog.Infof("dump cluster %s data to %s success", bm, backupFullPath) + // TODO: Concurrent get file size and upload backup data to speed up processing time archiveBackupPath := backupFullPath + constants.DefaultArchiveExtention err = archiveBackupData(backupFullPath, archiveBackupPath) if err != nil { - glog.Errorf("archive cluster %s backup data %s failed, err: %s", bm, archiveBackupPath, err) + klog.Errorf("archive cluster %s backup data %s failed, err: %s", bm, archiveBackupPath, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -184,11 +227,11 @@ func (bm *BackupManager) performBackup(backup *v1alpha1.Backup, db *sql.DB) erro Message: err.Error(), }) } - glog.Infof("archive cluster %s backup data %s success", bm, archiveBackupPath) + klog.Infof("archive cluster %s backup data %s success", bm, archiveBackupPath) size, err := getBackupSize(archiveBackupPath) if err != nil { - glog.Errorf("get cluster %s archived backup file %s size %d failed, err: %s", bm, archiveBackupPath, size, err) + klog.Errorf("get cluster %s archived backup file %s size %d failed, err: %s", bm, archiveBackupPath, size, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -196,11 +239,11 @@ func (bm *BackupManager) performBackup(backup *v1alpha1.Backup, db *sql.DB) erro Message: err.Error(), }) } - glog.Infof("get cluster %s archived backup file %s size %d success", bm, archiveBackupPath, size) + klog.Infof("get cluster %s archived backup file %s size %d success", bm, archiveBackupPath, size) commitTs, err := getCommitTsFromMetadata(backupFullPath) if err != nil { - glog.Errorf("get cluster %s commitTs failed, err: %s", bm, err) + klog.Errorf("get cluster %s commitTs failed, err: %s", bm, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -208,13 +251,13 @@ func (bm *BackupManager) performBackup(backup *v1alpha1.Backup, db *sql.DB) erro Message: err.Error(), }) } - glog.Infof("get cluster %s commitTs %s success", bm, commitTs) + klog.Infof("get cluster %s commitTs %s success", bm, commitTs) remotePath := strings.TrimPrefix(archiveBackupPath, constants.BackupRootPath+"/") bucketURI := bm.getDestBucketURI(remotePath) err = bm.backupDataToRemote(archiveBackupPath, bucketURI) if err != nil { - glog.Errorf("backup cluster %s data to %s failed, err: %s", bm, bm.StorageType, err) + klog.Errorf("backup cluster %s data to %s failed, err: %s", bm, bm.StorageType, err) return bm.StatusUpdater.Update(backup, &v1alpha1.BackupCondition{ Type: v1alpha1.BackupFailed, Status: corev1.ConditionTrue, @@ -222,7 +265,7 @@ func (bm *BackupManager) performBackup(backup *v1alpha1.Backup, db *sql.DB) erro Message: err.Error(), }) } - glog.Infof("backup cluster %s data to %s success", bm, bm.StorageType) + klog.Infof("backup cluster %s data to %s success", bm, bm.StorageType) finish := time.Now() diff --git a/cmd/backup-manager/app/import/manager.go b/cmd/backup-manager/app/import/manager.go index c5d74a1591..624d24e156 100644 --- a/cmd/backup-manager/app/import/manager.go +++ b/cmd/backup-manager/app/import/manager.go @@ -14,6 +14,7 @@ package _import import ( + "database/sql" "fmt" "path/filepath" "time" @@ -21,38 +22,57 @@ import ( "github.com/pingcap/tidb-operator/cmd/backup-manager/app/constants" "github.com/pingcap/tidb-operator/cmd/backup-manager/app/util" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" + bkconstants "github.com/pingcap/tidb-operator/pkg/backup/constants" listers "github.com/pingcap/tidb-operator/pkg/client/listers/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/controller" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" - glog "k8s.io/klog" + "k8s.io/klog" ) -// RestoreManager mainly used to manage backup related work +// RestoreManager mainly used to manage restore related work type RestoreManager struct { restoreLister listers.RestoreLister StatusUpdater controller.RestoreConditionUpdaterInterface - RestoreOpts + Options } // NewRestoreManager return a RestoreManager func NewRestoreManager( restoreLister listers.RestoreLister, statusUpdater controller.RestoreConditionUpdaterInterface, - backupOpts RestoreOpts) *RestoreManager { + restoreOpts Options) *RestoreManager { return &RestoreManager{ restoreLister, statusUpdater, - backupOpts, + restoreOpts, } } +func (rm *RestoreManager) setOptions(restore *v1alpha1.Restore) { + rm.Options.Host = restore.Spec.To.Host + + if restore.Spec.To.Port != 0 { + rm.Options.Port = restore.Spec.To.Port + } else { + rm.Options.Port = bkconstants.DefaultTidbPort + } + + if restore.Spec.To.User != "" { + rm.Options.User = restore.Spec.To.User + } else { + rm.Options.User = bkconstants.DefaultTidbUser + } + + rm.Options.Password = util.GetOptionValueFromEnv(bkconstants.TidbPasswordKey, bkconstants.BackupManagerEnvVarPrefix) +} + // ProcessRestore used to process the restore logic func (rm *RestoreManager) ProcessRestore() error { - restore, err := rm.restoreLister.Restores(rm.Namespace).Get(rm.RestoreName) + restore, err := rm.restoreLister.Restores(rm.Namespace).Get(rm.ResourceName) if err != nil { - glog.Errorf("can't find cluster %s restore %s CRD object, err: %v", rm, rm.RestoreName, err) + klog.Errorf("can't find cluster %s restore %s CRD object, err: %v", rm, rm.ResourceName, err) return rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ Type: v1alpha1.RestoreFailed, Status: corev1.ConditionTrue, @@ -61,23 +81,28 @@ func (rm *RestoreManager) ProcessRestore() error { }) } + rm.setOptions(restore) + + var db *sql.DB + var dsn string err = wait.PollImmediate(constants.PollInterval, constants.CheckTimeout, func() (done bool, err error) { - db, err := util.OpenDB(rm.getDSN(constants.TidbMetaDB)) + // TLS is not currently supported + dsn, err = rm.GetDSN(false) if err != nil { - glog.Warningf("can't open connection to tidb cluster %s, err: %v", rm, err) - return false, nil + klog.Errorf("can't get dsn of tidb cluster %s, err: %s", rm, err) + return false, err } - if err := db.Ping(); err != nil { - glog.Warningf("can't connect to tidb cluster %s, err: %s", rm, err) + db, err = util.OpenDB(dsn) + if err != nil { + klog.Warningf("can't connect to tidb cluster %s, err: %s", rm, err) return false, nil } - db.Close() return true, nil }) if err != nil { - glog.Errorf("cluster %s connect failed, err: %s", rm, err) + klog.Errorf("cluster %s connect failed, err: %s", rm, err) return rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ Type: v1alpha1.RestoreFailed, Status: corev1.ConditionTrue, @@ -86,6 +111,7 @@ func (rm *RestoreManager) ProcessRestore() error { }) } + defer db.Close() return rm.performRestore(restore.DeepCopy()) } @@ -102,7 +128,7 @@ func (rm *RestoreManager) performRestore(restore *v1alpha1.Restore) error { restoreDataPath := rm.getRestoreDataPath() if err := rm.downloadBackupData(restoreDataPath); err != nil { - glog.Errorf("download cluster %s backup %s data failed, err: %s", rm, rm.BackupPath, err) + klog.Errorf("download cluster %s backup %s data failed, err: %s", rm, rm.BackupPath, err) return rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ Type: v1alpha1.RestoreFailed, Status: corev1.ConditionTrue, @@ -110,12 +136,12 @@ func (rm *RestoreManager) performRestore(restore *v1alpha1.Restore) error { Message: fmt.Sprintf("download backup %s data failed, err: %v", rm.BackupPath, err), }) } - glog.Infof("download cluster %s backup %s data success", rm, rm.BackupPath) + klog.Infof("download cluster %s backup %s data success", rm, rm.BackupPath) restoreDataDir := filepath.Dir(restoreDataPath) unarchiveDataPath, err := unarchiveBackupData(restoreDataPath, restoreDataDir) if err != nil { - glog.Errorf("unarchive cluster %s backup %s data failed, err: %s", rm, restoreDataPath, err) + klog.Errorf("unarchive cluster %s backup %s data failed, err: %s", rm, restoreDataPath, err) return rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ Type: v1alpha1.RestoreFailed, Status: corev1.ConditionTrue, @@ -123,11 +149,11 @@ func (rm *RestoreManager) performRestore(restore *v1alpha1.Restore) error { Message: fmt.Sprintf("unarchive backup %s data failed, err: %v", restoreDataPath, err), }) } - glog.Infof("unarchive cluster %s backup %s data success", rm, restoreDataPath) + klog.Infof("unarchive cluster %s backup %s data success", rm, restoreDataPath) err = rm.loadTidbClusterData(unarchiveDataPath) if err != nil { - glog.Errorf("restore cluster %s from backup %s failed, err: %s", rm, rm.BackupPath, err) + klog.Errorf("restore cluster %s from backup %s failed, err: %s", rm, rm.BackupPath, err) return rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ Type: v1alpha1.RestoreFailed, Status: corev1.ConditionTrue, @@ -135,7 +161,7 @@ func (rm *RestoreManager) performRestore(restore *v1alpha1.Restore) error { Message: fmt.Sprintf("loader backup %s data failed, err: %v", restoreDataPath, err), }) } - glog.Infof("restore cluster %s from backup %s success", rm, rm.BackupPath) + klog.Infof("restore cluster %s from backup %s success", rm, rm.BackupPath) finish := time.Now() diff --git a/cmd/backup-manager/app/import/restore.go b/cmd/backup-manager/app/import/restore.go index 9db1c30a6d..22c00511f5 100644 --- a/cmd/backup-manager/app/import/restore.go +++ b/cmd/backup-manager/app/import/restore.go @@ -24,28 +24,19 @@ import ( "github.com/pingcap/tidb-operator/cmd/backup-manager/app/util" ) -// RestoreOpts contains the input arguments to the restore command -type RestoreOpts struct { - Namespace string - RestoreName string - Password string - Host string - Port int32 - User string - BackupPath string +// Options contains the input arguments to the restore command +type Options struct { + util.GenericOptions + BackupPath string } -func (ro *RestoreOpts) String() string { - return fmt.Sprintf("%s/%s", ro.Namespace, ro.RestoreName) -} - -func (ro *RestoreOpts) getRestoreDataPath() string { +func (ro *Options) getRestoreDataPath() string { backupName := filepath.Base(ro.BackupPath) bucketName := filepath.Base(filepath.Dir(ro.BackupPath)) return filepath.Join(constants.BackupRootPath, bucketName, backupName) } -func (ro *RestoreOpts) downloadBackupData(localPath string) error { +func (ro *Options) downloadBackupData(localPath string) error { if err := util.EnsureDirectoryExist(filepath.Dir(localPath)); err != nil { return err } @@ -62,29 +53,28 @@ func (ro *RestoreOpts) downloadBackupData(localPath string) error { return nil } -func (ro *RestoreOpts) loadTidbClusterData(restorePath string) error { +func (ro *Options) loadTidbClusterData(restorePath string) error { if exist := util.IsDirExist(restorePath); !exist { return fmt.Errorf("dir %s does not exist or is not a dir", restorePath) } args := []string{ - fmt.Sprintf("-d=%s", restorePath), - fmt.Sprintf("-h=%s", ro.Host), - fmt.Sprintf("-P=%d", ro.Port), - fmt.Sprintf("-u=%s", ro.User), - fmt.Sprintf("-p=%s", ro.Password), + "--status-addr=0.0.0.0:8289", + "--backend=tidb", + "--server-mode=false", + "–-log-file=", + fmt.Sprintf("--tidb-user=%s", ro.User), + fmt.Sprintf("--tidb-password=%s", ro.Password), + fmt.Sprintf("--tidb-host=%s", ro.Host), + fmt.Sprintf("--d=%s", restorePath), } - output, err := exec.Command("/loader", args...).CombinedOutput() + output, err := exec.Command("/tidb-lightning", args...).CombinedOutput() if err != nil { return fmt.Errorf("cluster %s, execute loader command %v failed, output: %s, err: %v", ro, args, string(output), err) } return nil } -func (ro *RestoreOpts) getDSN(db string) string { - return fmt.Sprintf("%s:%s@(%s:%d)/%s?charset=utf8", ro.User, ro.Password, ro.Host, ro.Port, db) -} - // unarchiveBackupData unarchive backup data to dest dir func unarchiveBackupData(backupFile, destDir string) (string, error) { var unarchiveBackupPath string diff --git a/cmd/backup-manager/app/restore/manager.go b/cmd/backup-manager/app/restore/manager.go index 2154c1c5c1..9b0dc28a51 100644 --- a/cmd/backup-manager/app/restore/manager.go +++ b/cmd/backup-manager/app/restore/manager.go @@ -14,16 +14,20 @@ package restore import ( + "database/sql" "fmt" "time" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - glog "k8s.io/klog" - + "github.com/pingcap/tidb-operator/cmd/backup-manager/app/constants" + "github.com/pingcap/tidb-operator/cmd/backup-manager/app/util" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" + bkconstants "github.com/pingcap/tidb-operator/pkg/backup/constants" listers "github.com/pingcap/tidb-operator/pkg/client/listers/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/controller" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/klog" ) type Manager struct { @@ -44,11 +48,29 @@ func NewManager( } } +func (rm *Manager) setOptions(restore *v1alpha1.Restore) { + rm.Options.Host = restore.Spec.To.Host + + if restore.Spec.To.Port != 0 { + rm.Options.Port = restore.Spec.To.Port + } else { + rm.Options.Port = bkconstants.DefaultTidbPort + } + + if restore.Spec.To.User != "" { + rm.Options.User = restore.Spec.To.User + } else { + rm.Options.User = bkconstants.DefaultTidbUser + } + + rm.Options.Password = util.GetOptionValueFromEnv(bkconstants.TidbPasswordKey, bkconstants.BackupManagerEnvVarPrefix) +} + // ProcessRestore used to process the restore logic func (rm *Manager) ProcessRestore() error { - restore, err := rm.restoreLister.Restores(rm.Namespace).Get(rm.RestoreName) + restore, err := rm.restoreLister.Restores(rm.Namespace).Get(rm.ResourceName) if err != nil { - glog.Errorf("can't find cluster %s restore %s CRD object, err: %v", rm, rm.RestoreName, err) + klog.Errorf("can't find cluster %s restore %s CRD object, err: %v", rm, rm.ResourceName, err) return rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ Type: v1alpha1.RestoreFailed, Status: corev1.ConditionTrue, @@ -60,10 +82,40 @@ func (rm *Manager) ProcessRestore() error { return fmt.Errorf("no br config in %s", rm) } - return rm.performRestore(restore.DeepCopy()) + rm.setOptions(restore) + + var db *sql.DB + var dsn string + err = wait.PollImmediate(constants.PollInterval, constants.CheckTimeout, func() (done bool, err error) { + dsn, err = rm.GetDSN(rm.TLSClient) + if err != nil { + klog.Errorf("can't get dsn of tidb cluster %s, err: %s", rm, err) + return false, err + } + + db, err = util.OpenDB(dsn) + if err != nil { + klog.Warningf("can't connect to tidb cluster %s, err: %s", rm, err) + return false, nil + } + return true, nil + }) + + if err != nil { + klog.Errorf("cluster %s connect failed, err: %s", rm, err) + return rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ + Type: v1alpha1.RestoreFailed, + Status: corev1.ConditionTrue, + Reason: "ConnectTidbFailed", + Message: err.Error(), + }) + } + + defer db.Close() + return rm.performRestore(restore.DeepCopy(), db) } -func (rm *Manager) performRestore(restore *v1alpha1.Restore) error { +func (rm *Manager) performRestore(restore *v1alpha1.Restore, db *sql.DB) error { started := time.Now() err := rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ @@ -74,16 +126,95 @@ func (rm *Manager) performRestore(restore *v1alpha1.Restore) error { return err } - if err := rm.restoreData(restore); err != nil { - glog.Errorf("restore cluster %s from %s failed, err: %s", rm, restore.Spec.Type, err) + oldTikvGCTime, err := rm.GetTikvGCLifeTime(db) + if err != nil { + klog.Errorf("cluster %s get %s failed, err: %s", rm, constants.TikvGCVariable, err) return rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ Type: v1alpha1.RestoreFailed, Status: corev1.ConditionTrue, - Reason: "RestoreDataFromRemoteFailed", + Reason: "GetTikvGCLifeTimeFailed", + Message: err.Error(), + }) + } + klog.Infof("cluster %s %s is %s", rm, constants.TikvGCVariable, oldTikvGCTime) + + oldTikvGCTimeDuration, err := time.ParseDuration(oldTikvGCTime) + if err != nil { + klog.Errorf("cluster %s parse old %s failed, err: %s", rm, constants.TikvGCVariable, err) + return rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ + Type: v1alpha1.RestoreFailed, + Status: corev1.ConditionTrue, + Reason: "ParseOldTikvGCLifeTimeFailed", Message: err.Error(), }) } - glog.Infof("restore cluster %s from %s succeed", rm, restore.Spec.Type) + + var tikvGCTimeDuration time.Duration + var tikvGCLifeTime string + if restore.Spec.TikvGCLifeTime != nil { + tikvGCLifeTime = *restore.Spec.TikvGCLifeTime + tikvGCTimeDuration, err = time.ParseDuration(tikvGCLifeTime) + if err != nil { + klog.Errorf("cluster %s parse configured %s failed, err: %s", rm, constants.TikvGCVariable, err) + return rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ + Type: v1alpha1.RestoreFailed, + Status: corev1.ConditionTrue, + Reason: "ParseConfiguredTikvGCLifeTimeFailed", + Message: err.Error(), + }) + } + } else { + tikvGCLifeTime = constants.TikvGCLifeTime + tikvGCTimeDuration, err = time.ParseDuration(tikvGCLifeTime) + if err != nil { + klog.Errorf("cluster %s parse default %s failed, err: %s", rm, constants.TikvGCVariable, err) + return rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ + Type: v1alpha1.RestoreFailed, + Status: corev1.ConditionTrue, + Reason: "ParseDefaultTikvGCLifeTimeFailed", + Message: err.Error(), + }) + } + } + + if oldTikvGCTimeDuration < tikvGCTimeDuration { + err = rm.SetTikvGCLifeTime(db, tikvGCLifeTime) + if err != nil { + klog.Errorf("cluster %s set tikv GC life time to %s failed, err: %s", rm, tikvGCLifeTime, err) + return rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ + Type: v1alpha1.RestoreFailed, + Status: corev1.ConditionTrue, + Reason: "SetTikvGCLifeTimeFailed", + Message: err.Error(), + }) + } + klog.Infof("set cluster %s %s to %s success", rm, constants.TikvGCVariable, tikvGCLifeTime) + } + + restoreErr := rm.restoreData(restore) + if oldTikvGCTimeDuration < tikvGCTimeDuration { + err = rm.SetTikvGCLifeTime(db, oldTikvGCTime) + if err != nil { + klog.Errorf("cluster %s reset tikv GC life time to %s failed, err: %s", rm, oldTikvGCTime, err) + return rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ + Type: v1alpha1.RestoreFailed, + Status: corev1.ConditionTrue, + Reason: "ResetTikvGCLifeTimeFailed", + Message: err.Error(), + }) + } + klog.Infof("reset cluster %s %s to %s success", rm, constants.TikvGCVariable, oldTikvGCTime) + } + if restoreErr != nil { + klog.Errorf("restore cluster %s from %s failed, err: %s", rm, restore.Spec.Type, restoreErr) + return rm.StatusUpdater.Update(restore, &v1alpha1.RestoreCondition{ + Type: v1alpha1.RestoreFailed, + Status: corev1.ConditionTrue, + Reason: "RestoreDataFromRemoteFailed", + Message: restoreErr.Error(), + }) + } + klog.Infof("restore cluster %s from %s succeed", rm, restore.Spec.Type) finish := time.Now() restore.Status.TimeStarted = metav1.Time{Time: started} diff --git a/cmd/backup-manager/app/restore/restore.go b/cmd/backup-manager/app/restore/restore.go index 200c013753..ff79167a49 100644 --- a/cmd/backup-manager/app/restore/restore.go +++ b/cmd/backup-manager/app/restore/restore.go @@ -14,29 +14,41 @@ package restore import ( + "bufio" "fmt" + "io" + "io/ioutil" "os/exec" + "path" + "strings" - glog "k8s.io/klog" - - "github.com/pingcap/tidb-operator/cmd/backup-manager/app/util" + backupUtil "github.com/pingcap/tidb-operator/cmd/backup-manager/app/util" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" + "github.com/pingcap/tidb-operator/pkg/util" + corev1 "k8s.io/api/core/v1" + "k8s.io/klog" ) type Options struct { - Namespace string - RestoreName string -} - -func (ro *Options) String() string { - return fmt.Sprintf("%s/%s", ro.Namespace, ro.RestoreName) + backupUtil.GenericOptions } func (ro *Options) restoreData(restore *v1alpha1.Restore) error { + clusterNamespace := restore.Spec.BR.ClusterNamespace + if restore.Spec.BR.ClusterNamespace == "" { + clusterNamespace = restore.Namespace + } args, err := constructBROptions(restore) if err != nil { return err } + args = append(args, fmt.Sprintf("--pd=%s-pd.%s:2379", restore.Spec.BR.Cluster, clusterNamespace)) + if ro.TLSCluster { + args = append(args, fmt.Sprintf("--ca=%s", path.Join(util.ClusterClientTLSPath, corev1.ServiceAccountRootCAKey))) + args = append(args, fmt.Sprintf("--cert=%s", path.Join(util.ClusterClientTLSPath, corev1.TLSCertKey))) + args = append(args, fmt.Sprintf("--key=%s", path.Join(util.ClusterClientTLSPath, corev1.TLSPrivateKeyKey))) + } + var restoreType string if restore.Spec.Type == "" { restoreType = string(v1alpha1.BackupTypeFull) @@ -48,17 +60,50 @@ func (ro *Options) restoreData(restore *v1alpha1.Restore) error { restoreType, } fullArgs = append(fullArgs, args...) - glog.Infof("Running br command with args: %v", fullArgs) - output, err := exec.Command("br", fullArgs...).CombinedOutput() + klog.Infof("Running br command with args: %v", fullArgs) + bin := "br" + backupUtil.Suffix(ro.TiKVVersion) + cmd := exec.Command(bin, fullArgs...) + + stdOut, err := cmd.StdoutPipe() + if err != nil { + return fmt.Errorf("cluster %s, create stdout pipe failed, err: %v", ro, err) + } + stdErr, err := cmd.StderrPipe() + if err != nil { + return fmt.Errorf("cluster %s, create stderr pipe failed, err: %v", ro, err) + } + err = cmd.Start() + if err != nil { + return fmt.Errorf("cluster %s, execute br command failed, args: %s, err: %v", ro, fullArgs, err) + } + var errMsg string + reader := bufio.NewReader(stdOut) + for { + line, err := reader.ReadString('\n') + if strings.Contains(line, "[ERROR]") { + errMsg += line + } + klog.Infof(strings.Replace(line, "\n", "", -1)) + if err != nil || io.EOF == err { + break + } + } + tmpErr, _ := ioutil.ReadAll(stdErr) + if len(tmpErr) > 0 { + klog.Infof(string(tmpErr)) + errMsg += string(tmpErr) + } + + err = cmd.Wait() if err != nil { - return fmt.Errorf("cluster %s, execute br command %v failed, output: %s, err: %v", ro, fullArgs, string(output), err) + return fmt.Errorf("cluster %s, wait pipe message failed, errMsg %s, err: %v", ro, errMsg, err) } - glog.Infof("Restore data for cluster %s successfully, output: %s", ro, string(output)) + klog.Infof("Restore data for cluster %s successfully", ro) return nil } func constructBROptions(restore *v1alpha1.Restore) ([]string, error) { - args, err := util.ConstructBRGlobalOptionsForRestore(restore) + args, err := backupUtil.ConstructBRGlobalOptionsForRestore(restore) if err != nil { return nil, err } diff --git a/cmd/backup-manager/app/util/generic.go b/cmd/backup-manager/app/util/generic.go new file mode 100644 index 0000000000..54f49d45bd --- /dev/null +++ b/cmd/backup-manager/app/util/generic.go @@ -0,0 +1,95 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +import ( + "crypto/tls" + "crypto/x509" + "database/sql" + "errors" + "fmt" + "io/ioutil" + "path" + + "github.com/go-sql-driver/mysql" + "github.com/pingcap/tidb-operator/cmd/backup-manager/app/constants" + "github.com/pingcap/tidb-operator/pkg/util" + corev1 "k8s.io/api/core/v1" +) + +// GenericOptions contains the generic input arguments to the backup/restore command +type GenericOptions struct { + Namespace string + // ResourceName can be the name of a backup or restore resource + ResourceName string + TLSClient bool + TLSCluster bool + Host string + Port int32 + Password string + User string + TiKVVersion string +} + +func (bo *GenericOptions) String() string { + return fmt.Sprintf("%s/%s", bo.Namespace, bo.ResourceName) +} + +func (bo *GenericOptions) GetDSN(enabledTLSClient bool) (string, error) { + if !enabledTLSClient { + return fmt.Sprintf("%s:%s@(%s:%d)/%s?charset=utf8", bo.User, bo.Password, bo.Host, bo.Port, constants.TidbMetaDB), nil + } + rootCertPool := x509.NewCertPool() + pem, err := ioutil.ReadFile(path.Join(util.TiDBClientTLSPath, corev1.ServiceAccountRootCAKey)) + if err != nil { + return "", err + } + if ok := rootCertPool.AppendCertsFromPEM(pem); !ok { + return "", errors.New("Failed to append PEM") + } + clientCert := make([]tls.Certificate, 0, 1) + certs, err := tls.LoadX509KeyPair( + path.Join(util.TiDBClientTLSPath, corev1.TLSCertKey), + path.Join(util.TiDBClientTLSPath, corev1.TLSPrivateKeyKey)) + if err != nil { + return "", err + } + clientCert = append(clientCert, certs) + mysql.RegisterTLSConfig("customer", &tls.Config{ + RootCAs: rootCertPool, + Certificates: clientCert, + ServerName: bo.Host, + }) + return fmt.Sprintf("%s:%s@(%s:%d)/%s?tls=customer&charset=utf8", bo.User, bo.Password, bo.Host, bo.Port, constants.TidbMetaDB), nil +} + +func (bo *GenericOptions) GetTikvGCLifeTime(db *sql.DB) (string, error) { + var tikvGCTime string + sql := fmt.Sprintf("select variable_value from %s where variable_name= ?", constants.TidbMetaTable) + row := db.QueryRow(sql, constants.TikvGCVariable) + err := row.Scan(&tikvGCTime) + if err != nil { + return tikvGCTime, fmt.Errorf("query cluster %s %s failed, sql: %s, err: %v", bo, constants.TikvGCVariable, sql, err) + } + return tikvGCTime, nil +} + +func (bo *GenericOptions) SetTikvGCLifeTime(db *sql.DB, gcTime string) error { + sql := fmt.Sprintf("update %s set variable_value = ? where variable_name = ?", constants.TidbMetaTable) + _, err := db.Exec(sql, gcTime, constants.TikvGCVariable) + if err != nil { + return fmt.Errorf("set cluster %s %s failed, sql: %s, err: %v", bo, constants.TikvGCVariable, sql, err) + } + return nil +} diff --git a/cmd/backup-manager/app/util/k8s.go b/cmd/backup-manager/app/util/k8s.go index a0de91bf15..a10b5c1fed 100644 --- a/cmd/backup-manager/app/util/k8s.go +++ b/cmd/backup-manager/app/util/k8s.go @@ -22,13 +22,13 @@ import ( "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" "k8s.io/client-go/tools/record" - glog "k8s.io/klog" + "k8s.io/klog" ) // NewEventRecorder return the specify source's recoder func NewEventRecorder(kubeCli kubernetes.Interface, source string) record.EventRecorder { eventBroadcaster := record.NewBroadcaster() - eventBroadcaster.StartLogging(glog.Infof) + eventBroadcaster.StartLogging(klog.Infof) eventBroadcaster.StartRecordingToSink(&eventv1.EventSinkImpl{ Interface: eventv1.New(kubeCli.CoreV1().RESTClient()).Events("")}) recorder := eventBroadcaster.NewRecorder(v1alpha1.Scheme, corev1.EventSource{Component: source}) diff --git a/cmd/backup-manager/app/util/util.go b/cmd/backup-manager/app/util/util.go index e7d436c14c..89cffee275 100644 --- a/cmd/backup-manager/app/util/util.go +++ b/cmd/backup-manager/app/util/util.go @@ -19,14 +19,22 @@ import ( "os" "strings" + "github.com/Masterminds/semver" "github.com/spf13/pflag" + "k8s.io/klog" cmdutil "k8s.io/kubectl/pkg/cmd/util" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" ) var ( - cmdHelpMsg string + cmdHelpMsg string + supportedVersions = map[string]struct{}{ + "3.1": {}, + "4.0": {}, + } + // DefaultVersion is the default tikv and br version + DefaultVersion = "4.0" ) func validCmdFlagFunc(flag *pflag.Flag) { @@ -98,17 +106,10 @@ func NormalizeBucketURI(bucket string) string { return strings.Replace(bucket, "://", ":", 1) } -// SetFlagsFromEnv set the environment variable. Will override default values, but be overridden by command line parameters. -func SetFlagsFromEnv(flags *pflag.FlagSet, prefix string) error { - flags.VisitAll(func(f *pflag.Flag) { - envVar := prefix + "_" + strings.Replace(strings.ToUpper(f.Name), "-", "_", -1) - value := os.Getenv(envVar) - if value != "" { - flags.Set(f.Name, value) - } - }) - - return nil +// GetOptionValueFromEnv get option's value from environment variable. If unset, return empty string. +func GetOptionValueFromEnv(option, envPrefix string) string { + envVar := envPrefix + "_" + strings.Replace(strings.ToUpper(option), "-", "_", -1) + return os.Getenv(envVar) } // ConstructBRGlobalOptionsForBackup constructs BR global options for backup and also return the remote path. @@ -119,7 +120,7 @@ func ConstructBRGlobalOptionsForBackup(backup *v1alpha1.Backup) ([]string, strin return nil, "", fmt.Errorf("no config for br in backup %s/%s", backup.Namespace, backup.Name) } args = append(args, constructBRGlobalOptions(config)...) - storageArgs, path, err := getRemoteStorage(backup.Spec.StorageProvider) + storageArgs, remotePath, err := getRemoteStorage(backup.Spec.StorageProvider) if err != nil { return nil, "", err } @@ -130,7 +131,7 @@ func ConstructBRGlobalOptionsForBackup(backup *v1alpha1.Backup) ([]string, strin if backup.Spec.Type == v1alpha1.BackupTypeTable && config.Table != "" { args = append(args, fmt.Sprintf("--table=%s", config.Table)) } - return args, path, nil + return args, remotePath, nil } // ConstructBRGlobalOptionsForRestore constructs BR global options for restore. @@ -158,16 +159,6 @@ func ConstructBRGlobalOptionsForRestore(restore *v1alpha1.Restore) ([]string, er // constructBRGlobalOptions constructs BR basic global options. func constructBRGlobalOptions(config *v1alpha1.BRConfig) []string { var args []string - args = append(args, fmt.Sprintf("--pd=%s", config.PDAddress)) - if config.CA != "" { - args = append(args, fmt.Sprintf("--ca=%s", config.CA)) - } - if config.Cert != "" { - args = append(args, fmt.Sprintf("--cert=%s", config.Cert)) - } - if config.Key != "" { - args = append(args, fmt.Sprintf("--key=%s", config.Key)) - } if config.LogLevel != "" { args = append(args, fmt.Sprintf("--log-level=%s", config.LogLevel)) } @@ -179,3 +170,20 @@ func constructBRGlobalOptions(config *v1alpha1.BRConfig) []string { } return args } + +// Suffix parses the major and minor version from the string and return the suffix +func Suffix(version string) string { + numS := strings.Split(DefaultVersion, ".") + defaultSuffix := numS[0] + numS[1] + + v, err := semver.NewVersion(version) + if err != nil { + klog.Errorf("Parse version %s failure, error: %v", version, err) + return defaultSuffix + } + parsed := fmt.Sprintf("%d.%d", v.Major(), v.Minor()) + if _, ok := supportedVersions[parsed]; ok { + return fmt.Sprintf("%d%d", v.Major(), v.Minor()) + } + return defaultSuffix +} diff --git a/cmd/controller-manager/main.go b/cmd/controller-manager/main.go index 7da64480c3..a5ae55c7a3 100644 --- a/cmd/controller-manager/main.go +++ b/cmd/controller-manager/main.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/tidb-operator/pkg/controller/autoscaler" "github.com/pingcap/tidb-operator/pkg/controller/backup" "github.com/pingcap/tidb-operator/pkg/controller/backupschedule" + "github.com/pingcap/tidb-operator/pkg/controller/periodicity" "github.com/pingcap/tidb-operator/pkg/controller/restore" "github.com/pingcap/tidb-operator/pkg/controller/tidbcluster" "github.com/pingcap/tidb-operator/pkg/controller/tidbinitializer" @@ -46,21 +47,22 @@ import ( "k8s.io/client-go/tools/leaderelection/resourcelock" "k8s.io/client-go/tools/record" "k8s.io/component-base/logs" - glog "k8s.io/klog" + "k8s.io/klog" "sigs.k8s.io/controller-runtime/pkg/client" ) var ( - printVersion bool - workers int - autoFailover bool - pdFailoverPeriod time.Duration - tikvFailoverPeriod time.Duration - tidbFailoverPeriod time.Duration - leaseDuration = 15 * time.Second - renewDuration = 5 * time.Second - retryPeriod = 3 * time.Second - waitDuration = 5 * time.Second + printVersion bool + workers int + autoFailover bool + pdFailoverPeriod time.Duration + tikvFailoverPeriod time.Duration + tidbFailoverPeriod time.Duration + tiflashFailoverPeriod time.Duration + leaseDuration = 15 * time.Second + renewDuration = 5 * time.Second + retryPeriod = 3 * time.Second + waitDuration = 5 * time.Second ) func init() { @@ -71,6 +73,7 @@ func init() { flag.BoolVar(&autoFailover, "auto-failover", true, "Auto failover") flag.DurationVar(&pdFailoverPeriod, "pd-failover-period", time.Duration(5*time.Minute), "PD failover period default(5m)") flag.DurationVar(&tikvFailoverPeriod, "tikv-failover-period", time.Duration(5*time.Minute), "TiKV failover period default(5m)") + flag.DurationVar(&tiflashFailoverPeriod, "tiflash-failover-period", time.Duration(5*time.Minute), "TiFlash failover period default(5m)") flag.DurationVar(&tidbFailoverPeriod, "tidb-failover-period", time.Duration(5*time.Minute), "TiDB failover period") flag.DurationVar(&controller.ResyncDuration, "resync-duration", time.Duration(30*time.Second), "Resync time of informer") flag.BoolVar(&controller.TestMode, "test-mode", false, "whether tidb-operator run in test mode") @@ -93,38 +96,42 @@ func main() { logs.InitLogs() defer logs.FlushLogs() + flag.CommandLine.VisitAll(func(flag *flag.Flag) { + klog.V(1).Infof("FLAG: --%s=%q", flag.Name, flag.Value) + }) + hostName, err := os.Hostname() if err != nil { - glog.Fatalf("failed to get hostname: %v", err) + klog.Fatalf("failed to get hostname: %v", err) } ns := os.Getenv("NAMESPACE") if ns == "" { - glog.Fatal("NAMESPACE environment variable not set") + klog.Fatal("NAMESPACE environment variable not set") } cfg, err := rest.InClusterConfig() if err != nil { - glog.Fatalf("failed to get config: %v", err) + klog.Fatalf("failed to get config: %v", err) } cli, err := versioned.NewForConfig(cfg) if err != nil { - glog.Fatalf("failed to create Clientset: %v", err) + klog.Fatalf("failed to create Clientset: %v", err) } var kubeCli kubernetes.Interface kubeCli, err = kubernetes.NewForConfig(cfg) if err != nil { - glog.Fatalf("failed to get kubernetes Clientset: %v", err) + klog.Fatalf("failed to get kubernetes Clientset: %v", err) } asCli, err := asclientset.NewForConfig(cfg) if err != nil { - glog.Fatalf("failed to get advanced-statefulset Clientset: %v", err) + klog.Fatalf("failed to get advanced-statefulset Clientset: %v", err) } // TODO: optimize the read of genericCli with the shared cache genericCli, err := client.New(cfg, client.Options{Scheme: scheme.Scheme}) if err != nil { - glog.Fatalf("failed to get the generic kube-apiserver client: %v", err) + klog.Fatalf("failed to get the generic kube-apiserver client: %v", err) } // note that kubeCli here must not be the hijacked one @@ -143,20 +150,14 @@ func main() { var informerFactory informers.SharedInformerFactory var kubeInformerFactory kubeinformers.SharedInformerFactory - if controller.ClusterScoped { - informerFactory = informers.NewSharedInformerFactory(cli, controller.ResyncDuration) - kubeInformerFactory = kubeinformers.NewSharedInformerFactory(kubeCli, controller.ResyncDuration) - } else { - options := []informers.SharedInformerOption{ - informers.WithNamespace(ns), - } - informerFactory = informers.NewSharedInformerFactoryWithOptions(cli, controller.ResyncDuration, options...) - - kubeoptions := []kubeinformers.SharedInformerOption{ - kubeinformers.WithNamespace(ns), - } - kubeInformerFactory = kubeinformers.NewSharedInformerFactoryWithOptions(kubeCli, controller.ResyncDuration, kubeoptions...) + var options []informers.SharedInformerOption + var kubeoptions []kubeinformers.SharedInformerOption + if !controller.ClusterScoped { + options = append(options, informers.WithNamespace(ns)) + kubeoptions = append(kubeoptions, kubeinformers.WithNamespace(ns)) } + informerFactory = informers.NewSharedInformerFactoryWithOptions(cli, controller.ResyncDuration, options...) + kubeInformerFactory = kubeinformers.NewSharedInformerFactoryWithOptions(kubeCli, controller.ResyncDuration, kubeoptions...) rl := resourcelock.EndpointsLock{ EndpointsMeta: metav1.ObjectMeta{ @@ -177,18 +178,24 @@ func main() { // Upgrade before running any controller logic. If it fails, we wait // for process supervisor to restart it again. if err := operatorUpgrader.Upgrade(); err != nil { - glog.Fatalf("failed to upgrade: %v", err) + klog.Fatalf("failed to upgrade: %v", err) } - tcController := tidbcluster.NewController(kubeCli, cli, genericCli, informerFactory, kubeInformerFactory, autoFailover, pdFailoverPeriod, tikvFailoverPeriod, tidbFailoverPeriod) + tcController := tidbcluster.NewController(kubeCli, cli, genericCli, informerFactory, kubeInformerFactory, autoFailover, pdFailoverPeriod, tikvFailoverPeriod, tidbFailoverPeriod, tiflashFailoverPeriod) backupController := backup.NewController(kubeCli, cli, informerFactory, kubeInformerFactory) restoreController := restore.NewController(kubeCli, cli, informerFactory, kubeInformerFactory) bsController := backupschedule.NewController(kubeCli, cli, informerFactory, kubeInformerFactory) tidbInitController := tidbinitializer.NewController(kubeCli, cli, genericCli, informerFactory, kubeInformerFactory) tidbMonitorController := tidbmonitor.NewController(kubeCli, genericCli, informerFactory, kubeInformerFactory) + + var periodicityController *periodicity.Controller + if controller.PodWebhookEnabled { + periodicityController = periodicity.NewController(kubeCli, informerFactory, kubeInformerFactory) + } + var autoScalerController *autoscaler.Controller if features.DefaultFeatureGate.Enabled(features.AutoScaling) { - autoScalerController = autoscaler.NewController(kubeCli, genericCli, informerFactory, kubeInformerFactory) + autoScalerController = autoscaler.NewController(kubeCli, cli, informerFactory, kubeInformerFactory) } // Start informer factories after all controller are initialized. informerFactory.Start(ctx.Done()) @@ -197,28 +204,31 @@ func main() { // Wait for all started informers' cache were synced. for v, synced := range informerFactory.WaitForCacheSync(wait.NeverStop) { if !synced { - glog.Fatalf("error syncing informer for %v", v) + klog.Fatalf("error syncing informer for %v", v) } } for v, synced := range kubeInformerFactory.WaitForCacheSync(wait.NeverStop) { if !synced { - glog.Fatalf("error syncing informer for %v", v) + klog.Fatalf("error syncing informer for %v", v) } } - glog.Infof("cache of informer factories sync successfully") + klog.Infof("cache of informer factories sync successfully") go wait.Forever(func() { backupController.Run(workers, ctx.Done()) }, waitDuration) go wait.Forever(func() { restoreController.Run(workers, ctx.Done()) }, waitDuration) go wait.Forever(func() { bsController.Run(workers, ctx.Done()) }, waitDuration) go wait.Forever(func() { tidbInitController.Run(workers, ctx.Done()) }, waitDuration) go wait.Forever(func() { tidbMonitorController.Run(workers, ctx.Done()) }, waitDuration) + if controller.PodWebhookEnabled { + go wait.Forever(func() { periodicityController.Run(ctx.Done()) }, waitDuration) + } if features.DefaultFeatureGate.Enabled(features.AutoScaling) { go wait.Forever(func() { autoScalerController.Run(workers, ctx.Done()) }, waitDuration) } wait.Forever(func() { tcController.Run(workers, ctx.Done()) }, waitDuration) } onStopped := func() { - glog.Fatalf("leader election lost") + klog.Fatalf("leader election lost") } // leader election for multiple tidb-controller-manager instances @@ -235,5 +245,5 @@ func main() { }) }, waitDuration) - glog.Fatal(http.ListenAndServe(":6060", nil)) + klog.Fatal(http.ListenAndServe(":6060", nil)) } diff --git a/cmd/discovery/main.go b/cmd/discovery/main.go index 8e890ee428..9aea6001d0 100644 --- a/cmd/discovery/main.go +++ b/cmd/discovery/main.go @@ -27,7 +27,7 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/component-base/logs" - glog "k8s.io/klog" + "k8s.io/klog" ) var ( @@ -52,21 +52,25 @@ func main() { logs.InitLogs() defer logs.FlushLogs() + flag.CommandLine.VisitAll(func(flag *flag.Flag) { + klog.V(1).Infof("FLAG: --%s=%q", flag.Name, flag.Value) + }) + cfg, err := rest.InClusterConfig() if err != nil { - glog.Fatalf("failed to get config: %v", err) + klog.Fatalf("failed to get config: %v", err) } cli, err := versioned.NewForConfig(cfg) if err != nil { - glog.Fatalf("failed to create Clientset: %v", err) + klog.Fatalf("failed to create Clientset: %v", err) } kubeCli, err := kubernetes.NewForConfig(cfg) if err != nil { - glog.Fatalf("failed to get kubernetes Clientset: %v", err) + klog.Fatalf("failed to get kubernetes Clientset: %v", err) } go wait.Forever(func() { server.StartServer(cli, kubeCli, port) }, 5*time.Second) - glog.Fatal(http.ListenAndServe(":6060", nil)) + klog.Fatal(http.ListenAndServe(":6060", nil)) } diff --git a/cmd/scheduler/main.go b/cmd/scheduler/main.go index af310d45b3..f7e4e7b693 100644 --- a/cmd/scheduler/main.go +++ b/cmd/scheduler/main.go @@ -28,7 +28,7 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/component-base/logs" - glog "k8s.io/klog" + "k8s.io/klog" ) var ( @@ -54,21 +54,25 @@ func main() { logs.InitLogs() defer logs.FlushLogs() + flag.CommandLine.VisitAll(func(flag *flag.Flag) { + klog.V(1).Infof("FLAG: --%s=%q", flag.Name, flag.Value) + }) + cfg, err := rest.InClusterConfig() if err != nil { - glog.Fatalf("failed to get config: %v", err) + klog.Fatalf("failed to get config: %v", err) } kubeCli, err := kubernetes.NewForConfig(cfg) if err != nil { - glog.Fatalf("failed to get kubernetes Clientset: %v", err) + klog.Fatalf("failed to get kubernetes Clientset: %v", err) } cli, err := versioned.NewForConfig(cfg) if err != nil { - glog.Fatalf("failed to create Clientset: %v", err) + klog.Fatalf("failed to create Clientset: %v", err) } go wait.Forever(func() { server.StartServer(kubeCli, cli, port) }, 5*time.Second) - glog.Fatal(http.ListenAndServe(":6060", nil)) + klog.Fatal(http.ListenAndServe(":6060", nil)) } diff --git a/cmd/to-crdgen/main.go b/cmd/to-crdgen/main.go index 7dd0532a13..1a9f5386c7 100644 --- a/cmd/to-crdgen/main.go +++ b/cmd/to-crdgen/main.go @@ -16,9 +16,10 @@ package main import ( "flag" "fmt" + "os" + "github.com/pingcap/tidb-operator/pkg/to-crdgen/cmd" "github.com/spf13/pflag" - "os" ) func main() { diff --git a/deploy/aliyun/main.tf b/deploy/aliyun/main.tf index 0c7be43e85..ef520f6b05 100644 --- a/deploy/aliyun/main.tf +++ b/deploy/aliyun/main.tf @@ -71,17 +71,17 @@ module "tidb-cluster" { helm = helm.default } - cluster_name = "my-cluster" + cluster_name = var.tidb_cluster_name ack = module.tidb-operator - tidb_version = var.tidb_version - tidb_cluster_chart_version = var.tidb_cluster_chart_version - pd_instance_type = var.pd_instance_type - pd_count = var.pd_count - tikv_instance_type = var.tikv_instance_type - tikv_count = var.tikv_count - tidb_instance_type = var.tidb_instance_type - tidb_count = var.tidb_count - monitor_instance_type = var.monitor_instance_type - override_values = file("my-cluster.yaml") + tidb_version = var.tidb_version + tidb_cluster_chart_version = var.tidb_cluster_chart_version + pd_instance_type = var.pd_instance_type + pd_count = var.pd_count + tikv_instance_type = var.tikv_instance_type + tikv_count = var.tikv_count + tidb_instance_type = var.tidb_instance_type + tidb_count = var.tidb_count + monitor_instance_type = var.monitor_instance_type + create_tidb_cluster_release = var.create_tidb_cluster_release } diff --git a/deploy/aliyun/manifests/db-monitor.yaml.example b/deploy/aliyun/manifests/db-monitor.yaml.example new file mode 100644 index 0000000000..243e935e8f --- /dev/null +++ b/deploy/aliyun/manifests/db-monitor.yaml.example @@ -0,0 +1,86 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbMonitor +metadata: + name: TIDB_CLUSTER_NAME +spec: + alertmanagerURL: "" + annotations: {} + clusters: + - name: TIDB_CLUSTER_NAME + grafana: + baseImage: grafana/grafana + envs: + # Configure Grafana using environment variables except GF_PATHS_DATA, GF_SECURITY_ADMIN_USER and GF_SECURITY_ADMIN_PASSWORD + # Ref https://grafana.com/docs/installation/configuration/#using-environment-variables + GF_AUTH_ANONYMOUS_ENABLED: "true" + GF_AUTH_ANONYMOUS_ORG_NAME: "Main Org." + GF_AUTH_ANONYMOUS_ORG_ROLE: "Viewer" + # if grafana is running behind a reverse proxy with subpath http://foo.bar/grafana + # GF_SERVER_DOMAIN: foo.bar + # GF_SERVER_ROOT_URL: "%(protocol)s://%(domain)s/grafana/" + imagePullPolicy: IfNotPresent + logLevel: info + password: admin + resources: {} + # limits: + # cpu: 8000m + # memory: 8Gi + # requests: + # cpu: 4000m + # memory: 4Gi + service: + portName: http-grafana + type: LoadBalancer + annotations: + service.beta.kubernetes.io/alicloud-loadbalancer-address-type: internet + username: admin + version: 6.0.1 + imagePullPolicy: IfNotPresent + initializer: + baseImage: pingcap/tidb-monitor-initializer + imagePullPolicy: IfNotPresent + resources: {} + # limits: + # cpu: 50m + # memory: 64Mi + # requests: + # cpu: 50m + # memory: 64Mi + version: v3.0.12 + kubePrometheusURL: "" + nodeSelector: {} + persistent: true + prometheus: + baseImage: prom/prometheus + imagePullPolicy: IfNotPresent + logLevel: info + reserveDays: 12 + resources: {} + # limits: + # cpu: 8000m + # memory: 8Gi + # requests: + # cpu: 4000m + # memory: 4Gi + service: + portName: http-prometheus + type: NodePort + version: v2.11.1 + reloader: + baseImage: pingcap/tidb-monitor-reloader + imagePullPolicy: IfNotPresent + resources: {} + # limits: + # cpu: 50m + # memory: 64Mi + # requests: + # cpu: 50m + # memory: 64Mi + service: + portName: tcp-reloader + type: NodePort + version: v1.0.1 + storage: 100Gi + storageClassName: alicloud-disk-available + tolerations: [] + diff --git a/deploy/aliyun/manifests/db.yaml.example b/deploy/aliyun/manifests/db.yaml.example new file mode 100644 index 0000000000..f5b3a37448 --- /dev/null +++ b/deploy/aliyun/manifests/db.yaml.example @@ -0,0 +1,110 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbCluster +metadata: + name: TIDB_CLUSTER_NAME +spec: + configUpdateStrategy: RollingUpdate + enableTLSCluster: false + helper: + image: busybox:1.31.1 + hostNetwork: false + imagePullPolicy: IfNotPresent + pd: + affinity: {} + baseImage: pingcap/pd + config: + log: + level: info + nodeSelector: + dedicated: TIDB_CLUSTER_NAME-pd + podSecurityContext: {} + replicas: 3 + requests: + cpu: "1" + memory: 400Mi + storage: 20Gi + storageClassName: alicloud-disk + tolerations: + - effect: NoSchedule + key: dedicated + operator: Equal + value: TIDB_CLUSTER_NAME-pd + pvReclaimPolicy: Retain + schedulerName: tidb-scheduler + tidb: + affinity: {} + annotations: + tidb.pingcap.com/sysctl-init: "true" + baseImage: pingcap/tidb + config: + log: + level: info + performance: + max-procs: 0 + tcp-keep-alive: true + enableTLSClient: false + maxFailoverCount: 3 + nodeSelector: + dedicated: TIDB_CLUSTER_NAME-tidb + podSecurityContext: + sysctls: + - name: net.ipv4.tcp_keepalive_time + value: "300" + - name: net.ipv4.tcp_keepalive_intvl + value: "75" + - name: net.core.somaxconn + value: "32768" + replicas: 2 + requests: + cpu: "1" + memory: 400Mi + separateSlowLog: true + service: + annotations: + service.beta.kubernetes.io/alicloud-loadbalancer-address-type: intranet + service.beta.kubernetes.io/alicloud-loadbalancer-slb-network-type: vpc + exposeStatus: true + externalTrafficPolicy: Local + type: LoadBalancer + slowLogTailer: + limits: + cpu: 100m + memory: 50Mi + requests: + cpu: 20m + memory: 5Mi + tolerations: + - effect: NoSchedule + key: dedicated + operator: Equal + value: TIDB_CLUSTER_NAME-tidb + tikv: + affinity: {} + annotations: + tidb.pingcap.com/sysctl-init: "true" + baseImage: pingcap/tikv + config: + log-level: info + hostNetwork: false + maxFailoverCount: 3 + nodeSelector: + dedicated: TIDB_CLUSTER_NAME-tikv + podSecurityContext: + sysctls: + - name: net.core.somaxconn + value: "32768" + privileged: false + replicas: 3 + requests: + cpu: "1" + memory: 2Gi + storage: 100Gi + storageClassName: local-volume + tolerations: + - effect: NoSchedule + key: dedicated + operator: Equal + value: TIDB_CLUSTER_NAME-tikv + timezone: UTC + version: v3.0.12 + diff --git a/deploy/aliyun/variables.tf b/deploy/aliyun/variables.tf index c783235497..843f163fbf 100644 --- a/deploy/aliyun/variables.tf +++ b/deploy/aliyun/variables.tf @@ -10,7 +10,7 @@ variable "bastion_cpu_core_count" { variable "operator_version" { type = string - default = "v1.0.6" + default = "v1.1.0" } variable "operator_helm_values" { @@ -112,3 +112,13 @@ variable "vpc_cidr" { description = "VPC cidr_block, options: [192.168.0.0.0/16, 172.16.0.0/16, 10.0.0.0/8], cannot collidate with kubernetes service cidr and pod cidr. Cannot change once the vpc created." default = "192.168.0.0/16" } + +variable "create_tidb_cluster_release" { + description = "whether creating tidb-cluster helm release" + default = false +} + +variable "tidb_cluster_name" { + description = "The TiDB cluster name" + default = "my-cluster" +} diff --git a/deploy/aws/clusters.tf b/deploy/aws/clusters.tf index ac3d9ff777..a8bf9691bd 100644 --- a/deploy/aws/clusters.tf +++ b/deploy/aws/clusters.tf @@ -17,25 +17,24 @@ provider "helm" { } # TiDB cluster declaration example -#module "example-cluster" { -# source = "./tidb-cluster" -# eks_info = local.default_eks -# subnets = local.default_subnets -# -# # NOTE: cluster_name cannot be changed after creation -# cluster_name = "demo-cluster" -# cluster_version = "v3.0.8" -# ssh_key_name = module.key-pair.key_name -# pd_count = 1 -# pd_instance_type = "t2.xlarge" -# tikv_count = 1 -# tikv_instance_type = "t2.xlarge" -# tidb_count = 1 -# tidb_instance_type = "t2.xlarge" -# monitor_instance_type = "t2.xlarge" -# # yaml file that passed to helm to customize the release -# override_values = file("values/example.yaml") -#} +# module example-cluster { +# source = "../modules/aws/tidb-cluster" + +# eks = local.eks +# subnets = local.subnets +# region = var.region +# cluster_name = "example" + +# ssh_key_name = module.key-pair.key_name +# pd_count = 1 +# pd_instance_type = "c5.large" +# tikv_count = 1 +# tikv_instance_type = "c5d.large" +# tidb_count = 1 +# tidb_instance_type = "c4.large" +# monitor_instance_type = "c5.large" +# create_tidb_cluster_release = false +# } module "default-cluster" { providers = { @@ -46,15 +45,15 @@ module "default-cluster" { subnets = local.subnets region = var.region - cluster_name = var.default_cluster_name - cluster_version = var.default_cluster_version - ssh_key_name = module.key-pair.key_name - pd_count = var.default_cluster_pd_count - pd_instance_type = var.default_cluster_pd_instance_type - tikv_count = var.default_cluster_tikv_count - tikv_instance_type = var.default_cluster_tikv_instance_type - tidb_count = var.default_cluster_tidb_count - tidb_instance_type = var.default_cluster_tidb_instance_type - monitor_instance_type = var.default_cluster_monitor_instance_type - override_values = file("default-cluster.yaml") + cluster_name = var.default_cluster_name + cluster_version = var.default_cluster_version + ssh_key_name = module.key-pair.key_name + pd_count = var.default_cluster_pd_count + pd_instance_type = var.default_cluster_pd_instance_type + tikv_count = var.default_cluster_tikv_count + tikv_instance_type = var.default_cluster_tikv_instance_type + tidb_count = var.default_cluster_tidb_count + tidb_instance_type = var.default_cluster_tidb_instance_type + monitor_instance_type = var.default_cluster_monitor_instance_type + create_tidb_cluster_release = var.create_tidb_cluster_release } diff --git a/deploy/aws/manifests/db-monitor.yaml.example b/deploy/aws/manifests/db-monitor.yaml.example new file mode 100644 index 0000000000..da607309b4 --- /dev/null +++ b/deploy/aws/manifests/db-monitor.yaml.example @@ -0,0 +1,84 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbMonitor +metadata: + name: CLUSTER_NAME +spec: + alertmanagerURL: "" + annotations: {} + clusters: + - name: CLUSTER_NAME + grafana: + baseImage: grafana/grafana + envs: + # Configure Grafana using environment variables except GF_PATHS_DATA, GF_SECURITY_ADMIN_USER and GF_SECURITY_ADMIN_PASSWORD + # Ref https://grafana.com/docs/installation/configuration/#using-environment-variables + GF_AUTH_ANONYMOUS_ENABLED: "true" + GF_AUTH_ANONYMOUS_ORG_NAME: "Main Org." + GF_AUTH_ANONYMOUS_ORG_ROLE: "Viewer" + # if grafana is running behind a reverse proxy with subpath http://foo.bar/grafana + # GF_SERVER_DOMAIN: foo.bar + # GF_SERVER_ROOT_URL: "%(protocol)s://%(domain)s/grafana/" + imagePullPolicy: IfNotPresent + logLevel: info + password: admin + resources: {} + # limits: + # cpu: 8000m + # memory: 8Gi + # requests: + # cpu: 4000m + # memory: 4Gi + service: + portName: http-grafana + type: LoadBalancer + username: admin + version: 6.0.1 + imagePullPolicy: IfNotPresent + initializer: + baseImage: pingcap/tidb-monitor-initializer + imagePullPolicy: IfNotPresent + resources: {} + # limits: + # cpu: 50m + # memory: 64Mi + # requests: + # cpu: 50m + # memory: 64Mi + version: v3.0.12 + kubePrometheusURL: "" + nodeSelector: {} + persistent: true + prometheus: + baseImage: prom/prometheus + imagePullPolicy: IfNotPresent + logLevel: info + reserveDays: 12 + resources: {} + # limits: + # cpu: 8000m + # memory: 8Gi + # requests: + # cpu: 4000m + # memory: 4Gi + service: + portName: http-prometheus + type: NodePort + version: v2.11.1 + reloader: + baseImage: pingcap/tidb-monitor-reloader + imagePullPolicy: IfNotPresent + resources: {} + # limits: + # cpu: 50m + # memory: 64Mi + # requests: + # cpu: 50m + # memory: 64Mi + service: + portName: tcp-reloader + type: NodePort + version: v1.0.1 + storage: 100Gi + storageClassName: ebs-gp2 + tolerations: [] + diff --git a/deploy/aws/manifests/db.yaml.example b/deploy/aws/manifests/db.yaml.example new file mode 100644 index 0000000000..5a4eb9c2bc --- /dev/null +++ b/deploy/aws/manifests/db.yaml.example @@ -0,0 +1,108 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbCluster +metadata: + name: CLUSTER_NAME +spec: + configUpdateStrategy: RollingUpdate + enableTLSCluster: false + helper: + image: busybox:1.31.1 + hostNetwork: false + imagePullPolicy: IfNotPresent + pd: + affinity: {} + baseImage: pingcap/pd + config: + log: + level: info + replication: + location-labels: + - zone + max-replicas: 3 + nodeSelector: + dedicated: CLUSTER_NAME-pd + podSecurityContext: {} + replicas: 3 + requests: + cpu: "1" + memory: 400Mi + storage: 1Gi + storageClassName: ebs-gp2 + tolerations: + - effect: NoSchedule + key: dedicated + operator: Equal + value: CLUSTER_NAME-pd + pvReclaimPolicy: Retain + schedulerName: tidb-scheduler + tidb: + affinity: {} + baseImage: pingcap/tidb + config: + log: + level: info + performance: + max-procs: 0 + tcp-keep-alive: true + enableTLSClient: false + maxFailoverCount: 3 + nodeSelector: + dedicated: CLUSTER_NAME-tidb + podSecurityContext: + sysctls: + - name: net.ipv4.tcp_keepalive_time + value: "300" + - name: net.ipv4.tcp_keepalive_intvl + value: "75" + - name: net.core.somaxconn + value: "32768" + replicas: 2 + requests: + cpu: "1" + memory: 400Mi + separateSlowLog: true + service: + annotations: + service.beta.kubernetes.io/aws-load-balancer-cross-zone-load-balancing-enabled: 'true' + service.beta.kubernetes.io/aws-load-balancer-internal: '0.0.0.0/0' + service.beta.kubernetes.io/aws-load-balancer-type: nlb + exposeStatus: true + externalTrafficPolicy: Local + type: LoadBalancer + slowLogTailer: + limits: + cpu: 100m + memory: 50Mi + requests: + cpu: 20m + memory: 5Mi + tolerations: + - effect: NoSchedule + key: dedicated + operator: Equal + value: CLUSTER_NAME-tidb + tikv: + affinity: {} + baseImage: pingcap/tikv + config: + log-level: info + hostNetwork: false + maxFailoverCount: 3 + nodeSelector: + dedicated: CLUSTER_NAME-tikv + podSecurityContext: {} + privileged: false + replicas: 3 + requests: + cpu: "1" + memory: 2Gi + storage: 45Gi + storageClassName: local-storage + tolerations: + - effect: NoSchedule + key: dedicated + operator: Equal + value: CLUSTER_NAME-tikv + timezone: UTC + version: v3.0.12 + diff --git a/deploy/aws/variables.tf b/deploy/aws/variables.tf index 0ad33b44f7..7691663a5c 100644 --- a/deploy/aws/variables.tf +++ b/deploy/aws/variables.tf @@ -19,7 +19,7 @@ variable "eks_version" { variable "operator_version" { description = "TiDB operator version" - default = "v1.0.6" + default = "v1.1.0" } variable "operator_values" { @@ -115,3 +115,7 @@ variable "default_cluster_name" { default = "my-cluster" } +variable "create_tidb_cluster_release" { + description = "whether creating tidb-cluster helm release" + default = false +} diff --git a/deploy/gcp/examples/multi-zonal.tfvars b/deploy/gcp/examples/multi-zonal.tfvars index 17aa938cc0..6824c0772d 100644 --- a/deploy/gcp/examples/multi-zonal.tfvars +++ b/deploy/gcp/examples/multi-zonal.tfvars @@ -2,12 +2,12 @@ # This will create a zonal cluster in zone us-central1-b with one additional zone. # Work nodes will be created in primary zone us-central1-b and additional zone us-central1-c. # -gke_name = "multi-zonal" -vpc_name = "multi-zonal" -location = "us-central1-b" +gke_name = "multi-zonal" +vpc_name = "multi-zonal" +location = "us-central1-b" pd_instance_type = "n1-standard-2" tikv_instance_type = "n1-highmem-4" tidb_instance_type = "n1-standard-8" node_locations = [ - "us-central1-c" + "us-central1-c" ] diff --git a/deploy/gcp/examples/single-zonal.tfvars b/deploy/gcp/examples/single-zonal.tfvars index 4b073e6e2c..568c6c8759 100644 --- a/deploy/gcp/examples/single-zonal.tfvars +++ b/deploy/gcp/examples/single-zonal.tfvars @@ -2,12 +2,12 @@ # This will create a zonal cluster in zone us-central1-b without additional zones. # Work nodes will be created in a single zone only. # -gke_name = "single-zonal" -vpc_name = "single-zonal" -location = "us-central1-b" +gke_name = "single-zonal" +vpc_name = "single-zonal" +location = "us-central1-b" pd_instance_type = "n1-standard-2" tikv_instance_type = "n1-highmem-4" tidb_instance_type = "n1-standard-8" -pd_count = 3 -tikv_count = 3 -tidb_count = 3 +pd_count = 3 +tikv_count = 3 +tidb_count = 3 diff --git a/deploy/gcp/examples/tidb-customized.tfvars b/deploy/gcp/examples/tidb-customized.tfvars deleted file mode 100644 index f2ca40b6d4..0000000000 --- a/deploy/gcp/examples/tidb-customized.tfvars +++ /dev/null @@ -1,25 +0,0 @@ -pd_instance_type = "n1-standard-2" -tikv_instance_type = "n1-highmem-4" -tidb_instance_type = "n1-standard-8" - -# specify tidb version -tidb_version = "3.0.8" - -# override tidb cluster values -override_values = < "${local.kubeconfig}" +EOS + } +} + provider "helm" { alias = "initial" insecure = true # service_account = "tiller" install_tiller = false # currently this doesn't work, so we install tiller in the local-exec provisioner. See https://github.com/terraform-providers/terraform-provider-helm/issues/148 kubernetes { - config_path = local_file.kubeconfig.filename + config_path = local.kubeconfig + # used to delay helm provisioner initialization in apply phrase + load_config_file = null_resource.kubeconfig.id != "" ? true : null } } diff --git a/deploy/modules/gcp/tidb-cluster/data.tf b/deploy/modules/gcp/tidb-cluster/data.tf index d8cbd84ab8..6250b5aed0 100644 --- a/deploy/modules/gcp/tidb-cluster/data.tf +++ b/deploy/modules/gcp/tidb-cluster/data.tf @@ -1,21 +1,21 @@ data "external" "tidb_ilb_ip" { depends_on = [null_resource.wait-lb-ip] - program = ["bash", "-c", "kubectl --kubeconfig ${var.kubeconfig_path} get svc -n ${var.cluster_name} ${var.cluster_name}-tidb -o json | jq '.status.loadBalancer.ingress[0]'"] + program = ["bash", "-c", local.cmd_get_tidb_ilb_ip] } data "external" "monitor_lb_ip" { depends_on = [null_resource.wait-lb-ip] - program = ["bash", "-c", "kubectl --kubeconfig ${var.kubeconfig_path} get svc -n ${var.cluster_name} ${var.cluster_name}-grafana -o json | jq '.status.loadBalancer.ingress[0]'"] + program = ["bash", "-c", local.cmd_get_monitor_lb_ip] } data "external" "tidb_port" { depends_on = [null_resource.wait-lb-ip] - program = ["bash", "-c", "kubectl --kubeconfig ${var.kubeconfig_path} get svc -n ${var.cluster_name} ${var.cluster_name}-tidb -o json | jq '.spec.ports | .[] | select( .name == \"mysql-client\") | {port: .port|tostring}'"] + program = ["bash", "-c", local.cmd_get_tidb_port] } data "external" "monitor_port" { depends_on = [null_resource.wait-lb-ip] - program = ["bash", "-c", "kubectl --kubeconfig ${var.kubeconfig_path} get svc -n ${var.cluster_name} ${var.cluster_name}-grafana -o json | jq '.spec.ports | .[] | select( .name == \"grafana\") | {port: .port|tostring}'"] + program = ["bash", "-c", local.cmd_get_monitor_port] } locals { @@ -24,6 +24,22 @@ locals { # TODO Update related code when node locations is avaiable in attributes of cluster resource. cmd_get_cluster_locations = < 0 then .[0].locations | join(",") else "" end) }' +EOT + cmd_get_tidb_ilb_ip = </dev/null) || true +jq -s '.[0].status.loadBalancer.ingress[0] // {"ip":""}' <<<"$output" +EOT + cmd_get_monitor_lb_ip = </dev/null) || true +jq -s '.[0].status.loadBalancer.ingress[0] // {"ip":""}' <<<"$output" +EOT + cmd_get_tidb_port = </dev/null) || true +jq -s 'try (.[0].spec.ports | .[] | select( .name == "mysql-client") | {port: .port|tostring}) catch {"port":""}' <<<"$otuput" +EOT + cmd_get_monitor_port = </dev/null) || true +jq -s 'try (.[0].spec.ports | .[] | select( .name == "grafana") | {port: .port|tostring}) catch {"port":""}' <<<"$otuput" EOT } diff --git a/deploy/modules/gcp/tidb-cluster/main.tf b/deploy/modules/gcp/tidb-cluster/main.tf index 7166ee7787..9aa76c3bdb 100644 --- a/deploy/modules/gcp/tidb-cluster/main.tf +++ b/deploy/modules/gcp/tidb-cluster/main.tf @@ -135,6 +135,7 @@ locals { module "tidb-cluster" { source = "../../share/tidb-cluster-release" + create = var.create_tidb_cluster_release cluster_name = var.cluster_name pd_count = var.pd_node_count * local.num_availability_zones tikv_count = var.tikv_node_count * local.num_availability_zones @@ -149,6 +150,7 @@ module "tidb-cluster" { } resource "null_resource" "wait-lb-ip" { + count = var.create_tidb_cluster_release == true ? 1 : 0 depends_on = [ module.tidb-cluster ] diff --git a/deploy/modules/gcp/tidb-cluster/variables.tf b/deploy/modules/gcp/tidb-cluster/variables.tf index db730b919d..258b667b1f 100644 --- a/deploy/modules/gcp/tidb-cluster/variables.tf +++ b/deploy/modules/gcp/tidb-cluster/variables.tf @@ -72,3 +72,8 @@ variable "tikv_local_ssd_count" { description = "TiKV node pool local ssd count (cannot be changed after the node pool is created)" default = 1 } + +variable "create_tidb_cluster_release" { + description = "Whether create tidb-cluster release in the node pools automatically" + default = true +} diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 472cd3c611..4a7900e1dd 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -14,14 +14,6 @@ $ export GOPATH=$HOME/go $ export PATH=$PATH:$GOPATH/bin ``` -## Dependency management - -TiDB Operator uses [retool](https://github.com/twitchtv/retool) to manage Go related tools. - -```sh -$ go get -u github.com/twitchtv/retool -``` - ## Workflow ### Step 1: Fork TiDB Operator on GitHub @@ -112,6 +104,49 @@ $ make check This will show errors if your code change does not pass checks (e.g. fmt, lint). Please fix them before submitting the PR. +#### Start tidb-operator locally and do manual tests + +We uses [kind](https://kind.sigs.k8s.io/docs/user/quick-start/#installation) to +start a Kubernetes cluster locally and +[kubectl](https://kubernetes.io/docs/reference/kubectl/overview/) must be +installed to access Kubernetes cluster. + +You can refer to their official references to install them on your machine, or +run the following command to install them into our local binary directory: +`output/bin`. + +``` +$ hack/install-up-operator.sh -i +$ export PATH=$(pwd)/output/bin:$PATH +``` + +Make sure they are installed correctly: + +``` +$ kind --version +... +$ kubectl version --client +... +``` + +Create a Kubernetes cluster with `kind`: + +``` +$ kind create cluster +``` + +Build and run tidb-operator: + +``` +$ ./hack/local-up-operator.sh +``` + +Start a basic TiDB cluster: + +``` +$ kubectl apply -f examples/basic/tidb-cluster.yaml +``` + #### Run unit tests Before running your code in a real Kubernetes cluster, make sure it passes all unit tests. diff --git a/docs/aliyun-tutorial.md b/docs/aliyun-tutorial.md deleted file mode 100644 index 44fdda744d..0000000000 --- a/docs/aliyun-tutorial.md +++ /dev/null @@ -1,3 +0,0 @@ -# Deploy on Aliyun (Alibaba Cloud) - -This document has been moved to [https://pingcap.com/docs/v3.0/tidb-in-kubernetes/deploy/alibaba-cloud/](https://pingcap.com/docs/v3.0/tidb-in-kubernetes/deploy/alibaba-cloud/). diff --git a/docs/api-references/config.json b/docs/api-references/config.json new file mode 100644 index 0000000000..f0888cb044 --- /dev/null +++ b/docs/api-references/config.json @@ -0,0 +1,64 @@ +{ + "hideMemberFields": [ + "TypeMeta", + "TmpPath", + "DisplayName", + "DefaultProfile", + "Path", + "ListenHost", + "TCPPort", + "HTTPPort", + "InternalServerHTTPPort", + "ErrorLog", + "ServerLog", + "TiDBStatusAddr", + "ServiceAddr", + "ProxyConfig", + "ClusterManagerPath", + "Flash", + "FlashStatus", + "FlashQuota", + "FlashUser", + "FlashProfile", + "FlashApplication", + "FlashProxy", + "FlashRaft", + "ClusterLog" + ], + "hideTypePatterns": [ + "ParseError$", + "List$", + "DataResource", + "ProxyConfig", + "^Flash$", + "FlashCluster", + "FlashStatus", + "FlashQuota", + "FlashUser", + "FlashProfile", + "FlashApplication", + "FlashProxy", + "FlashServerConfig", + "FlashRaft" + ], + "externalPackages": [ + { + "typeMatchPrefix": "^k8s\\.io/apimachinery/pkg/apis/meta/v1\\.Duration$", + "docsURLTemplate": "https://godoc.org/k8s.io/apimachinery/pkg/apis/meta/v1#Duration" + }, + { + "typeMatchPrefix": "^k8s\\.io/(api|apimachinery/pkg/apis)/", + "docsURLTemplate": "https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.13/#{{lower .TypeIdentifier}}-{{arrIndex .PackageSegments -1}}-{{arrIndex .PackageSegments -2}}" + }, + { + "typeMatchPrefix": "^github\\.com/knative/pkg/apis/duck/", + "docsURLTemplate": "https://godoc.org/github.com/knative/pkg/apis/duck/{{arrIndex .PackageSegments -1}}#{{.TypeIdentifier}}" + } + ], + "typeDisplayNamePrefixOverrides": { + "k8s.io/api/": "Kubernetes ", + "k8s.io/apimachinery/pkg/apis/": "Kubernetes " + }, + "markdownDisabled": false +} + diff --git a/docs/api-references/docs.md b/docs/api-references/docs.md new file mode 100644 index 0000000000..a676271b16 --- /dev/null +++ b/docs/api-references/docs.md @@ -0,0 +1,14277 @@ +--- +title: TiDB Operator API Document +summary: Reference of TiDB Operator API +category: how-to +--- +

API Document

+

Packages:

+ +

pingcap.com/v1alpha1

+

+

Package v1alpha1 is the v1alpha1 version of the API.

+

+Resource Types: + +

Backup +

+

+

Backup is a backup of tidb cluster.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+apiVersion
+string
+ +pingcap.com/v1alpha1 + +
+kind
+string +
Backup
+metadata
+ + +Kubernetes meta/v1.ObjectMeta + + +
+Refer to the Kubernetes API documentation for the fields of the +metadata field. +
+spec
+ + +BackupSpec + + +
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+from
+ + +TiDBAccessConfig + + +
+

From is the tidb cluster that needs to backup.

+
+backupType
+ + +BackupType + + +
+

Type is the backup type for tidb cluster.

+
+tikvGCLifeTime
+ +string + +
+

TikvGCLifeTime is to specify the safe gc life time for backup. +The time limit during which data is retained for each GC, in the format of Go Duration. +When a GC happens, the current time minus this value is the safe point.

+
+StorageProvider
+ + +StorageProvider + + +
+

+(Members of StorageProvider are embedded into this type.) +

+

StorageProvider configures where and how backups should be stored.

+
+storageClassName
+ +string + +
+(Optional) +

The storageClassName of the persistent volume for Backup data storage. +Defaults to Kubernetes default storage class.

+
+storageSize
+ +string + +
+

StorageSize is the request storage size for backup job

+
+br
+ + +BRConfig + + +
+

BRConfig is the configs for BR

+
+tolerations
+ + +[]Kubernetes core/v1.Toleration + + +
+(Optional) +

Base tolerations of backup Pods, components may add more tolerations upon this respectively

+
+affinity
+ + +Kubernetes core/v1.Affinity + + +
+(Optional) +

Affinity of backup Pods

+
+useKMS
+ +bool + +
+

Use KMS to decrypt the secrets

+
+serviceAccount
+ +string + +
+

Specify service account of backup

+
+
+status
+ + +BackupStatus + + +
+
+

BackupSchedule +

+

+

BackupSchedule is a backup schedule of tidb cluster.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+apiVersion
+string
+ +pingcap.com/v1alpha1 + +
+kind
+string +
BackupSchedule
+metadata
+ + +Kubernetes meta/v1.ObjectMeta + + +
+Refer to the Kubernetes API documentation for the fields of the +metadata field. +
+spec
+ + +BackupScheduleSpec + + +
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+schedule
+ +string + +
+

Schedule specifies the cron string used for backup scheduling.

+
+pause
+ +bool + +
+

Pause means paused backupSchedule

+
+maxBackups
+ +int32 + +
+

MaxBackups is to specify how many backups we want to keep +0 is magic number to indicate un-limited backups.

+
+maxReservedTime
+ +string + +
+

MaxReservedTime is to specify how long backups we want to keep.

+
+backupTemplate
+ + +BackupSpec + + +
+

BackupTemplate is the specification of the backup structure to get scheduled.

+
+storageClassName
+ +string + +
+(Optional) +

The storageClassName of the persistent volume for Backup data storage if not storage class name set in BackupSpec. +Defaults to Kubernetes default storage class.

+
+storageSize
+ +string + +
+

StorageSize is the request storage size for backup job

+
+
+status
+ + +BackupScheduleStatus + + +
+
+

Restore +

+

+

Restore represents the restoration of backup of a tidb cluster.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+apiVersion
+string
+ +pingcap.com/v1alpha1 + +
+kind
+string +
Restore
+metadata
+ + +Kubernetes meta/v1.ObjectMeta + + +
+Refer to the Kubernetes API documentation for the fields of the +metadata field. +
+spec
+ + +RestoreSpec + + +
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+to
+ + +TiDBAccessConfig + + +
+

To is the tidb cluster that needs to restore.

+
+backupType
+ + +BackupType + + +
+

Type is the backup type for tidb cluster.

+
+tikvGCLifeTime
+ +string + +
+

TikvGCLifeTime is to specify the safe gc life time for restore. +The time limit during which data is retained for each GC, in the format of Go Duration. +When a GC happens, the current time minus this value is the safe point.

+
+StorageProvider
+ + +StorageProvider + + +
+

+(Members of StorageProvider are embedded into this type.) +

+

StorageProvider configures where and how backups should be stored.

+
+storageClassName
+ +string + +
+(Optional) +

The storageClassName of the persistent volume for Restore data storage. +Defaults to Kubernetes default storage class.

+
+storageSize
+ +string + +
+

StorageSize is the request storage size for backup job

+
+br
+ + +BRConfig + + +
+

BR is the configs for BR.

+
+tolerations
+ + +[]Kubernetes core/v1.Toleration + + +
+(Optional) +

Base tolerations of restore Pods, components may add more tolerations upon this respectively

+
+affinity
+ + +Kubernetes core/v1.Affinity + + +
+(Optional) +

Affinity of restore Pods

+
+useKMS
+ +bool + +
+

Use KMS to decrypt the secrets

+
+serviceAccount
+ +string + +
+

Specify service account of restore

+
+
+status
+ + +RestoreStatus + + +
+
+

TidbCluster +

+

+

TidbCluster is the control script’s spec

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+apiVersion
+string
+ +pingcap.com/v1alpha1 + +
+kind
+string +
TidbCluster
+metadata
+ + +Kubernetes meta/v1.ObjectMeta + + +
+Refer to the Kubernetes API documentation for the fields of the +metadata field. +
+spec
+ + +TidbClusterSpec + + +
+

Spec defines the behavior of a tidb cluster

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+pd
+ + +PDSpec + + +
+

PD cluster spec

+
+tidb
+ + +TiDBSpec + + +
+

TiDB cluster spec

+
+tikv
+ + +TiKVSpec + + +
+

TiKV cluster spec

+
+tiflash
+ + +TiFlashSpec + + +
+(Optional) +

TiFlash cluster spec

+
+pump
+ + +PumpSpec + + +
+(Optional) +

Pump cluster spec

+
+helper
+ + +HelperSpec + + +
+(Optional) +

Helper spec

+
+paused
+ +bool + +
+(Optional) +

Indicates that the tidb cluster is paused and will not be processed by +the controller.

+
+version
+ +string + +
+(Optional) +

TODO: remove optional after defaulting logic introduced +TiDB cluster version

+
+schedulerName
+ +string + +
+

SchedulerName of TiDB cluster Pods

+
+pvReclaimPolicy
+ + +Kubernetes core/v1.PersistentVolumeReclaimPolicy + + +
+

Persistent volume reclaim policy applied to the PVs that consumed by TiDB cluster

+
+imagePullPolicy
+ + +Kubernetes core/v1.PullPolicy + + +
+

ImagePullPolicy of TiDB cluster Pods

+
+configUpdateStrategy
+ + +ConfigUpdateStrategy + + +
+

ConfigUpdateStrategy determines how the configuration change is applied to the cluster. +UpdateStrategyInPlace will update the ConfigMap of configuration in-place and an extra rolling-update of the +cluster component is needed to reload the configuration change. +UpdateStrategyRollingUpdate will create a new ConfigMap with the new configuration and rolling-update the +related components to use the new ConfigMap, that is, the new configuration will be applied automatically.

+
+enablePVReclaim
+ +bool + +
+(Optional) +

Whether enable PVC reclaim for orphan PVC left by statefulset scale-in +Optional: Defaults to false

+
+tlsCluster
+ + +TLSCluster + + +
+(Optional) +

Whether enable the TLS connection between TiDB server components +Optional: Defaults to nil

+
+hostNetwork
+ +bool + +
+(Optional) +

Whether Hostnetwork is enabled for TiDB cluster Pods +Optional: Defaults to false

+
+affinity
+ + +Kubernetes core/v1.Affinity + + +
+(Optional) +

Affinity of TiDB cluster Pods

+
+priorityClassName
+ +string + +
+(Optional) +

PriorityClassName of TiDB cluster Pods +Optional: Defaults to omitted

+
+nodeSelector
+ +map[string]string + +
+(Optional) +

Base node selectors of TiDB cluster Pods, components may add or override selectors upon this respectively

+
+annotations
+ +map[string]string + +
+(Optional) +

Base annotations of TiDB cluster Pods, components may add or override selectors upon this respectively

+
+tolerations
+ + +[]Kubernetes core/v1.Toleration + + +
+(Optional) +

Base tolerations of TiDB cluster Pods, components may add more tolerations upon this respectively

+
+timezone
+ +string + +
+(Optional) +

Time zone of TiDB cluster Pods +Optional: Defaults to UTC

+
+services
+ + +[]Service + + +
+

Services list non-headless services type used in TidbCluster +Deprecated

+
+
+status
+ + +TidbClusterStatus + + +
+

Most recently observed status of the tidb cluster

+
+

TidbClusterAutoScaler +

+

+

TidbClusterAutoScaler is the control script’s spec

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+apiVersion
+string
+ +pingcap.com/v1alpha1 + +
+kind
+string +
TidbClusterAutoScaler
+metadata
+ + +Kubernetes meta/v1.ObjectMeta + + +
+Refer to the Kubernetes API documentation for the fields of the +metadata field. +
+spec
+ + +TidbClusterAutoScalerSpec + + +
+

Spec describes the state of the TidbClusterAutoScaler

+
+
+ + + + + + + + + + + + + + + + + + + + + +
+cluster
+ + +TidbClusterRef + + +
+

TidbClusterRef describe the target TidbCluster

+
+metricsUrl
+ +string + +
+(Optional) +

We used prometheus to fetch the metrics resources until the pd could provide it. +MetricsUrl represents the url to fetch the metrics info

+
+monitor
+ + +TidbMonitorRef + + +
+(Optional) +

TidbMonitorRef describe the target TidbMonitor, when MetricsUrl and Monitor are both set, +Operator will use MetricsUrl

+
+tikv
+ + +TikvAutoScalerSpec + + +
+(Optional) +

TiKV represents the auto-scaling spec for tikv

+
+tidb
+ + +TidbAutoScalerSpec + + +
+(Optional) +

TiDB represents the auto-scaling spec for tidb

+
+
+status
+ + +TidbClusterAutoSclaerStatus + + +
+

Status describe the status of the TidbClusterAutoScaler

+
+

TidbInitializer +

+

+

TidbInitializer is a TiDB cluster initializing job

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+apiVersion
+string
+ +pingcap.com/v1alpha1 + +
+kind
+string +
TidbInitializer
+metadata
+ + +Kubernetes meta/v1.ObjectMeta + + +
+Refer to the Kubernetes API documentation for the fields of the +metadata field. +
+spec
+ + +TidbInitializerSpec + + +
+

Spec defines the desired state of TidbInitializer

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+image
+ +string + +
+
+cluster
+ + +TidbClusterRef + + +
+
+imagePullPolicy
+ + +Kubernetes core/v1.PullPolicy + + +
+(Optional) +
+permitHost
+ +string + +
+(Optional) +

permitHost is the host which will only be allowed to connect to the TiDB.

+
+initSql
+ +string + +
+(Optional) +

InitSql is the SQL statements executed after the TiDB cluster is bootstrapped.

+
+initSqlConfigMap
+ +string + +
+(Optional) +

InitSqlConfigMapName reference a configmap that provide init-sql, take high precedence than initSql if set

+
+passwordSecret
+ +string + +
+(Optional) +
+resources
+ + +Kubernetes core/v1.ResourceRequirements + + +
+(Optional) +
+timezone
+ +string + +
+(Optional) +

Time zone of TiDB initializer Pods

+
+
+status
+ + +TidbInitializerStatus + + +
+

Most recently observed status of the TidbInitializer

+
+

TidbMonitor +

+

+

TidbMonitor encode the spec and status of the monitoring component of a TiDB cluster

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+apiVersion
+string
+ +pingcap.com/v1alpha1 + +
+kind
+string +
TidbMonitor
+metadata
+ + +Kubernetes meta/v1.ObjectMeta + + +
+Refer to the Kubernetes API documentation for the fields of the +metadata field. +
+spec
+ + +TidbMonitorSpec + + +
+

Spec defines the desired state of TidbMonitor

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+clusters
+ + +[]TidbClusterRef + + +
+
+prometheus
+ + +PrometheusSpec + + +
+
+grafana
+ + +GrafanaSpec + + +
+(Optional) +
+reloader
+ + +ReloaderSpec + + +
+
+initializer
+ + +InitializerSpec + + +
+
+imagePullPolicy
+ + +Kubernetes core/v1.PullPolicy + + +
+
+persistent
+ +bool + +
+(Optional) +
+storageClassName
+ +string + +
+(Optional) +
+storage
+ +string + +
+(Optional) +
+nodeSelector
+ +map[string]string + +
+(Optional) +
+annotations
+ +map[string]string + +
+(Optional) +
+tolerations
+ + +[]Kubernetes core/v1.Toleration + + +
+(Optional) +
+kubePrometheusURL
+ +string + +
+(Optional) +

kubePrometheusURL is where tidb-monitoring get the common metrics of kube-prometheus. +Ref: https://github.com/coreos/kube-prometheus

+
+alertmanagerURL
+ +string + +
+(Optional) +

alertmanagerURL is where tidb-monitoring push alerts to. +Ref: https://prometheus.io/docs/alerting/alertmanager/

+
+
+status
+ + +TidbMonitorStatus + + +
+

Most recently observed status of the TidbMonitor

+
+

BRConfig +

+

+(Appears on: +BackupSpec, +RestoreSpec) +

+

+

BRConfig contains config for BR

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+cluster
+ +string + +
+

ClusterName of backup/restore cluster

+
+clusterNamespace
+ +string + +
+

Namespace of backup/restore cluster

+
+db
+ +string + +
+

DB is the specific DB which will be backed-up or restored

+
+table
+ +string + +
+

Table is the specific table which will be backed-up or restored

+
+logLevel
+ +string + +
+

LogLevel is the log level

+
+statusAddr
+ +string + +
+

StatusAddr is the HTTP listening address for the status report service. Set to empty string to disable

+
+concurrency
+ +uint32 + +
+

Concurrency is the size of thread pool on each node that execute the backup task

+
+rateLimit
+ +uint + +
+

RateLimit is the rate limit of the backup task, MB/s per node

+
+timeAgo
+ +string + +
+

TimeAgo is the history version of the backup task, e.g. 1m, 1h

+
+checksum
+ +bool + +
+

Checksum specifies whether to run checksum after backup

+
+sendCredToTikv
+ +bool + +
+

SendCredToTikv specifies whether to send credentials to TiKV

+
+onLine
+ +bool + +
+

OnLine specifies whether online during restore

+
+

BackupCondition +

+

+(Appears on: +BackupStatus) +

+

+

BackupCondition describes the observed state of a Backup at a certain point.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+type
+ + +BackupConditionType + + +
+
+status
+ + +Kubernetes core/v1.ConditionStatus + + +
+
+lastTransitionTime
+ + +Kubernetes meta/v1.Time + + +
+
+reason
+ +string + +
+
+message
+ +string + +
+
+

BackupConditionType +(string alias)

+

+(Appears on: +BackupCondition) +

+

+

BackupConditionType represents a valid condition of a Backup.

+

+

BackupScheduleSpec +

+

+(Appears on: +BackupSchedule) +

+

+

BackupScheduleSpec contains the backup schedule specification for a tidb cluster.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+schedule
+ +string + +
+

Schedule specifies the cron string used for backup scheduling.

+
+pause
+ +bool + +
+

Pause means paused backupSchedule

+
+maxBackups
+ +int32 + +
+

MaxBackups is to specify how many backups we want to keep +0 is magic number to indicate un-limited backups.

+
+maxReservedTime
+ +string + +
+

MaxReservedTime is to specify how long backups we want to keep.

+
+backupTemplate
+ + +BackupSpec + + +
+

BackupTemplate is the specification of the backup structure to get scheduled.

+
+storageClassName
+ +string + +
+(Optional) +

The storageClassName of the persistent volume for Backup data storage if not storage class name set in BackupSpec. +Defaults to Kubernetes default storage class.

+
+storageSize
+ +string + +
+

StorageSize is the request storage size for backup job

+
+

BackupScheduleStatus +

+

+(Appears on: +BackupSchedule) +

+

+

BackupScheduleStatus represents the current state of a BackupSchedule.

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+lastBackup
+ +string + +
+

LastBackup represents the last backup.

+
+lastBackupTime
+ + +Kubernetes meta/v1.Time + + +
+

LastBackupTime represents the last time the backup was successfully created.

+
+allBackupCleanTime
+ + +Kubernetes meta/v1.Time + + +
+

AllBackupCleanTime represents the time when all backup entries are cleaned up

+
+

BackupSpec +

+

+(Appears on: +Backup, +BackupScheduleSpec) +

+

+

BackupSpec contains the backup specification for a tidb cluster.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+from
+ + +TiDBAccessConfig + + +
+

From is the tidb cluster that needs to backup.

+
+backupType
+ + +BackupType + + +
+

Type is the backup type for tidb cluster.

+
+tikvGCLifeTime
+ +string + +
+

TikvGCLifeTime is to specify the safe gc life time for backup. +The time limit during which data is retained for each GC, in the format of Go Duration. +When a GC happens, the current time minus this value is the safe point.

+
+StorageProvider
+ + +StorageProvider + + +
+

+(Members of StorageProvider are embedded into this type.) +

+

StorageProvider configures where and how backups should be stored.

+
+storageClassName
+ +string + +
+(Optional) +

The storageClassName of the persistent volume for Backup data storage. +Defaults to Kubernetes default storage class.

+
+storageSize
+ +string + +
+

StorageSize is the request storage size for backup job

+
+br
+ + +BRConfig + + +
+

BRConfig is the configs for BR

+
+tolerations
+ + +[]Kubernetes core/v1.Toleration + + +
+(Optional) +

Base tolerations of backup Pods, components may add more tolerations upon this respectively

+
+affinity
+ + +Kubernetes core/v1.Affinity + + +
+(Optional) +

Affinity of backup Pods

+
+useKMS
+ +bool + +
+

Use KMS to decrypt the secrets

+
+serviceAccount
+ +string + +
+

Specify service account of backup

+
+

BackupStatus +

+

+(Appears on: +Backup) +

+

+

BackupStatus represents the current status of a backup.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+backupPath
+ +string + +
+

BackupPath is the location of the backup.

+
+timeStarted
+ + +Kubernetes meta/v1.Time + + +
+

TimeStarted is the time at which the backup was started.

+
+timeCompleted
+ + +Kubernetes meta/v1.Time + + +
+

TimeCompleted is the time at which the backup was completed.

+
+backupSize
+ +int64 + +
+

BackupSize is the data size of the backup.

+
+commitTs
+ +string + +
+

CommitTs is the snapshot time point of tidb cluster.

+
+conditions
+ + +[]BackupCondition + + +
+
+

BackupStorageType +(string alias)

+

+

BackupStorageType represents the backend storage type of backup.

+

+

BackupType +(string alias)

+

+(Appears on: +BackupSpec, +RestoreSpec) +

+

+

BackupType represents the backup type.

+

+

BasicAutoScalerSpec +

+

+(Appears on: +TidbAutoScalerSpec, +TikvAutoScalerSpec) +

+

+

BasicAutoScalerSpec describes the basic spec for auto-scaling

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+maxReplicas
+ +int32 + +
+

maxReplicas is the upper limit for the number of replicas to which the autoscaler can scale out. +It cannot be less than minReplicas.

+
+minReplicas
+ +int32 + +
+(Optional) +

minReplicas is the lower limit for the number of replicas to which the autoscaler +can scale down. It defaults to 1 pod. Scaling is active as long as at least one metric value is +available.

+
+scaleInIntervalSeconds
+ +int32 + +
+(Optional) +

ScaleInIntervalSeconds represents the duration seconds between each auto-scaling-in +If not set, the default ScaleInIntervalSeconds will be set to 500

+
+scaleOutIntervalSeconds
+ +int32 + +
+(Optional) +

ScaleOutIntervalSeconds represents the duration seconds between each auto-scaling-out +If not set, the default ScaleOutIntervalSeconds will be set to 300

+
+metrics
+ + +[]Kubernetes autoscaling/v2beta2.MetricSpec + + +
+(Optional) +

metrics contains the specifications for which to use to calculate the +desired replica count (the maximum replica count across all metrics will +be used). The desired replica count is calculated multiplying the +ratio between the target value and the current value by the current +number of pods. Ergo, metrics used must decrease as the pod count is +increased, and vice-versa. See the individual metric source types for +more information about how each type of metric must respond. +If not set, the default metric will be set to 80% average CPU utilization.

+
+metricsTimeDuration
+ +string + +
+(Optional) +

MetricsTimeDuration describe the Time duration to be queried in the Prometheus

+
+scaleOutThreshold
+ +int32 + +
+(Optional) +

ScaleOutThreshold describe the consecutive threshold for the auto-scaling, +if the consecutive counts of the scale-out result in auto-scaling reach this number, +the auto-scaling would be performed. +If not set, the default value is 3.

+
+scaleInThreshold
+ +int32 + +
+(Optional) +

ScaleInThreshold describe the consecutive threshold for the auto-scaling, +if the consecutive counts of the scale-in result in auto-scaling reach this number, +the auto-scaling would be performed. +If not set, the default value is 5.

+
+

BasicAutoScalerStatus +

+

+(Appears on: +TidbAutoScalerStatus, +TikvAutoScalerStatus) +

+

+

BasicAutoScalerStatus describe the basic auto-scaling status

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+metrics
+ + +[]MetricsStatus + + +
+(Optional) +

MetricsStatusList describes the metrics status in the last auto-scaling reconciliation

+
+currentReplicas
+ +int32 + +
+

CurrentReplicas describes the current replicas for the component(tidb/tikv)

+
+recommendedReplicas
+ +int32 + +
+(Optional) +

RecommendedReplicas describes the calculated replicas in the last auto-scaling reconciliation for the component(tidb/tikv)

+
+lastAutoScalingTimestamp
+ + +Kubernetes meta/v1.Time + + +
+(Optional) +

LastAutoScalingTimestamp describes the last auto-scaling timestamp for the component(tidb/tikv)

+
+

Binlog +

+

+(Appears on: +TiDBConfig) +

+

+

Binlog is the config for binlog.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+enable
+ +bool + +
+

optional

+
+write-timeout
+ +string + +
+(Optional) +

Optional: Defaults to 15s

+
+ignore-error
+ +bool + +
+(Optional) +

If IgnoreError is true, when writing binlog meets error, TiDB would +ignore the error.

+
+binlog-socket
+ +string + +
+(Optional) +

Use socket file to write binlog, for compatible with kafka version tidb-binlog.

+
+strategy
+ +string + +
+(Optional) +

The strategy for sending binlog to pump, value can be “range,omitempty” or “hash,omitempty” now. +Optional: Defaults to range

+
+

CommonConfig +

+

+(Appears on: +TiFlashConfig) +

+

+

CommonConfig is the configuration of TiFlash process.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+path_realtime_mode
+ +bool + +
+(Optional) +

Optional: Defaults to false

+
+mark_cache_size
+ +int64 + +
+(Optional) +

Optional: Defaults to 5368709120

+
+minmax_index_cache_size
+ +int64 + +
+(Optional) +

Optional: Defaults to 5368709120

+
+loger
+ + +FlashLogger + + +
+(Optional) +
+

ComponentAccessor +

+

+

ComponentAccessor is the interface to access component details, which respects the cluster-level properties +and component-level overrides

+

+

ComponentSpec +

+

+(Appears on: +PDSpec, +PumpSpec, +TiDBSpec, +TiFlashSpec, +TiKVSpec) +

+

+

ComponentSpec is the base spec of each component, the fields should always accessed by the BasicSpec() method to respect the cluster-level properties

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+image
+ +string + +
+

Image of the component, override baseImage and version if present +Deprecated

+
+version
+ +string + +
+(Optional) +

Version of the component. Override the cluster-level version if non-empty +Optional: Defaults to cluster-level setting

+
+imagePullPolicy
+ + +Kubernetes core/v1.PullPolicy + + +
+(Optional) +

ImagePullPolicy of the component. Override the cluster-level imagePullPolicy if present +Optional: Defaults to cluster-level setting

+
+hostNetwork
+ +bool + +
+(Optional) +

Whether Hostnetwork of the component is enabled. Override the cluster-level setting if present +Optional: Defaults to cluster-level setting

+
+affinity
+ + +Kubernetes core/v1.Affinity + + +
+(Optional) +

Affinity of the component. Override the cluster-level one if present +Optional: Defaults to cluster-level setting

+
+priorityClassName
+ +string + +
+(Optional) +

PriorityClassName of the component. Override the cluster-level one if present +Optional: Defaults to cluster-level setting

+
+schedulerName
+ +string + +
+(Optional) +

SchedulerName of the component. Override the cluster-level one if present +Optional: Defaults to cluster-level setting

+
+nodeSelector
+ +map[string]string + +
+(Optional) +

NodeSelector of the component. Merged into the cluster-level nodeSelector if non-empty +Optional: Defaults to cluster-level setting

+
+annotations
+ +map[string]string + +
+(Optional) +

Annotations of the component. Merged into the cluster-level annotations if non-empty +Optional: Defaults to cluster-level setting

+
+tolerations
+ + +[]Kubernetes core/v1.Toleration + + +
+(Optional) +

Tolerations of the component. Override the cluster-level tolerations if non-empty +Optional: Defaults to cluster-level setting

+
+podSecurityContext
+ + +Kubernetes core/v1.PodSecurityContext + + +
+(Optional) +

PodSecurityContext of the component

+
+configUpdateStrategy
+ + +ConfigUpdateStrategy + + +
+(Optional) +

ConfigUpdateStrategy of the component. Override the cluster-level updateStrategy if present +Optional: Defaults to cluster-level setting

+
+env
+ + +[]Kubernetes core/v1.EnvVar + + +
+(Optional) +

List of environment variables to set in the container, like +v1.Container.Env. +Note that following env names cannot be used and may be overrided by +tidb-operator built envs. +- NAMESPACE +- TZ +- SERVICE_NAME +- PEER_SERVICE_NAME +- HEADLESS_SERVICE_NAME +- SET_NAME +- HOSTNAME +- CLUSTER_NAME +- POD_NAME +- BINLOG_ENABLED +- SLOW_LOG_FILE

+
+

ConfigUpdateStrategy +(string alias)

+

+(Appears on: +ComponentSpec, +TidbClusterSpec) +

+

+

ConfigUpdateStrategy represents the strategy to update configuration

+

+

CoprocessorCache +

+

+(Appears on: +TiKVClient) +

+

+

CoprocessorCache is the config for coprocessor cache.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+enabled
+ +bool + +
+(Optional) +

Whether to enable the copr cache. The copr cache saves the result from TiKV Coprocessor in the memory and +reuses the result when corresponding data in TiKV is unchanged, on a region basis.

+
+capacity-mb
+ +float64 + +
+(Optional) +

The capacity in MB of the cache.

+
+admission-max-result-mb
+ +float64 + +
+(Optional) +

Only cache requests whose result set is small.

+
+admission-min-process-ms
+ +uint64 + +
+(Optional) +

Only cache requests takes notable time to process.

+
+

CrdKind +

+

+(Appears on: +CrdKinds) +

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+Kind
+ +string + +
+
+Plural
+ +string + +
+
+SpecName
+ +string + +
+
+ShortNames
+ +[]string + +
+
+AdditionalPrinterColums
+ +[]k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1.CustomResourceColumnDefinition + +
+
+

CrdKinds +

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+KindsString
+ +string + +
+
+TiDBCluster
+ + +CrdKind + + +
+
+Backup
+ + +CrdKind + + +
+
+Restore
+ + +CrdKind + + +
+
+BackupSchedule
+ + +CrdKind + + +
+
+TiDBMonitor
+ + +CrdKind + + +
+
+TiDBInitializer
+ + +CrdKind + + +
+
+TidbClusterAutoScaler
+ + +CrdKind + + +
+
+

DashboardConfig +

+

+(Appears on: +PDConfig) +

+

+

DashboardConfig is the configuration for tidb-dashboard.

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+tidb_cacert_path
+ +string + +
+
+tidb_cert_path
+ +string + +
+
+tidb_key_path
+ +string + +
+
+

Experimental +

+

+(Appears on: +TiDBConfig) +

+

+

Experimental controls the features that are still experimental: their semantics, interfaces are subject to change. +Using these features in the production environment is not recommended.

+

+ + + + + + + + + + + + + +
FieldDescription
+allow-auto-random
+ +bool + +
+(Optional) +

Whether enable the syntax like auto_random(3) on the primary key column. +imported from TiDB v3.1.0

+
+

FileLogConfig +

+

+(Appears on: +Log, +PDLogConfig) +

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+filename
+ +string + +
+(Optional) +

Log filename, leave empty to disable file log.

+
+log-rotate
+ +bool + +
+(Optional) +

Is log rotate enabled.

+
+max-size
+ +int + +
+(Optional) +

Max size for a single file, in MB.

+
+max-days
+ +int + +
+(Optional) +

Max log keep days, default is never deleting.

+
+max-backups
+ +int + +
+(Optional) +

Maximum number of old log files to retain.

+
+

Flash +

+

+(Appears on: +CommonConfig) +

+

+

Flash is the configuration of [flash] section.

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+overlap_threshold
+ +float64 + +
+(Optional) +

Optional: Defaults to 0.6

+
+compact_log_min_period
+ +int32 + +
+(Optional) +

Optional: Defaults to 200

+
+flash_cluster
+ + +FlashCluster + + +
+(Optional) +
+

FlashLogger +

+

+(Appears on: +CommonConfig) +

+

+

FlashLogger is the configuration of [logger] section.

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+size
+ +string + +
+(Optional) +

Optional: Defaults to 100M

+
+level
+ +string + +
+(Optional) +

Optional: Defaults to information

+
+count
+ +int32 + +
+(Optional) +

Optional: Defaults to 10

+
+

GcsStorageProvider +

+

+(Appears on: +StorageProvider) +

+

+

GcsStorageProvider represents the google cloud storage for storing backups.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+projectId
+ +string + +
+

ProjectId represents the project that organizes all your Google Cloud Platform resources

+
+location
+ +string + +
+

Location in which the gcs bucket is located.

+
+bucket
+ +string + +
+

Bucket in which to store the backup data.

+
+storageClass
+ +string + +
+

StorageClass represents the storage class

+
+objectAcl
+ +string + +
+

ObjectAcl represents the access control list for new objects

+
+bucketAcl
+ +string + +
+

BucketAcl represents the access control list for new buckets

+
+secretName
+ +string + +
+

SecretName is the name of secret which stores the +gcs service account credentials JSON .

+
+

GrafanaSpec +

+

+(Appears on: +TidbMonitorSpec) +

+

+

GrafanaSpec is the desired state of grafana

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+MonitorContainer
+ + +MonitorContainer + + +
+

+(Members of MonitorContainer are embedded into this type.) +

+
+logLevel
+ +string + +
+
+service
+ + +ServiceSpec + + +
+
+username
+ +string + +
+
+password
+ +string + +
+
+envs
+ +map[string]string + +
+(Optional) +
+

HelperSpec +

+

+(Appears on: +TidbClusterSpec) +

+

+

HelperSpec contains details of helper component

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+image
+ +string + +
+(Optional) +

Image used to tail slow log and set kernel parameters if necessary, must have tail and sysctl installed +Optional: Defaults to busybox:1.26.2

+
+imagePullPolicy
+ + +Kubernetes core/v1.PullPolicy + + +
+(Optional) +

ImagePullPolicy of the component. Override the cluster-level imagePullPolicy if present +Optional: Defaults to the cluster-level setting

+
+

InitializePhase +(string alias)

+

+(Appears on: +TidbInitializerStatus) +

+

+

+

InitializerSpec +

+

+(Appears on: +TidbMonitorSpec) +

+

+

InitializerSpec is the desired state of initializer

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+MonitorContainer
+ + +MonitorContainer + + +
+

+(Members of MonitorContainer are embedded into this type.) +

+
+envs
+ +map[string]string + +
+(Optional) +
+

Interval +

+

+(Appears on: +Quota) +

+

+

Interval is the configuration of [quotas.default.interval] section.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+duration
+ +int32 + +
+(Optional) +

Optional: Defaults to 3600

+
+queries
+ +int32 + +
+(Optional) +

Optional: Defaults to 0

+
+errors
+ +int32 + +
+(Optional) +

Optional: Defaults to 0

+
+result_rows
+ +int32 + +
+(Optional) +

Optional: Defaults to 0

+
+read_rows
+ +int32 + +
+(Optional) +

Optional: Defaults to 0

+
+execution_time
+ +int32 + +
+(Optional) +

Optional: Defaults to 0

+
+

IsolationRead +

+

+(Appears on: +TiDBConfig) +

+

+

IsolationRead is the config for isolation read.

+

+ + + + + + + + + + + + + +
FieldDescription
+engines
+ +[]string + +
+(Optional) +

Engines filters tidb-server access paths by engine type. +imported from v3.1.0

+
+

Log +

+

+(Appears on: +TiDBConfig) +

+

+

Log is the log section of config.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+level
+ +string + +
+(Optional) +

Log level. +Optional: Defaults to info

+
+format
+ +string + +
+(Optional) +

Log format. one of json, text, or console. +Optional: Defaults to text

+
+disable-timestamp
+ +bool + +
+(Optional) +

Disable automatic timestamps in output.

+
+enable-timestamp
+ +bool + +
+(Optional) +

EnableTimestamp enables automatic timestamps in log output.

+
+enable-error-stack
+ +bool + +
+(Optional) +

EnableErrorStack enables annotating logs with the full stack error +message.

+
+file
+ + +FileLogConfig + + +
+(Optional) +

File log config.

+
+enable-slow-log
+ +bool + +
+(Optional) +
+slow-query-file
+ +string + +
+(Optional) +
+slow-threshold
+ +uint64 + +
+(Optional) +

Optional: Defaults to 300

+
+expensive-threshold
+ +uint + +
+(Optional) +

Optional: Defaults to 10000

+
+query-log-max-len
+ +uint64 + +
+(Optional) +

Optional: Defaults to 2048

+
+record-plan-in-slow-log
+ +uint32 + +
+(Optional) +

Optional: Defaults to 1

+
+

LogTailerSpec +

+

+(Appears on: +TiFlashSpec) +

+

+

LogTailerSpec represents an optional log tailer sidecar container

+

+ + + + + + + + + + + + + +
FieldDescription
+ResourceRequirements
+ + +Kubernetes core/v1.ResourceRequirements + + +
+

+(Members of ResourceRequirements are embedded into this type.) +

+
+

MasterKeyFileConfig +

+

+(Appears on: +TiKVMasterKeyConfig) +

+

+

+ + + + + + + + + + + + + +
FieldDescription
+method
+ +string + +
+

Encrypyion method, use master key encryption data key +Possible values: plaintext, aes128-ctr, aes192-ctr, aes256-ctr +Optional: Default to plaintext +optional

+
+

MasterKeyKMSConfig +

+

+(Appears on: +TiKVMasterKeyConfig) +

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+key-id
+ +string + +
+

AWS CMK key-id it can be find in AWS Console or use aws cli +This field is required

+
+access-key
+ +string + +
+

AccessKey of AWS user, leave empty if using other authrization method +optional

+
+secret-access-key
+ +string + +
+

SecretKey of AWS user, leave empty if using other authrization method +optional

+
+region
+ +string + +
+

Region of this KMS key +Optional: Default to us-east-1 +optional

+
+endpoint
+ +string + +
+

Used for KMS compatible KMS, such as Ceph, minio, If use AWS, leave empty +optional

+
+

MemberPhase +(string alias)

+

+(Appears on: +PDStatus, +PumpStatus, +TiDBStatus, +TiKVStatus) +

+

+

MemberPhase is the current state of member

+

+

MemberType +(string alias)

+

+

MemberType represents member type

+

+

MetricsStatus +

+

+(Appears on: +BasicAutoScalerStatus) +

+

+

MetricsStatus describe the basic metrics status in the last auto-scaling reconciliation

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+name
+ +string + +
+

Name indicates the metrics name

+
+currentValue
+ +string + +
+

CurrentValue indicates the value calculated in the last auto-scaling reconciliation

+
+thresholdValue
+ +string + +
+

TargetValue indicates the threshold value for this metrics in auto-scaling

+
+

MonitorComponentAccessor +

+

+

+

MonitorContainer +

+

+(Appears on: +GrafanaSpec, +InitializerSpec, +PrometheusSpec, +ReloaderSpec) +

+

+

MonitorContainer is the common attributes of the container of monitoring

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+Resources
+ + +Kubernetes core/v1.ResourceRequirements + + +
+

+(Members of Resources are embedded into this type.) +

+
+baseImage
+ +string + +
+
+version
+ +string + +
+
+imagePullPolicy
+ + +Kubernetes core/v1.PullPolicy + + +
+(Optional) +
+

Networks +

+

+(Appears on: +User) +

+

+

Networks is the configuration of [users.readonly.networks] section.

+

+ + + + + + + + + + + + + +
FieldDescription
+ip
+ +string + +
+(Optional) +
+

OpenTracing +

+

+(Appears on: +TiDBConfig) +

+

+

OpenTracing is the opentracing section of the config.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+enable
+ +bool + +
+(Optional) +

Optional: Defaults to false

+
+sampler
+ + +OpenTracingSampler + + +
+(Optional) +
+reporter
+ + +OpenTracingReporter + + +
+(Optional) +
+rpc-metrics
+ +bool + +
+(Optional) +
+

OpenTracingReporter +

+

+(Appears on: +OpenTracing) +

+

+

OpenTracingReporter is the config for opentracing reporter. +See https://godoc.org/github.com/uber/jaeger-client-go/config#ReporterConfig

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+queue-size
+ +int + +
+(Optional) +
+buffer-flush-interval
+ +time.Duration + +
+(Optional) +
+log-spans
+ +bool + +
+(Optional) +
+local-agent-host-port
+ +string + +
+(Optional) +
+

OpenTracingSampler +

+

+(Appears on: +OpenTracing) +

+

+

OpenTracingSampler is the config for opentracing sampler. +See https://godoc.org/github.com/uber/jaeger-client-go/config#SamplerConfig

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+type
+ +string + +
+(Optional) +
+param
+ +float64 + +
+(Optional) +
+sampling-server-url
+ +string + +
+(Optional) +
+max-operations
+ +int + +
+(Optional) +
+sampling-refresh-interval
+ +time.Duration + +
+(Optional) +
+

PDConfig +

+

+(Appears on: +PDSpec) +

+

+

PDConfig is the configuration of pd-server

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+force-new-cluster
+ +bool + +
+(Optional) +
+enable-grpc-gateway
+ +bool + +
+(Optional) +

Optional: Defaults to true

+
+lease
+ +int64 + +
+(Optional) +

LeaderLease time, if leader doesn’t update its TTL +in etcd after lease time, etcd will expire the leader key +and other servers can campaign the leader again. +Etcd only supports seconds TTL, so here is second too. +Optional: Defaults to 3

+
+log
+ + +PDLogConfig + + +
+(Optional) +

Log related config.

+
+log-file
+ +string + +
+(Optional) +

Backward compatibility.

+
+log-level
+ +string + +
+(Optional) +
+tso-save-interval
+ +string + +
+(Optional) +

TsoSaveInterval is the interval to save timestamp. +Optional: Defaults to 3s

+
+metric
+ + +PDMetricConfig + + +
+(Optional) +
+schedule
+ + +PDScheduleConfig + + +
+(Optional) +

Immutable, change should be made through pd-ctl after cluster creation

+
+replication
+ + +PDReplicationConfig + + +
+(Optional) +

Immutable, change should be made through pd-ctl after cluster creation

+
+namespace
+ + +map[string]github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.PDNamespaceConfig + + +
+(Optional) +
+pd-server
+ + +PDServerConfig + + +
+(Optional) +
+cluster-version
+ +string + +
+(Optional) +
+quota-backend-bytes
+ +string + +
+(Optional) +

QuotaBackendBytes Raise alarms when backend size exceeds the given quota. 0 means use the default quota. +the default size is 2GB, the maximum is 8GB.

+
+auto-compaction-mode
+ +string + +
+(Optional) +

AutoCompactionMode is either ‘periodic’ or ‘revision’. The default value is ‘periodic’.

+
+auto-compaction-retention-v2
+ +string + +
+(Optional) +

AutoCompactionRetention is either duration string with time unit +(e.g. ‘5m’ for 5-minute), or revision unit (e.g. ‘5000’). +If no time unit is provided and compaction mode is ‘periodic’, +the unit defaults to hour. For example, ‘5’ translates into 5-hour. +The default retention is 1 hour. +Before etcd v3.3.x, the type of retention is int. We add ‘v2’ suffix to make it backward compatible.

+
+tikv-interval
+ +string + +
+(Optional) +

TickInterval is the interval for etcd Raft tick.

+
+election-interval
+ +string + +
+(Optional) +

ElectionInterval is the interval for etcd Raft election.

+
+enable-prevote
+ +bool + +
+(Optional) +

Prevote is true to enable Raft Pre-Vote. +If enabled, Raft runs an additional election phase +to check whether it would get enough votes to win +an election, thus minimizing disruptions. +Optional: Defaults to true

+
+security
+ + +PDSecurityConfig + + +
+(Optional) +
+label-property
+ + +PDLabelPropertyConfig + + +
+(Optional) +
+namespace-classifier
+ +string + +
+(Optional) +

NamespaceClassifier is for classifying stores/regions into different +namespaces. +Optional: Defaults to true

+
+dashboard
+ + +DashboardConfig + + +
+(Optional) +
+

PDFailureMember +

+

+(Appears on: +PDStatus) +

+

+

PDFailureMember is the pd failure member information

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+podName
+ +string + +
+
+memberID
+ +string + +
+
+pvcUID
+ +k8s.io/apimachinery/pkg/types.UID + +
+
+memberDeleted
+ +bool + +
+
+createdAt
+ + +Kubernetes meta/v1.Time + + +
+
+

PDLabelPropertyConfig +(map[string]github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.PDStoreLabels alias)

+

+(Appears on: +PDConfig) +

+

+

+

PDLogConfig +

+

+(Appears on: +PDConfig) +

+

+

PDLogConfig serializes log related config in toml/json.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+level
+ +string + +
+(Optional) +

Log level. +Optional: Defaults to info

+
+format
+ +string + +
+(Optional) +

Log format. one of json, text, or console.

+
+disable-timestamp
+ +bool + +
+(Optional) +

Disable automatic timestamps in output.

+
+file
+ + +FileLogConfig + + +
+(Optional) +

File log config.

+
+development
+ +bool + +
+(Optional) +

Development puts the logger in development mode, which changes the +behavior of DPanicLevel and takes stacktraces more liberally.

+
+disable-caller
+ +bool + +
+(Optional) +

DisableCaller stops annotating logs with the calling function’s file +name and line number. By default, all logs are annotated.

+
+disable-stacktrace
+ +bool + +
+(Optional) +

DisableStacktrace completely disables automatic stacktrace capturing. By +default, stacktraces are captured for WarnLevel and above logs in +development and ErrorLevel and above in production.

+
+disable-error-verbose
+ +bool + +
+(Optional) +

DisableErrorVerbose stops annotating logs with the full verbose error +message.

+
+

PDMember +

+

+(Appears on: +PDStatus) +

+

+

PDMember is PD member

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+name
+ +string + +
+
+id
+ +string + +
+

member id is actually a uint64, but apimachinery’s json only treats numbers as int64/float64 +so uint64 may overflow int64 and thus convert to float64

+
+clientURL
+ +string + +
+
+health
+ +bool + +
+
+lastTransitionTime
+ + +Kubernetes meta/v1.Time + + +
+

Last time the health transitioned from one to another.

+
+

PDMetricConfig +

+

+(Appears on: +PDConfig) +

+

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+job
+ +string + +
+(Optional) +
+address
+ +string + +
+(Optional) +
+interval
+ +string + +
+(Optional) +
+

PDNamespaceConfig +

+

+(Appears on: +PDConfig) +

+

+

PDNamespaceConfig is to overwrite the global setting for specific namespace

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+leader-schedule-limit
+ +uint64 + +
+(Optional) +

LeaderScheduleLimit is the max coexist leader schedules.

+
+region-schedule-limit
+ +uint64 + +
+(Optional) +

RegionScheduleLimit is the max coexist region schedules.

+
+replica-schedule-limit
+ +uint64 + +
+(Optional) +

ReplicaScheduleLimit is the max coexist replica schedules.

+
+merge-schedule-limit
+ +uint64 + +
+(Optional) +

MergeScheduleLimit is the max coexist merge schedules.

+
+hot-region-schedule-limit
+ +uint64 + +
+(Optional) +

HotRegionScheduleLimit is the max coexist hot region schedules.

+
+max-replicas
+ +uint64 + +
+(Optional) +

MaxReplicas is the number of replicas for each region.

+
+

PDReplicationConfig +

+

+(Appears on: +PDConfig) +

+

+

PDReplicationConfig is the replication configuration.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+max-replicas
+ +uint64 + +
+(Optional) +

MaxReplicas is the number of replicas for each region. +Immutable, change should be made through pd-ctl after cluster creation +Optional: Defaults to 3

+
+location-labels
+ +[]string + +
+(Optional) +

The label keys specified the location of a store. +The placement priorities is implied by the order of label keys. +For example, [“zone”, “rack”] means that we should place replicas to +different zones first, then to different racks if we don’t have enough zones. +Immutable, change should be made through pd-ctl after cluster creation

+
+strictly-match-label,string
+ +bool + +
+(Optional) +

StrictlyMatchLabel strictly checks if the label of TiKV is matched with LocaltionLabels. +Immutable, change should be made through pd-ctl after cluster creation. +Imported from v3.1.0

+
+enable-placement-rules,string
+ +bool + +
+(Optional) +

When PlacementRules feature is enabled. MaxReplicas and LocationLabels are not used anymore.

+
+

PDScheduleConfig +

+

+(Appears on: +PDConfig) +

+

+

ScheduleConfig is the schedule configuration.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+max-snapshot-count
+ +uint64 + +
+(Optional) +

If the snapshot count of one store is greater than this value, +it will never be used as a source or target store. +Immutable, change should be made through pd-ctl after cluster creation +Optional: Defaults to 3

+
+max-pending-peer-count
+ +uint64 + +
+(Optional) +

Immutable, change should be made through pd-ctl after cluster creation +Optional: Defaults to 16

+
+max-merge-region-size
+ +uint64 + +
+(Optional) +

If both the size of region is smaller than MaxMergeRegionSize +and the number of rows in region is smaller than MaxMergeRegionKeys, +it will try to merge with adjacent regions. +Immutable, change should be made through pd-ctl after cluster creation +Optional: Defaults to 20

+
+max-merge-region-keys
+ +uint64 + +
+(Optional) +

Immutable, change should be made through pd-ctl after cluster creation +Optional: Defaults to 200000

+
+split-merge-interval
+ +string + +
+(Optional) +

SplitMergeInterval is the minimum interval time to permit merge after split. +Immutable, change should be made through pd-ctl after cluster creation +Optional: Defaults to 1h

+
+patrol-region-interval
+ +string + +
+(Optional) +

PatrolRegionInterval is the interval for scanning region during patrol. +Immutable, change should be made through pd-ctl after cluster creation

+
+max-store-down-time
+ +string + +
+(Optional) +

MaxStoreDownTime is the max duration after which +a store will be considered to be down if it hasn’t reported heartbeats. +Immutable, change should be made through pd-ctl after cluster creation +Optional: Defaults to 30m

+
+leader-schedule-limit
+ +uint64 + +
+(Optional) +

LeaderScheduleLimit is the max coexist leader schedules. +Immutable, change should be made through pd-ctl after cluster creation. +Optional: Defaults to 4. +Imported from v3.1.0

+
+region-schedule-limit
+ +uint64 + +
+(Optional) +

RegionScheduleLimit is the max coexist region schedules. +Immutable, change should be made through pd-ctl after cluster creation +Optional: Defaults to 2048

+
+replica-schedule-limit
+ +uint64 + +
+(Optional) +

ReplicaScheduleLimit is the max coexist replica schedules. +Immutable, change should be made through pd-ctl after cluster creation +Optional: Defaults to 64

+
+merge-schedule-limit
+ +uint64 + +
+(Optional) +

MergeScheduleLimit is the max coexist merge schedules. +Immutable, change should be made through pd-ctl after cluster creation +Optional: Defaults to 8

+
+hot-region-schedule-limit
+ +uint64 + +
+(Optional) +

HotRegionScheduleLimit is the max coexist hot region schedules. +Immutable, change should be made through pd-ctl after cluster creation +Optional: Defaults to 4

+
+hot-region-cache-hits-threshold
+ +uint64 + +
+(Optional) +

HotRegionCacheHitThreshold is the cache hits threshold of the hot region. +If the number of times a region hits the hot cache is greater than this +threshold, it is considered a hot region. +Immutable, change should be made through pd-ctl after cluster creation

+
+tolerant-size-ratio
+ +float64 + +
+(Optional) +

TolerantSizeRatio is the ratio of buffer size for balance scheduler. +Immutable, change should be made through pd-ctl after cluster creation. +Imported from v3.1.0

+
+low-space-ratio
+ +float64 + +
+(Optional) +
 high space stage         transition stage           low space stage
+
+

|——————–|—————————–|————————-| +^ ^ ^ ^ +0 HighSpaceRatio * capacity LowSpaceRatio * capacity capacity

+

LowSpaceRatio is the lowest usage ratio of store which regraded as low space. +When in low space, store region score increases to very large and varies inversely with available size. +Immutable, change should be made through pd-ctl after cluster creation

+
+high-space-ratio
+ +float64 + +
+(Optional) +

HighSpaceRatio is the highest usage ratio of store which regraded as high space. +High space means there is a lot of spare capacity, and store region score varies directly with used size. +Immutable, change should be made through pd-ctl after cluster creation

+
+disable-raft-learner,string
+ +bool + +
+(Optional) +

DisableLearner is the option to disable using AddLearnerNode instead of AddNode +Immutable, change should be made through pd-ctl after cluster creation

+
+disable-remove-down-replica,string
+ +bool + +
+(Optional) +

DisableRemoveDownReplica is the option to prevent replica checker from +removing down replicas. +Immutable, change should be made through pd-ctl after cluster creation

+
+disable-replace-offline-replica,string
+ +bool + +
+(Optional) +

DisableReplaceOfflineReplica is the option to prevent replica checker from +repalcing offline replicas. +Immutable, change should be made through pd-ctl after cluster creation

+
+disable-make-up-replica,string
+ +bool + +
+(Optional) +

DisableMakeUpReplica is the option to prevent replica checker from making up +replicas when replica count is less than expected. +Immutable, change should be made through pd-ctl after cluster creation

+
+disable-remove-extra-replica,string
+ +bool + +
+(Optional) +

DisableRemoveExtraReplica is the option to prevent replica checker from +removing extra replicas. +Immutable, change should be made through pd-ctl after cluster creation

+
+disable-location-replacement,string
+ +bool + +
+(Optional) +

DisableLocationReplacement is the option to prevent replica checker from +moving replica to a better location. +Immutable, change should be made through pd-ctl after cluster creation

+
+disable-namespace-relocation,string
+ +bool + +
+(Optional) +

DisableNamespaceRelocation is the option to prevent namespace checker +from moving replica to the target namespace. +Immutable, change should be made through pd-ctl after cluster creation

+
+schedulers-v2
+ + +PDSchedulerConfigs + + +
+(Optional) +

Schedulers support for loding customized schedulers +Immutable, change should be made through pd-ctl after cluster creation

+
+schedulers-payload
+ +map[string]string + +
+(Optional) +

Only used to display

+
+enable-one-way-merge,string
+ +bool + +
+(Optional) +

EnableOneWayMerge is the option to enable one way merge. This means a Region can only be merged into the next region of it. +Imported from v3.1.0

+
+enable-cross-table-merge,string
+ +bool + +
+(Optional) +

EnableCrossTableMerge is the option to enable cross table merge. This means two Regions can be merged with different table IDs. +This option only works when key type is “table”. +Imported from v3.1.0

+
+

PDSchedulerConfig +

+

+

PDSchedulerConfig is customized scheduler configuration

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+type
+ +string + +
+(Optional) +

Immutable, change should be made through pd-ctl after cluster creation

+
+args
+ +[]string + +
+(Optional) +

Immutable, change should be made through pd-ctl after cluster creation

+
+disable
+ +bool + +
+(Optional) +

Immutable, change should be made through pd-ctl after cluster creation

+
+

PDSchedulerConfigs +([]github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.PDSchedulerConfig alias)

+

+(Appears on: +PDScheduleConfig) +

+

+

+

PDSecurityConfig +

+

+(Appears on: +PDConfig) +

+

+

PDSecurityConfig is the configuration for supporting tls.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+cacert-path
+ +string + +
+(Optional) +

CAPath is the path of file that contains list of trusted SSL CAs. if set, following four settings shouldn’t be empty

+
+cert-path
+ +string + +
+(Optional) +

CertPath is the path of file that contains X509 certificate in PEM format.

+
+key-path
+ +string + +
+(Optional) +

KeyPath is the path of file that contains X509 key in PEM format.

+
+cert-allowed-cn
+ +[]string + +
+(Optional) +

CertAllowedCN is the Common Name that allowed

+
+

PDServerConfig +

+

+(Appears on: +PDConfig) +

+

+

PDServerConfig is the configuration for pd server.

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+use-region-storage,string
+ +bool + +
+(Optional) +

UseRegionStorage enables the independent region storage.

+
+metric-storage
+ +string + +
+(Optional) +

MetricStorage is the cluster metric storage. +Currently we use prometheus as metric storage, we may use PD/TiKV as metric storage later. +Imported from v3.1.0

+
+

PDSpec +

+

+(Appears on: +TidbClusterSpec) +

+

+

PDSpec contains details of PD members

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+ComponentSpec
+ + +ComponentSpec + + +
+

+(Members of ComponentSpec are embedded into this type.) +

+
+ResourceRequirements
+ + +Kubernetes core/v1.ResourceRequirements + + +
+

+(Members of ResourceRequirements are embedded into this type.) +

+
+replicas
+ +int32 + +
+

The desired ready replicas

+
+baseImage
+ +string + +
+(Optional) +

TODO: remove optional after defaulting introduced +Base image of the component, image tag is now allowed during validation

+
+service
+ + +ServiceSpec + + +
+(Optional) +

Service defines a Kubernetes service of PD cluster. +Optional: Defaults to .spec.services in favor of backward compatibility

+
+maxFailoverCount
+ +int32 + +
+(Optional) +

MaxFailoverCount limit the max replicas could be added in failover, 0 means no failover. +Optional: Defaults to 3

+
+storageClassName
+ +string + +
+(Optional) +

The storageClassName of the persistent volume for PD data storage. +Defaults to Kubernetes default storage class.

+
+config
+ + +PDConfig + + +
+(Optional) +

Config is the Configuration of pd-servers

+
+

PDStatus +

+

+(Appears on: +TidbClusterStatus) +

+

+

PDStatus is PD status

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+synced
+ +bool + +
+
+phase
+ + +MemberPhase + + +
+
+statefulSet
+ + +Kubernetes apps/v1.StatefulSetStatus + + +
+
+members
+ + +map[string]github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.PDMember + + +
+
+leader
+ + +PDMember + + +
+
+failureMembers
+ + +map[string]github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.PDFailureMember + + +
+
+unjoinedMembers
+ + +map[string]github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.UnjoinedMember + + +
+
+image
+ +string + +
+
+

PDStoreLabel +

+

+

PDStoreLabel is the config item of LabelPropertyConfig.

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+key
+ +string + +
+(Optional) +
+value
+ +string + +
+(Optional) +
+

PDStoreLabels +([]github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.PDStoreLabel alias)

+

+

+

Performance +

+

+(Appears on: +TiDBConfig) +

+

+

Performance is the performance section of the config.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+max-procs
+ +uint + +
+(Optional) +
+max-memory
+ +uint64 + +
+(Optional) +

Optional: Defaults to 0

+
+stats-lease
+ +string + +
+(Optional) +

Optional: Defaults to 3s

+
+stmt-count-limit
+ +uint + +
+(Optional) +

Optional: Defaults to 5000

+
+feedback-probability
+ +float64 + +
+(Optional) +

Optional: Defaults to 0.05

+
+query-feedback-limit
+ +uint + +
+(Optional) +

Optional: Defaults to 1024

+
+pseudo-estimate-ratio
+ +float64 + +
+(Optional) +

Optional: Defaults to 0.8

+
+force-priority
+ +string + +
+(Optional) +

Optional: Defaults to NO_PRIORITY

+
+bind-info-lease
+ +string + +
+(Optional) +

Optional: Defaults to 3s

+
+txn-total-size-limit
+ +uint64 + +
+(Optional) +

Optional: Defaults to 104857600

+
+tcp-keep-alive
+ +bool + +
+(Optional) +

Optional: Defaults to true

+
+cross-join
+ +bool + +
+(Optional) +

Optional: Defaults to true

+
+run-auto-analyze
+ +bool + +
+(Optional) +

Optional: Defaults to true

+
+txn-entry-count-limit
+ +uint64 + +
+(Optional) +

Optional: Defaults to 300000

+
+

PessimisticTxn +

+

+(Appears on: +TiDBConfig) +

+

+

PessimisticTxn is the config for pessimistic transaction.

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+enable
+ +bool + +
+(Optional) +

Enable must be true for ‘begin lock’ or session variable to start a pessimistic transaction. +Optional: Defaults to true

+
+max-retry-count
+ +uint + +
+(Optional) +

The max count of retry for a single statement in a pessimistic transaction. +Optional: Defaults to 256

+
+

PlanCache +

+

+

PlanCache is the PlanCache section of the config.

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+enabled
+ +bool + +
+(Optional) +
+capacity
+ +uint + +
+(Optional) +
+shards
+ +uint + +
+(Optional) +
+

Plugin +

+

+(Appears on: +TiDBConfig) +

+

+

Plugin is the config for plugin

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+dir
+ +string + +
+(Optional) +
+load
+ +string + +
+(Optional) +
+

PreparedPlanCache +

+

+(Appears on: +TiDBConfig) +

+

+

PreparedPlanCache is the PreparedPlanCache section of the config.

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+enabled
+ +bool + +
+(Optional) +

Optional: Defaults to false

+
+capacity
+ +uint + +
+(Optional) +

Optional: Defaults to 100

+
+memory-guard-ratio
+ +float64 + +
+(Optional) +

Optional: Defaults to 0.1

+
+

Profile +

+

+

Profile is the configuration profiles.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+readonly
+ +int32 + +
+(Optional) +
+max_memory_usage
+ +int64 + +
+(Optional) +
+use_uncompressed_cache
+ +int32 + +
+(Optional) +
+load_balancing
+ +string + +
+(Optional) +
+

PrometheusSpec +

+

+(Appears on: +TidbMonitorSpec) +

+

+

PrometheusSpec is the desired state of prometheus

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+MonitorContainer
+ + +MonitorContainer + + +
+

+(Members of MonitorContainer are embedded into this type.) +

+
+logLevel
+ +string + +
+
+service
+ + +ServiceSpec + + +
+
+reserveDays
+ +int + +
+(Optional) +
+

ProxyProtocol +

+

+(Appears on: +TiDBConfig) +

+

+

ProxyProtocol is the PROXY protocol section of the config.

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+networks
+ +string + +
+(Optional) +

PROXY protocol acceptable client networks. +Empty *string means disable PROXY protocol, +* means all networks.

+
+header-timeout
+ +uint + +
+(Optional) +

PROXY protocol header read timeout, Unit is second.

+
+

PumpSpec +

+

+(Appears on: +TidbClusterSpec) +

+

+

PumpSpec contains details of Pump members

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+ComponentSpec
+ + +ComponentSpec + + +
+

+(Members of ComponentSpec are embedded into this type.) +

+
+ResourceRequirements
+ + +Kubernetes core/v1.ResourceRequirements + + +
+

+(Members of ResourceRequirements are embedded into this type.) +

+
+replicas
+ +int32 + +
+

The desired ready replicas

+
+baseImage
+ +string + +
+(Optional) +

TODO: remove optional after defaulting introduced +Base image of the component, image tag is now allowed during validation

+
+storageClassName
+ +string + +
+(Optional) +

The storageClassName of the persistent volume for Pump data storage. +Defaults to Kubernetes default storage class.

+
+GenericConfig
+ +github.com/pingcap/tidb-operator/pkg/util/config.GenericConfig + +
+

+(Members of GenericConfig are embedded into this type.) +

+(Optional) +

TODO: add schema +The configuration of Pump cluster.

+
+setTimeZone
+ +bool + +
+

For backward compatibility with helm chart

+
+

PumpStatus +

+

+(Appears on: +TidbClusterStatus) +

+

+

PumpStatus is Pump status

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+phase
+ + +MemberPhase + + +
+
+statefulSet
+ + +Kubernetes apps/v1.StatefulSetStatus + + +
+
+

Quota +

+

+

Quota is the configuration of [quotas.default] section.

+

+ + + + + + + + + + + + + +
FieldDescription
+interval
+ + +Interval + + +
+(Optional) +
+

ReloaderSpec +

+

+(Appears on: +TidbMonitorSpec) +

+

+

ReloaderSpec is the desired state of reloader

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+MonitorContainer
+ + +MonitorContainer + + +
+

+(Members of MonitorContainer are embedded into this type.) +

+
+service
+ + +ServiceSpec + + +
+
+

RestoreCondition +

+

+(Appears on: +RestoreStatus) +

+

+

RestoreCondition describes the observed state of a Restore at a certain point.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+type
+ + +RestoreConditionType + + +
+
+status
+ + +Kubernetes core/v1.ConditionStatus + + +
+
+lastTransitionTime
+ + +Kubernetes meta/v1.Time + + +
+
+reason
+ +string + +
+
+message
+ +string + +
+
+

RestoreConditionType +(string alias)

+

+(Appears on: +RestoreCondition) +

+

+

RestoreConditionType represents a valid condition of a Restore.

+

+

RestoreSpec +

+

+(Appears on: +Restore) +

+

+

RestoreSpec contains the specification for a restore of a tidb cluster backup.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+to
+ + +TiDBAccessConfig + + +
+

To is the tidb cluster that needs to restore.

+
+backupType
+ + +BackupType + + +
+

Type is the backup type for tidb cluster.

+
+tikvGCLifeTime
+ +string + +
+

TikvGCLifeTime is to specify the safe gc life time for restore. +The time limit during which data is retained for each GC, in the format of Go Duration. +When a GC happens, the current time minus this value is the safe point.

+
+StorageProvider
+ + +StorageProvider + + +
+

+(Members of StorageProvider are embedded into this type.) +

+

StorageProvider configures where and how backups should be stored.

+
+storageClassName
+ +string + +
+(Optional) +

The storageClassName of the persistent volume for Restore data storage. +Defaults to Kubernetes default storage class.

+
+storageSize
+ +string + +
+

StorageSize is the request storage size for backup job

+
+br
+ + +BRConfig + + +
+

BR is the configs for BR.

+
+tolerations
+ + +[]Kubernetes core/v1.Toleration + + +
+(Optional) +

Base tolerations of restore Pods, components may add more tolerations upon this respectively

+
+affinity
+ + +Kubernetes core/v1.Affinity + + +
+(Optional) +

Affinity of restore Pods

+
+useKMS
+ +bool + +
+

Use KMS to decrypt the secrets

+
+serviceAccount
+ +string + +
+

Specify service account of restore

+
+

RestoreStatus +

+

+(Appears on: +Restore) +

+

+

RestoreStatus represents the current status of a tidb cluster restore.

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+timeStarted
+ + +Kubernetes meta/v1.Time + + +
+

TimeStarted is the time at which the restore was started.

+
+timeCompleted
+ + +Kubernetes meta/v1.Time + + +
+

TimeCompleted is the time at which the restore was completed.

+
+conditions
+ + +[]RestoreCondition + + +
+
+

S3StorageProvider +

+

+(Appears on: +StorageProvider) +

+

+

S3StorageProvider represents a S3 compliant storage for storing backups.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+provider
+ + +S3StorageProviderType + + +
+

Provider represents the specific storage provider that implements the S3 interface

+
+region
+ +string + +
+

Region in which the S3 compatible bucket is located.

+
+bucket
+ +string + +
+

Bucket in which to store the backup data.

+
+endpoint
+ +string + +
+

Endpoint of S3 compatible storage service

+
+storageClass
+ +string + +
+

StorageClass represents the storage class

+
+acl
+ +string + +
+

Acl represents access control permissions for this bucket

+
+secretName
+ +string + +
+

SecretName is the name of secret which stores +S3 compliant storage access key and secret key.

+
+prefix
+ +string + +
+

Prefix for the keys.

+
+sse
+ +string + +
+

SSE Sever-Side Encryption.

+
+

S3StorageProviderType +(string alias)

+

+(Appears on: +S3StorageProvider) +

+

+

S3StorageProviderType represents the specific storage provider that implements the S3 interface

+

+

Security +

+

+(Appears on: +TiDBConfig) +

+

+

Security is the security section of the config.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+skip-grant-table
+ +bool + +
+(Optional) +
+ssl-ca
+ +string + +
+(Optional) +
+ssl-cert
+ +string + +
+(Optional) +
+ssl-key
+ +string + +
+(Optional) +
+cluster-ssl-ca
+ +string + +
+(Optional) +
+cluster-ssl-cert
+ +string + +
+(Optional) +
+cluster-ssl-key
+ +string + +
+(Optional) +
+cluster-verify-cn
+ +[]string + +
+(Optional) +

ClusterVerifyCN is the Common Name that allowed

+
+

Service +

+

+(Appears on: +TidbClusterSpec) +

+

+

Deprecated +Service represent service type used in TidbCluster

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+name
+ +string + +
+
+type
+ +string + +
+
+

ServiceSpec +

+

+(Appears on: +GrafanaSpec, +PDSpec, +PrometheusSpec, +ReloaderSpec, +TiDBServiceSpec) +

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+type
+ + +Kubernetes core/v1.ServiceType + + +
+

Type of the real kubernetes service

+
+annotations
+ +map[string]string + +
+(Optional) +

Additional annotations of the kubernetes service object

+
+loadBalancerIP
+ +string + +
+(Optional) +

LoadBalancerIP is the loadBalancerIP of service +Optional: Defaults to omitted

+
+clusterIP
+ +string + +
+(Optional) +

ClusterIP is the clusterIP of service

+
+portName
+ +string + +
+(Optional) +

PortName is the name of service port

+
+

Status +

+

+(Appears on: +TiDBConfig) +

+

+

Status is the status section of the config.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+metrics-addr
+ +string + +
+(Optional) +
+metrics-interval
+ +uint + +
+(Optional) +

Optional: Defaults to 15

+
+report-status
+ +bool + +
+(Optional) +

Optional: Defaults to true

+
+record-db-qps
+ +bool + +
+(Optional) +

Optional: Defaults to false

+
+

StmtSummary +

+

+(Appears on: +TiDBConfig) +

+

+

StmtSummary is the config for statement summary.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+enable
+ +bool + +
+(Optional) +

Enable statement summary or not.

+
+max-stmt-count
+ +uint + +
+(Optional) +

The maximum number of statements kept in memory. +Optional: Defaults to 100

+
+max-sql-length
+ +uint + +
+(Optional) +

The maximum length of displayed normalized SQL and sample SQL. +Optional: Defaults to 4096

+
+refresh-interval
+ +int + +
+(Optional) +

The refresh interval of statement summary.

+
+history-size
+ +int + +
+(Optional) +

The maximum history size of statement summary.

+
+

StorageClaim +

+

+(Appears on: +TiFlashSpec) +

+

+

StorageClaim contains details of TiFlash storages

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+resources
+ + +Kubernetes core/v1.ResourceRequirements + + +
+(Optional) +

Resources represents the minimum resources the volume should have. +More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources

+
+storageClassName
+ +string + +
+(Optional) +

Name of the StorageClass required by the claim. +More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1

+
+

StorageProvider +

+

+(Appears on: +BackupSpec, +RestoreSpec) +

+

+

StorageProvider defines the configuration for storing a backup in backend storage.

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+s3
+ + +S3StorageProvider + + +
+
+gcs
+ + +GcsStorageProvider + + +
+
+

TLSCluster +

+

+(Appears on: +TidbClusterSpec) +

+

+

TLSCluster can enable TLS connection between TiDB server components +https://pingcap.com/docs/stable/how-to/secure/enable-tls-between-components/

+

+ + + + + + + + + + + + + +
FieldDescription
+enabled
+ +bool + +
+(Optional) +

Enable mutual TLS authentication among TiDB components +Once enabled, the mutual authentication applies to all components, +and it does not support applying to only part of the components. +The steps to enable this feature: +1. Generate TiDB server components certificates and a client-side certifiacete for them. +There are multiple ways to generate these certificates: +- user-provided certificates: https://pingcap.com/docs/stable/how-to/secure/generate-self-signed-certificates/ +- use the K8s built-in certificate signing system signed certificates: https://kubernetes.io/docs/tasks/tls/managing-tls-in-a-cluster/ +- or use cert-manager signed certificates: https://cert-manager.io/ +2. Create one secret object for one component which contains the certificates created above. +The name of this Secret must be: --cluster-secret. +For PD: kubectl create secret generic -pd-cluster-secret –namespace= –from-file=tls.crt= –from-file=tls.key= –from-file=ca.crt= +For TiKV: kubectl create secret generic -tikv-cluster-secret –namespace= –from-file=tls.crt= –from-file=tls.key= –from-file=ca.crt= +For TiDB: kubectl create secret generic -tidb-cluster-secret –namespace= –from-file=tls.crt= –from-file=tls.key= –from-file=ca.crt= +For Client: kubectl create secret generic -cluster-client-secret –namespace= –from-file=tls.crt= –from-file=tls.key= –from-file=ca.crt= +Same for other components.

+
+

TiDBAccessConfig +

+

+(Appears on: +BackupSpec, +RestoreSpec) +

+

+

TiDBAccessConfig defines the configuration for access tidb cluster

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+host
+ +string + +
+

Host is the tidb cluster access address

+
+port
+ +int32 + +
+

Port is the port number to use for connecting tidb cluster

+
+user
+ +string + +
+

User is the user for login tidb cluster

+
+secretName
+ +string + +
+

SecretName is the name of secret which stores tidb cluster’s password.

+
+tlsClient
+ + +TiDBTLSClient + + +
+(Optional) +

Whether enable the TLS connection between the SQL client and TiDB server +Optional: Defaults to nil

+
+

TiDBConfig +

+

+(Appears on: +TiDBSpec) +

+

+

TiDBConfig is the configuration of tidb-server +For more detail, refer to https://pingcap.com/docs/stable/reference/configuration/tidb-server/configuration/

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+cors
+ +string + +
+(Optional) +
+socket
+ +string + +
+(Optional) +
+lease
+ +string + +
+(Optional) +

Optional: Defaults to 45s

+
+run-ddl
+ +bool + +
+(Optional) +

Optional: Defaults to true

+
+split-table
+ +bool + +
+(Optional) +

Optional: Defaults to true

+
+token-limit
+ +uint + +
+(Optional) +

Optional: Defaults to 1000

+
+oom-action
+ +string + +
+(Optional) +

Optional: Defaults to log

+
+mem-quota-query
+ +int64 + +
+(Optional) +

Optional: Defaults to 34359738368

+
+enable-streaming
+ +bool + +
+(Optional) +

Optional: Defaults to false

+
+enable-batch-dml
+ +bool + +
+(Optional) +

Optional: Defaults to false

+
+txn-local-latches
+ + +TxnLocalLatches + + +
+(Optional) +
+lower-case-table-names
+ +int + +
+(Optional) +
+log
+ + +Log + + +
+(Optional) +
+security
+ + +Security + + +
+(Optional) +
+status
+ + +Status + + +
+(Optional) +
+performance
+ + +Performance + + +
+(Optional) +
+prepared-plan-cache
+ + +PreparedPlanCache + + +
+(Optional) +
+opentracing
+ + +OpenTracing + + +
+(Optional) +
+proxy-protocol
+ + +ProxyProtocol + + +
+(Optional) +
+tikv-client
+ + +TiKVClient + + +
+(Optional) +
+binlog
+ + +Binlog + + +
+(Optional) +
+compatible-kill-query
+ +bool + +
+(Optional) +
+plugin
+ + +Plugin + + +
+(Optional) +
+pessimistic-txn
+ + +PessimisticTxn + + +
+(Optional) +
+check-mb4-value-in-utf8
+ +bool + +
+(Optional) +

Optional: Defaults to true

+
+alter-primary-key
+ +bool + +
+(Optional) +

Optional: Defaults to false

+
+treat-old-version-utf8-as-utf8mb4
+ +bool + +
+(Optional) +

Optional: Defaults to true

+
+split-region-max-num
+ +uint64 + +
+(Optional) +

Optional: Defaults to 1000

+
+stmt-summary
+ + +StmtSummary + + +
+(Optional) +
+repair-mode
+ +bool + +
+(Optional) +

RepairMode indicates that the TiDB is in the repair mode for table meta.

+
+repair-table-list
+ +[]string + +
+(Optional) +
+isolation-read
+ + +IsolationRead + + +
+(Optional) +

IsolationRead indicates that the TiDB reads data from which isolation level(engine and label).

+
+max-server-connections
+ +uint32 + +
+(Optional) +

MaxServerConnections is the maximum permitted number of simultaneous client connections.

+
+new_collations_enabled_on_first_bootstrap
+ +bool + +
+(Optional) +

NewCollationsEnabledOnFirstBootstrap indicates if the new collations are enabled, it effects only when a TiDB cluster bootstrapped on the first time.

+
+experimental
+ + +Experimental + + +
+(Optional) +

Experimental contains parameters for experimental features.

+
+enable-dynamic-config
+ +bool + +
+(Optional) +

EnableDynamicConfig enables the TiDB to fetch configs from PD and update itself during runtime. +see https://github.com/pingcap/tidb/pull/13660 for more details.

+
+enable-table-lock
+ +bool + +
+

imported from v3.1.0 +optional

+
+delay-clean-table-lock
+ +uint64 + +
+

imported from v3.1.0 +optional

+
+

TiDBFailureMember +

+

+(Appears on: +TiDBStatus) +

+

+

TiDBFailureMember is the tidb failure member information

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+podName
+ +string + +
+
+createdAt
+ + +Kubernetes meta/v1.Time + + +
+
+

TiDBMember +

+

+(Appears on: +TiDBStatus) +

+

+

TiDBMember is TiDB member

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+name
+ +string + +
+
+health
+ +bool + +
+
+lastTransitionTime
+ + +Kubernetes meta/v1.Time + + +
+

Last time the health transitioned from one to another.

+
+node
+ +string + +
+

Node hosting pod of this TiDB member.

+
+

TiDBServiceSpec +

+

+(Appears on: +TiDBSpec) +

+

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+ServiceSpec
+ + +ServiceSpec + + +
+
+externalTrafficPolicy
+ + +Kubernetes core/v1.ServiceExternalTrafficPolicyType + + +
+(Optional) +

ExternalTrafficPolicy of the service +Optional: Defaults to omitted

+
+exposeStatus
+ +bool + +
+(Optional) +

Whether expose the status port +Optional: Defaults to true

+
+

TiDBSlowLogTailerSpec +

+

+(Appears on: +TiDBSpec) +

+

+

TiDBSlowLogTailerSpec represents an optional log tailer sidecar with TiDB

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+ResourceRequirements
+ + +Kubernetes core/v1.ResourceRequirements + + +
+

+(Members of ResourceRequirements are embedded into this type.) +

+
+image
+ +string + +
+

Image used for slowlog tailer +Deprecated, use TidbCluster.HelperImage instead

+
+imagePullPolicy
+ + +Kubernetes core/v1.PullPolicy + + +
+

ImagePullPolicy of the component. Override the cluster-level imagePullPolicy if present +Deprecated, use TidbCluster.HelperImagePullPolicy instead

+
+

TiDBSpec +

+

+(Appears on: +TidbClusterSpec) +

+

+

TiDBSpec contains details of TiDB members

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+ComponentSpec
+ + +ComponentSpec + + +
+

+(Members of ComponentSpec are embedded into this type.) +

+
+ResourceRequirements
+ + +Kubernetes core/v1.ResourceRequirements + + +
+

+(Members of ResourceRequirements are embedded into this type.) +

+
+replicas
+ +int32 + +
+

The desired ready replicas

+
+baseImage
+ +string + +
+(Optional) +

TODO: remove optional after defaulting introduced +Base image of the component, image tag is now allowed during validation

+
+service
+ + +TiDBServiceSpec + + +
+(Optional) +

Service defines a Kubernetes service of TiDB cluster. +Optional: No kubernetes service will be created by default.

+
+binlogEnabled
+ +bool + +
+(Optional) +

Whether enable TiDB Binlog, it is encouraged to not set this field and rely on the default behavior +Optional: Defaults to true if PumpSpec is non-nil, otherwise false

+
+maxFailoverCount
+ +int32 + +
+(Optional) +

MaxFailoverCount limit the max replicas could be added in failover, 0 means no failover +Optional: Defaults to 3

+
+separateSlowLog
+ +bool + +
+(Optional) +

Whether output the slow log in an separate sidecar container +Optional: Defaults to true

+
+tlsClient
+ + +TiDBTLSClient + + +
+(Optional) +

Whether enable the TLS connection between the SQL client and TiDB server +Optional: Defaults to nil

+
+slowLogTailer
+ + +TiDBSlowLogTailerSpec + + +
+(Optional) +

The spec of the slow log tailer sidecar

+
+plugins
+ +[]string + +
+(Optional) +

Plugins is a list of plugins that are loaded by TiDB server, empty means plugin disabled

+
+config
+ + +TiDBConfig + + +
+(Optional) +

Config is the Configuration of tidb-servers

+
+

TiDBStatus +

+

+(Appears on: +TidbClusterStatus) +

+

+

TiDBStatus is TiDB status

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+phase
+ + +MemberPhase + + +
+
+statefulSet
+ + +Kubernetes apps/v1.StatefulSetStatus + + +
+
+members
+ + +map[string]github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiDBMember + + +
+
+failureMembers
+ + +map[string]github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiDBFailureMember + + +
+
+resignDDLOwnerRetryCount
+ +int32 + +
+
+image
+ +string + +
+
+

TiDBTLSClient +

+

+(Appears on: +TiDBAccessConfig, +TiDBSpec) +

+

+

TiDBTLSClient can enable TLS connection between TiDB server and MySQL client

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+enabled
+ +bool + +
+(Optional) +

When enabled, TiDB will accept TLS encrypted connections from MySQL client +The steps to enable this feature: +1. Generate a TiDB server-side certificate and a client-side certifiacete for the TiDB cluster. +There are multiple ways to generate certificates: +- user-provided certificates: https://pingcap.com/docs/stable/how-to/secure/enable-tls-clients/ +- use the K8s built-in certificate signing system signed certificates: https://kubernetes.io/docs/tasks/tls/managing-tls-in-a-cluster/ +- or use cert-manager signed certificates: https://cert-manager.io/ +2. Create a K8s Secret object which contains the TiDB server-side certificate created above. +The name of this Secret must be: -tidb-server-secret. +kubectl create secret generic -tidb-server-secret –namespace= –from-file=tls.crt= –from-file=tls.key= –from-file=ca.crt= +3. Create a K8s Secret object which contains the TiDB client-side certificate created above which will be used by TiDB Operator. +The name of this Secret must be: -tidb-client-secret. +kubectl create secret generic -tidb-client-secret –namespace= –from-file=tls.crt= –from-file=tls.key= –from-file=ca.crt= +4. Set Enabled to true.

+
+tlsSecret
+ +string + +
+(Optional) +

Specify a secret of client cert for backup/restore +Optional: Defaults to -tidb-client-secret +If you want to specify a secret for backup/restore, generate a Secret Object according to the third step of the above procedure, The difference is the Secret Name can be freely defined, and then copy the Secret Name to TLSSecret +this field only work in backup/restore process

+
+

TiFlashConfig +

+

+(Appears on: +TiFlashSpec) +

+

+

TiFlashConfig is the configuration of TiFlash.

+

+ + + + + + + + + + + + + +
FieldDescription
+config
+ + +CommonConfig + + +
+(Optional) +

commonConfig is the Configuration of TiFlash process

+
+

TiFlashSpec +

+

+(Appears on: +TidbClusterSpec) +

+

+

TiFlashSpec contains details of TiFlash members

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+ComponentSpec
+ + +ComponentSpec + + +
+

+(Members of ComponentSpec are embedded into this type.) +

+
+ResourceRequirements
+ + +Kubernetes core/v1.ResourceRequirements + + +
+

+(Members of ResourceRequirements are embedded into this type.) +

+
+serviceAccount
+ +string + +
+

Specify a Service Account for TiFlash

+
+replicas
+ +int32 + +
+

The desired ready replicas

+
+baseImage
+ +string + +
+(Optional) +

Base image of the component, image tag is now allowed during validation

+
+privileged
+ +bool + +
+(Optional) +

Whether create the TiFlash container in privileged mode, it is highly discouraged to enable this in +critical environment. +Optional: defaults to false

+
+maxFailoverCount
+ +int32 + +
+(Optional) +

MaxFailoverCount limit the max replicas could be added in failover, 0 means no failover +Optional: Defaults to 3

+
+storageClaims
+ + +[]StorageClaim + + +
+

The persistent volume claims of the TiFlash data storages. +TiFlash supports multiple disks.

+
+config
+ + +TiFlashConfig + + +
+(Optional) +

Config is the Configuration of TiFlash

+
+logTailer
+ + +LogTailerSpec + + +
+(Optional) +

LogTailer is the configurations of the log tailers for TiFlash

+
+

TiKVBlockCacheConfig +

+

+(Appears on: +TiKVStorageConfig) +

+

+

TiKVBlockCacheConfig is the config of a block cache

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+shared
+ +bool + +
+(Optional) +

Optional: Defaults to true

+
+capacity
+ +string + +
+(Optional) +
+num-shard-bits
+ +int64 + +
+(Optional) +
+strict-capacity-limit
+ +bool + +
+(Optional) +
+high-pri-pool-ratio
+ +float64 + +
+(Optional) +
+memory-allocator
+ +string + +
+(Optional) +
+

TiKVCfConfig +

+

+(Appears on: +TiKVDbConfig, +TiKVRaftDBConfig) +

+

+

TiKVCfConfig is the config of a cf

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+block-size
+ +string + +
+(Optional) +
+block-cache-size
+ +string + +
+(Optional) +
+disable-block-cache
+ +bool + +
+(Optional) +
+cache-index-and-filter-blocks
+ +bool + +
+(Optional) +
+pin-l0-filter-and-index-blocks
+ +bool + +
+(Optional) +
+use-bloom-filter
+ +bool + +
+(Optional) +
+optimize-filters-for-hits
+ +bool + +
+(Optional) +
+whole-key-filtering
+ +bool + +
+(Optional) +
+bloom-filter-bits-per-key
+ +int64 + +
+(Optional) +
+block-based-bloom-filter
+ +bool + +
+(Optional) +
+read-amp-bytes-per-bit
+ +int64 + +
+(Optional) +
+compression-per-level
+ +[]string + +
+(Optional) +
+write-buffer-size
+ +string + +
+(Optional) +
+max-write-buffer-number
+ +int64 + +
+(Optional) +
+min-write-buffer-number-to-merge
+ +int64 + +
+(Optional) +
+max-bytes-for-level-base
+ +string + +
+(Optional) +
+target-file-size-base
+ +string + +
+(Optional) +
+level0-file-num-compaction-trigger
+ +int64 + +
+(Optional) +
+level0-slowdown-writes-trigger
+ +int64 + +
+(Optional) +
+level0-stop-writes-trigger
+ +int64 + +
+(Optional) +
+max-compaction-bytes
+ +string + +
+(Optional) +
+compaction-pri
+ +int64 + +
+(Optional) +
+dynamic-level-bytes
+ +bool + +
+(Optional) +
+num-levels
+ +int64 + +
+(Optional) +
+max-bytes-for-level-multiplier
+ +int64 + +
+(Optional) +
+compaction-style
+ +int64 + +
+(Optional) +
+disable-auto-compactions
+ +bool + +
+(Optional) +
+soft-pending-compaction-bytes-limit
+ +string + +
+(Optional) +
+hard-pending-compaction-bytes-limit
+ +string + +
+(Optional) +
+force-consistency-checks
+ +bool + +
+(Optional) +
+prop-size-index-distance
+ +int64 + +
+(Optional) +
+prop-keys-index-distance
+ +int64 + +
+(Optional) +
+enable-doubly-skiplist
+ +bool + +
+(Optional) +
+titan
+ + +TiKVTitanCfConfig + + +
+(Optional) +
+

TiKVClient +

+

+(Appears on: +TiDBConfig) +

+

+

TiKVClient is the config for tikv client.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+grpc-connection-count
+ +uint + +
+(Optional) +

GrpcConnectionCount is the max gRPC connections that will be established +with each tikv-server. +Optional: Defaults to 16

+
+grpc-keepalive-time
+ +uint + +
+(Optional) +

After a duration of this time in seconds if the client doesn’t see any activity it pings +the server to see if the transport is still alive. +Optional: Defaults to 10

+
+grpc-keepalive-timeout
+ +uint + +
+(Optional) +

After having pinged for keepalive check, the client waits for a duration of Timeout in seconds +and if no activity is seen even after that the connection is closed. +Optional: Defaults to 3

+
+commit-timeout
+ +string + +
+(Optional) +

CommitTimeout is the max time which command ‘commit’ will wait. +Optional: Defaults to 41s

+
+max-txn-time-use
+ +uint + +
+(Optional) +

MaxTxnTimeUse is the max time a Txn may use (in seconds) from its startTS to commitTS. +Optional: Defaults to 590

+
+max-batch-size
+ +uint + +
+(Optional) +

MaxBatchSize is the max batch size when calling batch commands API. +Optional: Defaults to 128

+
+overload-threshold
+ +uint + +
+(Optional) +

If TiKV load is greater than this, TiDB will wait for a while to avoid little batch. +Optional: Defaults to 200

+
+max-batch-wait-time
+ +time.Duration + +
+(Optional) +

MaxBatchWaitTime in nanosecond is the max wait time for batch. +Optional: Defaults to 0

+
+batch-wait-size
+ +uint + +
+(Optional) +

BatchWaitSize is the max wait size for batch. +Optional: Defaults to 8

+
+region-cache-ttl
+ +uint + +
+(Optional) +

If a Region has not been accessed for more than the given duration (in seconds), it +will be reloaded from the PD. +Optional: Defaults to 600

+
+store-limit
+ +int64 + +
+(Optional) +

If a store has been up to the limit, it will return error for successive request to +prevent the store occupying too much token in dispatching level. +Optional: Defaults to 0

+
+copr-cache
+ + +CoprocessorCache + + +
+(Optional) +
+

TiKVConfig +

+

+(Appears on: +TiKVSpec) +

+

+

TiKVConfig is the configuration of TiKV.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+log-level
+ +string + +
+(Optional) +

Optional: Defaults to info

+
+log-file
+ +string + +
+(Optional) +
+log-rotation-timespan
+ +string + +
+(Optional) +

Optional: Defaults to 24h

+
+panic-when-unexpected-key-or-data
+ +bool + +
+(Optional) +
+server
+ + +TiKVServerConfig + + +
+(Optional) +
+storage
+ + +TiKVStorageConfig + + +
+(Optional) +
+raftstore
+ + +TiKVRaftstoreConfig + + +
+(Optional) +
+rocksdb
+ + +TiKVDbConfig + + +
+(Optional) +
+coprocessor
+ + +TiKVCoprocessorConfig + + +
+(Optional) +
+readpool
+ + +TiKVReadPoolConfig + + +
+(Optional) +
+raftdb
+ + +TiKVRaftDBConfig + + +
+(Optional) +
+import
+ + +TiKVImportConfig + + +
+(Optional) +
+gc
+ + +TiKVGCConfig + + +
+(Optional) +
+pd
+ + +TiKVPDConfig + + +
+(Optional) +
+security
+ + +TiKVSecurityConfig + + +
+(Optional) +
+encryption
+ + +TiKVEncryptionConfig + + +
+(Optional) +
+

TiKVCoprocessorConfig +

+

+(Appears on: +TiKVConfig) +

+

+

TiKVCoprocessorConfig is the configuration of TiKV Coprocessor component.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+split-region-on-table
+ +bool + +
+

When it is set to true, TiKV will try to split a Region with table prefix if that Region +crosses tables. +It is recommended to turn off this option if there will be a large number of tables created. +Optional: Defaults to false +optional

+
+batch-split-limit
+ +int64 + +
+

One split check produces several split keys in batch. This config limits the number of produced +split keys in one batch. +optional

+
+region-max-size
+ +string + +
+

When Region [a,e) size exceeds region-max-size, it will be split into several Regions [a,b), +[b,c), [c,d), [d,e) and the size of [a,b), [b,c), [c,d) will be region-split-size (or a +little larger). See also: region-split-size +Optional: Defaults to 144MB +optional

+
+region-split-size
+ +string + +
+

When Region [a,e) size exceeds region-max-size, it will be split into several Regions [a,b), +[b,c), [c,d), [d,e) and the size of [a,b), [b,c), [c,d) will be region-split-size (or a +little larger). See also: region-max-size +Optional: Defaults to 96MB +optional

+
+region-max-keys
+ +int64 + +
+

When the number of keys in Region [a,e) exceeds the region-max-keys, it will be split into +several Regions [a,b), [b,c), [c,d), [d,e) and the number of keys in [a,b), [b,c), [c,d) will be +region-split-keys. See also: region-split-keys +Optional: Defaults to 1440000 +optional

+
+region-split-keys
+ +int64 + +
+

When the number of keys in Region [a,e) exceeds the region-max-keys, it will be split into +several Regions [a,b), [b,c), [c,d), [d,e) and the number of keys in [a,b), [b,c), [c,d) will be +region-split-keys. See also: region-max-keys +Optional: Defaults to 960000 +optional

+
+

TiKVCoprocessorReadPoolConfig +

+

+(Appears on: +TiKVReadPoolConfig) +

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+high-concurrency
+ +int64 + +
+(Optional) +

Optional: Defaults to 8

+
+normal-concurrency
+ +int64 + +
+(Optional) +

Optional: Defaults to 8

+
+low-concurrency
+ +int64 + +
+(Optional) +

Optional: Defaults to 8

+
+max-tasks-per-worker-high
+ +int64 + +
+(Optional) +

Optional: Defaults to 2000

+
+max-tasks-per-worker-normal
+ +int64 + +
+(Optional) +

Optional: Defaults to 2000

+
+max-tasks-per-worker-low
+ +int64 + +
+(Optional) +

Optional: Defaults to 2000

+
+stack-size
+ +string + +
+(Optional) +

Optional: Defaults to 10MB

+
+

TiKVDbConfig +

+

+(Appears on: +TiKVConfig) +

+

+

TiKVDbConfig is the rocksdb config.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+wal-recovery-mode
+ +int64 + +
+(Optional) +

Optional: Defaults to 2

+
+wal-ttl-seconds
+ +int64 + +
+(Optional) +
+wal-size-limit
+ +string + +
+(Optional) +
+max-total-wal-size
+ +string + +
+(Optional) +

Optional: Defaults to 4GB

+
+max-background-jobs
+ +int64 + +
+(Optional) +

Optional: Defaults to 8

+
+max-manifest-file-size
+ +string + +
+(Optional) +

Optional: Defaults to 128MB

+
+create-if-missing
+ +bool + +
+(Optional) +

Optional: Defaults to true

+
+max-open-files
+ +int64 + +
+(Optional) +

Optional: Defaults to 40960

+
+enable-statistics
+ +bool + +
+(Optional) +

Optional: Defaults to true

+
+stats-dump-period
+ +string + +
+(Optional) +

Optional: Defaults to 10m

+
+compaction-readahead-size
+ +string + +
+(Optional) +

Optional: Defaults to 0

+
+info-log-max-size
+ +string + +
+(Optional) +
+info-log-roll-time
+ +string + +
+(Optional) +
+info-log-keep-log-file-num
+ +int64 + +
+(Optional) +
+info-log-dir
+ +string + +
+(Optional) +
+rate-bytes-per-sec
+ +string + +
+(Optional) +
+rate-limiter-mode
+ +int64 + +
+(Optional) +
+auto-tuned
+ +bool + +
+(Optional) +
+bytes-per-sync
+ +string + +
+(Optional) +
+wal-bytes-per-sync
+ +string + +
+(Optional) +
+max-sub-compactions
+ +int64 + +
+(Optional) +

Optional: Defaults to 3

+
+writable-file-max-buffer-size
+ +string + +
+(Optional) +
+use-direct-io-for-flush-and-compaction
+ +bool + +
+(Optional) +
+enable-pipelined-write
+ +bool + +
+(Optional) +
+defaultcf
+ + +TiKVCfConfig + + +
+(Optional) +
+writecf
+ + +TiKVCfConfig + + +
+(Optional) +
+lockcf
+ + +TiKVCfConfig + + +
+(Optional) +
+raftcf
+ + +TiKVCfConfig + + +
+(Optional) +
+titan
+ + +TiKVTitanDBConfig + + +
+(Optional) +
+

TiKVEncryptionConfig +

+

+(Appears on: +TiKVConfig) +

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+method
+ +string + +
+

Encrypyion method, use data key encryption raw rocksdb data +Possible values: plaintext, aes128-ctr, aes192-ctr, aes256-ctr +Optional: Default to plaintext +optional

+
+data-key-rotation-period
+ +string + +
+

The frequency of datakey rotation, It managered by tikv +Optional: default to 7d +optional

+
+master-key
+ + +TiKVMasterKeyConfig + + +
+

Master key config

+
+previous-master-key
+ + +TiKVMasterKeyConfig + + +
+

Previous master key config +It used in master key rotation, the data key should decryption by previous master key and then encrypytion by new master key

+
+

TiKVFailureStore +

+

+(Appears on: +TiKVStatus) +

+

+

TiKVFailureStore is the tikv failure store information

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+podName
+ +string + +
+
+storeID
+ +string + +
+
+createdAt
+ + +Kubernetes meta/v1.Time + + +
+
+

TiKVGCConfig +

+

+(Appears on: +TiKVConfig) +

+

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+ batch-keys
+ +int64 + +
+(Optional) +

Optional: Defaults to 512

+
+ max-write-bytes-per-sec
+ +string + +
+(Optional) +
+

TiKVImportConfig +

+

+(Appears on: +TiKVConfig) +

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+import-dir
+ +string + +
+(Optional) +
+num-threads
+ +int64 + +
+(Optional) +
+num-import-jobs
+ +int64 + +
+(Optional) +
+num-import-sst-jobs
+ +int64 + +
+(Optional) +
+max-prepare-duration
+ +string + +
+(Optional) +
+region-split-size
+ +string + +
+(Optional) +
+stream-channel-window
+ +int64 + +
+(Optional) +
+max-open-engines
+ +int64 + +
+(Optional) +
+upload-speed-limit
+ +string + +
+(Optional) +
+

TiKVMasterKeyConfig +

+

+(Appears on: +TiKVEncryptionConfig) +

+

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+type
+ +string + +
+

Use KMS encryption or use file encryption, possible values: kms, file +If set to kms, kms MasterKeyKMSConfig should be filled, if set to file MasterKeyFileConfig should be filled +optional

+
+MasterKeyFileConfig
+ + +MasterKeyFileConfig + + +
+

+(Members of MasterKeyFileConfig are embedded into this type.) +

+

Master key file config +If the type set to file, this config should be filled

+
+MasterKeyKMSConfig
+ + +MasterKeyKMSConfig + + +
+

+(Members of MasterKeyKMSConfig are embedded into this type.) +

+

Master key KMS config +If the type set to kms, this config should be filled

+
+

TiKVPDConfig +

+

+(Appears on: +TiKVConfig) +

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+endpoints
+ +[]string + +
+(Optional) +

The PD endpoints for the client.

+

Default is empty.

+
+retry-interval
+ +string + +
+(Optional) +

The interval at which to retry a PD connection initialization.

+

Default is 300ms. +Optional: Defaults to 300ms

+
+retry-max-count
+ +int64 + +
+(Optional) +

The maximum number of times to retry a PD connection initialization.

+

Default is isize::MAX, represented by -1. +Optional: Defaults to -1

+
+retry-log-every
+ +int64 + +
+(Optional) +

If the client observes the same error message on retry, it can repeat the message only +every n times.

+

Default is 10. Set to 1 to disable this feature. +Optional: Defaults to 10

+
+

TiKVRaftDBConfig +

+

+(Appears on: +TiKVConfig) +

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+wal-recovery-mode
+ +string + +
+(Optional) +
+wal-dir
+ +string + +
+(Optional) +
+wal-ttl-seconds
+ +int64 + +
+(Optional) +
+wal-size-limit
+ +string + +
+(Optional) +
+max-total-wal-size
+ +string + +
+(Optional) +
+max-background-jobs
+ +int64 + +
+(Optional) +
+max-manifest-file-size
+ +string + +
+(Optional) +
+create-if-missing
+ +bool + +
+(Optional) +
+max-open-files
+ +int64 + +
+(Optional) +
+enable-statistics
+ +bool + +
+(Optional) +
+stats-dump-period
+ +string + +
+(Optional) +
+compaction-readahead-size
+ +string + +
+(Optional) +
+info-log-max-size
+ +string + +
+(Optional) +
+info-log-roll-time
+ +string + +
+(Optional) +
+info-log-keep-log-file-num
+ +int64 + +
+(Optional) +
+info-log-dir
+ +string + +
+(Optional) +
+max-sub-compactions
+ +int64 + +
+(Optional) +
+writable-file-max-buffer-size
+ +string + +
+(Optional) +
+use-direct-io-for-flush-and-compaction
+ +bool + +
+(Optional) +
+enable-pipelined-write
+ +bool + +
+(Optional) +
+allow-concurrent-memtable-write
+ +bool + +
+(Optional) +
+bytes-per-sync
+ +string + +
+(Optional) +
+wal-bytes-per-sync
+ +string + +
+(Optional) +
+defaultcf
+ + +TiKVCfConfig + + +
+(Optional) +
+

TiKVRaftstoreConfig +

+

+(Appears on: +TiKVConfig) +

+

+

TiKVRaftstoreConfig is the configuration of TiKV raftstore component.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+sync-log
+ +bool + +
+(Optional) +

true for high reliability, prevent data loss when power failure. +Optional: Defaults to true

+
+prevote
+ +bool + +
+(Optional) +

Optional: Defaults to true

+
+raft-base-tick-interval
+ +string + +
+(Optional) +

raft-base-tick-interval is a base tick interval (ms).

+
+raft-heartbeat-ticks
+ +int64 + +
+(Optional) +
+raft-election-timeout-ticks
+ +int64 + +
+(Optional) +
+raft-entry-max-size
+ +string + +
+(Optional) +

When the entry exceed the max size, reject to propose it. +Optional: Defaults to 8MB

+
+raft-log-gc-tick-interval
+ +string + +
+(Optional) +

Interval to gc unnecessary raft log (ms). +Optional: Defaults to 10s

+
+raft-log-gc-threshold
+ +int64 + +
+(Optional) +

A threshold to gc stale raft log, must >= 1. +Optional: Defaults to 50

+
+raft-log-gc-count-limit
+ +int64 + +
+(Optional) +

When entry count exceed this value, gc will be forced trigger. +Optional: Defaults to 72000

+
+raft-log-gc-size-limit
+ +string + +
+(Optional) +

When the approximate size of raft log entries exceed this value +gc will be forced trigger. +Optional: Defaults to 72MB

+
+raft-entry-cache-life-time
+ +string + +
+(Optional) +

When a peer is not responding for this time, leader will not keep entry cache for it.

+
+raft-reject-transfer-leader-duration
+ +string + +
+(Optional) +

When a peer is newly added, reject transferring leader to the peer for a while.

+
+split-region-check-tick-interval
+ +string + +
+(Optional) +

Interval (ms) to check region whether need to be split or not. +Optional: Defaults to 10s

+
+region-split-check-diff
+ +string + +
+(Optional) +

/ When size change of region exceed the diff since last check, it +/ will be checked again whether it should be split. +Optional: Defaults to 6MB

+
+region-compact-check-interval
+ +string + +
+(Optional) +

/ Interval (ms) to check whether start compaction for a region. +Optional: Defaults to 5m

+
+clean-stale-peer-delay
+ +string + +
+(Optional) +

delay time before deleting a stale peer +Optional: Defaults to 10m

+
+region-compact-check-step
+ +int64 + +
+(Optional) +

/ Number of regions for each time checking. +Optional: Defaults to 100

+
+region-compact-min-tombstones
+ +int64 + +
+(Optional) +

/ Minimum number of tombstones to trigger manual compaction. +Optional: Defaults to 10000

+
+region-compact-tombstones-percent
+ +int64 + +
+(Optional) +

/ Minimum percentage of tombstones to trigger manual compaction. +/ Should between 1 and 100. +Optional: Defaults to 30

+
+pd-heartbeat-tick-interval
+ +string + +
+(Optional) +

Optional: Defaults to 60s

+
+pd-store-heartbeat-tick-interval
+ +string + +
+(Optional) +

Optional: Defaults to 10s

+
+snap-mgr-gc-tick-interval
+ +string + +
+(Optional) +
+snap-gc-timeout
+ +string + +
+(Optional) +
+lock-cf-compact-interval
+ +string + +
+(Optional) +

Optional: Defaults to 10m

+
+lock-cf-compact-bytes-threshold
+ +string + +
+(Optional) +

Optional: Defaults to 256MB

+
+notify-capacity
+ +int64 + +
+(Optional) +
+messages-per-tick
+ +int64 + +
+(Optional) +
+max-peer-down-duration
+ +string + +
+(Optional) +

/ When a peer is not active for max-peer-down-duration +/ the peer is considered to be down and is reported to PD. +Optional: Defaults to 5m

+
+max-leader-missing-duration
+ +string + +
+(Optional) +

/ If the leader of a peer is missing for longer than max-leader-missing-duration +/ the peer would ask pd to confirm whether it is valid in any region. +/ If the peer is stale and is not valid in any region, it will destroy itself.

+
+abnormal-leader-missing-duration
+ +string + +
+(Optional) +

/ Similar to the max-leader-missing-duration, instead it will log warnings and +/ try to alert monitoring systems, if there is any.

+
+peer-stale-state-check-interval
+ +string + +
+(Optional) +
+leader-transfer-max-log-lag
+ +int64 + +
+(Optional) +
+snap-apply-batch-size
+ +string + +
+(Optional) +
+consistency-check-interval
+ +string + +
+(Optional) +

Interval (ms) to check region whether the data is consistent. +Optional: Defaults to 0

+
+report-region-flow-interval
+ +string + +
+(Optional) +
+raft-store-max-leader-lease
+ +string + +
+(Optional) +

The lease provided by a successfully proposed and applied entry.

+
+right-derive-when-split
+ +bool + +
+(Optional) +

Right region derive origin region id when split.

+
+allow-remove-leader
+ +bool + +
+(Optional) +
+merge-max-log-gap
+ +int64 + +
+(Optional) +

/ Max log gap allowed to propose merge.

+
+merge-check-tick-interval
+ +string + +
+(Optional) +

/ Interval to re-propose merge.

+
+use-delete-range
+ +bool + +
+(Optional) +
+cleanup-import-sst-interval
+ +string + +
+(Optional) +

Optional: Defaults to 10m

+
+apply-max-batch-size
+ +int64 + +
+(Optional) +
+apply-pool-size
+ +int64 + +
+(Optional) +

Optional: Defaults to 2

+
+store-max-batch-size
+ +int64 + +
+(Optional) +
+store-pool-size
+ +int64 + +
+(Optional) +

Optional: Defaults to 2

+
+hibernate-regions
+ +bool + +
+(Optional) +
+

TiKVReadPoolConfig +

+

+(Appears on: +TiKVConfig) +

+

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+coprocessor
+ + +TiKVCoprocessorReadPoolConfig + + +
+(Optional) +
+storage
+ + +TiKVStorageReadPoolConfig + + +
+(Optional) +
+

TiKVSecurityConfig +

+

+(Appears on: +TiKVConfig) +

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+ca-path
+ +string + +
+(Optional) +
+cert-path
+ +string + +
+(Optional) +
+key-path
+ +string + +
+(Optional) +
+cert-allowed-cn
+ +[]string + +
+(Optional) +

CertAllowedCN is the Common Name that allowed

+
+override-ssl-target
+ +string + +
+(Optional) +
+cipher-file
+ +string + +
+(Optional) +
+

TiKVServerConfig +

+

+(Appears on: +TiKVConfig) +

+

+

TiKVServerConfig is the configuration of TiKV server.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+status-thread-pool-size
+ +string + +
+(Optional) +

Optional: Defaults to 1

+
+grpc-compression-type
+ +string + +
+(Optional) +

Optional: Defaults to none

+
+grpc-concurrency
+ +uint + +
+(Optional) +

Optional: Defaults to 4

+
+grpc-concurrent-stream
+ +uint + +
+(Optional) +

Optional: Defaults to 1024

+
+grpc-memory-pool-quota
+ +string + +
+(Optional) +

Optional: Defaults to 32G

+
+grpc-raft-conn-num
+ +uint + +
+(Optional) +

Optional: Defaults to 10

+
+grpc-stream-initial-window-size
+ +string + +
+(Optional) +

Optional: Defaults to 2MB

+
+grpc-keepalive-time
+ +string + +
+(Optional) +

Optional: Defaults to 10s

+
+grpc-keepalive-timeout
+ +string + +
+(Optional) +

Optional: Defaults to 3s

+
+concurrent-send-snap-limit
+ +uint + +
+(Optional) +

Optional: Defaults to 32

+
+concurrent-recv-snap-limit
+ +uint + +
+(Optional) +

Optional: Defaults to 32

+
+end-point-recursion-limit
+ +uint + +
+(Optional) +

Optional: Defaults to 1000

+
+end-point-stream-channel-size
+ +uint + +
+(Optional) +
+end-point-batch-row-limit
+ +uint + +
+(Optional) +
+end-point-stream-batch-row-limit
+ +uint + +
+(Optional) +
+end-point-enable-batch-if-possible
+ +uint + +
+(Optional) +
+end-point-request-max-handle-duration
+ +string + +
+(Optional) +
+snap-max-write-bytes-per-sec
+ +string + +
+(Optional) +

Optional: Defaults to 100MB

+
+snap-max-total-size
+ +string + +
+(Optional) +
+stats-concurrency
+ +uint + +
+(Optional) +
+heavy-load-threshold
+ +uint + +
+(Optional) +
+heavy-load-wait-duration
+ +string + +
+(Optional) +

Optional: Defaults to 60s

+
+labels
+ +map[string]string + +
+(Optional) +
+

TiKVSpec +

+

+(Appears on: +TidbClusterSpec) +

+

+

TiKVSpec contains details of TiKV members

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+ComponentSpec
+ + +ComponentSpec + + +
+

+(Members of ComponentSpec are embedded into this type.) +

+
+ResourceRequirements
+ + +Kubernetes core/v1.ResourceRequirements + + +
+

+(Members of ResourceRequirements are embedded into this type.) +

+
+serviceAccount
+ +string + +
+

Specify a Service Account for tikv

+
+replicas
+ +int32 + +
+

The desired ready replicas

+
+baseImage
+ +string + +
+(Optional) +

TODO: remove optional after defaulting introduced +Base image of the component, image tag is now allowed during validation

+
+privileged
+ +bool + +
+(Optional) +

Whether create the TiKV container in privileged mode, it is highly discouraged to enable this in +critical environment. +Optional: defaults to false

+
+maxFailoverCount
+ +int32 + +
+(Optional) +

MaxFailoverCount limit the max replicas could be added in failover, 0 means no failover +Optional: Defaults to 3

+
+storageClassName
+ +string + +
+(Optional) +

The storageClassName of the persistent volume for TiKV data storage. +Defaults to Kubernetes default storage class.

+
+config
+ + +TiKVConfig + + +
+(Optional) +

Config is the Configuration of tikv-servers

+
+

TiKVStatus +

+

+(Appears on: +TidbClusterStatus) +

+

+

TiKVStatus is TiKV status

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+synced
+ +bool + +
+
+phase
+ + +MemberPhase + + +
+
+statefulSet
+ + +Kubernetes apps/v1.StatefulSetStatus + + +
+
+stores
+ + +map[string]github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiKVStore + + +
+
+tombstoneStores
+ + +map[string]github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiKVStore + + +
+
+failureStores
+ + +map[string]github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiKVFailureStore + + +
+
+image
+ +string + +
+
+

TiKVStorageConfig +

+

+(Appears on: +TiKVConfig) +

+

+

TiKVStorageConfig is the config of storage

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+max-key-size
+ +int64 + +
+(Optional) +
+scheduler-notify-capacity
+ +int64 + +
+(Optional) +
+scheduler-concurrency
+ +int64 + +
+(Optional) +

Optional: Defaults to 2048000

+
+scheduler-worker-pool-size
+ +int64 + +
+(Optional) +

Optional: Defaults to 4

+
+scheduler-pending-write-threshold
+ +string + +
+(Optional) +

Optional: Defaults to 100MB

+
+block-cache
+ + +TiKVBlockCacheConfig + + +
+(Optional) +
+

TiKVStorageReadPoolConfig +

+

+(Appears on: +TiKVReadPoolConfig) +

+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+high-concurrency
+ +int64 + +
+(Optional) +

Optional: Defaults to 4

+
+normal-concurrency
+ +int64 + +
+(Optional) +

Optional: Defaults to 4

+
+low-concurrency
+ +int64 + +
+(Optional) +

Optional: Defaults to 4

+
+max-tasks-per-worker-high
+ +int64 + +
+(Optional) +

Optional: Defaults to 2000

+
+max-tasks-per-worker-normal
+ +int64 + +
+(Optional) +

Optional: Defaults to 2000

+
+max-tasks-per-worker-low
+ +int64 + +
+(Optional) +

Optional: Defaults to 2000

+
+stack-size
+ +string + +
+(Optional) +

Optional: Defaults to 10MB

+
+

TiKVStore +

+

+(Appears on: +TiKVStatus) +

+

+

TiKVStores is either Up/Down/Offline/Tombstone

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+id
+ +string + +
+

store id is also uint64, due to the same reason as pd id, we store id as string

+
+podName
+ +string + +
+
+ip
+ +string + +
+
+leaderCount
+ +int32 + +
+
+state
+ +string + +
+
+lastHeartbeatTime
+ + +Kubernetes meta/v1.Time + + +
+
+lastTransitionTime
+ + +Kubernetes meta/v1.Time + + +
+

Last time the health transitioned from one to another.

+
+

TiKVTitanCfConfig +

+

+(Appears on: +TiKVCfConfig) +

+

+

TiKVTitanCfConfig is the titian config.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+min-blob-size
+ +string + +
+(Optional) +
+blob-file-compression
+ +string + +
+(Optional) +
+blob-cache-size
+ +string + +
+(Optional) +
+min-gc-batch-size
+ +string + +
+(Optional) +
+max-gc-batch-size
+ +string + +
+(Optional) +
+discardable-ratio
+ +float64 + +
+(Optional) +
+sample-ratio
+ +float64 + +
+(Optional) +
+merge-small-file-threshold
+ +string + +
+(Optional) +
+blob-run-mode
+ +string + +
+(Optional) +
+

TiKVTitanDBConfig +

+

+(Appears on: +TiKVDbConfig) +

+

+

TiKVTitanDBConfig is the config a titian db.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+enabled
+ +bool + +
+(Optional) +
+dirname
+ +string + +
+(Optional) +
+disable-gc
+ +bool + +
+(Optional) +
+max-background-gc
+ +int64 + +
+(Optional) +
+purge-obsolete-files-period
+ +string + +
+(Optional) +

The value of this field will be truncated to seconds.

+
+

TidbAutoScalerSpec +

+

+(Appears on: +TidbClusterAutoScalerSpec) +

+

+

TidbAutoScalerSpec describes the spec for tidb auto-scaling

+

+ + + + + + + + + + + + + +
FieldDescription
+BasicAutoScalerSpec
+ + +BasicAutoScalerSpec + + +
+

+(Members of BasicAutoScalerSpec are embedded into this type.) +

+
+

TidbAutoScalerStatus +

+

+(Appears on: +TidbClusterAutoSclaerStatus) +

+

+

TidbAutoScalerStatus describe the auto-scaling status of tidb

+

+ + + + + + + + + + + + + +
FieldDescription
+BasicAutoScalerStatus
+ + +BasicAutoScalerStatus + + +
+

+(Members of BasicAutoScalerStatus are embedded into this type.) +

+
+

TidbClusterAutoScalerSpec +

+

+(Appears on: +TidbClusterAutoScaler) +

+

+

TidbAutoScalerSpec describes the state of the TidbClusterAutoScaler

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+cluster
+ + +TidbClusterRef + + +
+

TidbClusterRef describe the target TidbCluster

+
+metricsUrl
+ +string + +
+(Optional) +

We used prometheus to fetch the metrics resources until the pd could provide it. +MetricsUrl represents the url to fetch the metrics info

+
+monitor
+ + +TidbMonitorRef + + +
+(Optional) +

TidbMonitorRef describe the target TidbMonitor, when MetricsUrl and Monitor are both set, +Operator will use MetricsUrl

+
+tikv
+ + +TikvAutoScalerSpec + + +
+(Optional) +

TiKV represents the auto-scaling spec for tikv

+
+tidb
+ + +TidbAutoScalerSpec + + +
+(Optional) +

TiDB represents the auto-scaling spec for tidb

+
+

TidbClusterAutoSclaerStatus +

+

+(Appears on: +TidbClusterAutoScaler) +

+

+

TidbClusterAutoSclaerStatus describe the whole status

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+tikv
+ + +TikvAutoScalerStatus + + +
+(Optional) +

Tikv describes the status for the tikv in the last auto-scaling reconciliation

+
+tidb
+ + +TidbAutoScalerStatus + + +
+(Optional) +

Tidb describes the status for the tidb in the last auto-scaling reconciliation

+
+

TidbClusterRef +

+

+(Appears on: +TidbClusterAutoScalerSpec, +TidbInitializerSpec, +TidbMonitorSpec) +

+

+

TidbClusterRef reference to a TidbCluster

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+namespace
+ +string + +
+(Optional) +

Namespace is the namespace that TidbCluster object locates, +default to the same namespace with TidbMonitor

+
+name
+ +string + +
+

Name is the name of TidbCluster object

+
+

TidbClusterSpec +

+

+(Appears on: +TidbCluster) +

+

+

TidbClusterSpec describes the attributes that a user creates on a tidb cluster

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+pd
+ + +PDSpec + + +
+

PD cluster spec

+
+tidb
+ + +TiDBSpec + + +
+

TiDB cluster spec

+
+tikv
+ + +TiKVSpec + + +
+

TiKV cluster spec

+
+tiflash
+ + +TiFlashSpec + + +
+(Optional) +

TiFlash cluster spec

+
+pump
+ + +PumpSpec + + +
+(Optional) +

Pump cluster spec

+
+helper
+ + +HelperSpec + + +
+(Optional) +

Helper spec

+
+paused
+ +bool + +
+(Optional) +

Indicates that the tidb cluster is paused and will not be processed by +the controller.

+
+version
+ +string + +
+(Optional) +

TODO: remove optional after defaulting logic introduced +TiDB cluster version

+
+schedulerName
+ +string + +
+

SchedulerName of TiDB cluster Pods

+
+pvReclaimPolicy
+ + +Kubernetes core/v1.PersistentVolumeReclaimPolicy + + +
+

Persistent volume reclaim policy applied to the PVs that consumed by TiDB cluster

+
+imagePullPolicy
+ + +Kubernetes core/v1.PullPolicy + + +
+

ImagePullPolicy of TiDB cluster Pods

+
+configUpdateStrategy
+ + +ConfigUpdateStrategy + + +
+

ConfigUpdateStrategy determines how the configuration change is applied to the cluster. +UpdateStrategyInPlace will update the ConfigMap of configuration in-place and an extra rolling-update of the +cluster component is needed to reload the configuration change. +UpdateStrategyRollingUpdate will create a new ConfigMap with the new configuration and rolling-update the +related components to use the new ConfigMap, that is, the new configuration will be applied automatically.

+
+enablePVReclaim
+ +bool + +
+(Optional) +

Whether enable PVC reclaim for orphan PVC left by statefulset scale-in +Optional: Defaults to false

+
+tlsCluster
+ + +TLSCluster + + +
+(Optional) +

Whether enable the TLS connection between TiDB server components +Optional: Defaults to nil

+
+hostNetwork
+ +bool + +
+(Optional) +

Whether Hostnetwork is enabled for TiDB cluster Pods +Optional: Defaults to false

+
+affinity
+ + +Kubernetes core/v1.Affinity + + +
+(Optional) +

Affinity of TiDB cluster Pods

+
+priorityClassName
+ +string + +
+(Optional) +

PriorityClassName of TiDB cluster Pods +Optional: Defaults to omitted

+
+nodeSelector
+ +map[string]string + +
+(Optional) +

Base node selectors of TiDB cluster Pods, components may add or override selectors upon this respectively

+
+annotations
+ +map[string]string + +
+(Optional) +

Base annotations of TiDB cluster Pods, components may add or override selectors upon this respectively

+
+tolerations
+ + +[]Kubernetes core/v1.Toleration + + +
+(Optional) +

Base tolerations of TiDB cluster Pods, components may add more tolerations upon this respectively

+
+timezone
+ +string + +
+(Optional) +

Time zone of TiDB cluster Pods +Optional: Defaults to UTC

+
+services
+ + +[]Service + + +
+

Services list non-headless services type used in TidbCluster +Deprecated

+
+

TidbClusterStatus +

+

+(Appears on: +TidbCluster) +

+

+

TidbClusterStatus represents the current status of a tidb cluster.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+clusterID
+ +string + +
+
+pd
+ + +PDStatus + + +
+
+tikv
+ + +TiKVStatus + + +
+
+tidb
+ + +TiDBStatus + + +
+
+Pump
+ + +PumpStatus + + +
+
+tiflash
+ + +TiFlashStatus + + +
+
+

TidbInitializerSpec +

+

+(Appears on: +TidbInitializer) +

+

+

TidbInitializer spec encode the desired state of tidb initializer Job

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+image
+ +string + +
+
+cluster
+ + +TidbClusterRef + + +
+
+imagePullPolicy
+ + +Kubernetes core/v1.PullPolicy + + +
+(Optional) +
+permitHost
+ +string + +
+(Optional) +

permitHost is the host which will only be allowed to connect to the TiDB.

+
+initSql
+ +string + +
+(Optional) +

InitSql is the SQL statements executed after the TiDB cluster is bootstrapped.

+
+initSqlConfigMap
+ +string + +
+(Optional) +

InitSqlConfigMapName reference a configmap that provide init-sql, take high precedence than initSql if set

+
+passwordSecret
+ +string + +
+(Optional) +
+resources
+ + +Kubernetes core/v1.ResourceRequirements + + +
+(Optional) +
+timezone
+ +string + +
+(Optional) +

Time zone of TiDB initializer Pods

+
+

TidbInitializerStatus +

+

+(Appears on: +TidbInitializer) +

+

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+JobStatus
+ + +Kubernetes batch/v1.JobStatus + + +
+

+(Members of JobStatus are embedded into this type.) +

+
+phase
+ + +InitializePhase + + +
+

Phase is a user readable state inferred from the underlying Job status and TidbCluster status

+
+

TidbMonitorRef +

+

+(Appears on: +TidbClusterAutoScalerSpec) +

+

+

TidbMonitorRef reference to a TidbMonitor

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+namespace
+ +string + +
+(Optional) +

Namespace is the namespace that TidbMonitor object locates, +default to the same namespace with TidbClusterAutoScaler

+
+name
+ +string + +
+

Name is the name of TidbMonitor object

+
+

TidbMonitorSpec +

+

+(Appears on: +TidbMonitor) +

+

+

TidbMonitor spec encode the desired state of tidb monitoring component

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+clusters
+ + +[]TidbClusterRef + + +
+
+prometheus
+ + +PrometheusSpec + + +
+
+grafana
+ + +GrafanaSpec + + +
+(Optional) +
+reloader
+ + +ReloaderSpec + + +
+
+initializer
+ + +InitializerSpec + + +
+
+imagePullPolicy
+ + +Kubernetes core/v1.PullPolicy + + +
+
+persistent
+ +bool + +
+(Optional) +
+storageClassName
+ +string + +
+(Optional) +
+storage
+ +string + +
+(Optional) +
+nodeSelector
+ +map[string]string + +
+(Optional) +
+annotations
+ +map[string]string + +
+(Optional) +
+tolerations
+ + +[]Kubernetes core/v1.Toleration + + +
+(Optional) +
+kubePrometheusURL
+ +string + +
+(Optional) +

kubePrometheusURL is where tidb-monitoring get the common metrics of kube-prometheus. +Ref: https://github.com/coreos/kube-prometheus

+
+alertmanagerURL
+ +string + +
+(Optional) +

alertmanagerURL is where tidb-monitoring push alerts to. +Ref: https://prometheus.io/docs/alerting/alertmanager/

+
+

TidbMonitorStatus +

+

+(Appears on: +TidbMonitor) +

+

+

TODO: sync status

+

+

TikvAutoScalerSpec +

+

+(Appears on: +TidbClusterAutoScalerSpec) +

+

+

TikvAutoScalerSpec describes the spec for tikv auto-scaling

+

+ + + + + + + + + + + + + +
FieldDescription
+BasicAutoScalerSpec
+ + +BasicAutoScalerSpec + + +
+

+(Members of BasicAutoScalerSpec are embedded into this type.) +

+
+

TikvAutoScalerStatus +

+

+(Appears on: +TidbClusterAutoSclaerStatus) +

+

+

TikvAutoScalerStatus describe the auto-scaling status of tikv

+

+ + + + + + + + + + + + + +
FieldDescription
+BasicAutoScalerStatus
+ + +BasicAutoScalerStatus + + +
+

+(Members of BasicAutoScalerStatus are embedded into this type.) +

+
+

TxnLocalLatches +

+

+(Appears on: +TiDBConfig) +

+

+

TxnLocalLatches is the TxnLocalLatches section of the config.

+

+ + + + + + + + + + + + + + + + + +
FieldDescription
+enabled
+ +bool + +
+(Optional) +
+capacity
+ +uint + +
+(Optional) +
+

UnjoinedMember +

+

+(Appears on: +PDStatus) +

+

+

UnjoinedMember is the pd unjoin cluster member information

+

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+podName
+ +string + +
+
+pvcUID
+ +k8s.io/apimachinery/pkg/types.UID + +
+
+createdAt
+ + +Kubernetes meta/v1.Time + + +
+
+

User +

+

+

User is the configuration of users.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+password
+ +string + +
+(Optional) +
+profile
+ +string + +
+(Optional) +
+quota
+ +string + +
+(Optional) +
+networks
+ + +Networks + + +
+(Optional) +
+
+

+Generated with gen-crd-api-reference-docs +

diff --git a/docs/api-references/template/members.tpl b/docs/api-references/template/members.tpl new file mode 100644 index 0000000000..9f08d1aa5b --- /dev/null +++ b/docs/api-references/template/members.tpl @@ -0,0 +1,48 @@ +{{ define "members" }} + +{{ range .Members }} +{{ if not (hiddenMember .)}} + + + {{ fieldName . }}
+ + {{ if linkForType .Type }} + + {{ typeDisplayName .Type }} + + {{ else }} + {{ typeDisplayName .Type }} + {{ end }} + + + + {{ if fieldEmbedded . }} +

+ (Members of {{ fieldName . }} are embedded into this type.) +

+ {{ end}} + + {{ if isOptionalMember .}} + (Optional) + {{ end }} + + {{ safe (renderComments .CommentLines) }} + + {{ if and (eq (.Type.Name.Name) "ObjectMeta") }} + Refer to the Kubernetes API documentation for the fields of the + metadata field. + {{ end }} + + {{ if or (eq (fieldName .) "spec") }} +
+
+ + {{ template "members" .Type }} +
+ {{ end }} + + +{{ end }} +{{ end }} + +{{ end }} diff --git a/docs/api-references/template/pkg.tpl b/docs/api-references/template/pkg.tpl new file mode 100644 index 0000000000..4e7fe158c5 --- /dev/null +++ b/docs/api-references/template/pkg.tpl @@ -0,0 +1,55 @@ +{{ define "packages" }} + +{{ with .packages}} +--- +title: TiDB Operator API Document +summary: Reference of TiDB Operator API +category: how-to +--- + +

API Document

+

Packages:

+ +{{ end}} + +{{ range .packages }} +

+ {{- packageDisplayName . -}} +

+ + {{ with (index .GoPackages 0 )}} + {{ with .DocComments }} +

+ {{ safe (renderComments .) }} +

+ {{ end }} + {{ end }} + + Resource Types: +
    + {{- range (visibleTypes (sortedTypes .Types)) -}} + {{ if isExportedType . -}} +
  • + {{ typeDisplayName . }} +
  • + {{- end }} + {{- end -}} +
+ + {{ range (visibleTypes (sortedTypes .Types))}} + {{ template "type" . }} + {{ end }} +
+{{ end }} + +

+ Generated with gen-crd-api-reference-docs +

+ +{{ end }} diff --git a/docs/api-references/template/type.tpl b/docs/api-references/template/type.tpl new file mode 100644 index 0000000000..e28b088abc --- /dev/null +++ b/docs/api-references/template/type.tpl @@ -0,0 +1,58 @@ +{{ define "type" }} + +

+ {{- .Name.Name }} + {{ if eq .Kind "Alias" }}({{.Underlying}} alias)

{{ end -}} +

+{{ with (typeReferences .) }} +

+ (Appears on: + {{- $prev := "" -}} + {{- range . -}} + {{- if $prev -}}, {{ end -}} + {{ $prev = . }} + {{ typeDisplayName . }} + {{- end -}} + ) +

+{{ end }} + + +

+ {{ safe (renderComments .CommentLines) }} +

+ +{{ if .Members }} + + + + + + + + + {{ if isExportedType . }} + + + + + + + + + {{ end }} + {{ template "members" .}} + +
FieldDescription
+ apiVersion
+ string
+ + {{apiGroup .}} + +
+ kind
+ string +
{{.Name.Name}}
+{{ end }} + +{{ end }} diff --git a/docs/aws-eks-tutorial.md b/docs/aws-eks-tutorial.md deleted file mode 100644 index fef7883cb4..0000000000 --- a/docs/aws-eks-tutorial.md +++ /dev/null @@ -1,3 +0,0 @@ -# Deploy TiDB, a distributed MySQL compatible database, on Kubernetes via AWS EKS - -This document has been moved to [https://pingcap.com/docs/v3.0/tidb-in-kubernetes/maintain/backup-and-restore/](https://pingcap.com/docs/v3.0/tidb-in-kubernetes/maintain/backup-and-restore/). diff --git a/docs/backup-restore.md b/docs/backup-restore.md deleted file mode 100644 index a45c907ec6..0000000000 --- a/docs/backup-restore.md +++ /dev/null @@ -1,3 +0,0 @@ -# Backup and Restore a TiDB Cluster - -This document has been moved to [https://pingcap.com/docs/v3.0/tidb-in-kubernetes/maintain/backup-and-restore/](https://pingcap.com/docs/v3.0/tidb-in-kubernetes/maintain/backup-and-restore/). diff --git a/docs/cli-manual.md b/docs/cli-manual.md deleted file mode 100644 index 5970fca5bb..0000000000 --- a/docs/cli-manual.md +++ /dev/null @@ -1,3 +0,0 @@ -# The TiDB Kubernetes Control(tkctl) User Manual - -This document has been moved to [https://pingcap.com/docs/v3.0/tidb-in-kubernetes/reference/tools/tkctl/](https://pingcap.com/docs/v3.0/tidb-in-kubernetes/reference/tools/tkctl/). diff --git a/docs/google-kubernetes-tutorial.md b/docs/google-kubernetes-tutorial.md deleted file mode 100644 index d8ec0936fc..0000000000 --- a/docs/google-kubernetes-tutorial.md +++ /dev/null @@ -1,4 +0,0 @@ -# Deploy TiDB, a distributed MySQL compatible database, to Kubernetes on Google Cloud - -This document has been moved to [https://pingcap.com/docs/v3.0/tidb-in-kubernetes/get-started/deploy-tidb-from-kubernetes-gke/](https://pingcap.com/docs/v3.0/tidb-in-kubernetes/get-started/deploy-tidb-from-kubernetes-gke/). - diff --git a/docs/minikube-tutorial.md b/docs/minikube-tutorial.md deleted file mode 100644 index ad83e13dbe..0000000000 --- a/docs/minikube-tutorial.md +++ /dev/null @@ -1,3 +0,0 @@ -# Deploy TiDB in the minikube cluster - -This document has been moved to [https://pingcap.com/docs/v3.0/tidb-in-kubernetes/get-started/deploy-tidb-from-kubernetes-minikube/](https://pingcap.com/docs/v3.0/tidb-in-kubernetes/get-started/deploy-tidb-from-kubernetes-minikube/). diff --git a/docs/operation-guide.md b/docs/operation-guide.md deleted file mode 100644 index 4e6c9ac682..0000000000 --- a/docs/operation-guide.md +++ /dev/null @@ -1,3 +0,0 @@ -# TiDB Cluster Operation Guide - -This document has been moved to [https://pingcap.com/docs/v3.0/tidb-in-kubernetes/tidb-operator-overview/](https://pingcap.com/docs/v3.0/tidb-in-kubernetes/tidb-operator-overview/). diff --git a/docs/references/tidb-backup-configuration.md b/docs/references/tidb-backup-configuration.md deleted file mode 100644 index 29a582219b..0000000000 --- a/docs/references/tidb-backup-configuration.md +++ /dev/null @@ -1,3 +0,0 @@ -# TiDB Backup Configuration Reference - -This document has been moved to [https://pingcap.com/docs/v3.0/tidb-in-kubernetes/reference/configuration/backup/](https://pingcap.com/docs/v3.0/tidb-in-kubernetes/reference/configuration/backup/). diff --git a/docs/release-note-guide.md b/docs/release-note-guide.md index 81abf45466..a00fb35021 100644 --- a/docs/release-note-guide.md +++ b/docs/release-note-guide.md @@ -6,6 +6,9 @@ When you write a release note for your pull request, make sure that your languag - ACTION REQUIRED: Add the `timezone` support for [all charts] + Then, add label `release-note-action-required` onto the PR. This is required + by [the tool we use to generate change log](generate-changelog.md). + 2. Every note starts with the "do" form of a verb. For example: - Support backup to S3 with [Backup & Restore (BR)](https://github.com/pingcap/br) diff --git a/docs/setup.md b/docs/setup.md deleted file mode 100644 index 447daaf457..0000000000 --- a/docs/setup.md +++ /dev/null @@ -1,3 +0,0 @@ -# TiDB Operator Setup - -This document has been moved to [https://pingcap.com/docs/v3.0/tidb-in-kubernetes/deploy/tidb-operator/](https://pingcap.com/docs/v3.0/tidb-in-kubernetes/deploy/tidb-operator/). diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md deleted file mode 100644 index 795fa932f2..0000000000 --- a/docs/troubleshooting.md +++ /dev/null @@ -1,3 +0,0 @@ -# Troubleshooting - -This document has been moved to [https://pingcap.com/docs/v3.0/tidb-in-kubernetes/troubleshoot/](https://pingcap.com/docs/v3.0/tidb-in-kubernetes/troubleshoot/). diff --git a/docs/user-guide.md b/docs/user-guide.md deleted file mode 100644 index 1e4632e7b9..0000000000 --- a/docs/user-guide.md +++ /dev/null @@ -1,3 +0,0 @@ -# TiDB Operator User Guide - -This document has been moved to [https://pingcap.com/docs/v3.0/tidb-in-kubernetes/tidb-operator-overview/](https://pingcap.com/docs/v3.0/tidb-in-kubernetes/tidb-operator-overview/). diff --git a/examples/advanced-statefulset/tidb-cluster-scaled.yaml b/examples/advanced-statefulset/tidb-cluster-scaled.yaml new file mode 100644 index 0000000000..c7b12fbb30 --- /dev/null +++ b/examples/advanced-statefulset/tidb-cluster-scaled.yaml @@ -0,0 +1,28 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbCluster +metadata: + annotations: + tikv.tidb.pingcap.com/delete-slots: '[1]' + name: asts +spec: + version: v3.0.8 + timezone: UTC + pvReclaimPolicy: Delete + pd: + baseImage: pingcap/pd + replicas: 3 + requests: + storage: "1Gi" + config: {} + tikv: + baseImage: pingcap/tikv + replicas: 3 + requests: + storage: "1Gi" + config: {} + tidb: + baseImage: pingcap/tidb + replicas: 2 + service: + type: ClusterIP + config: {} diff --git a/examples/advanced-statefulset/tidb-cluster.yaml b/examples/advanced-statefulset/tidb-cluster.yaml new file mode 100644 index 0000000000..ea8aaa9755 --- /dev/null +++ b/examples/advanced-statefulset/tidb-cluster.yaml @@ -0,0 +1,26 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbCluster +metadata: + name: asts +spec: + version: v3.0.8 + timezone: UTC + pvReclaimPolicy: Delete + pd: + baseImage: pingcap/pd + replicas: 3 + requests: + storage: "1Gi" + config: {} + tikv: + baseImage: pingcap/tikv + replicas: 4 + requests: + storage: "1Gi" + config: {} + tidb: + baseImage: pingcap/tidb + replicas: 2 + service: + type: ClusterIP + config: {} diff --git a/examples/auto-scale/README.md b/examples/auto-scale/README.md new file mode 100644 index 0000000000..5bb7277d31 --- /dev/null +++ b/examples/auto-scale/README.md @@ -0,0 +1,50 @@ +# Deploying TidbCluster with Auto-scaling + +> **Note:** +> +> This setup is for test or demo purpose only and **IS NOT** applicable for critical environment. Refer to the [Documents](https://pingcap.com/docs/stable/tidb-in-kubernetes/deploy/prerequisites/) for production setup. + + +The following steps will create a TiDB cluster with monitoring and auto-scaler, the monitoring data is not persisted by default. + +**Prerequisites**: +- Has TiDB operator `v1.1.0-beta.2` or higher version installed. [Doc](https://pingcap.com/docs/stable/tidb-in-kubernetes/deploy/tidb-operator/) +- Has default `StorageClass` configured, and there are enough PVs (by default, 6 PVs are required) of that storageClass: + + This could be verified by the following command: + + ```bash + > kubectl get storageclass + ``` + + The output is similar to this: + + ```bash + NAME PROVISIONER AGE + standard (default) kubernetes.io/gce-pd 1d + gold kubernetes.io/gce-pd 1d + ``` + + Alternatively, you could specify the storageClass explicitly by modifying `tidb-cluster.yaml`. + + +## Enabling Auto-scaling + +> **Note:** +> +> The Auto-scaling feature is still in alpha, you should enable this feature in TiDB Operator by setting values.yaml: + ```yaml +features: + AutoScaling=true +``` + +Auto-scale the cluster based on CPU load +```bash +> kubectl -n apply -f ./ +``` + +## Destroy + +```bash +> kubectl -n delete -f ./ +``` diff --git a/examples/auto-scale/tidb-cluster-auto-scaler.yaml b/examples/auto-scale/tidb-cluster-auto-scaler.yaml new file mode 100644 index 0000000000..7727b8e0f2 --- /dev/null +++ b/examples/auto-scale/tidb-cluster-auto-scaler.yaml @@ -0,0 +1,31 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbClusterAutoScaler +metadata: + name: auto-scaling-demo +spec: + cluster: + name: auto-scaling-demo + monitor: + name: auto-scaling-demo + tikv: + minReplicas: 3 + maxReplicas: 4 + metricsTimeDuration: "1m" + metrics: + - type: "Resource" + resource: + name: "cpu" + target: + type: "Utilization" + averageUtilization: 80 + tidb: + minReplicas: 2 + maxReplicas: 3 + metricsTimeDuration: "1m" + metrics: + - type: "Resource" + resource: + name: "cpu" + target: + type: "Utilization" + averageUtilization: 80 diff --git a/examples/auto-scale/tidb-cluster.yaml b/examples/auto-scale/tidb-cluster.yaml new file mode 100644 index 0000000000..9c3c94f86b --- /dev/null +++ b/examples/auto-scale/tidb-cluster.yaml @@ -0,0 +1,29 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbCluster +metadata: + name: auto-scaling-demo +spec: + version: v3.0.8 + timezone: UTC + pvReclaimPolicy: Delete + pd: + baseImage: pingcap/pd + replicas: 3 + requests: + storage: "1Gi" + config: {} + tikv: + baseImage: pingcap/tikv + replicas: 3 + requests: + cpu: "1" + storage: "1Gi" + config: {} + tidb: + baseImage: pingcap/tidb + replicas: 2 + service: + type: ClusterIP + config: {} + requests: + cpu: "1" diff --git a/examples/auto-scale/tidb-monitor.yaml b/examples/auto-scale/tidb-monitor.yaml new file mode 100644 index 0000000000..c1c99bc95d --- /dev/null +++ b/examples/auto-scale/tidb-monitor.yaml @@ -0,0 +1,20 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbMonitor +metadata: + name: auto-scaling-demo +spec: + clusters: + - name: auto-scaling-demo + prometheus: + baseImage: prom/prometheus + version: v2.11.1 + grafana: + baseImage: grafana/grafana + version: 6.0.1 + initializer: + baseImage: pingcap/tidb-monitor-initializer + version: v3.0.5 + reloader: + baseImage: pingcap/tidb-monitor-reloader + version: v1.0.1 + imagePullPolicy: IfNotPresent diff --git a/examples/basic/README.md b/examples/basic/README.md new file mode 100644 index 0000000000..9933d7e7c9 --- /dev/null +++ b/examples/basic/README.md @@ -0,0 +1,73 @@ +# A Basic TiDB cluster with monitoring + +> **Note:** +> +> This setup is for test or demo purpose only and **IS NOT** applicable for critical environment. Refer to the [Documents](https://pingcap.com/docs/stable/tidb-in-kubernetes/deploy/prerequisites/) for production setup. + +The following steps will create a TiDB cluster with monitoring, the monitoring data is not persisted by default. + +**Prerequisites**: +- Has TiDB operator `v1.1.0-beta.1` or higher version installed. [Doc](https://pingcap.com/docs/stable/tidb-in-kubernetes/deploy/tidb-operator/) +- Has default `StorageClass` configured, and there are enough PVs (by default, 6 PVs are required) of that storageClass: + + This could by verified by the following command: + + ```bash + > kubectl get storageclass + ``` + + The output is similar to this: + + ```bash + NAME PROVISIONER AGE + standard (default) kubernetes.io/gce-pd 1d + gold kubernetes.io/gce-pd 1d + ``` + + Alternatively, you could specify the storageClass explicitly by modifying `tidb-cluster.yaml`. + +## Install + +The following commands is assumed to be executed in this directory. + +Install the cluster: + +```bash +> kubectl -n apply -f ./ +``` + +Wait for cluster Pods ready: + +```bash +watch kubectl -n get pod +``` + +## Explore + +Explore the TiDB sql interface: + +```bash +> kubectl -n port-forward svc/basic-tidb 4000:4000 &>/tmp/pf-tidb.log & +> mysql -h 127.0.0.1 -P 4000 -u root +``` + +Explore the monitoring dashboards: + +```bash +> kubectl -n port-forward svc/basic-grafana 4000:4000 &>/tmp/pf-grafana.log & +``` + +Browse [localhost:4000](http://localhost:4000). + +## Destroy + +```bash +> kubectl -n delete -f ./ +``` + +The PVCs used by TiDB cluster will not be deleted in the above process, therefore, the PVs will be not be released neither. You can delete PVCs and release the PVs by the following command: + +```bash +> kubectl -n delete pvc app.kubernetes.io/instance=basic,app.kubernetes.io,app.kubernetes.io/managed-by=tidb-operator +``` + diff --git a/examples/basic/tidb-cluster.yaml b/examples/basic/tidb-cluster.yaml new file mode 100644 index 0000000000..ae7279deb7 --- /dev/null +++ b/examples/basic/tidb-cluster.yaml @@ -0,0 +1,26 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbCluster +metadata: + name: basic +spec: + version: v3.0.8 + timezone: UTC + pvReclaimPolicy: Delete + pd: + baseImage: pingcap/pd + replicas: 3 + requests: + storage: "1Gi" + config: {} + tikv: + baseImage: pingcap/tikv + replicas: 3 + requests: + storage: "1Gi" + config: {} + tidb: + baseImage: pingcap/tidb + replicas: 2 + service: + type: ClusterIP + config: {} diff --git a/examples/basic/tidb-monitor.yaml b/examples/basic/tidb-monitor.yaml new file mode 100644 index 0000000000..d314dc1f3e --- /dev/null +++ b/examples/basic/tidb-monitor.yaml @@ -0,0 +1,20 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbMonitor +metadata: + name: basic +spec: + clusters: + - name: basic + prometheus: + baseImage: prom/prometheus + version: v2.11.1 + grafana: + baseImage: grafana/grafana + version: 6.0.1 + initializer: + baseImage: pingcap/tidb-monitor-initializer + version: v3.0.5 + reloader: + baseImage: pingcap/tidb-monitor-reloader + version: v1.0.1 + imagePullPolicy: IfNotPresent diff --git a/examples/initialize/README.md b/examples/initialize/README.md new file mode 100644 index 0000000000..6e4df651ad --- /dev/null +++ b/examples/initialize/README.md @@ -0,0 +1,67 @@ +# Creating TidbCluster with Initialization + +> **Note:** +> +> This setup is for test or demo purpose only and **IS NOT** applicable for critical environment. Refer to the [Documents](https://pingcap.com/docs/stable/tidb-in-kubernetes/deploy/prerequisites/) for production setup. + + +The following steps will create a TiDB cluster with Initialization. + +**Prerequisites**: +- Has TiDB operator `v1.1.0-beta.1` or higher version installed. [Doc](https://pingcap.com/docs/stable/tidb-in-kubernetes/deploy/tidb-operator/) +- Has default `StorageClass` configured, and there are enough PVs (by default, 6 PVs are required) of that storageClass: + + This could by verified by the following command: + + ```bash + > kubectl get storageclass + ``` + + The output is similar to this: + + ```bash + NAME PROVISIONER AGE + standard (default) kubernetes.io/gce-pd 1d + gold kubernetes.io/gce-pd 1d + ``` + + Alternatively, you could specify the storageClass explicitly by modifying `tidb-cluster.yaml`. + + +## Initialize + + +> **Note:** +> +> The Initialization should be done once the TiDB Cluster was created + +The following commands is assumed to be executed in this directory. + +You can create the root user and set its password by creating secret and link it to the Initializer: + +```bash +> kubectl create secret generic tidb-secret --from-literal=root= --namespace= +``` + +You can also create other users and set their password: +```bash +> kubectl create secret generic tidb-secret --from-literal=root= --from-literal=developer= --namespace= +``` + +Initialize the cluster to create the users and create the database named `hello`: + +```bash +> kubectl -n apply -f ./ +``` + +Wait for Initialize job done: +```bash +$ kubectl get pod -n | grep initialize-demo-tidb-initializer +initialize-demo-tidb-initializer-whzn7 0/1 Completed 0 57s +``` + +## Destroy + +```bash +> kubectl -n delete -f ./ +``` diff --git a/examples/initialize/tidb-cluster.yaml b/examples/initialize/tidb-cluster.yaml new file mode 100644 index 0000000000..1ec543ea72 --- /dev/null +++ b/examples/initialize/tidb-cluster.yaml @@ -0,0 +1,26 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbCluster +metadata: + name: initialize-demo +spec: + version: v3.0.8 + timezone: UTC + pvReclaimPolicy: Delete + pd: + baseImage: pingcap/pd + replicas: 1 + requests: + storage: "1Gi" + config: {} + tikv: + baseImage: pingcap/tikv + replicas: 1 + requests: + storage: "1Gi" + config: {} + tidb: + baseImage: pingcap/tidb + replicas: 1 + service: + type: ClusterIP + config: {} diff --git a/examples/initialize/tidb-initializer.yaml b/examples/initialize/tidb-initializer.yaml new file mode 100644 index 0000000000..9067aff97b --- /dev/null +++ b/examples/initialize/tidb-initializer.yaml @@ -0,0 +1,21 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbInitializer +metadata: + name: initialize-demo +spec: + image: tnir/mysqlclient + imagePullPolicy: IfNotPresent + cluster: + name: initialize-demo + initSql: "create database hello;" + # initSqlConfigMap: tidb-initsql + passwordSecret: "tidb-secret" + # permitHost: 172.6.5.8 + # resources: + # limits: + # cpu: 1000m + # memory: 500Mi + # requests: + # cpu: 100m + # memory: 50Mi + # timezone: "Asia/Shanghai" diff --git a/examples/selfsigned-tls/selfsigned-ca.yaml b/examples/selfsigned-tls/selfsigned-ca.yaml new file mode 100644 index 0000000000..806a78be55 --- /dev/null +++ b/examples/selfsigned-tls/selfsigned-ca.yaml @@ -0,0 +1,11 @@ +apiVersion: cert-manager.io/v1alpha2 +kind: Certificate +metadata: + name: selfsigned-ca-cert +spec: + secretName: selfsigned-ca-cert + commonName: "certmanager" + isCA: true + issuerRef: + name: selfsigned-issuer + kind: Issuer diff --git a/examples/selfsigned-tls/selfsigned-cert-issuer.yaml b/examples/selfsigned-tls/selfsigned-cert-issuer.yaml new file mode 100644 index 0000000000..934b53124d --- /dev/null +++ b/examples/selfsigned-tls/selfsigned-cert-issuer.yaml @@ -0,0 +1,7 @@ +apiVersion: cert-manager.io/v1alpha2 +kind: Issuer +metadata: + name: selfsigned-cert-issuer +spec: + ca: + secretName: selfsigned-ca-cert diff --git a/examples/selfsigned-tls/selfsigned-issuer.yaml b/examples/selfsigned-tls/selfsigned-issuer.yaml new file mode 100644 index 0000000000..7f06abf08a --- /dev/null +++ b/examples/selfsigned-tls/selfsigned-issuer.yaml @@ -0,0 +1,6 @@ +apiVersion: cert-manager.io/v1alpha2 +kind: Issuer +metadata: + name: selfsigned-issuer +spec: + selfSigned: {} diff --git a/examples/selfsigned-tls/tidb-client-cert.yaml b/examples/selfsigned-tls/tidb-client-cert.yaml new file mode 100644 index 0000000000..df740c27ed --- /dev/null +++ b/examples/selfsigned-tls/tidb-client-cert.yaml @@ -0,0 +1,21 @@ +apiVersion: cert-manager.io/v1alpha2 +kind: Certificate +metadata: + name: tidb-client-cert +spec: + secretName: tls-tidb-client-secret # -tidb-client-secret + subject: + organizationalUnits: + - "TiDB Operator" + organization: + - "PingCAP" + duration: "8760h" # 364 days + # If you want verify server cert Common Name (e.g. --ssl-verify-server-cert + # flag in MySQL CLI), you must configure the HostName you used to connect the + # server here. + commonName: "tls-tidb-client" + usages: + - "client auth" + issuerRef: + name: selfsigned-cert-issuer + kind: Issuer diff --git a/examples/selfsigned-tls/tidb-cluster.yaml b/examples/selfsigned-tls/tidb-cluster.yaml new file mode 100644 index 0000000000..aa93ea2274 --- /dev/null +++ b/examples/selfsigned-tls/tidb-cluster.yaml @@ -0,0 +1,28 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbCluster +metadata: + name: tls +spec: + version: v3.0.8 + timezone: UTC + pvReclaimPolicy: Delete + pd: + baseImage: pingcap/pd + replicas: 1 + requests: + storage: "1Gi" + config: {} + tikv: + baseImage: pingcap/tikv + replicas: 1 + requests: + storage: "1Gi" + config: {} + tidb: + baseImage: pingcap/tidb + replicas: 1 + service: + type: ClusterIP + config: {} + tlsClient: + enabled: true diff --git a/examples/selfsigned-tls/tidb-server-cert.yaml b/examples/selfsigned-tls/tidb-server-cert.yaml new file mode 100644 index 0000000000..6580dc5091 --- /dev/null +++ b/examples/selfsigned-tls/tidb-server-cert.yaml @@ -0,0 +1,22 @@ +apiVersion: cert-manager.io/v1alpha2 +kind: Certificate +metadata: + name: tidb-server-cert +spec: + secretName: tls-tidb-server-secret # -tidb-server-secret + subject: + organizationalUnits: + - "TiDB Operator" + organization: + - "PingCAP" + duration: "8760h" # 364 days + # If you want verify server cert Common Name (e.g. --ssl-verify-server-cert + # flag in MySQL CLI), you must configure the HostName you used to connect the + # server here. + commonName: "tls-tidb-server" + usages: + - "client auth" + - "server auth" + issuerRef: + name: selfsigned-cert-issuer + kind: Issuer diff --git a/go.mod b/go.mod index e1cfcae76e..0968f9aa4b 100644 --- a/go.mod +++ b/go.mod @@ -7,8 +7,10 @@ module github.com/pingcap/tidb-operator go 1.13 require ( + github.com/Azure/go-autorest/autorest/mocks v0.3.0 // indirect github.com/BurntSushi/toml v0.3.1 github.com/MakeNowJust/heredoc v0.0.0-20171113091838-e9091a26100e // indirect + github.com/Masterminds/semver v1.4.2 github.com/Microsoft/go-winio v0.4.12 // indirect github.com/NYTimes/gziphandler v1.1.1 // indirect github.com/ant31/crd-validation v0.0.0-20180702145049-30f8a35d0ac2 @@ -35,6 +37,7 @@ require ( github.com/gophercloud/gophercloud v0.3.0 // indirect github.com/gregjones/httpcache v0.0.0-20190212212710-3befbb6ad0cc // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect + github.com/grpc-ecosystem/grpc-gateway v1.13.0 // indirect github.com/imdario/mergo v0.3.7 // indirect github.com/juju/errors v0.0.0-20180806074554-22422dad46e1 github.com/juju/loggo v0.0.0-20180524022052-584905176618 // indirect @@ -47,7 +50,7 @@ require ( github.com/openshift/generic-admission-server v1.14.0 github.com/opentracing/opentracing-go v1.1.0 // indirect github.com/pierrec/lz4 v2.0.5+incompatible // indirect - github.com/pingcap/advanced-statefulset v0.3.1 + github.com/pingcap/advanced-statefulset v0.3.2 github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8 // indirect github.com/pingcap/errors v0.11.0 github.com/pingcap/kvproto v0.0.0-20191217072959-393e6c0fd4b7 diff --git a/go.sum b/go.sum index 8e1ada0a58..383bfe3b6d 100644 --- a/go.sum +++ b/go.sum @@ -32,6 +32,8 @@ github.com/Azure/go-autorest/autorest/date v0.1.0/go.mod h1:plvfp3oPSKwf2DNjlBjW github.com/Azure/go-autorest/autorest/mocks v0.1.0/go.mod h1:OTyCOPRA2IgIlWxVYxBee2F5Gr4kF2zd2J5cFRaIDN0= github.com/Azure/go-autorest/autorest/mocks v0.2.0 h1:Ww5g4zThfD/6cLb4z6xxgeyDa7QDkizMkJKe0ysZXp0= github.com/Azure/go-autorest/autorest/mocks v0.2.0/go.mod h1:OTyCOPRA2IgIlWxVYxBee2F5Gr4kF2zd2J5cFRaIDN0= +github.com/Azure/go-autorest/autorest/mocks v0.3.0 h1:qJumjCaCudz+OcqE9/XtEPfvtOjOmKaui4EOpFI6zZc= +github.com/Azure/go-autorest/autorest/mocks v0.3.0/go.mod h1:a8FDP3DYzQ4RYfVAxAN3SVSiiO77gL2j2ronKKP0syM= github.com/Azure/go-autorest/autorest/to v0.2.0/go.mod h1:GunWKJp1AEqgMaGLV+iocmRAJWqST1wQYhyyjXJ3SJc= github.com/Azure/go-autorest/autorest/validation v0.1.0/go.mod h1:Ha3z/SqBeaalWQvokg3NZAlQTalVMtOIAs1aGK7G6u8= github.com/Azure/go-autorest/logger v0.1.0 h1:ruG4BSDXONFRrZZJ2GUXDiUyVpayPmb1GnWeHDdaNKY= @@ -43,11 +45,13 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/GoogleCloudPlatform/cloudsql-proxy v0.0.0-20190605020000-c4ba1fdf4d36/go.mod h1:aJ4qN3TfrelA6NZ6AXsXRfmEVaYin3EDbSPJrKS8OXo= +github.com/GoogleCloudPlatform/k8s-cloud-provider v0.0.0-20190822182118-27a4ced34534 h1:N7lSsF+R7wSulUADi36SInSQA3RvfO/XclHQfedr0qk= github.com/GoogleCloudPlatform/k8s-cloud-provider v0.0.0-20190822182118-27a4ced34534/go.mod h1:iroGtC8B3tQiqtds1l+mgk/BBOrxbqjH+eUfFQYRc14= github.com/JeffAshton/win_pdh v0.0.0-20161109143554-76bb4ee9f0ab/go.mod h1:3VYc5hodBMJ5+l/7J4xAyMeuM2PNuepvHlGs8yilUCA= github.com/MakeNowJust/heredoc v0.0.0-20170808103936-bb23615498cd/go.mod h1:64YHyfSL2R96J44Nlwm39UHepQbyR5q10x7iYa1ks2E= github.com/MakeNowJust/heredoc v0.0.0-20171113091838-e9091a26100e h1:eb0Pzkt15Bm7f2FFYv7sjY7NPFi3cPkS3tv1CcrFBWA= github.com/MakeNowJust/heredoc v0.0.0-20171113091838-e9091a26100e/go.mod h1:64YHyfSL2R96J44Nlwm39UHepQbyR5q10x7iYa1ks2E= +github.com/Masterminds/semver v1.4.2 h1:WBLTQ37jOCzSLtXNdoo8bNM8876KhNqOKvrlGITgsTc= github.com/Masterminds/semver v1.4.2/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= github.com/Microsoft/go-winio v0.4.11/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA= github.com/Microsoft/go-winio v0.4.12 h1:xAfWHN1IrQ0NJ9TBC0KBZoqLjzDTr1ML+4MywiUOryc= @@ -67,6 +71,7 @@ github.com/Rican7/retry v0.1.0/go.mod h1:FgOROf8P5bebcC1DS0PdOQiqGUridaZvikzUmkF github.com/ajg/form v0.0.0-20160822230020-523a5da1a92f/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY= github.com/ant31/crd-validation v0.0.0-20180702145049-30f8a35d0ac2 h1:CDDf61yprxfS7bmBPyhH8pxaobD2VbO3d7laAxJbZos= github.com/ant31/crd-validation v0.0.0-20180702145049-30f8a35d0ac2/go.mod h1:X0noFIik9YqfhGYBLEHg8LJKEwy7QIitLQuFMpKLcPk= +github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6/go.mod h1:V8iCPQYkqmusNa815XgQio277wI47sdRh1dUOLdyC6Q= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/asaskevich/govalidator v0.0.0-20180720115003-f9ffefc3facf/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= @@ -458,6 +463,8 @@ github.com/grpc-ecosystem/grpc-gateway v1.3.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpg github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.2 h1:S+ef0492XaIknb8LMjcwgW2i3cNTzDYMmDrOThOJNWc= github.com/grpc-ecosystem/grpc-gateway v1.9.2/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.13.0 h1:sBDQoHXrOlfPobnKw69FIKa1wg9qsLLvvQ/Y19WtFgI= +github.com/grpc-ecosystem/grpc-gateway v1.13.0/go.mod h1:8XEsbTttt/W+VvjtQhLACqCisSPWTxCZ7sBRjU6iH9c= github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= github.com/hashicorp/golang-lru v0.0.0-20180201235237-0fb14efe8c47/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= @@ -644,8 +651,8 @@ github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+v github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pierrec/lz4 v2.0.5+incompatible h1:2xWsjqPFWcplujydGg4WmhC/6fZqK42wMM8aXeqhl0I= github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pingcap/advanced-statefulset v0.3.1 h1:LxfAdpY2MV/b0MUlASYWjcPfUR161Xly1rA7oaIi684= -github.com/pingcap/advanced-statefulset v0.3.1/go.mod h1:rg2p1v6AGsKhvEZi6Sm0YNYJCmdXdZZhQ6Sviei7Ivs= +github.com/pingcap/advanced-statefulset v0.3.2 h1:cdnmWNaldoAyAWL/614Nr3hydnAzJEhSDMdIB6votZU= +github.com/pingcap/advanced-statefulset v0.3.2/go.mod h1:rg2p1v6AGsKhvEZi6Sm0YNYJCmdXdZZhQ6Sviei7Ivs= github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8 h1:USx2/E1bX46VG32FIw034Au6seQ2fY9NEILmNh/UlQg= github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8/go.mod h1:B1+S9LNcuMyLH/4HMTViQOJevkGiik3wW2AN9zb2fNQ= github.com/pingcap/errors v0.11.0 h1:DCJQB8jrHbQ1VVlMFIrbj2ApScNNotVmkSNplu2yUt4= @@ -679,6 +686,7 @@ github.com/remyoudompheng/bigfft v0.0.0-20170806203942-52369c62f446/go.mod h1:uY github.com/robfig/cron v1.1.0 h1:jk4/Hud3TTdcrJgUOBgsqrZBarcxl6ADIjSC2iniwLY= github.com/robfig/cron v1.1.0/go.mod h1:JGuDeoQd7Z6yL4zQhZ3OPEVHB7fL6Ka6skscFHfmt2k= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= +github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-charset v0.0.0-20180617210344-2471d30d28b4/go.mod h1:qgYeAmZ5ZIpBWTGllZSQnw97Dj+woV0toclVaRGI8pc= github.com/rogpeppe/go-internal v1.0.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.0.1-alpha.3/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= @@ -862,6 +870,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20190812203447-cdfb69ac37fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297 h1:k7pJ2yAPLPgbskkFdhRCsA77k2fySZ1zf2zCjvQCiIM= golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191002035440-2ec189313ef0 h1:2mqDk8w/o6UmeUCu5Qiq2y7iMf6anbx+YA8d1JFoFrs= +golang.org/x/net v0.0.0-20191002035440-2ec189313ef0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -984,6 +994,8 @@ google.golang.org/genproto v0.0.0-20190508193815-b515fa19cec8/go.mod h1:VzzqZJRn google.golang.org/genproto v0.0.0-20190530194941-fb225487d101/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= google.golang.org/genproto v0.0.0-20190620144150-6af8c5fc6601 h1:9VBRTdmgQxbs6HE0sUnMrSWNePppAJU07NYvX5dIB04= google.golang.org/genproto v0.0.0-20190620144150-6af8c5fc6601/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= +google.golang.org/genproto v0.0.0-20190927181202-20e1ac93f88c h1:hrpEMCZ2O7DR5gC1n2AJGVhrwiEjOi35+jxtIuZpTMo= +google.golang.org/genproto v0.0.0-20190927181202-20e1ac93f88c/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= @@ -991,6 +1003,8 @@ google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ij google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.23.0 h1:AzbTB6ux+okLTzP8Ru1Xs41C303zdcfEht7MQnYJt5A= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.24.0 h1:vb/1TCsVn3DcJlQ0Gs1yB1pKI6Do2/QNwxdKqmc/b0s= +google.golang.org/grpc v1.24.0/go.mod h1:XDChyiUovWa60DnaeDeZmSW86xtLtjtZbwvSiRnRtcA= gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U= gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc/go.mod h1:m7x9LTH6d71AHyAX77c9yqWCCa3UKHcVEj9y7hAtKDk= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -1001,6 +1015,7 @@ gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/gcfg.v1 v1.2.0 h1:0HIbH907iBTAntm+88IJV2qmJALDAh8sPekI9Vc1fm0= gopkg.in/gcfg.v1 v1.2.0/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o= gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2/go.mod h1:Xk6kEKp8OKb+X14hQBKWaSkCsqBpgog8nAV2xsGOxlo= gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df/go.mod h1:LRQQ+SO6ZHR7tOkpBDuZnXENFzX8qRjMDMyPD6BRkCw= @@ -1018,11 +1033,13 @@ gopkg.in/square/go-jose.v2 v2.2.2 h1:orlkJ3myw8CN1nVQHBFfloD+L3egixIa4FvUP6RosSA gopkg.in/square/go-jose.v2 v2.2.2/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/warnings.v0 v0.1.1 h1:XM28wIgFzaBmeZ5dNHIpWLQpt/9DGKxk+rCg/22nnYE= gopkg.in/warnings.v0 v0.1.1/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= gopkg.in/yaml.v2 v2.0.0/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gotest.tools v2.1.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= @@ -1083,6 +1100,7 @@ k8s.io/kubectl v0.0.0-20190918164019-21692a0861df/go.mod h1:AjffgL1ZYSrbpRJHER9v k8s.io/kubelet v0.0.0-20190918162654-250a1838aa2c/go.mod h1:LGhpyzd/3AkWcFcQJ3yO1UxMnJ6urMkCYfCp4iVxhjs= k8s.io/kubernetes v1.16.0 h1:WPaqle2JWogVzLxhN6IK67u62IHKKrtYF7MS4FVR4/E= k8s.io/kubernetes v1.16.0/go.mod h1:nlP2zevWKRGKuaaVbKIwozU0Rjg9leVDXkL4YTtjmVs= +k8s.io/legacy-cloud-providers v0.0.0-20190918163543-cfa506e53441 h1:JkEasocl8SM6+H65kaEUjtLAOFYzwaQOVTDdy5DLOXk= k8s.io/legacy-cloud-providers v0.0.0-20190918163543-cfa506e53441/go.mod h1:Phw/j+7dcoTPXRkv9Nyi3RJuA6SVSoHlc7M5K1pHizM= k8s.io/metrics v0.0.0-20190918162108-227c654b2546/go.mod h1:XUFuIsGbIqaUga6Ivs02cCzxNjY4RPRvYnW0KhmnpQY= k8s.io/repo-infra v0.0.0-20181204233714-00fe14e3d1a3/go.mod h1:+G1xBfZDfVFsm1Tj/HNCvg4QqWx8rJ2Fxpqr1rqp/gQ= diff --git a/hack/check-EOF.sh b/hack/check-EOF.sh index 20417e109d..09336ee18a 100755 --- a/hack/check-EOF.sh +++ b/hack/check-EOF.sh @@ -33,6 +33,7 @@ FILELIST=($(find . -type f -not \( -path './vendor/*' \ -o -path './.idea/*' \ -o -path './.DS_Store' \ -o -path './*/.DS_Store' \ + -o -path './data' \ \))) NUM=0 diff --git a/hack/create-cert.sh b/hack/create-cert.sh new file mode 100755 index 0000000000..b0dabab060 --- /dev/null +++ b/hack/create-cert.sh @@ -0,0 +1,151 @@ +#!/usr/bin/env bash + +# Copyright 2020 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +usage() { + cat <> ${tmpdir}/csr.conf +[req] +req_extensions = v3_req +distinguished_name = req_distinguished_name +[req_distinguished_name] +[ v3_req ] +basicConstraints = CA:FALSE +keyUsage = nonRepudiation, digitalSignature, keyEncipherment +extendedKeyUsage = serverAuth +subjectAltName = @alt_names +[alt_names] +DNS.1 = ${service} +DNS.2 = ${service}.${namespace} +DNS.3 = ${service}.${namespace}.svc +DNS.4 = *.${service} +DNS.5 = *.${service}.${namespace} +DNS.5 = *.${service}.${namespace}.svc +IP.1 = 127.0.0.1 +EOF + +openssl genrsa -out ${tmpdir}/server-key.pem 2048 +openssl req -new -key ${tmpdir}/server-key.pem -subj "/CN=${service}.${namespace}.svc" -out ${tmpdir}/server.csr -config ${tmpdir}/csr.conf + + # clean-up any previously created CSR for our service. Ignore errors if not present. +kubectl delete csr ${csrName} 2>/dev/null || true + + # create server cert/key CSR and send to k8s API +cat <&2 + exit 1 +fi + + echo ${serverCert} | openssl base64 -d -A -out ${tmpdir}/server-cert.pem + + # create the secret with CA cert and server cert/key +kubectl create secret tls ${secret} \ + --key=${tmpdir}/server-key.pem \ + --cert=${tmpdir}/server-cert.pem \ + --dry-run -o yaml | + kubectl -n ${namespace} apply -f - diff --git a/hack/e2e-examples.sh b/hack/e2e-examples.sh new file mode 100755 index 0000000000..895d93ce66 --- /dev/null +++ b/hack/e2e-examples.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +# Copyright 2020 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# E2E entrypoint script for examples. +# + +ROOT=$(unset CDPATH && cd $(dirname "${BASH_SOURCE[0]}")/.. && pwd) +cd $ROOT + +source "${ROOT}/hack/lib.sh" + +hack::ensure_kind + +echo "info: create a Kubernetes cluster" +$KIND_BIN create cluster + +echo "info: start tidb-operator" +hack/local-up-operator.sh + +echo "info: testing examples" +export PATH=$PATH:$OUTPUT_BIN +hack::ensure_kubectl + +cnt=0 +for t in $(find tests/examples/ -regextype sed -regex '.*/[0-9]\{3\}-.*\.sh'); do + echo "info: testing $t" + $t + if [ $? -eq 0 ]; then + echo "info: test $t passed" + else + echo "error: test $t failed" + ((cnt++)) + fi +done +if [ $cnt -gt 0 ]; then + echo "fatal: $cnt tests failed" + exit 1 +fi diff --git a/hack/e2e-openshift.sh b/hack/e2e-openshift.sh new file mode 100755 index 0000000000..7e5e819d31 --- /dev/null +++ b/hack/e2e-openshift.sh @@ -0,0 +1,131 @@ +#!/usr/bin/env bash + +# Copyright 2020 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# E2E entrypoint script for OpenShift. +# + +set -o errexit +set -o nounset +set -o pipefail + +ROOT=$(unset CDPATH && cd $(dirname "${BASH_SOURCE[0]}")/.. && pwd) +cd $ROOT + +PULL_SECRET_FILE=${PULL_SECRET_FILE:-} + +if [ ! -e "$PULL_SECRET_FILE" ]; then + echo "error: pull secret file '$PULL_SECRET_FILE' does not exist" + exit 1 +fi + +vmx_cnt=$(grep -cw vmx /proc/cpuinfo) +if [ "$vmx_cnt" -gt 0 ]; then + echo "info: nested virtualization enabled (vmx cnt: $vmx_cnt)" +else + echo "error: nested virtualization not enabled, please refer to https://cloud.google.com/compute/docs/instances/enable-nested-virtualization-vm-instances" + exit 1 +fi + +echo "info: install required software packages" +sudo yum install -y jq git make golang +sudo yum install -y yum-utils +sudo yum-config-manager \ + --add-repo https://download.docker.com/linux/centos/docker-ce.repo +sudo yum install -y --nobest docker-ce docker-ce-cli containerd.io +if ! systemctl is-active --quiet docker; then + sudo systemctl start docker +fi +echo "info: printing docker information" +sudo docker info +sudo chmod o+rw /var/run/docker.sock + +CRC_HOME=$HOME/.crc +echo "info: mouting disk onto $CRC_HOME" +if ! mountpoint $CRC_HOME &>/dev/null; then + sudo mkfs.ext4 -F /dev/disk/by-id/google-local-ssd-0 + sudo rm -rf $CRC_HOME + mkdir $CRC_HOME + sudo mount /dev/disk/by-id/google-local-ssd-0 $CRC_HOME + sudo chown -R $(id -u):$(id -g) $CRC_HOME +fi + +echo "info: downloading latest crc" +cd $HOME +CRC_VERSION=$(curl --retry 10 -L -s 'https://mirror.openshift.com/pub/openshift-v4/clients/crc/latest/release-info.json' | jq -r '.version.crcVersion') +if ! test -e crc-linux-amd64.tar.xz; then + curl --retry 10 -LO https://mirror.openshift.com/pub/openshift-v4/clients/crc/$CRC_VERSION/crc-linux-amd64.tar.xz + tar -xvf crc-linux-amd64.tar.xz +fi +export PATH=$HOME/crc-linux-$CRC_VERSION-amd64:$PATH + +crc version + +crcStatus=$(crc status 2>/dev/null | awk '/CRC VM:/ {print $3}') || true +if [[ "$crcStatus" == "Running" ]]; then + echo "info: OpenShift cluster is running" + crc status +else + echo "info: starting OpenShift clsuter" + crc setup + crc config set cpus 6 + crc config set memory 24576 + crc start --pull-secret-file $PULL_SECRET_FILE +fi + +echo "info: login" +eval $(crc oc-env) +KUBEADMIN_PASSWORD=$(cat $HOME/.crc/cache/crc_libvirt_*/kubeadmin-password) +oc login -u kubeadmin -p "$KUBEADMIN_PASSWORD" https://api.crc.testing:6443 --insecure-skip-tls-verify + +echo "info: building images" +cd $HOME/tidb-operator +./hack/run-in-container.sh bash -c 'make docker e2e-docker +images=( + tidb-operator:latest + tidb-backup-manager:latest + tidb-operator-e2e:latest +) +for image in ${images[@]}; do + docker save localhost:5000/pingcap/$image -o $image.tar.gz +done +' + +echo "info: pusing images" +OC_PROJECT=openshift +oc extract secret/router-ca --keys=tls.crt -n openshift-ingress-operator +sudo mkdir /etc/docker/certs.d/default-route-openshift-image-registry.apps-crc.testing/ -p +sudo mv tls.crt /etc/docker/certs.d/default-route-openshift-image-registry.apps-crc.testing/ +docker login -u kubeadmin --password-stdin default-route-openshift-image-registry.apps-crc.testing <<< "$(oc whoami -t)" + +images=( + tidb-operator:latest + tidb-backup-manager:latest + tidb-operator-e2e:latest +) +for image in ${images[@]}; do + sudo chown -R $(id -u):$(id -g) $image.tar.gz + docker load -i $image.tar.gz + docker tag localhost:5000/pingcap/$image image-registry.openshift-image-registry.svc:5000/$OC_PROJECT/$image + docker tag localhost:5000/pingcap/$image default-route-openshift-image-registry.apps-crc.testing/$OC_PROJECT/$image + docker push default-route-openshift-image-registry.apps-crc.testing/$OC_PROJECT/$image +done + +export PROVIDER=openshift +export TIDB_OPERATOR_IMAGE=image-registry.openshift-image-registry.svc:5000/$OC_PROJECT/tidb-operator:latest +export TIDB_BACKUP_MANAGER_IMAGE=image-registry.openshift-image-registry.svc:5000/$OC_PROJECT/tidb-backup-manager:latest +export E2E_IMAGE=image-registry.openshift-image-registry.svc:5000/$OC_PROJECT/tidb-operator-e2e:latest +# 'Restarter' test starts 1 replica of pd and tikv and can pass in single-node OpenShift cluster. +./hack/run-e2e.sh --ginkgo.focus 'Restarter' diff --git a/hack/e2e.sh b/hack/e2e.sh index df5ace923b..3b1442d9b0 100755 --- a/hack/e2e.sh +++ b/hack/e2e.sh @@ -46,22 +46,39 @@ Usage: hack/e2e.sh [-h] -- [extra test args] Environments: - DOCKER_REGISTRY image docker registry - IMAGE_TAG image tag - SKIP_BUILD skip building binaries - SKIP_IMAGE_BUILD skip build and push images - SKIP_UP skip starting the cluster - SKIP_DOWN skip shutting down the cluster - REUSE_CLUSTER reuse existing cluster if found - KUBE_VERSION the version of Kubernetes to test against - KUBE_WORKERS the number of worker nodes (excludes master nodes), defaults: 3 - DOCKER_IO_MIRROR configure mirror for docker.io - GCR_IO_MIRROR configure mirror for gcr.io - QUAY_IO_MIRROR configure mirror for quay.io - KIND_DATA_HOSTPATH (for kind) the host path of data directory for kind cluster, defaults: none - GINKGO_NODES ginkgo nodes to run specs, defaults: 1 - GINKGO_PARALLEL if set to `y`, will run specs in parallel, the number of nodes will be the number of cpus - GINKGO_NO_COLOR if set to `y`, suppress color output in default reporter + PROVIDER Kubernetes provider, e.g. kind, gke, eks, defaults: kind + DOCKER_REPO docker image repo + IMAGE_TAG image tag + CLUSTER the name of e2e cluster, defaults: tidb-operator + KUBECONFIG path to the kubeconfig file, defaults: ~/.kube/config + SKIP_BUILD skip building binaries + SKIP_IMAGE_BUILD skip build and push images + SKIP_IMAGE_LOAD skip load images + SKIP_UP skip starting the cluster + SKIP_DOWN skip shutting down the cluster + SKIP_TEST skip running the test + KUBE_VERSION the version of Kubernetes to test against + KUBE_WORKERS the number of worker nodes (excludes master nodes), defaults: 3 + DOCKER_IO_MIRROR configure mirror for docker.io + GCR_IO_MIRROR configure mirror for gcr.io + QUAY_IO_MIRROR configure mirror for quay.io + KIND_DATA_HOSTPATH (kind only) the host path of data directory for kind cluster, defaults: none + GCP_PROJECT (gke only) the GCP project to run in + GCP_CREDENTIALS (gke only) the GCP service account to use + GCP_REGION (gke only) the GCP region, if specified a regional cluster is creaetd + GCP_ZONE (gke only) the GCP zone, if specified a zonal cluster is created + GCP_SSH_PRIVATE_KEY (gke only) the path to the private ssh key + GCP_SSH_PUBLIC_KEY (gke only) the path to the public ssh key + GCP_MACHINE_TYPE (gke only) the machine type of instance, defaults: n1-standard-4 + AWS_ACCESS_KEY_ID (eks only) the aws access key id + AWS_SECRET_ACCESS_KEY (eks only) the aws secret access key + AWS_REGION (eks only) the aws region + AWS_ZONE (eks only) the aws zone + GINKGO_NODES ginkgo nodes to run specs, defaults: 1 + GINKGO_PARALLEL if set to `y`, will run specs in parallel, the number of nodes will be the number of cpus + GINKGO_NO_COLOR if set to `y`, suppress color output in default reporter + RUNNER_SUITE_NAME the suite name of runner + SKIP_GINKGO if set to `y`, skip ginkgo Examples: @@ -83,12 +100,60 @@ Examples: 3) reuse the cluster and don't tear down it after the testing - REUSE_CLUSTER=y SKIP_DOWN=y ./hack/e2e.sh -- + # for the first time, skip the down phase + SKIP_DOWN=y ./hack/e2e.sh -- + # then skip both the up/down phase in subsequent tests + SKIP_UP=y SKIP_DOWN=y ./hack/e2e.sh -- 4) use registry mirrors DOCKER_IO_MIRROR=https://dockerhub.azk8s.cn QUAY_IO_MIRROR=https://quay.azk8s.cn GCR_IO_MIRROR=https://gcr.azk8s.cn ./hack/e2e.sh -- +5) run e2e with gke provider locally + + You need prepare GCP service account with the following permissions: + + - Compute Network Admin + - Kubernetes Engine Admin + - Service Account User + - Storage Admin + - Compute Instance Admin (v1) + + You can create ssh keypair with ssh-keygen at ~/.ssh/google_compute_engine + or specifc existing ssh keypair with following environments: + + export GCP_SSH_PRIVATE_KEY= + export GCP_SSH_PUBLIC_KEY= + + Then run with following additional GCP-specific environments: + + export GCP_PROJECT= + export GCP_CREDENTIALS= + export GCP_ZONE=us-central1-b + + PROVIDER=gke ./hack/e2e.sh -- + + If you run the outside of the dev containter started by + ./hack/run-in-container.sh, Google Cloud SDK must be installed on you + machine. + +6) run e2e with eks provider locally + + You need configure your aws credential and region or set it via following + environments: + + export AWS_ACCESS_KEY_ID= + export AWS_SECRET_ACCESS_KEY= + export AWS_REGION= + + then run e2e with eks provider: + + PROVIDER=eks ./hack/e2e.sh -- + + If you run the outside of the dev containter started by + ./hack/run-in-container.sh, AWS CLI must be installed on you + machine. + EOF } @@ -106,38 +171,59 @@ if [ "${1:-}" == "--" ]; then shift fi -hack::ensure_kind -hack::ensure_kubectl -hack::ensure_helm - -DOCKER_REGISTRY=${DOCKER_REGISTRY:-localhost:5000} +PROVIDER=${PROVIDER:-kind} +DOCKER_REPO=${DOCKER_REPO:-localhost:5000/pingcap} IMAGE_TAG=${IMAGE_TAG:-latest} CLUSTER=${CLUSTER:-tidb-operator} KUBECONFIG=${KUBECONFIG:-~/.kube/config} -KUBECONTEXT=kind-$CLUSTER SKIP_BUILD=${SKIP_BUILD:-} SKIP_IMAGE_BUILD=${SKIP_IMAGE_BUILD:-} +SKIP_IMAGE_LOAD=${SKIP_IMAGE_LOAD:-} SKIP_UP=${SKIP_UP:-} SKIP_DOWN=${SKIP_DOWN:-} +SKIP_TEST=${SKIP_TEST:-} REUSE_CLUSTER=${REUSE_CLUSTER:-} KIND_DATA_HOSTPATH=${KIND_DATA_HOSTPATH:-none} +GCP_PROJECT=${GCP_PROJECT:-} +GCP_CREDENTIALS=${GCP_CREDENTIALS:-} +GCP_REGION=${GCP_REGION:-} +GCP_ZONE=${GCP_ZONE:-} +GCP_SSH_PRIVATE_KEY=${GCP_SSH_PRIVATE_KEY:-} +GCP_SSH_PUBLIC_KEY=${GCP_SSH_PUBLIC_KEY:-} +GCP_MACHINE_TYPE=${GCP_MACHINE_TYPE:-n1-standard-4} +AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-} +AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-} +AWS_REGION=${AWS_REGION:-} +AWS_ZONE=${AWS_ZONE:-} KUBE_VERSION=${KUBE_VERSION:-v1.12.10} KUBE_WORKERS=${KUBE_WORKERS:-3} DOCKER_IO_MIRROR=${DOCKER_IO_MIRROR:-} GCR_IO_MIRROR=${GCR_IO_MIRROR:-} QUAY_IO_MIRROR=${QUAY_IO_MIRROR:-} +SKIP_GINKGO=${SKIP_GINKGO:-} +RUNNER_SUITE_NAME=${RUNNER_SUITE_NAME:-} -echo "DOCKER_REGISTRY: $DOCKER_REGISTRY" +echo "PROVIDER: $PROVIDER" +echo "DOCKER_REPO: $DOCKER_REPO" echo "IMAGE_TAG: $IMAGE_TAG" echo "CLUSTER: $CLUSTER" echo "KUBECONFIG: $KUBECONFIG" -echo "KUBECONTEXT: $KUBECONTEXT" echo "SKIP_BUILD: $SKIP_BUILD" echo "SKIP_IMAGE_BUILD: $SKIP_IMAGE_BUILD" echo "SKIP_UP: $SKIP_UP" echo "SKIP_DOWN: $SKIP_DOWN" echo "KIND_DATA_HOSTPATH: $KIND_DATA_HOSTPATH" +echo "GCP_PROJECT: $GCP_PROJECT" +echo "GCP_CREDENTIALS: $GCP_CREDENTIALS" +echo "GCP_REGION: $GCP_REGION" +echo "GCP_ZONE: $GCP_ZONE" +# We shouldn't print aws credential environments. +# echo "AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID" +# echo "AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY" +echo "AWS_REGION: $AWS_REGION" +echo "AWS_ZONE: $AWS_ZONE" echo "KUBE_VERSION: $KUBE_VERSION" +echo "KUBE_WORKERS: $KUBE_WORKERS" echo "DOCKER_IO_MIRROR: $DOCKER_IO_MIRROR" echo "GCR_IO_MIRROR: $GCR_IO_MIRROR" echo "QUAY_IO_MIRROR: $QUAY_IO_MIRROR" @@ -150,7 +236,8 @@ kind_node_images["v1.13.12"]="kindest/node:v1.13.12@sha256:5e8ae1a4e39f3d151d420 kind_node_images["v1.14.10"]="kindest/node:v1.14.10@sha256:81ae5a3237c779efc4dda43cc81c696f88a194abcc4f8fa34f86cf674aa14977" kind_node_images["v1.15.7"]="kindest/node:v1.15.7@sha256:e2df133f80ef633c53c0200114fce2ed5e1f6947477dbc83261a6a921169488d" kind_node_images["v1.16.4"]="kindest/node:v1.16.4@sha256:b91a2c2317a000f3a783489dfb755064177dbc3a0b2f4147d50f04825d016f55" -kind_node_images["v1.17.0"]="kindest/node:v1.17.0@sha256:9512edae126da271b66b990b6fff768fbb7cd786c7d39e86bdf55906352fdf62" +kind_node_images["v1.17.2"]="kindest/node:v1.17.2@sha256:59df31fc61d1da5f46e8a61ef612fa53d3f9140f82419d1ef1a6b9656c6b737c" +kind_node_images["v1.18.0"]="kindest/node:v1.18.0@sha256:0e20578828edd939d25eb98496a685c76c98d54084932f76069f886ec315d694" function e2e::image_build() { if [ -n "$SKIP_BUILD" ]; then @@ -161,23 +248,8 @@ function e2e::image_build() { echo "info: skip building and pushing images" return fi - DOCKER_REGISTRY=$DOCKER_REGISTRY IMAGE_TAG=$IMAGE_TAG make docker - DOCKER_REGISTRY=$DOCKER_REGISTRY IMAGE_TAG=$IMAGE_TAG make e2e-docker -} - -function e2e::image_load() { - local names=( - pingcap/tidb-operator - pingcap/tidb-operator-e2e - ) - for n in ${names[@]}; do - $KIND_BIN load docker-image --name $CLUSTER $DOCKER_REGISTRY/$n:$IMAGE_TAG - done -} - -function e2e::cluster_exists() { - local name="$1" - $KIND_BIN get clusters | grep $CLUSTER &>/dev/null + DOCKER_REPO=$DOCKER_REPO IMAGE_TAG=$IMAGE_TAG make docker + DOCKER_REPO=$DOCKER_REPO IMAGE_TAG=$IMAGE_TAG make e2e-docker } function e2e::__restart_docker() { @@ -201,14 +273,6 @@ function e2e::__restart_docker() { echo "info: done restarting docker" } -# e2e::__cluster_is_alive checks if the cluster is alive or not -function e2e::__cluster_is_alive() { - local ret=0 - echo "info: checking the cluster version" - $KUBECTL_BIN --context $KUBECONTEXT version --short || ret=$? - return $ret -} - function e2e::__configure_docker_mirror_for_dind() { echo "info: configure docker.io mirror '$DOCKER_IO_MIRROR' for DinD" cat < /etc/docker/daemon.json.tmp @@ -225,29 +289,8 @@ EOF fi } -function e2e::up() { - if [ -n "$SKIP_UP" ]; then - echo "info: skip starting a new cluster" - return - fi - if [ -n "$DOCKER_IO_MIRROR" -a -n "${DOCKER_IN_DOCKER_ENABLED:-}" ]; then - e2e::__configure_docker_mirror_for_dind - fi - if e2e::cluster_exists $CLUSTER; then - if [ -n "$REUSE_CLUSTER" ]; then - if e2e::__cluster_is_alive; then - echo "info: REUSE_CLUSTER is enabled and the cluster is alive, reusing it" - return - else - echo "info: REUSE_CLUSTER is enabled but the cluster is not alive, trying to recreate it" - fi - fi - echo "info: deleting the cluster '$CLUSTER'" - $KIND_BIN delete cluster --name $CLUSTER - fi - echo "info: starting a new cluster" - tmpfile=$(mktemp) - trap "test -f $tmpfile && rm $tmpfile" RETURN +function e2e::create_kindconfig() { + local tmpfile=${1} cat < $tmpfile kind: Cluster apiVersion: kind.x-k8s.io/v1alpha4 @@ -315,137 +358,181 @@ EOF EOF fi } +} + +hack::ensure_kind +hack::ensure_kubectl +hack::ensure_helm + +e2e::image_build + +if [ -n "$DOCKER_IO_MIRROR" -a -n "${DOCKER_IN_DOCKER_ENABLED:-}" ]; then + e2e::__configure_docker_mirror_for_dind +fi + +kubetest2_args=( + $PROVIDER +) + +if [ -n "$RUNNER_SUITE_NAME" ]; then + kubetest2_args+=( + --suite-name "$RUNNER_SUITE_NAME" + ) +fi + +if [ -z "$SKIP_UP" ]; then + kubetest2_args+=(--up) +fi + +if [ -z "$SKIP_DOWN" ]; then + kubetest2_args+=(--down) +fi + +if [ -z "$SKIP_TEST" ]; then + kubetest2_args+=(--test exec) +fi + +if [ "$PROVIDER" == "kind" ]; then + tmpfile=$(mktemp) + trap "test -f $tmpfile && rm $tmpfile" EXIT + e2e::create_kindconfig $tmpfile echo "info: print the contents of kindconfig" cat $tmpfile - echo "info: end of the contents of kindconfig" - echo "info: creating the cluster '$CLUSTER'" - local image="" + image="" for v in ${!kind_node_images[*]}; do - if [[ "$KUBE_VERSION" == "$v" ]]; then + if [[ "$KUBE_VERSION" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ && "$KUBE_VERSION" == "$v" ]]; then + image=${kind_node_images[$v]} + echo "info: image for $KUBE_VERSION: $image" + elif [[ "$KUBE_VERSION" =~ ^v[0-9]+\.[0-9]+$ && "$KUBE_VERSION" == "${v%.*}" ]]; then image=${kind_node_images[$v]} echo "info: image for $KUBE_VERSION: $image" - break fi done if [ -z "$image" ]; then echo "error: no image for $KUBE_VERSION, exit" exit 1 fi - # Retry on error. Sometimes, kind will fail with the following error: - # - # OCI runtime create failed: container_linux.go:346: starting container process caused "process_linux.go:319: getting the final child's pid from pipe caused \"EOF\"": unknown - # - # TODO this error should be related to docker or linux kernel, find the root cause. - hack::wait_for_success 120 5 "$KIND_BIN create cluster --config $KUBECONFIG --name $CLUSTER --image $image --config $tmpfile -v 4" - # make it able to schedule pods on control-plane, then less resources we required - # This is disabled because when hostNetwork is used, pd requires 2379/2780 - # which may conflict with etcd on control-plane. - #echo "info: remove 'node-role.kubernetes.io/master' taint from $CLUSTER-control-plane" - #kubectl taint nodes $CLUSTER-control-plane node-role.kubernetes.io/master- -} - -function e2e::__wait_for_ds() { - local ns="$1" - local name="$2" - local retries="${3:-300}" - echo "info: waiting for pods of daemonset $ns/$name are ready (retries: $retries, interval: 1s)" - for ((i = 0; i < retries; i++)) { - read a b <<<$($KUBECTL_BIN --context $KUBECONTEXT -n $ns get ds/$name -ojsonpath='{.status.desiredNumberScheduled} {.status.numberReady}{"\n"}') - if [[ "$a" -gt 0 && "$a" -eq "$b" ]]; then - echo "info: all pods of daemonset $ns/$name are ready (desired: $a, ready: $b)" - return 0 - fi - echo "info: pods of daemonset $ns/$name (desired: $a, ready: $b)" - sleep 1 - } - echo "info: timed out waiting for pods of daemonset $ns/$name are ready" - return 1 -} - -function e2e::__wait_for_deploy() { - local ns="$1" - local name="$2" - local retries="${3:-300}" - echo "info: waiting for pods of deployment $ns/$name are ready (retries: $retries, interval: 1s)" - for ((i = 0; i < retries; i++)) { - read a b <<<$($KUBECTL_BIN --context $KUBECONTEXT -n $ns get deploy/$name -ojsonpath='{.spec.replicas} {.status.readyReplicas}{"\n"}') - if [[ "$a" -gt 0 && "$a" -eq "$b" ]]; then - echo "info: all pods of deployment $ns/$name are ready (desired: $a, ready: $b)" - return 0 - fi - echo "info: pods of deployment $ns/$name (desired: $a, ready: $b)" - sleep 1 - } - echo "info: timed out waiting for pods of deployment $ns/$name are ready" - return 1 -} - -function e2e::setup_local_pvs() { - echo "info: preparing disks" - for n in $($KIND_BIN get nodes --name=$CLUSTER); do - docker exec -i $n bash <<'EOF' -test -d /mnt/disks || mkdir -p /mnt/disks -df -h /mnt/disks -if mountpoint /mnt/disks &>/dev/null; then - echo "info: /mnt/disks is a mountpoint" -else - echo "info: /mnt/disks is not a mountpoint, creating local volumes on the rootfs" -fi -cd /mnt/disks -for ((i = 1; i <= 32; i++)) { - if [ ! -d vol$i ]; then - mkdir vol$i + kubetest2_args+=(--image-name $image) + kubetest2_args+=( + # add some retires because kind may fail to start the cluster when the + # load is high + --up-retries 3 + --cluster-name "$CLUSTER" + --config "$tmpfile" + --verbosity 4 + ) +elif [ "$PROVIDER" == "gke" ]; then + if [ -z "$GCP_PROJECT" ]; then + echo "error: GCP_PROJECT is required" + exit 1 fi - if ! mountpoint vol$i &>/dev/null; then - mount --bind vol$i vol$i + if [ -z "$GCP_CREDENTIALS" ]; then + echo "error: GCP_CREDENTIALS is required" + exit 1 fi -} + if [ -z "$GCP_REGION" -a -z "$GCP_ZONE" ]; then + echo "error: either GCP_REGION or GCP_ZONE must be specified" + exit 1 + elif [ -n "$GCP_REGION" -a -n "$GCP_ZONE" ]; then + echo "error: GCP_REGION or GCP_ZONE cannot be both set" + exit 1 + fi + echo "info: preparing ssh keypairs for GCP" + if [ ! -d ~/.ssh ]; then + mkdir ~/.ssh + fi + if [ ! -e ~/.ssh/google_compute_engine -a -n "$GCP_SSH_PRIVATE_KEY" ]; then + echo "Copying $GCP_SSH_PRIVATE_KEY to ~/.ssh/google_compute_engine" >&2 + cp $GCP_SSH_PRIVATE_KEY ~/.ssh/google_compute_engine + chmod 0600 ~/.ssh/google_compute_engine + fi + if [ ! -e ~/.ssh/google_compute_engine.pub -a -n "$GCP_SSH_PUBLIC_KEY" ]; then + echo "Copying $GCP_SSH_PUBLIC_KEY to ~/.ssh/google_compute_engine.pub" >&2 + cp $GCP_SSH_PUBLIC_KEY ~/.ssh/google_compute_engine.pub + chmod 0600 ~/.ssh/google_compute_engine.pub + fi + ! read -r -d '' nodePoolsJSON <&1 | awk '{print $2}') + [[ "$tmpv" == "$v" ]] + return + fi + return 1 +} + +function hack::__ensure_kubetest2() { + local n="$1" + if hack::__verify_kubetest2 $n $KUBETEST2_VERSION; then + return 0 + fi + local tmpfile=$(mktemp) + trap "test -f $tmpfile && rm $tmpfile" RETURN + echo "info: downloading $n $KUBETEST2_VERSION" + curl --retry 10 -L -o - https://github.com/cofyc/kubetest2/releases/download/$KUBETEST2_VERSION/$n-$OS-$ARCH.gz | gunzip > $tmpfile + mv $tmpfile $OUTPUT_BIN/$n + chmod +x $OUTPUT_BIN/$n +} + +function hack::ensure_kubetest2() { + hack::__ensure_kubetest2 kubetest2 + hack::__ensure_kubetest2 kubetest2-gke + hack::__ensure_kubetest2 kubetest2-kind + hack::__ensure_kubetest2 kubetest2-eks +} + +function hack::verify_aws_k8s_tester() { + if test -x $AWS_K8S_TESTER_BIN; then + [[ "$($AWS_K8S_TESTER_BIN version | jq '."release-version"' -r)" == "$AWS_K8S_TESTER_VERSION" ]] + return + fi + return 1 +} + +function hack::ensure_aws_k8s_tester() { + if hack::verify_aws_k8s_tester; then + return + fi + local DOWNLOAD_URL=https://github.com/aws/aws-k8s-tester/releases/download + local tmpfile=$(mktemp) + trap "test -f $tmpfile && rm $tmpfile" RETURN + curl --retry 10 -L -o $tmpfile https://github.com/aws/aws-k8s-tester/releases/download/$AWS_K8S_TESTER_VERSION/aws-k8s-tester-$AWS_K8S_TESTER_VERSION-$OS-$ARCH + mv $tmpfile $AWS_K8S_TESTER_BIN + chmod +x $AWS_K8S_TESTER_BIN +} + +function hack::verify_gen_crd_api_references_docs() { + if test -x "$DOCS_BIN"; then + # TODO check version when the binary version is available. + return + fi + return 1 +} + +function hack::ensure_gen_crd_api_references_docs() { + if hack::verify_gen_crd_api_references_docs; then + return 0 + fi + echo "Installing gen_crd_api_references_docs v$DOCS_VERSION..." + tmpdir=$(mktemp -d) + trap "test -d $tmpdir && rm -r $tmpdir" RETURN + curl --retry 10 -L -o ${tmpdir}/docs-bin.tar.gz https://github.com/ahmetb/gen-crd-api-reference-docs/releases/download/v${DOCS_VERSION}/gen-crd-api-reference-docs_${OS}_${ARCH}.tar.gz + tar -zvxf ${tmpdir}/docs-bin.tar.gz -C ${tmpdir} + mv ${tmpdir}/gen-crd-api-reference-docs ${DOCS_BIN} + chmod +x ${DOCS_BIN} +} diff --git a/hack/local-up-operator.sh b/hack/local-up-operator.sh new file mode 100755 index 0000000000..2d8bdac03a --- /dev/null +++ b/hack/local-up-operator.sh @@ -0,0 +1,188 @@ +#!/usr/bin/env bash + +# Copyright 2020 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This command runs tidb-operator in Kubernetes. +# + +set -o errexit +set -o nounset +set -o pipefail + +ROOT=$(unset CDPATH && cd $(dirname "${BASH_SOURCE[0]}")/.. && pwd) +cd $ROOT + +source "${ROOT}/hack/lib.sh" + +function usage() { + cat <<'EOF' +This commands run tidb-operator in Kubernetes. + +Usage: hack/local-up-operator.sh [-hd] + + -h show this message and exit + -i install dependencies only + +Environments: + + PROVIDER Kubernetes provider. Defaults: kind. + CLUSTER the name of e2e cluster. Defaults to kind for kind provider. + KUBECONFIG path to the kubeconfig file, defaults: ~/.kube/config + KUBECONTEXT context in kubeconfig file, defaults to current context + NAMESPACE Kubernetes namespace in which we run our tidb-operator. + DOCKER_REGISTRY image docker registry + IMAGE_TAG image tag + SKIP_IMAGE_BUILD skip build and push images + +EOF +} + +installOnly=false +while getopts "h?i" opt; do + case "$opt" in + h|\?) + usage + exit 0 + ;; + i) + installOnly=true + ;; + esac +done + +PROVIDER=${PROVIDER:-kind} +CLUSTER=${CLUSTER:-} +KUBECONFIG=${KUBECONFIG:-~/.kube/config} +KUBECONTEXT=${KUBECONTEXT:-} +NAMESPACE=${NAMESPACE:-pingcap} +DOCKER_REGISTRY=${DOCKER_REGISTRY:-localhost:5000} +IMAGE_TAG=${IMAGE_TAG:-latest} +SKIP_IMAGE_BUILD=${SKIP_IMAGE_BUILD:-} + +hack::ensure_kubectl +hack::ensure_kind +hack::ensure_helm + +if [[ "$installOnly" == "true" ]]; then + exit 0 +fi + +function hack::create_namespace() { + local ns="$1" + $KUBECTL_BIN create namespace $ns + for ((i=0; i < 30; i++)); do + local phase=$(kubectl get ns $ns -ojsonpath='{.status.phase}') + if [ "$phase" == "Active" ]; then + return 0 + fi + sleep 1 + done + return 1 +} + +function hack::wait_for_deploy() { + local ns="$1" + local name="$2" + local retries="${3:-300}" + echo "info: waiting for pods of deployment $ns/$name are ready (retries: $retries, interval: 1s)" + for ((i = 0; i < retries; i++)) { + read a b <<<$($KUBECTL_BIN --context $KUBECONTEXT -n $ns get deploy/$name -ojsonpath='{.spec.replicas} {.status.readyReplicas}{"\n"}') + if [[ "$a" -gt 0 && "$a" -eq "$b" ]]; then + echo "info: all pods of deployment $ns/$name are ready (desired: $a, ready: $b)" + return 0 + fi + echo "info: pods of deployment $ns/$name (desired: $a, ready: $b)" + sleep 1 + } + echo "info: timed out waiting for pods of deployment $ns/$name are ready" + return 1 +} + +function hack::cluster_exists() { + local c="$1" + for n in $($KIND_BIN get clusters); do + if [ "$n" == "$c" ]; then + return 0 + fi + done + return 1 +} + +echo "info: checking clusters" + +if [ "$PROVIDER" == "kind" ]; then + if [ -z "$CLUSTER" ]; then + CLUSTER=kind + fi + if ! hack::cluster_exists "$CLUSTER"; then + echo "error: kind cluster '$CLUSTER' not found, please create it or specify the right cluster name with CLUSTER environment" + exit 1 + fi +else + echo "erorr: only kind PROVIDER is supported" + exit 1 +fi + +if [ -z "$KUBECONTEXT" ]; then + KUBECONTEXT=$(kubectl config current-context) + echo "info: KUBECONTEXT is not set, current context $KUBECONTEXT is used" +fi + +if [ -z "$SKIP_IMAGE_BUILD" ]; then + echo "info: building docker images" + DOCKER_REGISTRY=$DOCKER_REGISTRY IMAGE_TAG=$IMAGE_TAG make docker +else + echo "info: skip building docker images" +fi + +echo "info: loading images into cluster" +images=( + $DOCKER_REGISTRY/pingcap/tidb-operator:${IMAGE_TAG} +) +for n in ${images[@]}; do + echo "info: loading image $n" + $KIND_BIN load docker-image --name $CLUSTER $n +done + +echo "info: uninstall tidb-operator" +$KUBECTL_BIN -n "$NAMESPACE" delete deploy -l app.kubernetes.io/name=tidb-operator +$KUBECTL_BIN -n "$NAMESPACE" delete pods -l app.kubernetes.io/name=tidb-operator + +echo "info: create namespace '$NAMESPACE' if absent" +if ! $KUBECTL_BIN get ns "$NAMESPACE" &>/dev/null; then + hack::create_namespace "$NAMESPACE" +fi + +echo "info: installing crds" +$KUBECTL_BIN apply -f manifests/crd.yaml + +echo "info: deploying tidb-operator" +helm_args=( + template + --name tidb-operator-dev + --namespace "$NAMESPACE" + --set operatorImage=$DOCKER_REGISTRY/pingcap/tidb-operator:${IMAGE_TAG} +) + +$HELM_BIN ${helm_args[@]} ./charts/tidb-operator/ | kubectl -n "$NAMESPACE" apply -f - + +deploys=( + tidb-controller-manager + tidb-scheduler +) +for deploy in ${deploys[@]}; do + echo "info: waiting for $NAMESPACE/$deploy to be ready" + hack::wait_for_deploy "$NAMESPACE" "$deploy" +done diff --git a/hack/prepare-e2e.sh b/hack/prepare-e2e.sh index fb32d6c531..1f35640779 100755 --- a/hack/prepare-e2e.sh +++ b/hack/prepare-e2e.sh @@ -30,3 +30,4 @@ source "${ROOT}/hack/lib.sh" hack::ensure_kind hack::ensure_kubectl hack::ensure_helm +hack::ensure_kubetest2 diff --git a/hack/run-e2e.sh b/hack/run-e2e.sh index e8470676ad..97961d7a58 100755 --- a/hack/run-e2e.sh +++ b/hack/run-e2e.sh @@ -22,33 +22,277 @@ cd $ROOT source $ROOT/hack/lib.sh -hack::ensure_kubectl -hack::ensure_helm - +PROVIDER=${PROVIDER:-} +CLUSTER=${CLUSTER:-} +GCP_PROJECT=${GCP_PROJECT:-} +GCP_REGION=${GCP_REGION:-} +GCP_ZONE=${GCP_ZONE:-} +GCP_CREDENTIALS=${GCP_CREDENTIALS:-} +GCP_SDK=${GCP_SDK:-/google-cloud-sdk} +KUBE_SSH_USER=${KUBE_SSH_USER:-vagrant} +IMAGE_TAG=${IMAGE_TAG:-} +SKIP_IMAGE_LOAD=${SKIP_IMAGE_LOAD:-} TIDB_OPERATOR_IMAGE=${TIDB_OPERATOR_IMAGE:-localhost:5000/pingcap/tidb-operator:latest} +TIDB_BACKUP_MANAGER_IMAGE=${TIDB_BACKUP_MANAGER_IMAGE:-localhost:5000/pingcap/tidb-backup-manager:latest} E2E_IMAGE=${E2E_IMAGE:-localhost:5000/pingcap/tidb-operator-e2e:latest} KUBECONFIG=${KUBECONFIG:-$HOME/.kube/config} KUBECONTEXT=${KUBECONTEXT:-} REPORT_DIR=${REPORT_DIR:-} REPORT_PREFIX=${REPORT_PREFIX:-} +GINKGO_NODES=${GINKGO_NODES:-} +GINKGO_PARALLEL=${GINKGO_PARALLEL:-n} # set to 'y' to run tests in parallel +# If 'y', Ginkgo's reporter will not print out in color when tests are run +# in parallel +GINKGO_NO_COLOR=${GINKGO_NO_COLOR:-n} +GINKGO_STREAM=${GINKGO_STREAM:-y} +SKIP_GINKGO=${SKIP_GINKGO:-} if [ -z "$KUBECONFIG" ]; then echo "error: KUBECONFIG is required" exit 1 fi +echo "KUBE_SSH_USER: $KUBE_SSH_USER" echo "TIDB_OPERATOR_IMAGE: $TIDB_OPERATOR_IMAGE" +echo "TIDB_BACKUP_MANAGER_IMAGE: $TIDB_BACKUP_MANAGER_IMAGE" echo "E2E_IMAGE: $E2E_IMAGE" echo "KUBECONFIG: $KUBECONFIG" echo "KUBECONTEXT: $KUBECONTEXT" echo "REPORT_DIR: $REPORT_DIR" echo "REPORT_PREFIX: $REPORT_PREFIX" +echo "GINKGO_NODES: $GINKGO_NODES" +echo "GINKGO_PARALLEL: $GINKGO_PARALLEL" +echo "GINKGO_NO_COLOR: $GINKGO_NO_COLOR" +echo "GINKGO_STREAM: $GINKGO_STREAM" -GINKGO_PARALLEL=${GINKGO_PARALLEL:-n} # set to 'y' to run tests in parallel -# If 'y', Ginkgo's reporter will not print out in color when tests are run -# in parallel -GINKGO_NO_COLOR=${GINKGO_NO_COLOR:-n} -GINKGO_STREAM=${GINKGO_STREAM:-y} +function e2e::__wait_for_ds() { + local ns="$1" + local name="$2" + local retries="${3:-300}" + echo "info: waiting for pods of daemonset $ns/$name are ready (retries: $retries, interval: 1s)" + for ((i = 0; i < retries; i++)) { + read a b <<<$($KUBECTL_BIN --context $KUBECONTEXT -n $ns get ds/$name -ojsonpath='{.status.desiredNumberScheduled} {.status.numberReady}{"\n"}') + if [[ "$a" -gt 0 && "$a" -eq "$b" ]]; then + echo "info: all pods of daemonset $ns/$name are ready (desired: $a, ready: $b)" + return 0 + fi + echo "info: pods of daemonset $ns/$name (desired: $a, ready: $b)" + sleep 1 + } + echo "info: timed out waiting for pods of daemonset $ns/$name are ready" + return 1 +} + +function e2e::__wait_for_deploy() { + local ns="$1" + local name="$2" + local retries="${3:-300}" + echo "info: waiting for pods of deployment $ns/$name are ready (retries: $retries, interval: 1s)" + for ((i = 0; i < retries; i++)) { + read a b <<<$($KUBECTL_BIN --context $KUBECONTEXT -n $ns get deploy/$name -ojsonpath='{.spec.replicas} {.status.readyReplicas}{"\n"}') + if [[ "$a" -gt 0 && "$a" -eq "$b" ]]; then + echo "info: all pods of deployment $ns/$name are ready (desired: $a, ready: $b)" + return 0 + fi + echo "info: pods of deployment $ns/$name (desired: $a, ready: $b)" + sleep 1 + } + echo "info: timed out waiting for pods of deployment $ns/$name are ready" + return 1 +} + +function e2e::setup_local_pvs() { + echo "info: preparing local disks" + if [ "$PROVIDER" == "kind" ]; then + for n in $($KIND_BIN get nodes --name=$CLUSTER); do + docker exec -i $n bash <<'EOF' +test -d /mnt/disks || mkdir -p /mnt/disks +df -h /mnt/disks +if mountpoint /mnt/disks &>/dev/null; then + echo "info: /mnt/disks is a mountpoint" +else + echo "info: /mnt/disks is not a mountpoint, creating local volumes on the rootfs" +fi +cd /mnt/disks +for ((i = 1; i <= 32; i++)) { + if [ ! -d vol$i ]; then + mkdir vol$i + fi + if ! mountpoint vol$i &>/dev/null; then + mount --bind vol$i vol$i + fi +} +EOF + done + elif [ "$PROVIDER" == "gke" ]; then + echo "info: provider is $PROVIDER, skipped" + elif [ "$PROVIDER" == "eks" ]; then + echo "info: provider is $PROVIDER, skipped" + elif [ "$PROVIDER" == "openshift" ]; then + CRC_IP=$(crc ip) + ssh -i ~/.crc/machines/crc/id_rsa -o StrictHostKeyChecking=no core@$CRC_IP <<'EOF' +sudo bash -c ' +test -d /mnt/disks || mkdir -p /mnt/disks +df -h /mnt/disks +if mountpoint /mnt/disks &>/dev/null; then + echo "info: /mnt/disks is a mountpoint" +else + echo "info: /mnt/disks is not a mountpoint, creating local volumes on the rootfs" +fi +cd /mnt/disks +for ((i = 1; i <= 32; i++)) { + if [ ! -d vol$i ]; then + mkdir vol$i + fi + if ! mountpoint vol$i &>/dev/null; then + mount --bind vol$i vol$i + fi +} +' +EOF + fi + echo "info: installing local-volume-provisioner" + $KUBECTL_BIN --context $KUBECONTEXT apply -f ${ROOT}/manifests/local-dind/local-volume-provisioner.yaml + e2e::__wait_for_ds kube-system local-volume-provisioner +} + +function e2e::__ecr_url() { + local account_id=$(aws sts get-caller-identity --output text | awk '{print $1}') + local region=$(aws configure get region) + echo "${account_id}.dkr.ecr.${region}.amazonaws.com" +} + +function e2e::get_kube_version() { + $KUBECTL_BIN --context $KUBECONTEXT version --short | awk '/Server Version:/ {print $3}' +} + +function e2e::setup_helm_server() { + $KUBECTL_BIN --context $KUBECONTEXT apply -f ${ROOT}/manifests/tiller-rbac.yaml + if hack::version_ge $(e2e::get_kube_version) "v1.16.0"; then + # workaround for https://github.com/helm/helm/issues/6374 + # TODO remove this when we can upgrade to helm 2.15+, see https://github.com/helm/helm/pull/6462 + $HELM_BIN init --service-account tiller --output yaml \ + | sed 's@apiVersion: extensions/v1beta1@apiVersion: apps/v1@' \ + | sed 's@ replicas: 1@ replicas: 1\n selector: {"matchLabels": {"app": "helm", "name": "tiller"}}@' \ + | $KUBECTL_BIN --context $KUBECONTEXT apply -f - + echo "info: wait for tiller to be ready" + e2e::__wait_for_deploy kube-system tiller-deploy + else + $HELM_BIN init --service-account=tiller --wait + fi + $HELM_BIN version +} + +# Used by non-kind providers to tag image with its id. This can force our e2e +# process to pull correct image even if IfNotPresent is used in an existing +# environment, e.g. testing in the same cluster. +function e2e::image_id_tag() { + docker image inspect -f '{{.Id}}' "$1" | cut -d ':' -f 2 | head -c 10 +} + +function e2e::image_load() { + local images=( + $TIDB_OPERATOR_IMAGE + $TIDB_BACKUP_MANAGER_IMAGE + $E2E_IMAGE + ) + if [ "$PROVIDER" == "kind" ]; then + local nodes=$($KIND_BIN get nodes --name $CLUSTER | grep -v 'control-plane$') + echo "info: load images ${images[@]}" + for n in ${images[@]}; do + $KIND_BIN load docker-image --name $CLUSTER $n --nodes $(hack::join ',' ${nodes[@]}) + done + elif [ "$PROVIDER" == "gke" ]; then + unset DOCKER_CONFIG # We don't need this and it may be read-only and fail the command to fail + gcloud auth configure-docker + GCP_TIDB_OPERATOR_IMAGE=gcr.io/$GCP_PROJECT/tidb-operator:$CLUSTER-$(e2e::image_id_tag $TIDB_OPERATOR_IMAGE) + GCP_TIDB_BACKUP_MANAGER_IMAGE=gcr.io/$GCP_PROJECT/tidb-backup-image:$CLUSTER-$(e2e::image_id_tag $TIDB_BACKUP_MANAGER_IMAGE) + GCP_E2E_IMAGE=gcr.io/$GCP_PROJECT/tidb-operator-e2e:$CLUSTER-$(e2e::image_id_tag $E2E_IMAGE) + docker tag $TIDB_OPERATOR_IMAGE $GCP_TIDB_OPERATOR_IMAGE + docker tag $E2E_IMAGE $GCP_E2E_IMAGE + docker tag $TIDB_BACKUP_MANAGER_IMAGE $GCP_TIDB_BACKUP_MANAGER_IMAGE + echo "info: pushing $GCP_TIDB_OPERATOR_IMAGE" + docker push $GCP_TIDB_OPERATOR_IMAGE + echo "info: pushing $GCP_E2E_IMAGE" + docker push $GCP_E2E_IMAGE + echo "info: pushing $GCP_TIDB_BACKUP_MANAGER_IMAGE" + docker push $GCP_TIDB_BACKUP_MANAGER_IMAGE + TIDB_OPERATOR_IMAGE=$GCP_TIDB_OPERATOR_IMAGE + E2E_IMAGE=$GCP_E2E_IMAGE + TIDB_BACKUP_MANAGER_IMAGE=$GCP_TIDB_BACKUP_MANAGER_IMAGE + elif [ "$PROVIDER" == "eks" ]; then + for repoName in e2e/tidb-operator e2e/tidb-operator-e2e e2e/tidb-backup-manager; do + local ret=0 + aws ecr describe-repositories --repository-names $repoName || ret=$? + if [ $ret -ne 0 ]; then + echo "info: creating repository $repoName" + aws ecr create-repository --repository-name $repoName + fi + done + local ecrURL=$(e2e::__ecr_url) + echo "info: logging in $ecrURL" + aws ecr get-login-password | docker login --username AWS --password-stdin $ecrURL + AWS_TIDB_OPERATOR_IMAGE=$ecrURL/e2e/tidb-operator:$CLUSTER-$(e2e::image_id_tag $TIDB_OPERATOR_IMAGE) + AWS_TIDB_BACKUP_MANAGER_IMAGE=$ecrURL/e2e/tidb-backup-manager:$CLUSTER-$(e2e::image_id_tag $TIDB_BACKUP_MANAGER_IMAGE) + AWS_E2E_IMAGE=$ecrURL/e2e/tidb-operator-e2e:$CLUSTER-$(e2e::image_id_tag $E2E_IMAGE) + docker tag $TIDB_OPERATOR_IMAGE $AWS_TIDB_OPERATOR_IMAGE + docker tag $TIDB_BACKUP_MANAGER_IMAGE $AWS_TIDB_BACKUP_MANAGER_IMAGE + docker tag $E2E_IMAGE $AWS_E2E_IMAGE + echo "info: pushing $AWS_TIDB_OPERATOR_IMAGE" + docker push $AWS_TIDB_OPERATOR_IMAGE + echo "info: pushing $AWS_TIDB_BACKUP_MANAGER_IMAGE" + docker push $AWS_TIDB_BACKUP_MANAGER_IMAGE + echo "info: pushing $AWS_E2E_IMAGE" + docker push $AWS_E2E_IMAGE + TIDB_BACKUP_MANAGER_IMAGE=$AWS_TIDB_BACKUP_MANAGER_IMAGE + TIDB_OPERATOR_IMAGE=$AWS_TIDB_OPERATOR_IMAGE + E2E_IMAGE=$AWS_E2E_IMAGE + else + echo "info: unsupported provider '$PROVIDER', skip loading images" + fi +} + +hack::ensure_kubectl +hack::ensure_helm + +if [ "$PROVIDER" == "gke" ]; then + if [ -n "$GCP_CREDENTIALS" ]; then + gcloud auth activate-service-account --key-file "$GCP_CREDENTIALS" + fi + if [ -n "$GCP_REGION" ]; then + gcloud config set compute/region "$GCP_REGION" + fi + if [ -n "$GCP_ZONE" ]; then + gcloud config set compute/zone "$GCP_ZONE" + fi + gcloud container clusters get-credentials "$CLUSTER" +elif [ "$PROVIDER" == "eks" ]; then + aws eks update-kubeconfig --name "$CLUSTER" +fi + +if [ -z "$KUBECONTEXT" ]; then + echo "info: KUBECONTEXT is not set, current context is used" + KUBECONTEXT=$($KUBECTL_BIN config current-context 2>/dev/null) || true + if [ -z "$KUBECONTEXT" ]; then + echo "error: current context cannot be detected" + exit 1 + fi + echo "info: current kubeconfig context is '$KUBECONTEXT'" +fi + +if [ -z "$SKIP_IMAGE_LOAD" ]; then + e2e::image_load +fi + +e2e::setup_local_pvs +e2e::setup_helm_server + +if [ -n "$SKIP_GINKGO" ]; then + echo "info: skipping ginkgo" + exit 0 +fi + +echo "info: start to run e2e process" ginkgo_args=() @@ -66,33 +310,25 @@ if [[ "${GINKGO_STREAM}" == "y" ]]; then ginkgo_args+=("--stream") fi -echo "info: start to run e2e process" e2e_args=( /usr/local/bin/ginkgo ${ginkgo_args[@]:-} /usr/local/bin/e2e.test -- - --provider=skeleton --clean-start=true --delete-namespace-on-failure=false - --repo-root=$ROOT + --repo-root="$ROOT" # tidb-operator e2e flags --operator-tag=e2e - --operator-image=${TIDB_OPERATOR_IMAGE} - --e2e-image=${E2E_IMAGE} + --operator-image="${TIDB_OPERATOR_IMAGE}" + --backup-image="${TIDB_BACKUP_MANAGER_IMAGE}" + --e2e-image="${E2E_IMAGE}" # two tidb versions can be configuraed: , --tidb-versions=v3.0.7,v3.0.8 --chart-dir=/charts -v=4 ) -if [ -n "$REPORT_DIR" ]; then - e2e_args+=( - --report-dir="${REPORT_DIR}" - --report-prefix="${REPORT_PREFIX}" - ) -fi - e2e_args+=(${@:-}) docker_args=( @@ -106,9 +342,53 @@ docker_args=( -v $KUBECONFIG:/etc/kubernetes/admin.conf:ro --env KUBECONFIG=/etc/kubernetes/admin.conf --env KUBECONTEXT=$KUBECONTEXT + --env KUBE_SSH_USER=$KUBE_SSH_USER ) +if [ "$PROVIDER" == "eks" ]; then + e2e_args+=( + --provider=aws + --gce-zone="${AWS_ZONE}" # reuse gce-zone to configure aws zone + ) + docker_args+=( + # aws credential is required to get token for EKS + -v $HOME/.aws:/root/.aws + # ~/.ssh/kube_aws_rsa must be mounted into e2e container to run ssh + -v $HOME/.ssh/kube_aws_rsa:/root/.ssh/kube_aws_rsa + ) +elif [ "$PROVIDER" == "gke" ]; then + e2e_args+=( + --provider="${PROVIDER}" + --gce-project="${GCP_PROJECT}" + --gce-region="${GCP_REGION}" + --gce-zone="${GCP_ZONE}" + ) + docker_args+=( + -v ${GCP_CREDENTIALS}:${GCP_CREDENTIALS} + --env GOOGLE_APPLICATION_CREDENTIALS=${GCP_CREDENTIALS} + ) + # google-cloud-sdk is very large, we didn't pack it into our e2e image. + # instead, we use the sdk installed in CI image. + if [ ! -e "${GCP_SDK}/bin/gcloud" ]; then + echo "error: ${GCP_SDK} is not google cloud sdk, please install it here or specify correct path via GCP_SDK env" + exit 1 + fi + docker_args+=( + -v ${GCP_SDK}:/google-cloud-sdk + # ~/.ssh/google_compute_engine must be mounted into e2e container to run ssh + -v $HOME/.ssh/google_compute_engine:/root/.ssh/google_compute_engine + ) +else + e2e_args+=( + --provider="${PROVIDER}" + ) +fi + if [ -n "$REPORT_DIR" ]; then + e2e_args+=( + --report-dir="${REPORT_DIR}" + --report-prefix="${REPORT_PREFIX}" + ) docker_args+=( -v $REPORT_DIR:$REPORT_DIR ) diff --git a/hack/run-in-container.sh b/hack/run-in-container.sh index d99204bbb9..317751f8e3 100755 --- a/hack/run-in-container.sh +++ b/hack/run-in-container.sh @@ -78,15 +78,20 @@ fi args=(bash) if [ $# -gt 0 ]; then - args=($@) + args=("$@") fi docker_args=( - -it --rm + --rm -h $NAME --name $NAME ) +if [ -t 1 ]; then + # Allocate a pseudo-TTY when the STDIN is a terminal + docker_args+=(-it) +fi + # required by dind docker_args+=( --privileged @@ -139,5 +144,5 @@ docker run ${docker_args[@]} \ -v $ROOT:/go/src/github.com/pingcap/tidb-operator \ -w /go/src/github.com/pingcap/tidb-operator \ --entrypoint /usr/local/bin/runner.sh \ - gcr.io/k8s-testimages/kubekins-e2e:v20191108-9467d02-master \ + gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master \ "${args[@]}" diff --git a/hack/update-crd-groups.sh b/hack/update-crd-groups.sh index 1b1a4df4db..b55f35302e 100755 --- a/hack/update-crd-groups.sh +++ b/hack/update-crd-groups.sh @@ -39,3 +39,15 @@ to-crdgen generate backupschedule >> $crd_target to-crdgen generate tidbmonitor >> $crd_target to-crdgen generate tidbinitializer >> $crd_target to-crdgen generate tidbclusterautoscaler >> $crd_target + + + +hack::ensure_gen_crd_api_references_docs + +DOCS_PATH="$ROOT/docs/api-references" + +${DOCS_BIN} \ +-config "$DOCS_PATH/config.json" \ +-template-dir "$DOCS_PATH/template" \ +-api-dir "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" \ +-out-file "$DOCS_PATH/docs.md" diff --git a/hack/verify-crd-groups.sh b/hack/verify-crd-groups.sh index ca6ed42fb3..5d6cd11654 100755 --- a/hack/verify-crd-groups.sh +++ b/hack/verify-crd-groups.sh @@ -24,7 +24,12 @@ target="manifests/crd.yaml" verify_tmp=$(mktemp) trap "rm -f $verify_tmp" EXIT +targetDocs="$ROOT/docs/api-references/docs.md" +verifyDocs_tmp=$(mktemp) +trap "rm -f $verifyDocs_tmp" EXIT + cp "$target" "${verify_tmp}" +cp "$targetDocs" "${verifyDocs_tmp}" hack/update-crd-groups.sh @@ -36,3 +41,12 @@ if [[ -n "${diff}" ]]; then echo "Run ./hack/update-crd-groups.sh" >&2 exit 1 fi + +echo "diffing $targetDocs with $verifyDocs_tmp" >&2 +diff=$(diff "$targetDocs" "$verifyDocs_tmp") || true +if [[ -n "${diff}" ]]; then + echo "${diff}" >&2 + echo >&2 + echo "Run ./hack/update-crd-groups.sh" >&2 + exit 1 +fi diff --git a/images/backup-manager/Dockerfile b/images/backup-manager/Dockerfile deleted file mode 100644 index 32f0940baa..0000000000 --- a/images/backup-manager/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM pingcap/tidb-enterprise-tools:latest - -ARG VERSION=v1.48.0 -RUN apk update && apk add ca-certificates - -RUN wget -nv https://github.com/ncw/rclone/releases/download/${VERSION}/rclone-${VERSION}-linux-amd64.zip \ - && unzip rclone-${VERSION}-linux-amd64.zip \ - && mv rclone-${VERSION}-linux-amd64/rclone /usr/local/bin \ - && chmod 755 /usr/local/bin/rclone \ - && rm -rf rclone-${VERSION}-linux-amd64.zip rclone-${VERSION}-linux-amd64 - -COPY bin/tidb-backup-manager /tidb-backup-manager -COPY entrypoint.sh /entrypoint.sh - -ENTRYPOINT ["/entrypoint.sh"] diff --git a/images/tidb-backup-manager/Dockerfile b/images/tidb-backup-manager/Dockerfile new file mode 100644 index 0000000000..6b29976a56 --- /dev/null +++ b/images/tidb-backup-manager/Dockerfile @@ -0,0 +1,48 @@ +FROM pingcap/tidb-enterprise-tools:latest +ARG VERSION=v1.51.0 +ARG SHUSH_VERSION=v1.4.0 +ARG TOOLKIT_VERSION=v3.0.12 +ARG TOOLKIT_V31=v3.1.0-rc +ARG TOOLKIT_V40=v4.0.0-rc +RUN apk update && apk add ca-certificates + +RUN wget -nv https://github.com/ncw/rclone/releases/download/${VERSION}/rclone-${VERSION}-linux-amd64.zip \ + && unzip rclone-${VERSION}-linux-amd64.zip \ + && mv rclone-${VERSION}-linux-amd64/rclone /usr/local/bin \ + && chmod 755 /usr/local/bin/rclone \ + && rm -rf rclone-${VERSION}-linux-amd64.zip rclone-${VERSION}-linux-amd64 + +RUN wget -nv https://github.com/realestate-com-au/shush/releases/download/${SHUSH_VERSION}/shush_linux_amd64 \ + && mv shush_linux_amd64 /usr/local/bin/shush \ + && chmod 755 /usr/local/bin/shush + +RUN \ + wget -nv https://download.pingcap.org/tidb-toolkit-${TOOLKIT_VERSION}-linux-amd64.tar.gz \ + && tar -xzf tidb-toolkit-${TOOLKIT_VERSION}-linux-amd64.tar.gz \ + && mv tidb-toolkit-${TOOLKIT_VERSION}-linux-amd64/bin/tidb-lightning /tidb-lightning \ + && mv tidb-toolkit-${TOOLKIT_VERSION}-linux-amd64/bin/tidb-lightning-ctl /tidb-lightning-ctl \ + && chmod 755 /tidb-lightning /tidb-lightning-ctl \ + && rm -rf tidb-toolkit-${TOOLKIT_VERSION}-linux-amd64.tar.gz \ + && rm -rf tidb-toolkit-${TOOLKIT_VERSION}-linux-amd64 + +RUN \ + wget -nv https://download.pingcap.org/tidb-toolkit-${TOOLKIT_V31}-linux-amd64.tar.gz \ + && tar -xzf tidb-toolkit-${TOOLKIT_V31}-linux-amd64.tar.gz \ + && mv tidb-toolkit-${TOOLKIT_V31}-linux-amd64/bin/br /usr/local/bin/br31 \ + && chmod 755 /usr/local/bin/br31 \ + && rm -rf tidb-toolkit-${TOOLKIT_V31}-linux-amd64.tar.gz \ + && rm -rf tidb-toolkit-${TOOLKIT_V31}-linux-amd64 + +RUN \ + wget -nv https://download.pingcap.org/tidb-toolkit-${TOOLKIT_V40}-linux-amd64.tar.gz \ + && tar -xzf tidb-toolkit-${TOOLKIT_V40}-linux-amd64.tar.gz \ + && mv tidb-toolkit-${TOOLKIT_V40}-linux-amd64/bin/br /usr/local/bin/br40 \ + && chmod 755 /usr/local/bin/br40 \ + && rm -rf tidb-toolkit-${TOOLKIT_V40}-linux-amd64.tar.gz \ + && rm -rf tidb-toolkit-${TOOLKIT_V40}-linux-amd64 + +COPY bin/tidb-backup-manager /tidb-backup-manager +COPY entrypoint.sh /entrypoint.sh + + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/images/backup-manager/entrypoint.sh b/images/tidb-backup-manager/entrypoint.sh similarity index 83% rename from images/backup-manager/entrypoint.sh rename to images/tidb-backup-manager/entrypoint.sh index 85c889147d..fc11dc02f2 100755 --- a/images/backup-manager/entrypoint.sh +++ b/images/tidb-backup-manager/entrypoint.sh @@ -19,7 +19,7 @@ echo "Create rclone.conf file." cat < /tmp/rclone.conf [s3] type = s3 -env_auth = false +env_auth = true provider = ${S3_PROVIDER} access_key_id = ${AWS_ACCESS_KEY_ID} secret_access_key = ${AWS_SECRET_ACCESS_KEY:-$AWS_SECRET_KEY} @@ -51,33 +51,40 @@ else fi BACKUP_BIN=/tidb-backup-manager +if [[ -n "${AWS_DEFAULT_REGION}"]]; then + EXEC_COMMAND="exec" +else + EXEC_COMMAND="/usr/local/bin/shush exec --" +fi + +cat /tmp/rclone.conf # exec command case "$1" in backup) shift 1 echo "$BACKUP_BIN backup $@" - exec $BACKUP_BIN backup "$@" + $EXEC_COMMAND $BACKUP_BIN backup "$@" ;; export) shift 1 echo "$BACKUP_BIN export $@" - exec $BACKUP_BIN export "$@" + $EXEC_COMMAND $BACKUP_BIN export "$@" ;; restore) shift 1 echo "$BACKUP_BIN restore $@" - exec $BACKUP_BIN restore "$@" + $EXEC_COMMAND $BACKUP_BIN restore "$@" ;; import) shift 1 echo "$BACKUP_BIN import $@" - exec $BACKUP_BIN import "$@" + $EXEC_COMMAND $BACKUP_BIN import "$@" ;; clean) shift 1 echo "$BACKUP_BIN clean $@" - exec $BACKUP_BIN clean "$@" + $EXEC_COMMAND $BACKUP_BIN clean "$@" ;; *) echo "Usage: $0 {backup|restore|clean}" diff --git a/manifests/backup/backup-aws-s3-br.yaml b/manifests/backup/backup-aws-s3-br.yaml new file mode 100644 index 0000000000..a750e660c1 --- /dev/null +++ b/manifests/backup/backup-aws-s3-br.yaml @@ -0,0 +1,35 @@ +--- +apiVersion: pingcap.com/v1alpha1 +kind: Backup +metadata: + name: demo1-backup-s3 + namespace: test1 + # annotations: + # iam.amazonaws.com/role: "arn:aws:iam::123456789:role" +spec: + # backupType: full + # useKMS: false + # serviceAccount: myServiceAccount + br: + cluster: myCluster + # clusterNamespce: + # logLevel: info + # statusAddr: + # concurrency: 4 + # rateLimit: 0 + # timeAgo: