diff --git a/charts/tidb-cluster/templates/config/_grafana-config.tpl b/charts/tidb-cluster/templates/config/_grafana-config.tpl index e55922427b..1e0f4ecd4f 100644 --- a/charts/tidb-cluster/templates/config/_grafana-config.tpl +++ b/charts/tidb-cluster/templates/config/_grafana-config.tpl @@ -36,7 +36,11 @@ ;http_port = 3000 # The public facing domain name used to access grafana from a browser -;domain = localhost +{{- if .Values.monitor.grafana.serverDomain }} +domain = {{ .Values.monitor.grafana.serverDomain }} +{{- else }} +domain = localhost +{{- end }} # Redirect to correct domain if host header does not match domain # Prevents DNS rebinding attacks @@ -44,7 +48,11 @@ # The full public facing url you use in browser, used for redirects and emails # If you use reverse proxy and sub path specify full url (with sub path) -root_url = {{ .Values.grafanaUrl }} +{{- if .Values.monitor.grafana.serverRootUrl }} +root_url = {{ .Values.monitor.grafana.serverRootUrl }} +{{- else }} +root_url = %(protocol)s://%(domain)s:%(http_port)s/ +{{- end }} # Log web requests ;router_logging = false diff --git a/charts/tidb-cluster/templates/monitor-configmap.yaml b/charts/tidb-cluster/templates/monitor-configmap.yaml index f68ea43e2c..36e7b73040 100644 --- a/charts/tidb-cluster/templates/monitor-configmap.yaml +++ b/charts/tidb-cluster/templates/monitor-configmap.yaml @@ -15,7 +15,8 @@ data: alert-rules-config: |- {{ tuple "config/_alert-rules-config.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} - +{{- if .Values.monitor.grafana.create }} grafana-config: |- {{ tuple "config/_grafana-config.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} {{- end }} +{{- end }} diff --git a/charts/tidb-cluster/templates/monitor-deployment.yaml b/charts/tidb-cluster/templates/monitor-deployment.yaml index 9c47daf454..7d6bc21c07 100644 --- a/charts/tidb-cluster/templates/monitor-deployment.yaml +++ b/charts/tidb-cluster/templates/monitor-deployment.yaml @@ -54,8 +54,8 @@ spec: - /bin/sh - -c - | - mkdir -p /data/prometheus /data/grafana - chmod 777 /data/prometheus /data/grafana + mkdir -p /data/prometheus {{- if .Values.monitor.grafana.create }} /data/grafana {{- end }} + chmod 777 /data/prometheus {{- if .Values.monitor.grafana.create }} /data/grafana {{- end }} securityContext: runAsUser: 0 volumeMounts: @@ -89,6 +89,7 @@ spec: readOnly: true - name: monitor-data mountPath: /data + {{- if .Values.monitor.grafana.create }} - name: grafana image: {{ .Values.monitor.grafana.image }} imagePullPolicy: {{ .Values.monitor.grafana.imagePullPolicy | default "IfNotPresent" }} @@ -101,6 +102,18 @@ spec: containerPort: 3000 protocol: TCP env: + # The following two env (SERVER_ROOT_URL and SERVER_DOMAIN) should be configured in configuration file + # but grafana container startup script will chown of configuration directory + # this will be failed because configmap is mounted as readonly volume in container + # so they're temporarily set here as env + {{- if .Values.monitor.grafana.serverRootUrl }} + - name: GF_SERVER_ROOT_URL + value: {{ .Values.monitor.grafana.serverRootUrl | quote }} + {{- end }} + {{- if .Values.monitor.grafana.serverRootUrl }} + - name: GF_SERVER_DOMAIN + value: {{ .Values.monitor.grafana.serverDomain | quote }} + {{- end }} - name: GF_PATHS_DATA value: /data/grafana - name: GF_SECURITY_ADMIN_USER @@ -116,8 +129,13 @@ spec: - name: TZ value: {{ .Values.timezone | default "UTC" }} volumeMounts: + # configmap is always mounted as read only volume in container + # grafana startup script will fail with read only configuration directory + # - name: grafana-config + # mountPath: /etc/grafana - name: monitor-data mountPath: /data + {{- end }} volumes: - name: monitor-data {{- if .Values.monitor.persistent }} @@ -134,12 +152,14 @@ spec: path: prometheus.yml - key: alert-rules-config path: alert.rules + {{- if .Values.monitor.grafana.create }} - name: grafana-config configMap: name: {{ .Values.clusterName }}-monitor items: - key: grafana-config path: grafana.ini + {{- end }} {{- if .Values.monitor.tolerations }} tolerations: {{ toYaml .Values.monitor.tolerations | indent 6 }} diff --git a/charts/tidb-cluster/templates/monitor-job.yaml b/charts/tidb-cluster/templates/monitor-job.yaml index 56d6c73c90..c2651c2c1f 100644 --- a/charts/tidb-cluster/templates/monitor-job.yaml +++ b/charts/tidb-cluster/templates/monitor-job.yaml @@ -1,4 +1,4 @@ -{{- if .Values.monitor.create }} +{{- if (.Values.monitor.create) and (.Values.monitor.grafana.create) }} apiVersion: batch/v1 kind: Job metadata: diff --git a/charts/tidb-cluster/templates/monitor-secret.yaml b/charts/tidb-cluster/templates/monitor-secret.yaml index 5eda4ca58a..c75faaa4d8 100644 --- a/charts/tidb-cluster/templates/monitor-secret.yaml +++ b/charts/tidb-cluster/templates/monitor-secret.yaml @@ -1,4 +1,4 @@ -{{- if .Values.monitor.create }} +{{- if (.Values.monitor.create) and (.Values.monitor.grafana.create) }} apiVersion: v1 kind: Secret metadata: diff --git a/charts/tidb-cluster/templates/monitor-service.yaml b/charts/tidb-cluster/templates/monitor-service.yaml index a7c8c05253..516c8c853a 100644 --- a/charts/tidb-cluster/templates/monitor-service.yaml +++ b/charts/tidb-cluster/templates/monitor-service.yaml @@ -1,4 +1,5 @@ {{- if .Values.monitor.create }} +{{- if .Values.monitor.grafana.create }} apiVersion: v1 kind: Service metadata: @@ -20,6 +21,7 @@ spec: app.kubernetes.io/name: {{ template "chart.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} app.kubernetes.io/component: monitor +{{- end }} --- apiVersion: v1 kind: Service diff --git a/charts/tidb-cluster/values.yaml b/charts/tidb-cluster/values.yaml index b5cf2c8601..9c2c588eaa 100644 --- a/charts/tidb-cluster/values.yaml +++ b/charts/tidb-cluster/values.yaml @@ -163,7 +163,8 @@ monitor: image: pingcap/tidb-dashboard-installer:v2.0.0 imagePullPolicy: IfNotPresent grafana: - image: grafana/grafana:4.6.3 + create: true + image: grafana/grafana:4.6.5 imagePullPolicy: IfNotPresent logLevel: info resources: {} @@ -177,7 +178,10 @@ monitor: password: admin service: type: NodePort - grafanaUrl: http://localhost:3000 + # if grafana is running behind a reverse proxy with subpath http://foo.bar/grafana + # config the `serverDomain` and `serverRootUrl` as follows + # serverDomain: foo.bar + # serverRootUrl: "%(protocol)s://%(domain)s/grafana/" prometheus: image: prom/prometheus:v2.2.1 imagePullPolicy: IfNotPresent diff --git a/tests/e2e/create.go b/tests/e2e/create.go index d2279b80ed..0ef1c6e4e2 100644 --- a/tests/e2e/create.go +++ b/tests/e2e/create.go @@ -15,7 +15,11 @@ package e2e import ( "database/sql" + "encoding/json" "fmt" + "io/ioutil" + "net/http" + "net/url" "strconv" "strings" "time" @@ -32,9 +36,25 @@ import ( ) const ( + username = "admin" password = "admin" ) +type Result struct { + Metric struct { + Job string `json:"job"` + } `json:"metric"` + Values []interface{} `json:"values"` +} + +type Response struct { + Status string `json:"status"` + Data struct { + ResultType string `json:"resultType"` + Result []Result `json:"result"` + } +} + func testCreate(ns, clusterName string) { By(fmt.Sprintf("When create the TiDB cluster: %s/%s", ns, clusterName)) instanceName := getInstanceName(ns, clusterName) @@ -112,6 +132,11 @@ func allMembersRunning(ns, clusterName string) (bool, error) { return false, nil } + running, err = monitorMemberRunning(tc) + if err != nil || !running { + return false, nil + } + return true, nil } @@ -331,6 +356,63 @@ func tidbMemberRunning(tc *v1alpha1.TidbCluster) (bool, error) { return true, nil } +func monitorMemberRunning(tc *v1alpha1.TidbCluster) (bool, error) { + ns := tc.GetNamespace() + tcName := tc.GetName() + deployName := fmt.Sprintf("%s-monitor", tcName) + deploy, err := kubeCli.AppsV1beta1().Deployments(ns).Get(deployName, metav1.GetOptions{}) + if err != nil { + logf(err.Error()) + return false, nil + } + if deploy.Status.ReadyReplicas < 1 { + logf("monitor ready replicas %d < 1", deploy.Status.ReadyReplicas) + return false, nil + } + if err := checkGrafanaData(tc); err != nil { + logf("can't get grafana data: %v", err) + return false, nil + } + return true, nil +} + +func checkGrafanaData(tc *v1alpha1.TidbCluster) error { + ns := tc.GetNamespace() + tcName := tc.GetName() + svcName := fmt.Sprintf("%s-grafana", tcName) + end := time.Now() + start := end.Add(-time.Minute) + values := url.Values{} + values.Set("query", `sum(tikv_pd_heartbeat_tick_total{type="leader"}) by (job)`) + values.Set("start", fmt.Sprintf("%d", start.Unix())) + values.Set("end", fmt.Sprintf("%d", end.Unix())) + values.Set("step", "30") + u := fmt.Sprintf("http://%s.%s.svc.cluster.local:3000/api/datasources/proxy/1/api/v1/query_range?%s", svcName, ns, values.Encode()) + req, err := http.NewRequest(http.MethodGet, u, nil) + if err != nil { + return err + } + req.SetBasicAuth(username, password) + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + buf, err := ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + data := &Response{} + if err := json.Unmarshal(buf, data); err != nil { + return err + } + if data.Status != "success" || len(data.Data.Result) < 1 { + return fmt.Errorf("invalid response: status: %s, result: %v", data.Status, data.Data.Result) + } + return nil +} + func reclaimPolicySynced(tc *v1alpha1.TidbCluster) (bool, error) { ns := tc.GetNamespace() instanceName := tc.GetLabels()[label.InstanceLabelKey]