diff --git a/config/helm/aws-node-termination-handler/README.md b/config/helm/aws-node-termination-handler/README.md index eaf49187..186109e5 100644 --- a/config/helm/aws-node-termination-handler/README.md +++ b/config/helm/aws-node-termination-handler/README.md @@ -80,11 +80,6 @@ Parameter | Description | Default `enableProbesServer` | If true, start an http server exposing `/healthz` endpoint for probes. | `false` `probesServerPort` | Replaces the default HTTP port for exposing probes endpoint. | `8080` `probesServerEndpoint` | Replaces the default endpoint for exposing probes endpoint. | `/healthz` -`podMonitor.create` | If `true`, create a PodMonitor | `false` -`podMonitor.interval` | Prometheus scrape interval | `30s` -`podMonitor.sampleLimit` | Number of scraped samples accepted | `5000` -`podMonitor.labels` | Additional PodMonitor metadata labels | `{}` -`podMonitor.namespace` | Override podMonitor Helm release namespace | `{{ .Release.Namespace }}` `emitKubernetesEvents` | If `true`, Kubernetes events will be emitted when interruption events are received and when actions are taken on Kubernetes nodes. In IMDS Processor mode a default set of annotations with all the node metadata gathered from IMDS will be attached to each event. More information [here](https://github.com/aws/aws-node-termination-handler/blob/main/docs/kubernetes_events.md) | `false` `kubernetesExtraEventsAnnotations` | A comma-separated list of `key=value` extra annotations to attach to all emitted Kubernetes events. Example: `first=annotation,sample.annotation/number=two"` | None @@ -100,6 +95,11 @@ Parameter | Description | Default `workers` | The maximum amount of parallel event processors | `10` `replicas` | The number of replicas in the NTH deployment when using queue-processor mode (NOTE: increasing replicas may cause duplicate webhooks since NTH pods are stateless) | `1` `podDisruptionBudget` | Limit the disruption for controller pods, requires at least 2 controller replicas | `{}` +`serviceMonitor.create` | If `true`, create a ServiceMonitor (this requires enableSqsTerminationDraining and enablePrometheusServer to be set) | `false` +`serviceMonitor.interval` | Prometheus scrape interval | `30s` +`serviceMonitor.sampleLimit` | Number of scraped samples accepted | `5000` +`serviceMonitor.labels` | Additional ServiceMonitor metadata labels | `{}` +`serviceMonitor.namespace` | Override ServiceMonitor Helm release namespace | `{{ .Release.Namespace }}` ### AWS Node Termination Handler - IMDS Mode Configuration @@ -110,6 +110,11 @@ Parameter | Description | Default `enableRebalanceDraining` | If true, drain nodes when the rebalance recommendation notice is received | `false` `enableRebalanceMonitoring` | If true, cordon nodes when the rebalance recommendation notice is received. If you'd like to drain the node in addition to cordoning, then also set `enableRebalanceDraining`. | `false` `useHostNetwork` | If `true`, enables `hostNetwork` for the Linux DaemonSet. NOTE: setting this to `false` may cause issues accessing IMDSv2 if your account is not configured with an IP hop count of 2 | `true` +`podMonitor.create` | If `true`, create a PodMonitor (this requires enableSqsTerminationDraining to not be set and enablePrometheusServer to be set) | `false` +`podMonitor.interval` | Prometheus scrape interval | `30s` +`podMonitor.sampleLimit` | Number of scraped samples accepted | `5000` +`podMonitor.labels` | Additional PodMonitor metadata labels | `{}` +`podMonitor.namespace` | Override PodMonitor Helm release namespace | `{{ .Release.Namespace }}` ### Kubernetes Configuration diff --git a/config/helm/aws-node-termination-handler/templates/podmonitor.yaml b/config/helm/aws-node-termination-handler/templates/podmonitor.yaml index 47ae26fb..1c497d68 100644 --- a/config/helm/aws-node-termination-handler/templates/podmonitor.yaml +++ b/config/helm/aws-node-termination-handler/templates/podmonitor.yaml @@ -1,4 +1,4 @@ -{{- if .Values.podMonitor.create }} +{{- if and (not .Values.enableSqsTerminationDraining) (and .Values.enablePrometheusServer .Values.podMonitor.create) -}} apiVersion: monitoring.coreos.com/v1 kind: PodMonitor metadata: @@ -17,13 +17,17 @@ spec: jobLabel: {{ include "aws-node-termination-handler.name" . }} namespaceSelector: matchNames: - - {{ .Release.Namespace }} + - {{ .Release.Namespace }} podMetricsEndpoints: - - interval: {{ .Values.podMonitor.interval }} - path: /metrics - port: http-metrics - sampleLimit: {{ .Values.podMonitor.sampleLimit }} + - port: http-metrics + path: /metrics + {{- with .Values.podMonitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.podMonitor.sampleLimit }} + sampleLimit: {{ . }} + {{- end }} selector: matchLabels: {{- include "aws-node-termination-handler.selectorLabels" . | nindent 6 }} -{{- end }} +{{- end -}} diff --git a/config/helm/aws-node-termination-handler/templates/service.yaml b/config/helm/aws-node-termination-handler/templates/service.yaml new file mode 100644 index 00000000..5534b0bb --- /dev/null +++ b/config/helm/aws-node-termination-handler/templates/service.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.enableSqsTerminationDraining .Values.enablePrometheusServer -}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "aws-node-termination-handler.fullname" . }} + labels: + {{- include "aws-node-termination-handler.labels" . | nindent 4 }} +spec: + type: ClusterIP + selector: + {{- include "aws-node-termination-handler.selectorLabels" . | nindent 4 }} + ports: + - name: http-metrics + port: {{ .Values.prometheusServerPort }} + targetPort: http-metrics + protocol: TCP +{{- end -}} diff --git a/config/helm/aws-node-termination-handler/templates/servicemonitor.yaml b/config/helm/aws-node-termination-handler/templates/servicemonitor.yaml new file mode 100644 index 00000000..52ff799d --- /dev/null +++ b/config/helm/aws-node-termination-handler/templates/servicemonitor.yaml @@ -0,0 +1,33 @@ +{{- if and .Values.enableSqsTerminationDraining (and .Values.enablePrometheusServer .Values.serviceMonitor.create) -}} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "aws-node-termination-handler.fullname" . }} + {{- if .Values.serviceMonitor.namespace }} + namespace: {{ .Values.serviceMonitor.namespace }} + {{- else }} + namespace: {{ .Release.Namespace }} + {{- end }} + labels: + {{- include "aws-node-termination-handler.labels" . | nindent 4 }} + {{- with .Values.serviceMonitor.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + jobLabel: {{ include "aws-node-termination-handler.name" . }} + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} + endpoints: + - port: http-metrics + path: /metrics + {{- with .Values.serviceMonitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.serviceMonitor.sampleLimit }} + sampleLimit: {{ . }} + {{- end }} + selector: + matchLabels: + {{- include "aws-node-termination-handler.selectorLabels" . | nindent 6 }} +{{- end -}} diff --git a/config/helm/aws-node-termination-handler/values.yaml b/config/helm/aws-node-termination-handler/values.yaml index c97eef36..60035b1f 100644 --- a/config/helm/aws-node-termination-handler/values.yaml +++ b/config/helm/aws-node-termination-handler/values.yaml @@ -192,16 +192,33 @@ dnsPolicy: "" podMonitor: # Specifies whether PodMonitor should be created + # this needs enableSqsTerminationDraining: false + # and enablePrometheusServer: true create: false - # The Prometheus scrape interval + # Specifies whether the PodMonitor should be created in a different namespace than + # the Helm release + namespace: + # Additional labels to add to the metadata + labels: {} + # The Prometheus scrape interval interval: 30s # The number of scraped samples that will be accepted sampleLimit: 5000 - # Additional labels to add to the metadata - labels: {} - # Specifies whether a pod monitor should be created in a different namespace than + +serviceMonitor: + # Specifies whether ServiceMonitor should be created + # this needs enableSqsTerminationDraining: rue + # and enablePrometheusServer: true + create: false + # Specifies whether the ServiceMonitor should be created in a different namespace than # the Helm release - # namespace: monitoring + namespace: + # Additional labels to add to the metadata + labels: {} + # The Prometheus scrape interval + interval: 30s + # The number of scraped samples that will be accepted + sampleLimit: 5000 # K8s DaemonSet update strategy. updateStrategy: