Skip to content

Commit

Permalink
agentless: support updating health checks on consul clients during an…
Browse files Browse the repository at this point in the history
… upgrade to agentless (#1690)

We will need to handle the case when a user has many applications on the service mesh and needs to take time to re-inject all those applications. If that is the case, there may be situations where the application changes its health status, but because it still hasn’t been re-started (re-injected), its health status needs to be synced into Consul clients rather than servers the way we used to do before. To do this, we will add a new annotation to all pods pointing to the current consul-k8s version: consul.hashicorp.com/consul-k8s-version: 1.0.0. If this annotation is not there or if the version is less than the version that supports “agentless”, we will continue to sync health checks to Consul clients for that particular pod
  • Loading branch information
ishustava authored Nov 10, 2022
1 parent 3106d36 commit 6dc218d
Show file tree
Hide file tree
Showing 14 changed files with 713 additions and 76 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ IMPROVEMENTS:
* Control Plane
* Update minimum go version for project to 1.19 [[GH-1633](https://github.com/hashicorp/consul-k8s/pull/1633)]
* Remove unneeded `agent:read` ACL permissions from mesh gateway policy. [[GH-1255](https://github.com/hashicorp/consul-k8s/pull/1255)]
* Support updating health checks on consul clients during an upgrade to agentless. [[GH-1690](https://github.com/hashicorp/consul-k8s/pull/1690)]
* Helm:
* Remove deprecated annotation `service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"` in the `server-service` template. [[GH-1619](https://github.com/hashicorp/consul-k8s/pull/1619)]
* Support `minAvailable` on connect injector `PodDisruptionBudget`. [[GH-1557](https://github.com/hashicorp/consul-k8s/pull/1557)]
Expand Down
58 changes: 23 additions & 35 deletions charts/consul/templates/connect-inject-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ spec:
- name: sidecar-injector
image: "{{ default .Values.global.imageK8S .Values.connectInject.image }}"
ports:
- containerPort: 8080
name: webhook-server
protocol: TCP
- containerPort: 8080
name: webhook-server
protocol: TCP
env:
- name: NAMESPACE
valueFrom:
Expand Down Expand Up @@ -239,24 +239,12 @@ spec:
{{- end }}
{{- end }}
{{- if .Values.global.consulSidecarContainer }}
{{- $consulSidecarResources := .Values.global.consulSidecarContainer.resources }}
{{- if not (kindIs "invalid" $consulSidecarResources.limits.memory) }}
-default-consul-sidecar-memory-limit={{ $consulSidecarResources.limits.memory }} \
{{- end }}
{{- if not (kindIs "invalid" $consulSidecarResources.requests.memory) }}
-default-consul-sidecar-memory-request={{ $consulSidecarResources.requests.memory }} \
{{- end }}
{{- if not (kindIs "invalid" $consulSidecarResources.limits.cpu) }}
-default-consul-sidecar-cpu-limit={{ $consulSidecarResources.limits.cpu }} \
{{- end }}
{{- if not (kindIs "invalid" $consulSidecarResources.requests.cpu) }}
-default-consul-sidecar-cpu-request={{ $consulSidecarResources.requests.cpu }} \
{{- end }}
{{- end }}
{{- if .Values.global.cloud.enabled }}
-tls-server-name=server.{{ .Values.global.datacenter}}.{{ .Values.global.domain}} \
{{- end }}
{{- if and .Values.global.tls.enabled .Values.global.tls.enableAutoEncrypt }}
-enable-auto-encrypt \
{{- end }}
startupProbe:
httpGet:
path: /readyz/ready
Expand Down Expand Up @@ -286,38 +274,38 @@ spec:
timeoutSeconds: 5
volumeMounts:
{{- if not (and .Values.global.secretsBackend.vault.enabled .Values.global.secretsBackend.vault.connectInject.tlsCert.secretName) }}
- name: certs
mountPath: /etc/connect-injector/certs
readOnly: true
- name: certs
mountPath: /etc/connect-injector/certs
readOnly: true
{{- end }}
{{- if and .Values.global.tls.enabled (not (and .Values.externalServers.enabled .Values.externalServers.useSystemRoots))}}
- name: consul-ca-cert
mountPath: /consul/tls/ca
readOnly: true
- name: consul-ca-cert
mountPath: /consul/tls/ca
readOnly: true
{{- end }}
{{- with .Values.connectInject.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
volumes:
{{- if not (and .Values.global.secretsBackend.vault.enabled .Values.global.secretsBackend.vault.connectInject.tlsCert.secretName) }}
- name: certs
secret:
defaultMode: 420
secretName: {{ template "consul.fullname" . }}-connect-inject-webhook-cert
- name: certs
secret:
defaultMode: 420
secretName: {{ template "consul.fullname" . }}-connect-inject-webhook-cert
{{- end }}
{{- if .Values.global.tls.enabled }}
{{- if not (and .Values.externalServers.enabled .Values.externalServers.useSystemRoots) }}
- name: consul-ca-cert
secret:
- name: consul-ca-cert
secret:
{{- if .Values.global.tls.caCert.secretName }}
secretName: {{ .Values.global.tls.caCert.secretName }}
secretName: {{ .Values.global.tls.caCert.secretName }}
{{- else }}
secretName: {{ template "consul.fullname" . }}-ca-cert
secretName: {{ template "consul.fullname" . }}-ca-cert
{{- end }}
items:
- key: {{ default "tls.crt" .Values.global.tls.caCert.secretKey }}
path: tls.crt
items:
- key: {{ default "tls.crt" .Values.global.tls.caCert.secretKey }}
path: tls.crt
{{- end }}
{{- end }}
{{- if .Values.connectInject.priorityClassName }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ const (
// to explicitly perform the peering operation again.
AnnotationPeeringVersion = "consul.hashicorp.com/peering-version"

// AnnotationConsulK8sVersion is the current version of this binary.
AnnotationConsulK8sVersion = "consul.hashicorp.com/connect-k8s-version"

// LabelServiceIgnore is a label that can be added to a service to prevent it from being
// registered with Consul.
LabelServiceIgnore = "consul.hashicorp.com/service-ignore"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package endpoints

import (
"fmt"

"github.com/hashicorp/consul-k8s/control-plane/connect-inject/constants"
"github.com/hashicorp/consul-k8s/control-plane/consul"
"github.com/hashicorp/consul-server-connection-manager/discovery"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/go-version"
corev1 "k8s.io/api/core/v1"
)

const minSupportedConsulDataplaneVersion = "v1.0.0-beta1"

// isConsulDataplaneSupported returns true if the consul-k8s version on the pod supports
// consul-dataplane architecture of Consul.
func isConsulDataplaneSupported(pod corev1.Pod) bool {
if anno, ok := pod.Annotations[constants.AnnotationConsulK8sVersion]; ok {
consulK8sVersion, err := version.NewVersion(anno)
if err != nil {
return false
}
consulDPSupportedVersion, err := version.NewVersion(minSupportedConsulDataplaneVersion)
if err != nil {
return false
}
if !consulK8sVersion.LessThan(consulDPSupportedVersion) {
return true
}
}
return false
}

func (r *Controller) consulClientCfgForNodeAgent(serverClient *api.Client, pod corev1.Pod, state discovery.State) (*api.Config, error) {
ccCfg := &api.Config{
Scheme: r.ConsulClientConfig.APIClientConfig.Scheme,
}

consulClientHttpPort := 8500
if ccCfg.Scheme == "https" {
consulClientHttpPort = 8501
ccCfg.TLSConfig.CAFile = r.ConsulClientConfig.APIClientConfig.TLSConfig.CAFile
}
if r.consulClientHttpPort != 0 {
consulClientHttpPort = r.consulClientHttpPort
}
ccCfg.Address = fmt.Sprintf("%s:%d", pod.Status.HostIP, consulClientHttpPort)

ccCfg.Token = state.Token

// Check if auto-encrypt is enabled. If it is, we need to retrieve and set a different CA for the Consul client.
if r.EnableAutoEncrypt {
// Get Connect CA.
caRoots, _, err := serverClient.Agent().ConnectCARoots(nil)
if err != nil {
return nil, err
}
if caRoots == nil {
return nil, fmt.Errorf("ca root list is nil")
}
if caRoots.Roots == nil {
return nil, fmt.Errorf("ca roots is nil")
}
if len(caRoots.Roots) == 0 {
return nil, fmt.Errorf("the list of root CAs is empty")
}

for _, root := range caRoots.Roots {
if root.Active {
ccCfg.TLSConfig.CAFile = ""
ccCfg.TLSConfig.CAPem = []byte(root.RootCertPEM)
break
}
}
}
if r.EnableConsulNamespaces {
ccCfg.Namespace = r.consulNamespace(pod.Namespace)
}
return ccCfg, nil
}

func (r *Controller) updateHealthCheckOnConsulClient(consulClientCfg *api.Config, pod corev1.Pod, endpoints corev1.Endpoints, status string) error {
consulClient, err := consul.NewClient(consulClientCfg, r.ConsulClientConfig.APITimeout)
if err != nil {
return err
}
filter := fmt.Sprintf(`Name == "Kubernetes Health Check" and ServiceID == %q`, serviceID(pod, endpoints))
checks, err := consulClient.Agent().ChecksWithFilter(filter)
if err != nil {
return err
}
if len(checks) > 1 {
return fmt.Errorf("more than one Kubernetes health check found")
}
if len(checks) == 0 {
r.Log.Info("detected no health checks to update", "name", endpoints.Name, "ns", endpoints.Namespace, "service-id", serviceID(pod, endpoints))
return nil
}
for checkID := range checks {
output := "Kubernetes health checks passing"
if status == api.HealthCritical {
output = fmt.Sprintf(`Pod "%s/%s" is not ready`, pod.Namespace, pod.Name)
}
r.Log.Info("updating health check status", "name", endpoints.Name, "ns", endpoints.Namespace, "status", status)
err = consulClient.Agent().UpdateTTL(checkID, output, status)
if err != nil {
return err
}
}
return nil
}
Loading

0 comments on commit 6dc218d

Please sign in to comment.