Skip to content

Commit

Permalink
Backport of fix: lifecycle enabled iptables mismatch into release/1.2…
Browse files Browse the repository at this point in the history
….x (#2845)

fix: lifecycle enabled iptables mismatch (#2842)

Co-authored-by: Dan Stough <dan.stough@hashicorp.com>
  • Loading branch information
hc-github-team-consul-core and DanStough authored Feb 13, 2024
1 parent 6fd6a95 commit 12f3652
Show file tree
Hide file tree
Showing 10 changed files with 455 additions and 47 deletions.
3 changes: 3 additions & 0 deletions .changelog/2743.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
control-plane: Changed the container ordering in connect-inject to insert consul-dataplane container first if lifecycle is enabled. Container ordering is unchanged if lifecycle is disabled.
```
13 changes: 7 additions & 6 deletions acceptance/framework/connhelper/connect_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,17 @@ import (
"time"

terratestK8s "github.com/gruntwork-io/terratest/modules/k8s"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/stretchr/testify/require"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/hashicorp/consul-k8s/acceptance/framework/config"
"github.com/hashicorp/consul-k8s/acceptance/framework/consul"
"github.com/hashicorp/consul-k8s/acceptance/framework/environment"
"github.com/hashicorp/consul-k8s/acceptance/framework/helpers"
"github.com/hashicorp/consul-k8s/acceptance/framework/k8s"
"github.com/hashicorp/consul-k8s/acceptance/framework/logger"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/stretchr/testify/require"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
Expand Down Expand Up @@ -357,7 +358,7 @@ func (c *ConnectHelper) TestConnectionFailureWhenUnhealthy(t *testing.T) {
opts := c.KubectlOptsForApp(t)

logger.Log(t, "testing k8s -> consul health checks sync by making the static-server unhealthy")
k8s.RunKubectl(t, opts, "exec", "deploy/"+StaticServerName, "--", "touch", "/tmp/unhealthy")
k8s.RunKubectl(t, opts, "exec", "deploy/"+StaticServerName, "-c", "static-server", "--", "touch", "/tmp/unhealthy")

// The readiness probe should take a moment to be reflected in Consul,
// CheckStaticServerConnection will retry until Consul marks the service
Expand All @@ -382,7 +383,7 @@ func (c *ConnectHelper) TestConnectionFailureWhenUnhealthy(t *testing.T) {
}

// Return the static-server to a "healthy state".
k8s.RunKubectl(t, opts, "exec", "deploy/"+StaticServerName, "--", "rm", "/tmp/unhealthy")
k8s.RunKubectl(t, opts, "exec", "deploy/"+StaticServerName, "-c", "static-server", "--", "rm", "/tmp/unhealthy")
}

// helmValues uses the Secure and AutoEncrypt fields to set values for the Helm
Expand Down
2 changes: 1 addition & 1 deletion acceptance/tests/connect/connect_external_servers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func TestConnectInject_ExternalServers(t *testing.T) {
// Test that kubernetes readiness status is synced to Consul.
// Create the file so that the readiness probe of the static-server pod fails.
logger.Log(t, "testing k8s -> consul health checks sync by making the static-server unhealthy")
k8s.RunKubectl(t, ctx.KubectlOptions(t), "exec", "deploy/"+connhelper.StaticServerName, "--", "touch", "/tmp/unhealthy")
k8s.RunKubectl(t, ctx.KubectlOptions(t), "exec", "deploy/"+connhelper.StaticServerName, "-c", "static-server", "--", "touch", "/tmp/unhealthy")

// The readiness probe should take a moment to be reflected in Consul, CheckStaticServerConnection will retry
// until Consul marks the service instance unavailable for mesh traffic, causing the connection to fail.
Expand Down
2 changes: 1 addition & 1 deletion acceptance/tests/connect/connect_inject_namespaces_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ func TestConnectInjectNamespaces(t *testing.T) {
// Test that kubernetes readiness status is synced to Consul.
// Create the file so that the readiness probe of the static-server pod fails.
logger.Log(t, "testing k8s -> consul health checks sync by making the static-server unhealthy")
k8s.RunKubectl(t, staticServerOpts, "exec", "deploy/"+connhelper.StaticServerName, "--", "touch", "/tmp/unhealthy")
k8s.RunKubectl(t, staticServerOpts, "exec", "deploy/"+connhelper.StaticServerName, "-c", "static-server", "--", "touch", "/tmp/unhealthy")

// The readiness probe should take a moment to be reflected in Consul, CheckStaticServerConnection will retry
// until Consul marks the service instance unavailable for mesh traffic, causing the connection to fail.
Expand Down
4 changes: 2 additions & 2 deletions acceptance/tests/connect/connect_inject_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,9 +328,9 @@ func TestConnectInject_MultiportServices(t *testing.T) {
// and check inbound connections to the multi port pods' services.
// Create the files so that the readiness probes of the multi port pod fails.
logger.Log(t, "testing k8s -> consul health checks sync by making the multiport unhealthy")
k8s.RunKubectl(t, ctx.KubectlOptions(t), "exec", "deploy/"+multiport, "--", "touch", "/tmp/unhealthy-multiport")
k8s.RunKubectl(t, ctx.KubectlOptions(t), "exec", "deploy/"+multiport, "-c", "multiport", "--", "touch", "/tmp/unhealthy-multiport")
logger.Log(t, "testing k8s -> consul health checks sync by making the multiport-admin unhealthy")
k8s.RunKubectl(t, ctx.KubectlOptions(t), "exec", "deploy/"+multiport, "--", "touch", "/tmp/unhealthy-multiport-admin")
k8s.RunKubectl(t, ctx.KubectlOptions(t), "exec", "deploy/"+multiport, "-c", "multiport-admin", "--", "touch", "/tmp/unhealthy-multiport-admin")

// The readiness probe should take a moment to be reflected in Consul, CheckStaticServerConnection will retry
// until Consul marks the service instance unavailable for mesh traffic, causing the connection to fail.
Expand Down
8 changes: 4 additions & 4 deletions acceptance/tests/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,12 @@ func TestComponentMetrics(t *testing.T) {
k8s.DeployKustomize(t, ctx.KubectlOptions(t), cfg.NoCleanupOnFailure, cfg.NoCleanup, cfg.DebugDirectory, "../fixtures/bases/static-client")

// Server Metrics
metricsOutput, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), "exec", "deploy/"+StaticClientName, "--", "curl", "--silent", "--show-error", fmt.Sprintf("http://%s:8500/v1/agent/metrics?format=prometheus", fmt.Sprintf("%s-consul-server.%s.svc", releaseName, ns)))
metricsOutput, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), "exec", "deploy/"+StaticClientName, "-c", "static-client", "--", "curl", "--silent", "--show-error", fmt.Sprintf("http://%s:8500/v1/agent/metrics?format=prometheus", fmt.Sprintf("%s-consul-server.%s.svc", releaseName, ns)))
require.NoError(t, err)
require.Contains(t, metricsOutput, `consul_acl_ResolveToken{quantile="0.5"}`)

// Client Metrics
metricsOutput, err = k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), "exec", "deploy/"+StaticClientName, "--", "sh", "-c", "curl --silent --show-error http://$HOST_IP:8500/v1/agent/metrics?format=prometheus")
metricsOutput, err = k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), "exec", "deploy/"+StaticClientName, "-c", "static-client", "--", "sh", "-c", "curl --silent --show-error http://$HOST_IP:8500/v1/agent/metrics?format=prometheus")
require.NoError(t, err)
require.Contains(t, metricsOutput, `consul_acl_ResolveToken{quantile="0.5"}`)

Expand Down Expand Up @@ -133,7 +133,7 @@ func TestAppMetrics(t *testing.T) {
// Retry because sometimes the merged metrics server takes a couple hundred milliseconds
// to start.
retry.RunWith(&retry.Counter{Count: 20, Wait: 2 * time.Second}, t, func(r *retry.R) {
metricsOutput, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), "exec", "deploy/"+StaticClientName, "--", "curl", "--silent", "--show-error", fmt.Sprintf("http://%s:20200/metrics", podIP))
metricsOutput, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), "exec", "deploy/"+StaticClientName, "-c", "static-client", "--", "curl", "--silent", "--show-error", fmt.Sprintf("http://%s:20200/metrics", podIP))
require.NoError(r, err)
// This assertion represents the metrics from the envoy sidecar.
require.Contains(r, metricsOutput, `envoy_cluster_assignment_stale{local_cluster="server",consul_source_service="server"`)
Expand All @@ -147,7 +147,7 @@ func assertGatewayMetricsEnabled(t *testing.T, ctx environment.TestContext, ns,
require.NoError(t, err)
for _, pod := range pods.Items {
podIP := pod.Status.PodIP
metricsOutput, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), "exec", "deploy/"+StaticClientName, "--", "curl", "--silent", "--show-error", fmt.Sprintf("http://%s:20200/metrics", podIP))
metricsOutput, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), "exec", "deploy/"+StaticClientName, "-c", "static-client", "--", "curl", "--silent", "--show-error", fmt.Sprintf("http://%s:20200/metrics", podIP))
require.NoError(t, err)
require.Contains(t, metricsOutput, metricsAssertion)
}
Expand Down
8 changes: 4 additions & 4 deletions acceptance/tests/partitions/partitions_connect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,8 +405,8 @@ func TestPartitions_Connect(t *testing.T) {
// Test that kubernetes readiness status is synced to Consul.
// Create the file so that the readiness probe of the static-server pod fails.
logger.Log(t, "testing k8s -> consul health checks sync by making the static-server unhealthy")
k8s.RunKubectl(t, defaultPartitionClusterStaticServerOpts, "exec", "deploy/"+staticServerName, "--", "touch", "/tmp/unhealthy")
k8s.RunKubectl(t, secondaryPartitionClusterStaticServerOpts, "exec", "deploy/"+staticServerName, "--", "touch", "/tmp/unhealthy")
k8s.RunKubectl(t, defaultPartitionClusterStaticServerOpts, "exec", "deploy/"+staticServerName, "-c", "static-server", "--", "touch", "/tmp/unhealthy")
k8s.RunKubectl(t, secondaryPartitionClusterStaticServerOpts, "exec", "deploy/"+staticServerName, "-c", "static-server", "--", "touch", "/tmp/unhealthy")

// The readiness probe should take a moment to be reflected in Consul, CheckStaticServerConnection will retry
// until Consul marks the service instance unavailable for mesh traffic, causing the connection to fail.
Expand Down Expand Up @@ -578,8 +578,8 @@ func TestPartitions_Connect(t *testing.T) {
// Test that kubernetes readiness status is synced to Consul.
// Create the file so that the readiness probe of the static-server pod fails.
logger.Log(t, "testing k8s -> consul health checks sync by making the static-server unhealthy")
k8s.RunKubectl(t, defaultPartitionClusterStaticServerOpts, "exec", "deploy/"+staticServerName, "--", "touch", "/tmp/unhealthy")
k8s.RunKubectl(t, secondaryPartitionClusterStaticServerOpts, "exec", "deploy/"+staticServerName, "--", "touch", "/tmp/unhealthy")
k8s.RunKubectl(t, defaultPartitionClusterStaticServerOpts, "exec", "deploy/"+staticServerName, "-c", "static-server", "--", "touch", "/tmp/unhealthy")
k8s.RunKubectl(t, secondaryPartitionClusterStaticServerOpts, "exec", "deploy/"+staticServerName, "-c", "static-server", "--", "touch", "/tmp/unhealthy")

// The readiness probe should take a moment to be reflected in Consul, CheckStaticServerConnection will retry
// until Consul marks the service instance unavailable for mesh traffic, causing the connection to fail.
Expand Down
63 changes: 46 additions & 17 deletions control-plane/connect-inject/webhook/mesh_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,6 @@ import (

mapset "github.com/deckarep/golang-set"
"github.com/go-logr/logr"
"github.com/hashicorp/consul-k8s/control-plane/connect-inject/common"
"github.com/hashicorp/consul-k8s/control-plane/connect-inject/constants"
"github.com/hashicorp/consul-k8s/control-plane/connect-inject/lifecycle"
"github.com/hashicorp/consul-k8s/control-plane/connect-inject/metrics"
"github.com/hashicorp/consul-k8s/control-plane/consul"
"github.com/hashicorp/consul-k8s/control-plane/namespaces"
"github.com/hashicorp/consul-k8s/control-plane/version"
"gomodules.xyz/jsonpatch/v2"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
Expand All @@ -30,6 +23,14 @@ import (
"k8s.io/client-go/kubernetes"
_ "k8s.io/client-go/plugin/pkg/client/auth"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"

"github.com/hashicorp/consul-k8s/control-plane/connect-inject/common"
"github.com/hashicorp/consul-k8s/control-plane/connect-inject/constants"
"github.com/hashicorp/consul-k8s/control-plane/connect-inject/lifecycle"
"github.com/hashicorp/consul-k8s/control-plane/connect-inject/metrics"
"github.com/hashicorp/consul-k8s/control-plane/consul"
"github.com/hashicorp/consul-k8s/control-plane/namespaces"
"github.com/hashicorp/consul-k8s/control-plane/version"
)

const (
Expand Down Expand Up @@ -297,7 +298,10 @@ func (w *MeshWebhook) Handle(ctx context.Context, req admission.Request) admissi
// port.
annotatedSvcNames := w.annotatedServiceNames(pod)
multiPort := len(annotatedSvcNames) > 1

lifecycleEnabled, ok := w.LifecycleConfig.EnableProxyLifecycle(pod)
if ok != nil {
w.Log.Error(err, "unable to get lifecycle enabled status")
}
// For single port pods, add the single init container and envoy sidecar.
if !multiPort {
// Add the init container that registers the service and sets up the Envoy configuration.
Expand All @@ -314,8 +318,14 @@ func (w *MeshWebhook) Handle(ctx context.Context, req admission.Request) admissi
w.Log.Error(err, "error configuring injection sidecar container", "request name", req.Name)
return admission.Errored(http.StatusInternalServerError, fmt.Errorf("error configuring injection sidecar container: %s", err))
}
// TODO: invert to start the Envoy sidecar before the application container
pod.Spec.Containers = append(pod.Spec.Containers, envoySidecar)
//Append the Envoy sidecar before the application container only if lifecycle enabled.

if lifecycleEnabled && ok == nil {
pod.Spec.Containers = append([]corev1.Container{envoySidecar}, pod.Spec.Containers...)
} else {
pod.Spec.Containers = append(pod.Spec.Containers, envoySidecar)
}

} else {
// For multi port pods, check for unsupported cases, mount all relevant service account tokens, and mount an init
// container and envoy sidecar per port. Tproxy, metrics, and metrics merging are not supported for multi port pods.
Expand All @@ -330,6 +340,10 @@ func (w *MeshWebhook) Handle(ctx context.Context, req admission.Request) admissi
w.Log.Error(err, "checking unsupported cases for multi port pods")
return admission.Errored(http.StatusInternalServerError, err)
}

//List of sidecar containers for each service. Build as a list to preserve correct ordering in relation
//to services.
sidecarContainers := []corev1.Container{}
for i, svc := range annotatedSvcNames {
w.Log.Info(fmt.Sprintf("service: %s", svc))
if w.AuthMethod != "" {
Expand Down Expand Up @@ -385,9 +399,20 @@ func (w *MeshWebhook) Handle(ctx context.Context, req admission.Request) admissi
w.Log.Error(err, "error configuring injection sidecar container", "request name", req.Name)
return admission.Errored(http.StatusInternalServerError, fmt.Errorf("error configuring injection sidecar container: %s", err))
}
// TODO: invert to start the Envoy sidecar container before the
// application container
pod.Spec.Containers = append(pod.Spec.Containers, envoySidecar)
// If Lifecycle is enabled, add to the list of sidecar containers to be added
// to pod containers at the end in order to preserve relative ordering.
if lifecycleEnabled {
sidecarContainers = append(sidecarContainers, envoySidecar)
} else {
pod.Spec.Containers = append(pod.Spec.Containers, envoySidecar)

}

}

//Add sidecar containers first if lifecycle enabled.
if lifecycleEnabled {
pod.Spec.Containers = append(sidecarContainers, pod.Spec.Containers...)
}
}

Expand Down Expand Up @@ -512,20 +537,24 @@ func (w *MeshWebhook) overwriteProbes(ns corev1.Namespace, pod *corev1.Pod) erro
}

if tproxyEnabled && overwriteProbes {
for i, container := range pod.Spec.Containers {
// We don't use the loop index because this needs to line up w.withiptablesConfigJSON,
// which is performed before the sidecar is injected.
idx := 0
for _, container := range pod.Spec.Containers {
// skip the "envoy-sidecar" container from having it's probes overridden
if container.Name == sidecarContainer {
continue
}
if container.LivenessProbe != nil && container.LivenessProbe.HTTPGet != nil {
container.LivenessProbe.HTTPGet.Port = intstr.FromInt(exposedPathsLivenessPortsRangeStart + i)
container.LivenessProbe.HTTPGet.Port = intstr.FromInt(exposedPathsLivenessPortsRangeStart + idx)
}
if container.ReadinessProbe != nil && container.ReadinessProbe.HTTPGet != nil {
container.ReadinessProbe.HTTPGet.Port = intstr.FromInt(exposedPathsReadinessPortsRangeStart + i)
container.ReadinessProbe.HTTPGet.Port = intstr.FromInt(exposedPathsReadinessPortsRangeStart + idx)
}
if container.StartupProbe != nil && container.StartupProbe.HTTPGet != nil {
container.StartupProbe.HTTPGet.Port = intstr.FromInt(exposedPathsStartupPortsRangeStart + i)
container.StartupProbe.HTTPGet.Port = intstr.FromInt(exposedPathsStartupPortsRangeStart + idx)
}
idx++
}
}
return nil
Expand Down
Loading

0 comments on commit 12f3652

Please sign in to comment.