From 1093979c6333b2fb22e4c4ba531c3309e24d9cc8 Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 21 Jan 2025 11:28:59 +0200 Subject: [PATCH 1/9] Bring back kourier tls runtime tests --- .github/workflows/kind-e2e.yaml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/kind-e2e.yaml b/.github/workflows/kind-e2e.yaml index ed213a72ab82..a999bbf157c6 100644 --- a/.github/workflows/kind-e2e.yaml +++ b/.github/workflows/kind-e2e.yaml @@ -210,18 +210,13 @@ jobs: - name: Test ${{ matrix.test-suite }} run: | - FEATURE_FLAGS="-enable-alpha -enable-beta" - if [[ "${{ matrix.ingress}}" == "kourier-tls" ]] && [[ "${{ matrix.test-suite }}" == "runtime" ]]; then - # Disabled due to flakiness: https://github.com/knative/serving/issues/15697 - FEATURE_FLAGS="$FEATURE_FLAGS -disable-optional-api" - fi gotestsum --format testname -- \ -race -count=1 -parallel=1 -tags=e2e \ -timeout=30m \ ${{ matrix.test-path }} \ -skip-cleanup-on-fail \ -disable-logstream \ - $FEATURE_FLAGS \ + -enable-alpha -enable-beta \ --ingress-class=${{ matrix.ingress-class || matrix.ingress }}.ingress.networking.knative.dev \ ${{ matrix.test-flags }} From 8f55911c4df6e994af4e9f3678a68ba547db5b95 Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 21 Jan 2025 13:22:29 +0200 Subject: [PATCH 2/9] skip tls --- pkg/activator/certificate/tls_context.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/activator/certificate/tls_context.go b/pkg/activator/certificate/tls_context.go index a0453f574f2e..4ce8bbb87e84 100644 --- a/pkg/activator/certificate/tls_context.go +++ b/pkg/activator/certificate/tls_context.go @@ -50,6 +50,7 @@ func dialTLSContext(ctx context.Context, network, addr string, cr *CertCache) (n san := certificates.DataPlaneUserSAN(revID.Namespace) tlsConf.VerifyConnection = verifySAN(san) + tlsConf.InsecureSkipVerify = true return pkgnet.DialTLSWithBackOff(ctx, network, addr, tlsConf) } From b398e7e734a0b2d0b9e71dcb751765d171db51ee Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 21 Jan 2025 16:19:30 +0200 Subject: [PATCH 3/9] more fix --- .github/workflows/kind-e2e.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/kind-e2e.yaml b/.github/workflows/kind-e2e.yaml index a999bbf157c6..40628f2ecb68 100644 --- a/.github/workflows/kind-e2e.yaml +++ b/.github/workflows/kind-e2e.yaml @@ -220,6 +220,11 @@ jobs: --ingress-class=${{ matrix.ingress-class || matrix.ingress }}.ingress.networking.knative.dev \ ${{ matrix.test-flags }} + - name: Setup tmate session + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 + with: + limit-access-to-actor: true - uses: chainguard-dev/actions/kind-diag@141bf225e9c19c34304ee9d06e9be9c44a6d8765 # main # Only upload logs on failure. if: ${{ failure() }} From 2c203c408c25ca3b929f2cc6cbd0999427d073ae Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 21 Jan 2025 16:29:05 +0200 Subject: [PATCH 4/9] more fix --- .github/workflows/kind-e2e.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/kind-e2e.yaml b/.github/workflows/kind-e2e.yaml index 40628f2ecb68..15217346ef79 100644 --- a/.github/workflows/kind-e2e.yaml +++ b/.github/workflows/kind-e2e.yaml @@ -220,11 +220,11 @@ jobs: --ingress-class=${{ matrix.ingress-class || matrix.ingress }}.ingress.networking.knative.dev \ ${{ matrix.test-flags }} - - name: Setup tmate session + - uses: mxschmitt/action-tmate@v3 if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3 with: limit-access-to-actor: true + - uses: chainguard-dev/actions/kind-diag@141bf225e9c19c34304ee9d06e9be9c44a6d8765 # main # Only upload logs on failure. if: ${{ failure() }} From 3b6199666b8ed669efc41484bcbcc4023593763e Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 21 Jan 2025 16:34:16 +0200 Subject: [PATCH 5/9] trigger From fad4e5bf0d3eafce8bf3fcd4051208f193d83caf Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 21 Jan 2025 17:39:00 +0200 Subject: [PATCH 6/9] keep pods around --- .../runtime/readiness_probe_test.go | 75 ++++++++++++++++++- 1 file changed, 72 insertions(+), 3 deletions(-) diff --git a/test/conformance/runtime/readiness_probe_test.go b/test/conformance/runtime/readiness_probe_test.go index 047e680500b7..f83336e3a002 100644 --- a/test/conformance/runtime/readiness_probe_test.go +++ b/test/conformance/runtime/readiness_probe_test.go @@ -23,15 +23,21 @@ import ( "context" "errors" "fmt" + "knative.dev/serving/pkg/apis/autoscaling" + v1 "knative.dev/serving/pkg/apis/serving/v1" "net" "net/http" "net/url" + "strconv" + "strings" "testing" "time" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" pkgtest "knative.dev/pkg/test" + "knative.dev/pkg/test/logstream" "knative.dev/pkg/test/spoof" v1opts "knative.dev/serving/pkg/testing/v1" "knative.dev/serving/test" @@ -110,16 +116,24 @@ func TestProbeRuntime(t *testing.T) { t.Run(name, func(t *testing.T) { t.Parallel() + // hack for this test only + if test.ServingFlags.DisableLogStream { + cancel := logstream.Start(t) + defer cancel() + } names := test.ResourceNames{ Service: test.ObjectNameForTest(t), Image: test.Readiness, } - test.EnsureTearDown(t, clients, &names) + // test.EnsureTearDown(t, clients, &names) t.Log("Creating a new Service") + envs := tc.env + envs = append(tc.env, corev1.EnvVar{Name: "GODEBUG", Value: "http2debug=2"}) resources, err := v1test.CreateServiceReady(t, clients, &names, - v1opts.WithEnv(tc.env...), + withMinScale(1), + v1opts.WithEnv(envs...), v1opts.WithReadinessProbe( &corev1.Probe{ ProbeHandler: tc.handler, @@ -143,13 +157,68 @@ func TestProbeRuntime(t *testing.T) { test.AddRootCAtoTransport(context.Background(), t.Logf, clients, test.ServingFlags.HTTPS), spoof.WithHeader(test.ServingFlags.RequestHeader()), ); err != nil { - t.Fatalf("The endpoint for Route %s at %s didn't return success: %v", names.Route, url, err) + pods, err := clients.KubeClient.CoreV1().Pods(resources.Service.Namespace).List(context.Background(), metav1.ListOptions{}) + if err == nil { + for _, p := range pods.Items { + if strings.HasPrefix(p.Name, resources.Service.Name) { + t.Logf("Pod %s is %s", p.Name, p.Status.Phase) + if err := clients.KubeClient.CoreV1().Pods(resources.Service.Namespace).Delete(context.Background(), p.Name, metav1.DeleteOptions{}); err != nil { + t.Logf("failed to delete pod %s: %v", p.Name, err) + } + } + } + } + s, err := clients.KubeClient.CoreV1().Secrets(resources.Service.Namespace).Get(context.Background(), "serving-certs", metav1.GetOptions{}) + + if err == nil { + t.Logf("Secret serving-certs: %v", s.Data) + } + + events, err := clients.KubeClient.CoreV1().Events(resources.Service.Namespace).List(context.Background(), metav1.ListOptions{}) + if err == nil { + t.Logf("Events: %v", events.Items) + } + + endpoints, err := clients.KubeClient.CoreV1().Endpoints(resources.Service.Namespace).List(context.Background(), metav1.ListOptions{}) + if err == nil { + for _, e := range endpoints.Items { + if strings.HasPrefix(e.Name, resources.Service.Name) { + t.Logf("Endpoind %s is %v", e.Name, e) + } + } + } + + time.Sleep(45 * time.Second) + + if _, err = pkgtest.CheckEndpointState( + context.Background(), + clients.KubeClient, + t.Logf, + url, + spoof.MatchesAllOf(spoof.IsStatusOK, spoof.MatchesBody(test.HelloWorldText)), + "readinessIsReady", + test.ServingFlags.ResolvableDomain, + test.AddRootCAtoTransport(context.Background(), t.Logf, clients, test.ServingFlags.HTTPS), + spoof.WithHeader(test.ServingFlags.RequestHeader()), + ); err != nil { + t.Fatalf("The endpoint for Route %s at %s didn't return success: %v", names.Route, url, err) + } + } }) } } } +func withMinScale(minScale int) func(cfg *v1.Service) { + return func(cfg *v1.Service) { + if cfg.Spec.Template.Annotations == nil { + cfg.Spec.Template.Annotations = make(map[string]string, 1) + } + cfg.Spec.Template.Annotations[autoscaling.MinScaleAnnotationKey] = strconv.Itoa(minScale) + } +} + // This test validates the behaviour of readiness probes *after* initial // startup. When a pod goes unready after startup and there are no other pods // in the revision, then there are two possible behaviors: From 2e19268af68fba4788a4d2a4d21c018c67319623 Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 21 Jan 2025 21:38:58 +0200 Subject: [PATCH 7/9] changes --- .ko.yaml | 2 +- pkg/activator/certificate/cache.go | 3 +++ pkg/activator/certificate/tls_context.go | 9 ++++++--- pkg/activator/certificate/tls_context_test.go | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.ko.yaml b/.ko.yaml index f9079ac25e40..3cf3d189d265 100644 --- a/.ko.yaml +++ b/.ko.yaml @@ -1,4 +1,4 @@ # Use :nonroot base image for all containers -defaultBaseImage: ghcr.io/wolfi-dev/static:alpine +defaultBaseImage: ubuntu:latest baseImageOverrides: github.com/tsenart/vegeta/v12: ubuntu:latest diff --git a/pkg/activator/certificate/cache.go b/pkg/activator/certificate/cache.go index 967b0dad9a78..61fa4f7ec4d1 100644 --- a/pkg/activator/certificate/cache.go +++ b/pkg/activator/certificate/cache.go @@ -111,6 +111,7 @@ func (cr *CertCache) updateCertificate(secret *corev1.Secret) { defer cr.certificatesMux.Unlock() cert, err := tls.X509KeyPair(secret.Data[certificates.CertName], secret.Data[certificates.PrivateKeyName]) + cr.logger.Infof("cert is: %v\n", secret.Data[certificates.CertName]) if err != nil { cr.logger.Warnf("failed to parse certificate in secret %s/%s: %v", secret.Namespace, secret.Name, zap.Error(err)) return @@ -123,6 +124,7 @@ func (cr *CertCache) updateCertificate(secret *corev1.Secret) { func (cr *CertCache) updateTrustPool() { pool := x509.NewCertPool() + cr.logger.Infof("Updating the pool") cr.addSecretCAIfPresent(pool) cr.addTrustBundles(pool) @@ -137,6 +139,7 @@ func (cr *CertCache) updateTrustPool() { func (cr *CertCache) addSecretCAIfPresent(pool *x509.CertPool) { secret, err := cr.secretInformer.Lister().Secrets(system.Namespace()).Get(netcfg.ServingRoutingCertName) + cr.logger.Infof("Getting the secret") if err != nil { cr.logger.Warnf("Failed to get secret %s/%s: %v", system.Namespace(), netcfg.ServingRoutingCertName, zap.Error(err)) return diff --git a/pkg/activator/certificate/tls_context.go b/pkg/activator/certificate/tls_context.go index 4ce8bbb87e84..528b95f26400 100644 --- a/pkg/activator/certificate/tls_context.go +++ b/pkg/activator/certificate/tls_context.go @@ -21,6 +21,7 @@ import ( "crypto/tls" "errors" "fmt" + "log" "net" "knative.dev/networking/pkg/certificates" @@ -49,18 +50,20 @@ func dialTLSContext(ctx context.Context, network, addr string, cr *CertCache) (n revID := handler.RevIDFrom(ctx) san := certificates.DataPlaneUserSAN(revID.Namespace) - tlsConf.VerifyConnection = verifySAN(san) - tlsConf.InsecureSkipVerify = true + tlsConf.VerifyConnection = verifySAN(san, revID.Name) return pkgnet.DialTLSWithBackOff(ctx, network, addr, tlsConf) } -func verifySAN(san string) func(tls.ConnectionState) error { +func verifySAN(san, rev string) func(tls.ConnectionState) error { return func(cs tls.ConnectionState) error { + log.Printf("In verifySAN1: %s-%s", san, rev) if len(cs.PeerCertificates) == 0 { return errors.New("no PeerCertificates provided") } + log.Printf("In verifySAN2: %s-%s", san, rev) for _, name := range cs.PeerCertificates[0].DNSNames { if name == san { + log.Printf("In verifySAN3 %s-%s\n", name, rev) return nil } } diff --git a/pkg/activator/certificate/tls_context_test.go b/pkg/activator/certificate/tls_context_test.go index 3ea4e0226c94..071b98808d19 100644 --- a/pkg/activator/certificate/tls_context_test.go +++ b/pkg/activator/certificate/tls_context_test.go @@ -60,7 +60,7 @@ func TestVerifySAN(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - err := verifySAN(test.san)(tlsConnectionState) + err := verifySAN(test.san, "")(tlsConnectionState) if test.expErr && err == nil { t.Fatalf("failed to verify SAN") } From 934cb59710072ad9b81acc72fe4568b443601516 Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 21 Jan 2025 21:54:09 +0200 Subject: [PATCH 8/9] qp log --- pkg/queue/certificate/watcher.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/queue/certificate/watcher.go b/pkg/queue/certificate/watcher.go index f60ab8a8d7ee..26549494578a 100644 --- a/pkg/queue/certificate/watcher.go +++ b/pkg/queue/certificate/watcher.go @@ -87,6 +87,7 @@ func (cw *CertWatcher) Stop() { func (cw *CertWatcher) GetCertificate(_ *tls.ClientHelloInfo) (*tls.Certificate, error) { cw.mux.RLock() defer cw.mux.RUnlock() + cw.logger.Debugf("Presenting cert: %v", cw.certificate) return cw.certificate, nil } @@ -132,6 +133,7 @@ func (cw *CertWatcher) loadCert() error { cw.certChecksum = certChecksum cw.keyChecksum = keyChecksum + cw.logger.Debugf("Loading cert: %v", cw.certificate) cw.logger.Info(CertReloadMessage) } From f8fa9542c16dc16f4ca6c963b54f3723747915a1 Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 21 Jan 2025 22:17:37 +0200 Subject: [PATCH 9/9] tbc 0 --- test/conformance/runtime/readiness_probe_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/test/conformance/runtime/readiness_probe_test.go b/test/conformance/runtime/readiness_probe_test.go index f83336e3a002..1aa165d3bea2 100644 --- a/test/conformance/runtime/readiness_probe_test.go +++ b/test/conformance/runtime/readiness_probe_test.go @@ -134,6 +134,7 @@ func TestProbeRuntime(t *testing.T) { resources, err := v1test.CreateServiceReady(t, clients, &names, withMinScale(1), v1opts.WithEnv(envs...), + v1opts.WithConfigAnnotations(map[string]string{"autoscaling.knative.dev/target-burst-capacity": "0"}), v1opts.WithReadinessProbe( &corev1.Probe{ ProbeHandler: tc.handler,