From d16c9696c32874cbe98c0d3b926a0213e8011bfc Mon Sep 17 00:00:00 2001 From: Sanskar Jaiswal Date: Thu, 10 Nov 2022 13:16:51 +0530 Subject: [PATCH] add docs for istio sticky canary releases Signed-off-by: Sanskar Jaiswal --- .../tutorials/istio-progressive-delivery.md | 112 ++++++++++++++++++ docs/gitbook/usage/deployment-strategies.md | 58 +++++++++ 2 files changed, 170 insertions(+) diff --git a/docs/gitbook/tutorials/istio-progressive-delivery.md b/docs/gitbook/tutorials/istio-progressive-delivery.md index b130e646b..12f862463 100644 --- a/docs/gitbook/tutorials/istio-progressive-delivery.md +++ b/docs/gitbook/tutorials/istio-progressive-delivery.md @@ -292,6 +292,118 @@ Events: Warning Synced 1m flagger Canary failed! Scaling down podinfo.test ``` +## Session Affinity + +While Flagger can perform weighted routing and A/B testing individually, with Istio it can combine the two leading to a Canary +release with session affinity. For more information you can read the [deployment strategies docs](../usage/deployment-strategies.md#canary-release-with-session-affinity). + +Create a canary custom resource \(replace app.example.com with your own domain\): + +```yaml +apiVersion: flagger.app/v1beta1 +kind: Canary +metadata: + name: podinfo + namespace: test +spec: + # deployment reference + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: podinfo + # the maximum time in seconds for the canary deployment + # to make progress before it is rollback (default 600s) + progressDeadlineSeconds: 60 + # HPA reference (optional) + autoscalerRef: + apiVersion: autoscaling/v2beta2 + kind: HorizontalPodAutoscaler + name: podinfo + service: + # service port number + port: 9898 + # container port number or name (optional) + targetPort: 9898 + # Istio gateways (optional) + gateways: + - public-gateway.istio-system.svc.cluster.local + # Istio virtual service host names (optional) + hosts: + - app.example.com + # Istio traffic policy (optional) + trafficPolicy: + tls: + # use ISTIO_MUTUAL when mTLS is enabled + mode: DISABLE + # Istio retry policy (optional) + retries: + attempts: 3 + perTryTimeout: 1s + retryOn: "gateway-error,connect-failure,refused-stream" + analysis: + # schedule interval (default 60s) + interval: 1m + # max number of failed metric checks before rollback + threshold: 5 + # max traffic percentage routed to canary + # percentage (0-100) + maxWeight: 50 + # canary increment step + # percentage (0-100) + stepWeight: 10 + # session affinity config + sessionAffinity: + # name of the cookie used + cookieName: flagger-cookie + # max age of the cookie (in seconds) + # optional; defaults to 86400 + maxAge: 21600 + metrics: + - name: request-success-rate + # minimum req success rate (non 5xx responses) + # percentage (0-100) + thresholdRange: + min: 99 + interval: 1m + - name: request-duration + # maximum req duration P99 + # milliseconds + thresholdRange: + max: 500 + interval: 30s + # testing (optional) + webhooks: + - name: acceptance-test + type: pre-rollout + url: http://flagger-loadtester.test/ + timeout: 30s + metadata: + type: bash + cmd: "curl -sd 'test' http://podinfo-canary:9898/token | grep token" + - name: load-test + url: http://flagger-loadtester.test/ + timeout: 5s + metadata: + cmd: "hey -z 1m -q 10 -c 2 http://podinfo-canary.test:9898/" +``` + +Save the above resource as podinfo-canary-session-affinity.yaml and then apply it: + +```bash +kubectl apply -f ./podinfo-canary-session-affinity.yaml +``` + +Trigger a canary deployment by updating the container image: + +```bash +kubectl -n test set image deployment/podinfo \ +podinfod=ghcr.io/stefanprodan/podinfo:6.0.1 +``` + +You can load `app.example.com` in your browser and refresh it until you see the requests being served by `podinfo:6.0.1`. +All subsequent requests after that will be served by `podinfo:6.0.1` and not `podinfo:6.0.0` because of the session affinity +configured by Flagger with Istio. + ## Traffic mirroring ![Flagger Canary Traffic Shadowing](https://raw.githubusercontent.com/fluxcd/flagger/main/docs/diagrams/flagger-canary-traffic-mirroring.png) diff --git a/docs/gitbook/usage/deployment-strategies.md b/docs/gitbook/usage/deployment-strategies.md index 605dccb08..d814f5ca6 100644 --- a/docs/gitbook/usage/deployment-strategies.md +++ b/docs/gitbook/usage/deployment-strategies.md @@ -10,6 +10,8 @@ Flagger can run automated application analysis, promotion and rollback for the f * Kubernetes CNI, Istio, Linkerd, App Mesh, NGINX, Contour, Gloo Edge, Open Service Mesh, Gateway API * **Blue/Green Mirroring** \(traffic shadowing\) * Istio +* **Canary Release with Session Affinity** \(progressive traffic shifting combined with cookie based routing\) + * Istio For Canary releases and A/B testing you'll need a Layer 7 traffic management solution like a service mesh or an ingress controller. For Blue/Green deployments no service mesh or ingress controller is required. @@ -393,3 +395,59 @@ After the analysis finishes, the traffic is routed to the canary (green) before triggering the primary (blue) rolling update, this ensures a smooth transition to the new version avoiding dropping in-flight requests during the Kubernetes deployment rollout. +## Canary Release with Session Affinity + +This deployment strategy mixes a Canary Release with A/B testing. A Canary Release is helpful when +we're trying to expose new features to users progressively, but because of the very nature of its +routing (weight based), users can land on the application's old version even after they have been +routed to the new version previously. This can be annoying, or worse break how other services interact +with our application. To address this issue, we borrow some things from A/B testing. + +Since A/B testing is particularly helpful for applications that require session affinity, we integrate +cookie based routing with regular weight based routing. This means once a user is exposed to the new +version of our application (based on the traffic weights), they're always routed to that version, i.e. +they're never routed back to the old version of our application. + +You can enable this, by specifying `.spec.analsyis.sessionAffinity` in the Canary (only Istio is supported): + +```yaml + analysis: + # schedule interval (default 60s) + interval: 1m + # max number of failed metric checks before rollback + threshold: 10 + # max traffic percentage routed to canary + # percentage (0-100) + maxWeight: 50 + # canary increment step + # percentage (0-100) + stepWeight: 2 + # session affinity config + sessionAffinity: + # name of the cookie used + cookieName: flagger-cookie + # max age of the cookie (in seconds) + # optional; defaults to 86400 + maxAge: 21600 +``` + +`.spec.analysis.sessionAffinity.cookieName` is the name of the Cookie that is stored. The value of the +cookie is a randomly generated string of characters that act as a unique identifier. For the above +config, the response header of a request routed to the canary deployment during a Canary run will look like: +``` +Set-Cookie: flagger-cookie=LpsIaLdoNZ; Max-Age=21600 +``` + +After a Canary run is over and all traffic is shifted back to the primary deployment, all responses will +have the following header: +``` +Set-Cookie: flagger-cookie=LpsIaLdoNZ; Max-Age=-1 +``` +This tells the client to delete the cookie, making sure there are no junk cookies lying around in the user's +system. + +If a new Canary run is triggered, the response header will set a new cookie for all requests routed to +the Canary deployment: +``` +Set-Cookie: flagger-cookie=McxKdLQoIN; Max-Age=21600 +```