diff --git a/.github/workflows/image-reuse.yaml b/.github/workflows/image-reuse.yaml index 067f017484..85a337b988 100644 --- a/.github/workflows/image-reuse.yaml +++ b/.github/workflows/image-reuse.yaml @@ -79,7 +79,7 @@ jobs: cosign-release: 'v2.2.0' - uses: docker/setup-qemu-action@49b3bc8e6bdd4a60e6116a5414239cba5943d3cf # v3.2.0 - - uses: docker/setup-buildx-action@aa33708b10e362ff993539393ff100fa93ed6a27 # v3.5.0 + - uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3.6.1 - name: Setup tags for container image as a CSV type run: | diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index a71f20f40f..863e7da737 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -98,7 +98,7 @@ jobs: uses: docker/setup-qemu-action@49b3bc8e6bdd4a60e6116a5414239cba5943d3cf # v3.2.0 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@aa33708b10e362ff993539393ff100fa93ed6a27 # v3.5.0 + uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3.6.1 - name: Generate release artifacts run: | diff --git a/.github/workflows/testing.yaml b/.github/workflows/testing.yaml index d9862e5726..7c4282bea7 100644 --- a/.github/workflows/testing.yaml +++ b/.github/workflows/testing.yaml @@ -72,7 +72,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: coverage-output-unit - path: coverage-output-unit/coverage.out + path: coverage-output-unit test-e2e: strategy: @@ -123,8 +123,17 @@ jobs: uses: mxschmitt/action-tmate@v3 if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled == 'true'}} - name: Run e2e tests - run: make test-e2e + run: | + make test-e2e if: ${{ !(github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled == 'true') }} + - name: Stop e2e tests controller + run: | + pgrep -f go-build -a + pkill -f go-build + sleep 5 + echo "done stopping process" + ls -lah coverage-output-e2e/ + if: ${{ !(github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled == 'true' && matrix.kubernetes.latest)}} - name: Output Rerun Overview run: | [[ -f rerunreport.txt ]] && cat rerunreport.txt || echo "No rerun report found" @@ -160,6 +169,7 @@ jobs: with: go-version: ${{ env.GOLANG_VERSION }} id: go + - uses: actions/checkout@v4 - name: Get e2e code coverage uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: @@ -172,11 +182,14 @@ jobs: path: coverage-output-unit - name: combine-go-coverage run: | - go tool covdata percent -i=coverage-output-unit/,coverage-output-e2e/ -o full-coverage.out + go tool covdata textfmt -i=coverage-output-unit/,coverage-output-e2e/ -o full-coverage.out - name: Upload code coverage information to codecov.io uses: codecov/codecov-action@e28ff129e5465c2c0dcc6f003fc735cb6ae0c673 # v4.5.0 with: file: full-coverage.out fail_ci_if_error: false + codecov_yml_path: .codecov.yml + disable_search: true + verbose: true env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} \ No newline at end of file diff --git a/Makefile b/Makefile index d6244ecf82..55b21af87c 100644 --- a/Makefile +++ b/Makefile @@ -240,12 +240,12 @@ start-e2e: ## start e2e test environment .PHONY: test-e2e test-e2e: install-devtools-local - ${DIST_DIR}/gotestsum --rerun-fails-report=rerunreport.txt --junitfile=junit.xml --format=testname --packages="./test/e2e" --rerun-fails=5 -- -timeout 60m -count 1 --tags e2e -p ${E2E_PARALLEL} -parallel ${E2E_PARALLEL} -v --short ./test/e2e ${E2E_TEST_OPTIONS} + ${DIST_DIR}/gotestsum --rerun-fails-report=rerunreport.txt --junitfile=junit-e2e-test.xml --format=testname --packages="./test/e2e" --rerun-fails=5 -- -timeout 60m -count 1 --tags e2e -p ${E2E_PARALLEL} -parallel ${E2E_PARALLEL} -v --short ./test/e2e ${E2E_TEST_OPTIONS} .PHONY: test-unit test-unit: install-devtools-local ## run unit tests mkdir -p coverage-output-unit - ${DIST_DIR}/gotestsum --junitfile=junit.xml --format=testname -- -covermode=count -coverprofile=coverage-output-unit/coverage.out `go list ./... | grep -v ./test/cmd/metrics-plugin-sample` + ${DIST_DIR}/gotestsum --junitfile=junit-unit-test.xml --format=testname -- `go list ./... | grep -v ./test/cmd/metrics-plugin-sample` -cover -test.gocoverdir=$(CURDIR)/coverage-output-unit .PHONY: coverage diff --git a/docs/FAQ.md b/docs/FAQ.md index 861e4b3a80..caa9df475f 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -1,5 +1,7 @@ # FAQ +Be sure to read the [Best practices page](../best-practices) as well. + ## General ### Does Argo Rollouts depend on Argo CD or any other Argo project? diff --git a/docs/best-practices.md b/docs/best-practices.md index c268bb5d92..0ed32efbec 100644 --- a/docs/best-practices.md +++ b/docs/best-practices.md @@ -29,7 +29,23 @@ You should *NOT* use Argo Rollouts for preview/ephemeral environments. For that The recommended way to use Argo Rollouts is for brief deployments that take 15-20 minutes or maximum 1-2 hours. If you want to run new versions for days or weeks before deciding to promote, then Argo Rollouts is probably not the best solution for you. -Also, if you want to run a wave of multiple versions at the same time (i.e. have 1.1 and 1.2 and 1.3 running at the same time), know that Argo Rollouts was not designed for this scenario. +Keeping parallel releases for long times, complicates the deployment process a lot and opens several questions where different people have different views on how Argo Rollouts should work. + +For example let's say that you are testing for a week version 1.3 as stable and 1.4 as preview. +Then somebody deploys 1.5 + +1. Some people believe that the new state should be 1.3 stable and 1.5 as preview +1. Some people believe that the new state should be 1.4 stable and 1.5 as preview + +Currently Argo Rollouts follows the first approach, under the assumption that something was really wrong with 1.4 and 1.5 is the hotfix. + +And then let's say that 1.5 has an issue. Some people believe that Argo rollouts should "rollback" to 1.3 while other people think it should rollback to 1.4 + +Currently Argo Rollouts assumes that the version to rollback is always 1.3 regardless of how many "hotfixes" have been previewed in-between. + +All these problems are not present if you make the assumption that each release stays active only for a minimal time and you always create one new version when the previous one has finished. + +Also, if you want to run a wave of multiple versions at the same time (i.e. have 1.1 and 1.2 and 1.3 running at the same time), know that Argo Rollouts was not designed for this scenario. Argo Rollouts always works with the assumption that there is one stable/previous version and one preview/next version. A version that has just been promoted is assumed to be ready for production and has already passed all your tests (either manual or automated). @@ -41,6 +57,8 @@ While Argo Rollouts supports manual promotions and other manual pauses, these ar Ideally you should have proper metrics that tell you in 5-15 minutes if a deployment is successful or not. If you don't have those metrics, then you will miss a lot of value from Argo Rollouts. +If you are doing a deployment right now and then have an actual human looking at logs/metrics/traces for the next 2 hours, adopting Argo Rollouts is not going to help you a lot with automated deployments. + Get your [metrics](../features/analysis) in place first and test them with dry-runs before applying them to production deployments. diff --git a/go.mod b/go.mod index 13e74390fa..be23cdc3cc 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/argoproj/notifications-engine v0.4.1-0.20240219110818-7a069766e954 github.com/argoproj/pkg v0.13.6 github.com/aws/aws-sdk-go-v2 v1.30.3 - github.com/aws/aws-sdk-go-v2/config v1.27.26 + github.com/aws/aws-sdk-go-v2/config v1.27.27 github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.40.3 github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2 v1.31.3 github.com/aws/smithy-go v1.20.3 @@ -82,7 +82,7 @@ require ( github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a // indirect github.com/aws/aws-sdk-go v1.44.116 // indirect - github.com/aws/aws-sdk-go-v2/credentials v1.17.26 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.17.27 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.11 // indirect github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.15 // indirect github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.15 // indirect @@ -90,7 +90,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.3 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.17 // indirect github.com/aws/aws-sdk-go-v2/service/sqs v1.29.7 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.22.3 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.22.4 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.4 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.30.3 // indirect github.com/beorn7/perks v1.0.1 // indirect diff --git a/go.sum b/go.sum index d08737f6d6..cbbadec5c5 100644 --- a/go.sum +++ b/go.sum @@ -98,10 +98,10 @@ github.com/aws/aws-sdk-go v1.44.116 h1:NpLIhcvLWXJZAEwvPj3TDHeqp7DleK6ZUVYyW01WN github.com/aws/aws-sdk-go v1.44.116/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= github.com/aws/aws-sdk-go-v2 v1.30.3 h1:jUeBtG0Ih+ZIFH0F4UkmL9w3cSpaMv9tYYDbzILP8dY= github.com/aws/aws-sdk-go-v2 v1.30.3/go.mod h1:nIQjQVp5sfpQcTc9mPSr1B0PaWK5ByX9MOoDadSN4lc= -github.com/aws/aws-sdk-go-v2/config v1.27.26 h1:T1kAefbKuNum/AbShMsZEro6eRkeOT8YILfE9wyjAYQ= -github.com/aws/aws-sdk-go-v2/config v1.27.26/go.mod h1:ivWHkAWFrw/nxty5Fku7soTIVdqZaZ7dw+tc5iGW3GA= -github.com/aws/aws-sdk-go-v2/credentials v1.17.26 h1:tsm8g/nJxi8+/7XyJJcP2dLrnK/5rkFp6+i2nhmz5fk= -github.com/aws/aws-sdk-go-v2/credentials v1.17.26/go.mod h1:3vAM49zkIa3q8WT6o9Ve5Z0vdByDMwmdScO0zvThTgI= +github.com/aws/aws-sdk-go-v2/config v1.27.27 h1:HdqgGt1OAP0HkEDDShEl0oSYa9ZZBSOmKpdpsDMdO90= +github.com/aws/aws-sdk-go-v2/config v1.27.27/go.mod h1:MVYamCg76dFNINkZFu4n4RjDixhVr51HLj4ErWzrVwg= +github.com/aws/aws-sdk-go-v2/credentials v1.17.27 h1:2raNba6gr2IfA0eqqiP2XiQ0UVOpGPgDSi0I9iAP+UI= +github.com/aws/aws-sdk-go-v2/credentials v1.17.27/go.mod h1:gniiwbGahQByxan6YjQUMcW4Aov6bLC3m+evgcoN4r4= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.11 h1:KreluoV8FZDEtI6Co2xuNk/UqI9iwMrOx/87PBNIKqw= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.11/go.mod h1:SeSUYBLsMYFoRvHE0Tjvn7kbxaUhl75CJi1sbfhMxkU= github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.15 h1:SoNJ4RlFEQEbtDcCEt+QG56MY4fm4W8rYirAmq+/DdU= @@ -120,8 +120,8 @@ github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.17 h1:HGErhhrx github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.17/go.mod h1:RkZEx4l0EHYDJpWppMJ3nD9wZJAa8/0lq9aVC+r2UII= github.com/aws/aws-sdk-go-v2/service/sqs v1.29.7 h1:tRNrFDGRm81e6nTX5Q4CFblea99eAfm0dxXazGpLceU= github.com/aws/aws-sdk-go-v2/service/sqs v1.29.7/go.mod h1:8GWUDux5Z2h6z2efAtr54RdHXtLm8sq7Rg85ZNY/CZM= -github.com/aws/aws-sdk-go-v2/service/sso v1.22.3 h1:Fv1vD2L65Jnp5QRsdiM64JvUM4Xe+E0JyVsRQKv6IeA= -github.com/aws/aws-sdk-go-v2/service/sso v1.22.3/go.mod h1:ooyCOXjvJEsUw7x+ZDHeISPMhtwI3ZCB7ggFMcFfWLU= +github.com/aws/aws-sdk-go-v2/service/sso v1.22.4 h1:BXx0ZIxvrJdSgSvKTZ+yRBeSqqgPM89VPlulEcl37tM= +github.com/aws/aws-sdk-go-v2/service/sso v1.22.4/go.mod h1:ooyCOXjvJEsUw7x+ZDHeISPMhtwI3ZCB7ggFMcFfWLU= github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.4 h1:yiwVzJW2ZxZTurVbYWA7QOrAaCYQR72t0wrSBfoesUE= github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.4/go.mod h1:0oxfLkpz3rQ/CHlx5hB7H69YUpFiI1tql6Q6Ne+1bCw= github.com/aws/aws-sdk-go-v2/service/sts v1.30.3 h1:ZsDKRLXGWHk8WdtyYMoGNO7bTudrvuKpDKgMVRlepGE= diff --git a/pkg/kubectl-argo-rollouts/info/pod_info.go b/pkg/kubectl-argo-rollouts/info/pod_info.go index e53f350848..463657d70d 100644 --- a/pkg/kubectl-argo-rollouts/info/pod_info.go +++ b/pkg/kubectl-argo-rollouts/info/pod_info.go @@ -53,6 +53,12 @@ func newPodInfo(pod *corev1.Pod) rollout.PodInfo { }, } restarts := 0 + rs := make(map[string]bool, len(pod.Spec.InitContainers)) + for _, c := range pod.Spec.InitContainers { + p := c.RestartPolicy + rs[c.Name] = p != nil && *p == corev1.ContainerRestartPolicyAlways + } + totalContainers := len(pod.Spec.Containers) readyContainers := 0 @@ -69,7 +75,7 @@ func newPodInfo(pod *corev1.Pod) rollout.PodInfo { continue case container.State.Terminated != nil: // initialization is failed - if len(container.State.Terminated.Reason) == 0 { + if container.State.Terminated.Reason == "" { if container.State.Terminated.Signal != 0 { reason = fmt.Sprintf("Init:Signal:%d", container.State.Terminated.Signal) } else { @@ -79,6 +85,10 @@ func newPodInfo(pod *corev1.Pod) rollout.PodInfo { reason = "Init:" + container.State.Terminated.Reason } initializing = true + case rs[container.Name] && container.Started != nil && *container.Started: + if container.Ready { + continue + } case container.State.Waiting != nil && len(container.State.Waiting.Reason) > 0 && container.State.Waiting.Reason != "PodInitializing": reason = "Init:" + container.State.Waiting.Reason initializing = true diff --git a/rollout/trafficrouting/traefik/traefik.go b/rollout/trafficrouting/traefik/traefik.go index 8b5cfb7312..3782ee824e 100644 --- a/rollout/trafficrouting/traefik/traefik.go +++ b/rollout/trafficrouting/traefik/traefik.go @@ -23,12 +23,6 @@ const Type = "Traefik" const traefikServices = "traefikservices" const TraefikServiceUpdateError = "TraefikServiceUpdateError" -var ( - apiGroupToResource = map[string]string{ - defaults.GetTraefikAPIGroup(): traefikServices, - } -) - type ReconcilerConfig struct { Rollout *v1alpha1.Rollout Client ClientInterface @@ -41,6 +35,13 @@ type Reconciler struct { Recorder record.EventRecorder } +func apiGroupToResource(group string) string { + apiGroupToResource := map[string]string{ + defaults.GetTraefikAPIGroup(): traefikServices, + } + return apiGroupToResource[group] +} + func (r *Reconciler) sendWarningEvent(id, msg string) { r.sendEvent(corev1.EventTypeWarning, id, msg) } @@ -71,7 +72,8 @@ func GetMappingGVR() schema.GroupVersionResource { group := defaults.GetTraefikAPIGroup() parts := strings.Split(defaults.GetTraefikVersion(), "/") version := parts[len(parts)-1] - resourceName := apiGroupToResource[group] + resourceName := apiGroupToResource(group) + return schema.GroupVersionResource{ Group: group, Version: version,