From e7f2d9b87d31e44d7e2295e05295359fd178d66b Mon Sep 17 00:00:00 2001
From: Cornelius Roemer <cornelius.roemer@gmail.com>
Date: Wed, 28 Feb 2024 17:06:39 +0100
Subject: [PATCH] feat(ci): Retag images to reduce time spent on recomputing
 unchanged docker images (#1167)

* feat(ci): add workflow dispatch to allow debugging of individual workflows
---
 .github/workflows/backend.yml                 | 71 ++++++++++++-------
 .github/workflows/dummyPreprocessing.yml      | 61 ++++++++++------
 .../workflows/preprocessing-nextclade.yaml    | 52 ++++++++++----
 .github/workflows/website.yml                 | 58 +++++++++++----
 README.md                                     | 26 +++----
 5 files changed, 177 insertions(+), 91 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 85a1c8b8a..eb07dc7dc 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -7,10 +7,6 @@ on:
 env:
   DOCKER_IMAGE_NAME: ghcr.io/loculus-project/backend
 
-defaults:
-  run:
-    working-directory: ./backend
-
 concurrency:
   group: ci-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}-backend
   cancel-in-progress: true
@@ -45,11 +41,23 @@ jobs:
       checks: read
     steps:
       - uses: actions/checkout@v4
-      - name: Set up JDK
-        uses: actions/setup-java@v4
+
+      - name: Generate files hash
+        id: files-hash
+        run: |
+          DIR_HASH=$(echo -n ${{ hashFiles('backend/**', '.github/workflows/backend.yml') }})
+          echo "DIR_HASH=$DIR_HASH" >> $GITHUB_ENV
+
+      - name: Setup Docker metadata
+        id: dockerMetadata
+        uses: docker/metadata-action@v5
         with:
-          java-version: "21"
-          distribution: "adopt"
+          images: ${{ env.DOCKER_IMAGE_NAME }}
+          tags: |
+            type=raw,value=${{ env.DIR_HASH }}
+            type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
+            type=ref,event=branch
+            type=sha,prefix=commit-
 
       - name: Login to GitHub Container Registry
         uses: docker/login-action@v3
@@ -58,35 +66,46 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: Docker metadata
-        id: dockerMetadata
-        uses: docker/metadata-action@v5
+      - name: Check if image exists
+        id: check-image
+        run: |
+          EXISTS=$(docker manifest inspect ${{ env.DOCKER_IMAGE_NAME }}:${{ env.DIR_HASH }} > /dev/null 2>&1 && echo "true" || echo "false")
+          echo "CACHE_HIT=$EXISTS" >> $GITHUB_ENV
+
+      - name: Set up JDK
+        if: env.CACHE_HIT == 'false'
+        uses: actions/setup-java@v4
         with:
-          images: ${{ env.DOCKER_IMAGE_NAME }}
-          tags: |
-            type=ref,event=branch,enable=${{ github.ref != 'refs/heads/main' }}
-            type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
-            type=sha,prefix=commit-
+          java-version: "21"
+          distribution: "adopt"
+
       - name: Extract Docker Tags
+        if: env.CACHE_HIT == 'false'
         id: extractTag
         run: |
           FIRST_TAG=$(echo "${{ steps.dockerMetadata.outputs.tags }}" | head -n 1)
-          echo "firstTag=$FIRST_TAG" >> $GITHUB_OUTPUT
+          echo "firstTag=$FIRST_TAG" >> $GITHUB_ENV
 
-          SECOND_TAG=$(echo "${{ steps.dockerMetadata.outputs.tags }}" | head -n 2 | tail -n 1)
-          echo "secondTag=$SECOND_TAG" >> $GITHUB_OUTPUT
       - name: Build Docker Image For Branch
+        if: env.CACHE_HIT == 'false'
         uses: gradle/actions/setup-gradle@v3
         env:
           USER: ${{ github.actor }}
           TOKEN: "${{ secrets.GITHUB_TOKEN }}"
         with:
-          arguments: bootBuildImage --imageName=${{steps.extractTag.outputs.firstTag }}
+          arguments: bootBuildImage --imageName=${{ env.DOCKER_IMAGE_NAME }}:${{ env.DIR_HASH }}
           build-root-directory: ./backend
 
-      - name: Tag With SHA tag
-        run: docker tag ${{ steps.extractTag.outputs.firstTag  }} ${{ steps.extractTag.outputs.secondTag  }}
-      - name: Push 1st Docker Image
-        run: docker push ${{ steps.extractTag.outputs.firstTag  }}
-      - name: Push 2nd Docker Image
-        run: docker push ${{ steps.extractTag.outputs.secondTag  }}
+      - name: Push Docker Image
+        if: env.CACHE_HIT == 'false'
+        run: docker push ${{ env.DOCKER_IMAGE_NAME }}:${{ env.DIR_HASH }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Tag and push existing images
+        run: |
+          TAGS=(${{ steps.dockerMetadata.outputs.tags }})
+          for TAG in "${TAGS[@]}"; do
+            docker buildx imagetools create --tag $TAG ${{ env.DOCKER_IMAGE_NAME }}:${{ env.DIR_HASH }}
+          done
diff --git a/.github/workflows/dummyPreprocessing.yml b/.github/workflows/dummyPreprocessing.yml
index d213e2364..474f35acc 100644
--- a/.github/workflows/dummyPreprocessing.yml
+++ b/.github/workflows/dummyPreprocessing.yml
@@ -7,45 +7,62 @@ on:
 env:
   DOCKER_IMAGE_NAME: ghcr.io/loculus-project/preprocessing-dummy
 
-defaults:
-  run:
-    working-directory: ./preprocessing/dummy
-
-concurrency:
-  group: ci-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}-preprocessing-dummy
-  cancel-in-progress: true
-
 jobs:
-  dockerImage:
-    name: Build preprocessing-dummy Docker Image
+  build-or-use-cache:
     runs-on: ubuntu-latest
     permissions:
       contents: read
       packages: write
     steps:
       - uses: actions/checkout@v4
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-      - name: Docker metadata
+
+      - name: Generate files hash
+        id: files-hash
+        run: |
+          DIR_HASH=$(echo -n ${{ hashFiles('preprocessing/dummy/**', '.github/workflows/dummyPreprocessing.yml') }})
+          echo "DIR_HASH=$DIR_HASH" >> $GITHUB_ENV
+
+      - name: Setup Docker metadata
         id: dockerMetadata
         uses: docker/metadata-action@v5
         with:
           images: ${{ env.DOCKER_IMAGE_NAME }}
           tags: |
-            type=ref,event=branch
+            type=raw,value=${{ env.DIR_HASH }}
             type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
+            type=ref,event=branch
             type=sha,prefix=commit-
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Check if image exists
+        id: check-image
+        run: |
+          EXISTS=$(docker manifest inspect ${{ env.DOCKER_IMAGE_NAME }}:${{ env.DIR_HASH }} > /dev/null 2>&1 && echo "true" || echo "false")
+          echo "CACHE_HIT=$EXISTS" >> $GITHUB_ENV
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
       - name: Build and push image
+        if: env.CACHE_HIT == 'false'
         uses: docker/build-push-action@v5
         with:
           context: ./preprocessing/dummy
           push: true
           tags: ${{ steps.dockerMetadata.outputs.tags }}
-          cache-from: type=gha,scope=dummy${{ github.ref }}
-          cache-to: type=gha,mode=max,scope=dummy-${{ github.ref }}
+          cache-from: type=gha,scope=preprocessing-dummy-${{ github.ref }}
+          cache-to: type=gha,mode=max,scope=preprocessing-dummy-${{ github.ref }}
+
+      - name: Tag and push image if cache hit
+        if: env.CACHE_HIT == 'true'
+        run: |
+          TAGS=(${{ steps.dockerMetadata.outputs.tags }})
+          for TAG in "${TAGS[@]}"; do
+            docker buildx imagetools create --tag $TAG ${{ env.DOCKER_IMAGE_NAME }}:${{ env.DIR_HASH }}
+          done
diff --git a/.github/workflows/preprocessing-nextclade.yaml b/.github/workflows/preprocessing-nextclade.yaml
index 8badb93e4..9dbbdac51 100644
--- a/.github/workflows/preprocessing-nextclade.yaml
+++ b/.github/workflows/preprocessing-nextclade.yaml
@@ -7,10 +7,6 @@ on:
 env:
   DOCKER_IMAGE_NAME: ghcr.io/loculus-project/preprocessing-nextclade
 
-defaults:
-  run:
-    working-directory: ./preprocessing/nextclade
-
 concurrency:
   group: ci-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}-preprocessing-nextclade
   cancel-in-progress: true
@@ -25,24 +21,42 @@ jobs:
       checks: read
     steps:
       - uses: actions/checkout@v4
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-      - name: Docker metadata
+
+      - name: Generate files hash
+        id: files-hash
+        run: |
+          DIR_HASH=$(echo -n ${{ hashFiles('preprocessing/nextclade/**', '.github/workflows/preprocessing-nextclade.yml') }})
+          echo "DIR_HASH=$DIR_HASH" >> $GITHUB_ENV
+
+      - name: Setup Docker metadata
         id: dockerMetadata
         uses: docker/metadata-action@v5
         with:
           images: ${{ env.DOCKER_IMAGE_NAME }}
           tags: |
-            type=ref,event=branch
+            type=raw,value=${{ env.DIR_HASH }}
             type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
+            type=ref,event=branch
             type=sha,prefix=commit-
-      - name: Build and push image
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Check if image exists
+        id: check-image
+        run: |
+          EXISTS=$(docker manifest inspect ${{ env.DOCKER_IMAGE_NAME }}:${{ env.DIR_HASH }} > /dev/null 2>&1 && echo "true" || echo "false")
+          echo "CACHE_HIT=$EXISTS" >> $GITHUB_ENV
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build and push image if input files changed
+        if: env.CACHE_HIT == 'false'
         uses: docker/build-push-action@v5
         with:
           context: ./preprocessing/nextclade
@@ -50,3 +64,11 @@ jobs:
           tags: ${{ steps.dockerMetadata.outputs.tags }}
           cache-from: type=gha,scope=nextclade-${{ github.ref }}
           cache-to: type=gha,mode=max,scope=nextclade-${{ github.ref }}
+
+      - name: Retag and push existing image if cache hit
+        if: env.CACHE_HIT == 'true'
+        run: |
+          TAGS=(${{ steps.dockerMetadata.outputs.tags }})
+          for TAG in "${TAGS[@]}"; do
+            docker buildx imagetools create --tag $TAG ${{ env.DOCKER_IMAGE_NAME }}:${{ env.DIR_HASH }}
+          done
diff --git a/.github/workflows/website.yml b/.github/workflows/website.yml
index f2dd5d8d6..a0fcd8ee2 100644
--- a/.github/workflows/website.yml
+++ b/.github/workflows/website.yml
@@ -7,18 +7,17 @@ on:
 env:
   DOCKER_IMAGE_NAME: ghcr.io/loculus-project/website
 
-defaults:
-  run:
-    working-directory: ./website
-
 concurrency:
   group: ci-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}-website
   cancel-in-progress: true
 
 jobs:
   checks:
-    name: Check format
+    name: Check format and types
     runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ./website
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-node@v4
@@ -36,6 +35,9 @@ jobs:
     name: Unit Tests
     timeout-minutes: 10
     runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ./website
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-node@v4
@@ -60,24 +62,42 @@ jobs:
       checks: read
     steps:
       - uses: actions/checkout@v4
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-      - name: Docker metadata
+
+      - name: Generate files hash
+        id: files-hash
+        run: |
+          DIR_HASH=$(echo -n ${{ hashFiles('website/**', '.github/workflows/website.yml') }})
+          echo "DIR_HASH=$DIR_HASH" >> $GITHUB_ENV
+
+      - name: Setup Docker metadata
         id: dockerMetadata
         uses: docker/metadata-action@v5
         with:
           images: ${{ env.DOCKER_IMAGE_NAME }}
           tags: |
+            type=raw,value=${{ env.DIR_HASH }}
+            type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
             type=ref,event=branch
             type=sha,prefix=commit-
-            type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Check if image exists
+        id: check-image
+        run: |
+          EXISTS=$(docker manifest inspect ${{ env.DOCKER_IMAGE_NAME }}:${{ env.DIR_HASH }} > /dev/null 2>&1 && echo "true" || echo "false")
+          echo "CACHE_HIT=$EXISTS" >> $GITHUB_ENV
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
       - name: Build and push image
+        if: env.CACHE_HIT == 'false'
         uses: docker/build-push-action@v5
         with:
           context: ./website
@@ -85,3 +105,11 @@ jobs:
           tags: ${{ steps.dockerMetadata.outputs.tags }}
           cache-from: type=gha,scope=website-${{ github.ref }}
           cache-to: type=gha,mode=max,scope=website-${{ github.ref }}
+
+      - name: Retag and push existing image if cache hit
+        if: env.CACHE_HIT == 'true'
+        run: |
+          TAGS=(${{ steps.dockerMetadata.outputs.tags }})
+          for TAG in "${TAGS[@]}"; do
+            docker buildx imagetools create --tag $TAG ${{ env.DOCKER_IMAGE_NAME }}:${{ env.DIR_HASH }}
+          done
diff --git a/README.md b/README.md
index daaf8f253..4cb693186 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ Additional documentation for development is available in each folder's README. T
 
 - Backend code is in `backend`, see [`backend/README.md`](/backend/README.md)
 - Frontend code is in `website`, see [`website/README.md`](/website/README.md)
-- Sequence and metadata processing pipeline is in [`preprocessing`](/preprocessing) folder, see [`preprocessing/specification.md`](/preprocessing/specification.md) 
+- Sequence and metadata processing pipeline is in [`preprocessing`](/preprocessing) folder, see [`preprocessing/specification.md`](/preprocessing/specification.md)
 - Deployment code is in `kubernetes`, see [`kubernetes/README.md`](/kubernetes/README.md).
   Check this for local development setup instructions.
 - Authorization is performed by our own keycloak instance. see config in [`keycloak-image`](kubernetes/loculus/templates/keycloak-deployment.yaml) and [`realm-config`](kubernetes/loculus/templates/keycloak-config-map.yaml)
@@ -43,24 +43,24 @@ Unfortunately, local development on ARM macOS (M1 and M2 macs) is not currently
 
 We use keycloak for authorization. The keycloak instance is deployed in the `loculus` namespace and exposed to the outside either under `localhost:8083` or `authentication-[your-argo-cd-path]`. The keycloak instance is configured with a realm called `loculusRealm` and a client called `test-cli`. The realm is configured to use the exposed url of keycloak as a [frontend url](https://www.keycloak.org/server/hostname).
 For testing we added multiple users to the realm. The users are:
+
 - `admin` with password `admin` (login under `your-exposed-keycloak-url/admin/master/console/`)
 - `testuser` with password `testuser` (login under `your-exposed-keycloak-url/realms/loculusRealm/account/`)
-- and more testusers, for each browser in the e2e test following the pattern: `testuser_[processId]_[browser]` with password `testuser_[processId]_[browser]` 
-- These testusers will be added to the `testGroup` in the setup for e2e tests. If you change the number of browsers in the e2e test, you need to adapt `website/tests/playwrightSetup.ts` accordingly. 
-- To validate that a user exists we also created a technical user for the backend with username `backend` and password `backend`. The technical user is authorized to view users and groups and in principle to manage its own account. 
+- and more testusers, for each browser in the e2e test following the pattern: `testuser_[processId]_[browser]` with password `testuser_[processId]_[browser]`
+- These testusers will be added to the `testGroup` in the setup for e2e tests. If you change the number of browsers in the e2e test, you need to adapt `website/tests/playwrightSetup.ts` accordingly.
+- To validate that a user exists we also created a technical user for the backend with username `backend` and password `backend`. The technical user is authorized to view users and groups and in principle to manage its own account.
 
 ### Group management
 
- - Groups are entities managed by the backend, uniquely identified by a name.
- - Every sequence entry is owned by the group that it was initially submitted for. Modifications (edits while awaiting approval, revisions, revocations) can only be made by members of that group.
- - Each user can be a member of multiple groups.
- - Users can create new groups, becoming the initial member automatically.
- - Group members have the authority to add or remove other members.
- - If the last user leaves a group, the group becomes 'dangling'—it exists but is no longer accessible, and a new group with the same name cannot be created.
- - Admin users can manually delete a group directly on the DB but must transfer ownership of sequence entries to another group before doing so to fulfill the foreign key constraint.
+- Groups are entities managed by the backend, uniquely identified by a name.
+- Every sequence entry is owned by the group that it was initially submitted for. Modifications (edits while awaiting approval, revisions, revocations) can only be made by members of that group.
+- Each user can be a member of multiple groups.
+- Users can create new groups, becoming the initial member automatically.
+- Group members have the authority to add or remove other members.
+- If the last user leaves a group, the group becomes 'dangling'—it exists but is no longer accessible, and a new group with the same name cannot be created.
+- Admin users can manually delete a group directly on the DB but must transfer ownership of sequence entries to another group before doing so to fulfill the foreign key constraint.
 
- For testing we added all users declared above to the group `testGroup`.
- 
+For testing we added all users declared above to the group `testGroup`.
 
 ## Contributing to Loculus