IBM · maxdebayser · Feb 13, 2024 · Mar 12, 2024 · Mar 20, 2024 · Mar 20, 2024
diff --git a/.buildkite/download-images.sh b/.buildkite/download-images.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+set -ex
+set -o pipefail
+
+(which wget && which curl) || (apt-get update && apt-get install -y wget curl)
+
+# aws s3 sync s3://air-example-data-2/vllm_opensource_llava/ images/
+mkdir -p images
+cd images
+wget https://air-example-data-2.s3.us-west-2.amazonaws.com/vllm_opensource_llava/stop_sign_pixel_values.pt
+wget https://air-example-data-2.s3.us-west-2.amazonaws.com/vllm_opensource_llava/stop_sign_image_features.pt
+wget https://air-example-data-2.s3.us-west-2.amazonaws.com/vllm_opensource_llava/cherry_blossom_pixel_values.pt
+wget https://air-example-data-2.s3.us-west-2.amazonaws.com/vllm_opensource_llava/cherry_blossom_image_features.pt
+wget https://air-example-data-2.s3.us-west-2.amazonaws.com/vllm_opensource_llava/stop_sign.jpg
+wget https://air-example-data-2.s3.us-west-2.amazonaws.com/vllm_opensource_llava/cherry_blossom.jpg
+
+cd -
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -39,15 +39,24 @@ steps:
 
 - label: Models Test
   commands:
-    - pytest -v -s models --forked
+    - bash ../.buildkite/download-images.sh
+    - pytest -v -s models --ignore=models/test_llava.py  --forked
   soft_fail: true
 
+- label: Llava Test
+  commands:
+    - bash ../.buildkite/download-images.sh
+    - pytest -v -s models/test_llava.py
+
 - label: Prefix Caching Test
   commands:
     - pytest -v -s prefix_caching
 
 - label: Samplers Test
-  command: pytest -v -s samplers --forked
+  command: pytest -v -s samplers
+
+- label: LogitsProcessor Test
+  command: pytest -v -s test_logits_processor.py
 
 - label: Worker Test
   command: pytest -v -s worker
@@ -56,7 +65,7 @@ steps:
   command: pytest -v -s spec_decode
 
 - label: LoRA Test %N
-  command: pytest -v -s lora --forked --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
+  command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
   parallelism: 4
 
 - label: Metrics Test

diff --git a/.github/actions/free-up-disk-space/action.yml b/.github/actions/free-up-disk-space/action.yml
@@ -0,0 +1,36 @@
+name: "Free up disk space"
+description: "Removes non-essential tools, libraries and cached files from GitHub action runner node"
+
+runs:
+  using: "composite"
+  steps:
+    - name: "Remove non-essential tools and libraries"
+      shell: bash
+      run: |
+        # https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
+        echo "Disk usage before cleanup:"
+        df -h
+        echo "Removing non-essential tools and libraries ..."
+        sudo rm -rf /opt/ghc
+        sudo rm -rf /usr/local/.ghcup
+        sudo rm -rf /usr/share/dotnet
+        # sudo rm -rf /usr/local/share/boost
+        echo "Deleting libraries for Android (12G), CodeQL (5.3G), PowerShell (1.3G), Swift (1.7G) ..."
+        sudo rm -rf /usr/local/lib/android
+        sudo rm -rf "${AGENT_TOOLSDIRECTORY}/CodeQL"
+        sudo rm -rf /usr/local/share/powershell
+        sudo rm -rf /usr/share/swift
+        # ref: https://github.com/jlumbroso/free-disk-space/blob/main/action.yml
+        echo "Deleting some larger apt packages:"
+        sudo apt-get remove -y azure-cli google-chrome-stable firefox powershell mono-devel libgl1-mesa-dri --fix-missing || echo "::warning::The command [sudo apt-get remove -y azure-cli google-chrome-stable firefox powershell mono-devel libgl1-mesa-dri --fix-missing] failed to complete successfully. Proceeding..."
+        echo "Disk usage after cleanup:"
+        df -h
+
+    - name: "Prune docker images"
+      shell: bash
+      run: |
+        echo "Pruning docker images ..."
+        docker image prune -a -f
+        docker system df
+        echo "Disk usage after pruning docker images:"
+        df -h
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -0,0 +1,128 @@
+name: "Build"
+
+on:
+  workflow_dispatch:
+
+  push:
+    branches:
+      - release
+    paths-ignore:
+      - "**.md"
+      - "proto/**"
+
+  pull_request:
+    branches:
+      - main
+    paths-ignore:
+      - "**.md"
+      - "proto/**"
+
+defaults:
+  run:
+    shell: bash
+
+env:
+  SERVER_IMAGE: "quay.io/wxpe/tgis-vllm"
+  IMAGE_REGISTRY: "quay.io"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+      contents: read
+    env:
+      CACHE_IMAGE: "ghcr.io/ibm/tgis-vllm:build-cache"
+      CACHE_REGISTRY: "ghcr.io"
+      CACHE_PACKAGE_NAME: "tgis-vllm"
+
+    steps:
+      - name: "Checkout"
+        uses: actions/checkout@v4
+
+      - name: "Free up disk space"
+        uses: ./.github/actions/free-up-disk-space
+
+      - name: "Set up QEMU"
+        uses: docker/setup-qemu-action@v3
+
+      - name: "Set up Docker Buildx"
+        uses: docker/setup-buildx-action@v3
+
+      - name: "Log in to container registry (server-release)"
+        uses: docker/login-action@v3
+        if: github.event_name != 'pull_request'
+        with:
+          registry: ${{ env.IMAGE_REGISTRY }}
+          username: ${{ secrets.WXPE_QUAY_USER }}
+          password: ${{ secrets.WXPE_QUAY_TOKEN }}
+
+      - name: "Log in to container registry (cache image)"
+        uses: docker/login-action@v3
+        if: github.event_name != 'pull_request'
+        with:
+          registry: ${{ env.CACHE_REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: "Set build cache target"
+        run: |
+          # For push to `main` (PR merged), push a new cache image with all layers (cache-mode=max).
+          # For PR builds, use GitHub action cache which isolates cached layers by PR/branch.
+          # to optimize builds for subsequent pushes to the same PR/branch.
+          # Do not set a cache-to image for PR builds to not overwrite the `main` cache image and
+          # to not ping-pong cache images for two or more different PRs.
+          # Do not push cache images for each PR or multiple branches to not exceed GitHub package
+          # usage and traffic limitations.
+          # UPDATE 2024/02/26: GHA cache appears to have issues, cannot use `cache-to: gha,mode=min`
+          # if `cache-from: reg...,mode=max` but `cache-to: gha,mode=max` takes longer than uncached
+          # build and exhausts GHA cache size limits, so use cache `type=inline` (no external cache).
+          if [ "${{ github.event_name }}" == "pull_request" ]
+          then
+              #CACHE_TO="type=gha,mode=min"
+              CACHE_TO="type=inline"
+          else
+              CACHE_TO="type=registry,ref=${{ env.CACHE_IMAGE }},mode=max"
+          fi
+          echo "CACHE_TO=$CACHE_TO" >> $GITHUB_ENV
+
+      - name: "Generate tags"
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ${{ env.SERVER_IMAGE }}
+          tags: |
+            type=ref,event=branch
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=sha,enable=true,priority=100,prefix=,suffix=,format=short     
+            type=sha,enable=true,priority=100,prefix=${{ github.ref_name }}.,suffix=,format=short     
+
+      - name: "UBI Docker build"
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          target: vllm-openai
+          tags: ${{ steps.meta.outputs.tags }}
+          cache-from: type=registry,ref=${{ env.CACHE_IMAGE }}
+          cache-to: ${{ env.CACHE_TO }}
+          push: ${{ github.event_name != 'pull_request' }}
+          file: Dockerfile.ubi
+
+      - name: "Cleanup old cache images"
+        uses: actions/delete-package-versions@v5
+        if: ${{ github.event_name == 'push' }}
+        with: 
+          package-name: ${{ env.CACHE_PACKAGE_NAME }}
+          package-type: container
+          delete-only-untagged-versions: true
+
+      - name: "List docker images"
+        run: docker images
+
+      - name: "Check disk usage"
+        shell: bash
+        run: |
+          docker system df
+          df -h
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
@@ -25,10 +25,13 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install ruff==0.1.5 codespell==2.2.6 tomli==2.0.1
+        pip install ruff==0.1.5 codespell==2.2.6 tomli==2.0.1 isort==5.13.2
     - name: Analysing the code with ruff
       run: |
         ruff .
     - name: Spelling check with codespell
       run: |
-        codespell --toml pyproject.toml
+        codespell --toml pyproject.toml
+    - name: Run isort
+      run: |
+        isort . --check-only
diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh
diff --git a/.github/workflows/scripts/create_release.js b/.github/workflows/scripts/create_release.js