[CI] Add comprehensive testing: migration, e2e, and bench (#30)

AlibabaPAI · Sep 19, 2024 · 91a6454 · 91a6454
1 parent 4aa0ab8
commit 91a6454
Show file tree

Hide file tree

Showing 48 changed files with 803 additions and 51 deletions.
diff --git a/.github/workflows/bench_test.yml b/.github/workflows/bench_test.yml
@@ -0,0 +1,44 @@
+name: bench_test
+
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+    branches:
+    - main
+
+jobs:
+  cancel_previous_workflows:
+    runs-on: [self-hosted]
+    timeout-minutes: 3
+    steps:
+    - uses: styfle/cancel-workflow-action@0.12.1
+      with:
+        all_but_latest: true
+
+  bench_tests:
+    needs: cancel_previous_workflows
+    runs-on: [self-hosted]
+    timeout-minutes: 60
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+    - name: Kill Running Containers
+      run: |
+        [[ -n $(docker ps -q) ]] && docker kill $(docker ps -q) || echo "No running containers to kill."
+    - name: Build And Test
+      run: ./tools/bench_test.sh
+    - name: Create comment from file
+      uses: actions/github-script@v7
+      with:
+        script: |
+          const fs = require('fs');
+          const filePath = 'performance.txt';
+          const commentBody = fs.readFileSync(filePath, 'utf8');
+          await github.rest.issues.createComment({
+            issue_number: context.issue.number,
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            body: commentBody
+          });
diff --git a/.github/workflows/e2e_test.yml b/.github/workflows/e2e_test.yml
@@ -0,0 +1,31 @@
+name: e2e_test
+
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+    branches:
+    - main
+
+jobs:
+  cancel_previous_workflows:
+    runs-on: [self-hosted]
+    timeout-minutes: 3
+    steps:
+    - uses: styfle/cancel-workflow-action@0.12.1
+      with:
+        all_but_latest: true
+
+  e2e_tests:
+    needs: cancel_previous_workflows
+    runs-on: [self-hosted]
+    timeout-minutes: 60
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+    - name: Kill Running Containers
+      run: |
+        [[ -n $(docker ps -q) ]] && docker kill $(docker ps -q) || echo "No running containers to kill."
+    - name: Build And Test
+      run: ./tools/e2e_test.sh
diff --git a/.github/workflows/migration_test.yml b/.github/workflows/migration_test.yml
@@ -0,0 +1,44 @@
+name: migration_test
+
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+    branches:
+    - main
+
+jobs:
+  cancel_previous_workflows:
+    runs-on: [self-hosted]
+    timeout-minutes: 3
+    steps:
+    - uses: styfle/cancel-workflow-action@0.12.1
+      with:
+        all_but_latest: true
+
+  migration_tests:
+    needs: cancel_previous_workflows
+    runs-on: [self-hosted]
+    timeout-minutes: 60
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+    - name: Kill Running Containers
+      run: |
+        [[ -n $(docker ps -q) ]] && docker kill $(docker ps -q) || echo "No running containers to kill."
+    - name: Build And Test
+      run: ./tools/migration_test.sh
+    - name: Create comment from file
+      uses: actions/github-script@v7
+      with:
+        script: |
+          const fs = require('fs');
+          const filePath = 'performance.txt';
+          const commentBody = fs.readFileSync(filePath, 'utf8');
+          await github.rest.issues.createComment({
+            issue_number: context.issue.number,
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            body: commentBody
+          });
diff --git a/.github/workflows/offline_inference.yml b/.github/workflows/offline_inference.yml
@@ -0,0 +1,32 @@
+name: offline_inference
+
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+    branches:
+    - main
+
+jobs:
+  cancel_previous_workflows:
+    runs-on: [self-hosted]
+    timeout-minutes: 3
+    steps:
+    - uses: styfle/cancel-workflow-action@0.12.1
+      with:
+        all_but_latest: true
+
+  offline_inference:
+    needs: cancel_previous_workflows
+    runs-on: [self-hosted]
+    timeout-minutes: 10
+    steps:
+    - uses: actions/checkout@v4
+    - name: Run offline inference example
+      run: |
+        nvidia-docker run --rm -t --net host --ipc host \
+          -v ${PWD}:/workspace \
+          -w /workspace \
+          registry.cn-beijing.aliyuncs.com/llumnix/llumnix-dev:20240909_action_678a439 \
+          bash -c "pip install -e . > /dev/null && python examlpes/offline_inference.py"
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
@@ -1,4 +1,4 @@
-name: Pylint
+name: pylint
 
 on:
   push:
@@ -9,21 +9,24 @@ on:
     - main
 
 jobs:
-  build:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+  cancel_previous_workflows:
+    runs-on: [self-hosted]
+    timeout-minutes: 3
     steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
+    - uses: styfle/cancel-workflow-action@0.12.1
       with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install pylint==2.12.2
+        all_but_latest: true
+
+  pylint_test:
+    needs: cancel_previous_workflows
+    runs-on: [self-hosted]
+    timeout-minutes: 10
+    steps:
+    - uses: actions/checkout@v4
     - name: Analysing the code with pylint
       run: |
-        pylint --rcfile=.pylintrc --output-format=parseable --jobs=8 $( find llumnix/ -type f -name '*.py')
+        nvidia-docker run --rm -t --net host --ipc host \
+          -v ${PWD}:/workspace \
+          -w /workspace \
+          registry.cn-beijing.aliyuncs.com/llumnix/llumnix-dev:20240909_action_678a439 \
+          bash -c "pip install -e . > /dev/null && make lint"
diff --git a/.github/workflows/unit_test.yml b/.github/workflows/unit_test.yml
@@ -0,0 +1,31 @@
+name: unit_test
+
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+    branches:
+    - main
+
+jobs:
+  cancel_previous_workflows:
+    runs-on: [self-hosted]
+    timeout-minutes: 3
+    steps:
+    - uses: styfle/cancel-workflow-action@0.12.1
+      with:
+        all_but_latest: true
+
+  unit_tests:
+    needs: cancel_previous_workflows
+    runs-on: [self-hosted]
+    timeout-minutes: 60
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+    - name: Kill Running Containers
+      run: |
+        [[ -n $(docker ps -q) ]] && docker kill $(docker ps -q) || echo "No running containers to kill."
+    - name: Build And Test
+      run: ./tools/unit_test.sh
diff --git a/.github/workflows/whl.yml b/.github/workflows/whl.yml
@@ -0,0 +1,26 @@
+name: whl_build
+
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+    branches:
+    - main
+
+jobs:
+  whl_build:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+    - name: Set up Python 
+      uses: actions/setup-python@v5
+      with:
+        python-version: "3.10"
+    - name: Build whl
+      run: |
+        python3 -m pip install --upgrade setuptools wheel
+        python3 setup.py bdist_wheel --universal
diff --git a/Makefile b/Makefile
@@ -21,15 +21,15 @@ install:
 
 .PHONY: lint
 lint: check_pylint_installed check_pytest_installed
-	@pylint --rcfile=.pylintrc -s n ./llumnix
+	@pylint --rcfile=.pylintrc -s n  --jobs=32 ./llumnix
 
 	@pylint --rcfile=.pylintrc \
 			--disable=protected-access,super-init-not-called,unused-argument,redefined-outer-name,invalid-name \
-			-s n ./tests
+			-s n --jobs=32 ./tests
 
 .PHONY: test
 test: check_pytest_installed
-	@pytest -x -q --ignore=third_party/ --disable-warnings
+	@pytest -x -v --ignore=third_party/ --ignore=tests/e2e_test --disable-warnings
 
 #################### pygloo install for gloo migration backend begin ####################
 

diff --git a/llumnix/backends/vllm/llm_engine.py b/llumnix/backends/vllm/llm_engine.py
@@ -169,6 +169,7 @@ def step(self) -> None:
         instance_info.step_id = next(self.step_counter)
         instance_info.timestamp = time.time()
         instance_info.latency = self.model_executor.last_inference_latency
+
         seq_groups = self.scheduler.running
         if seq_groups:
             tot_blocks = []
@@ -260,8 +261,8 @@ def commit_dst_request(self, backend_request: SequenceGroupLlumnix) -> None:
         logger.info("add seq {} to block table".format(seq.seq_id))
         pre_alloc_blocks = self.engine.scheduler.pre_alloc_cache_dict.pop(backend_request.request_id)
         self.engine.scheduler.block_manager.add_block_table(pre_alloc_blocks, seq.seq_id)
-        self.add_running_request(backend_request)
         backend_request.reset_migration_args()
+        self.add_running_request(backend_request)
 
     def send_blocks(self, dst_ray_actor: "ray.actor.ActorHandle", src_blocks: List[int], dst_blocks: List[int]) -> None:
         ray.get(dst_ray_actor.execute_engine_method.remote("_run_workers",

diff --git a/llumnix/backends/vllm/worker.py b/llumnix/backends/vllm/worker.py
@@ -11,6 +11,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import time
 from typing import Dict, List
 import math
 import ray
@@ -21,11 +22,13 @@
 from vllm.worker.worker import Worker
 from vllm.config import CacheConfig,  ModelConfig, ParallelConfig
 from vllm.worker.cache_engine import CacheEngine
+from vllm.config import _GB
 
 from llumnix.logger import init_logger
 from llumnix.backends.vllm.utils import _sample_with_torch
 from llumnix.backends.vllm.migration_backend import MigrationBackendBase, get_migration_backend
 from llumnix.internal_config import MigrationConfig
+from llumnix.utils import convert_bytes
 
 logger = init_logger(__name__)
 
@@ -104,10 +107,19 @@ def init_migration(self, instance_id: str, migration_config: MigrationConfig, sr
 
     def migrate_cache(self, src_worker_handle_list, src_blocks: List[int], dst_blocks: List[int]) -> None:
         src_worker_handle = src_worker_handle_list[self.rank]
+
+        start_time = time.time()
         try:
             self.migration_backend.migrate_cache(src_worker_handle, src_blocks, dst_blocks)
         except ray.exceptions.RayActorError:
             logger.info("[migrate_cache] self.rank: {}, src_worker_handle {} is dead".format(self.rank, src_worker_handle))
+        end_time = time.time()
+
+        total_kv_cache_size = len(src_blocks) * CacheEngine.get_cache_block_size(
+            self.cache_config, self.model_config, self.parallel_config)
+        speed = total_kv_cache_size/_GB/(end_time - start_time)
+        logger.info("[migration_cache] blocks_num: {}, total_kv_cache_size: {}, time: {}s, speed: {}GB/s."
+                    .format(len(src_blocks), convert_bytes(total_kv_cache_size), end_time-start_time, speed))
 
     def do_recv(self, *args, **kwargs):
         return self.migration_backend.do_recv(*args, **kwargs)

diff --git a/llumnix/entrypoints/llumnix_utils.py b/llumnix/entrypoints/llumnix_utils.py
@@ -17,6 +17,7 @@
 import time
 from typing import List, Tuple
 import asyncio
+import socket
 import ray
 
 from llumnix.llm_engine_manager import LLMEngineManager, MANAGER_ACTOR_NAME
@@ -38,10 +39,9 @@
 MAX_TASK_RETRIES = 300
 RETRIES_INTERVALS = 0.1
 
-
 def get_ip_address():
-    result = subprocess.run(['hostname', '-i'], stdout=subprocess.PIPE, check=True)
-    ip_address = result.stdout.decode('utf-8').strip()
+    hostname = socket.gethostname()
+    ip_address = socket.gethostbyname(hostname)
     return ip_address
 
 def launch_ray_cluster(ray_cluster_port: int) -> subprocess.CompletedProcess:

diff --git a/llumnix/utils.py b/llumnix/utils.py
@@ -16,3 +16,17 @@
 
 def random_uuid() -> str:
     return str(uuid.uuid4().hex)
+
+def convert_bytes(bytes_size):
+    """Convert bytes to KB, MB, GB, etc."""
+    if bytes_size < 0:
+        raise ValueError("Size must be a non-negative integer.")
+
+    size_suffixes = ['B', 'KB', 'MB', 'GB', 'TB']
+    index = 0
+
+    while bytes_size >= 1024 and index < len(size_suffixes) - 1:
+        bytes_size /= 1024.0
+        index += 1
+
+    return f"{bytes_size:.2f} {size_suffixes[index]}"
diff --git a/pytest.ini b/pytest.ini
@@ -1,2 +1,2 @@
 [pytest]
-asyncio_default_fixture_loop_scope = function
+asyncio_default_fixture_loop_scope = function