Skip to content

Commit

Permalink
[CI] Add comprehensive testing: migration, e2e, and bench (#30)
Browse files Browse the repository at this point in the history
  • Loading branch information
KuilongCui committed Sep 19, 2024
1 parent 4aa0ab8 commit 91a6454
Show file tree
Hide file tree
Showing 48 changed files with 803 additions and 51 deletions.
44 changes: 44 additions & 0 deletions .github/workflows/bench_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: bench_test

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
cancel_previous_workflows:
runs-on: [self-hosted]
timeout-minutes: 3
steps:
- uses: styfle/cancel-workflow-action@0.12.1
with:
all_but_latest: true

bench_tests:
needs: cancel_previous_workflows
runs-on: [self-hosted]
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Kill Running Containers
run: |
[[ -n $(docker ps -q) ]] && docker kill $(docker ps -q) || echo "No running containers to kill."
- name: Build And Test
run: ./tools/bench_test.sh
- name: Create comment from file
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const filePath = 'performance.txt';
const commentBody = fs.readFileSync(filePath, 'utf8');
await github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: commentBody
});
31 changes: 31 additions & 0 deletions .github/workflows/e2e_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: e2e_test

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
cancel_previous_workflows:
runs-on: [self-hosted]
timeout-minutes: 3
steps:
- uses: styfle/cancel-workflow-action@0.12.1
with:
all_but_latest: true

e2e_tests:
needs: cancel_previous_workflows
runs-on: [self-hosted]
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Kill Running Containers
run: |
[[ -n $(docker ps -q) ]] && docker kill $(docker ps -q) || echo "No running containers to kill."
- name: Build And Test
run: ./tools/e2e_test.sh
44 changes: 44 additions & 0 deletions .github/workflows/migration_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: migration_test

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
cancel_previous_workflows:
runs-on: [self-hosted]
timeout-minutes: 3
steps:
- uses: styfle/cancel-workflow-action@0.12.1
with:
all_but_latest: true

migration_tests:
needs: cancel_previous_workflows
runs-on: [self-hosted]
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Kill Running Containers
run: |
[[ -n $(docker ps -q) ]] && docker kill $(docker ps -q) || echo "No running containers to kill."
- name: Build And Test
run: ./tools/migration_test.sh
- name: Create comment from file
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const filePath = 'performance.txt';
const commentBody = fs.readFileSync(filePath, 'utf8');
await github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: commentBody
});
32 changes: 32 additions & 0 deletions .github/workflows/offline_inference.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: offline_inference

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
cancel_previous_workflows:
runs-on: [self-hosted]
timeout-minutes: 3
steps:
- uses: styfle/cancel-workflow-action@0.12.1
with:
all_but_latest: true

offline_inference:
needs: cancel_previous_workflows
runs-on: [self-hosted]
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
- name: Run offline inference example
run: |
nvidia-docker run --rm -t --net host --ipc host \
-v ${PWD}:/workspace \
-w /workspace \
registry.cn-beijing.aliyuncs.com/llumnix/llumnix-dev:20240909_action_678a439 \
bash -c "pip install -e . > /dev/null && python examlpes/offline_inference.py"
33 changes: 18 additions & 15 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Pylint
name: pylint

on:
push:
Expand All @@ -9,21 +9,24 @@ on:
- main

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]
cancel_previous_workflows:
runs-on: [self-hosted]
timeout-minutes: 3
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
- uses: styfle/cancel-workflow-action@0.12.1
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint==2.12.2
all_but_latest: true

pylint_test:
needs: cancel_previous_workflows
runs-on: [self-hosted]
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
- name: Analysing the code with pylint
run: |
pylint --rcfile=.pylintrc --output-format=parseable --jobs=8 $( find llumnix/ -type f -name '*.py')
nvidia-docker run --rm -t --net host --ipc host \
-v ${PWD}:/workspace \
-w /workspace \
registry.cn-beijing.aliyuncs.com/llumnix/llumnix-dev:20240909_action_678a439 \
bash -c "pip install -e . > /dev/null && make lint"
31 changes: 31 additions & 0 deletions .github/workflows/unit_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: unit_test

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
cancel_previous_workflows:
runs-on: [self-hosted]
timeout-minutes: 3
steps:
- uses: styfle/cancel-workflow-action@0.12.1
with:
all_but_latest: true

unit_tests:
needs: cancel_previous_workflows
runs-on: [self-hosted]
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Kill Running Containers
run: |
[[ -n $(docker ps -q) ]] && docker kill $(docker ps -q) || echo "No running containers to kill."
- name: Build And Test
run: ./tools/unit_test.sh
26 changes: 26 additions & 0 deletions .github/workflows/whl.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: whl_build

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
whl_build:
runs-on: ubuntu-latest
timeout-minutes: 10

steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Build whl
run: |
python3 -m pip install --upgrade setuptools wheel
python3 setup.py bdist_wheel --universal
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ install:

.PHONY: lint
lint: check_pylint_installed check_pytest_installed
@pylint --rcfile=.pylintrc -s n ./llumnix
@pylint --rcfile=.pylintrc -s n --jobs=32 ./llumnix

@pylint --rcfile=.pylintrc \
--disable=protected-access,super-init-not-called,unused-argument,redefined-outer-name,invalid-name \
-s n ./tests
-s n --jobs=32 ./tests

.PHONY: test
test: check_pytest_installed
@pytest -x -q --ignore=third_party/ --disable-warnings
@pytest -x -v --ignore=third_party/ --ignore=tests/e2e_test --disable-warnings

#################### pygloo install for gloo migration backend begin ####################

Expand Down
3 changes: 2 additions & 1 deletion llumnix/backends/vllm/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ def step(self) -> None:
instance_info.step_id = next(self.step_counter)
instance_info.timestamp = time.time()
instance_info.latency = self.model_executor.last_inference_latency

seq_groups = self.scheduler.running
if seq_groups:
tot_blocks = []
Expand Down Expand Up @@ -260,8 +261,8 @@ def commit_dst_request(self, backend_request: SequenceGroupLlumnix) -> None:
logger.info("add seq {} to block table".format(seq.seq_id))
pre_alloc_blocks = self.engine.scheduler.pre_alloc_cache_dict.pop(backend_request.request_id)
self.engine.scheduler.block_manager.add_block_table(pre_alloc_blocks, seq.seq_id)
self.add_running_request(backend_request)
backend_request.reset_migration_args()
self.add_running_request(backend_request)

def send_blocks(self, dst_ray_actor: "ray.actor.ActorHandle", src_blocks: List[int], dst_blocks: List[int]) -> None:
ray.get(dst_ray_actor.execute_engine_method.remote("_run_workers",
Expand Down
12 changes: 12 additions & 0 deletions llumnix/backends/vllm/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import time
from typing import Dict, List
import math
import ray
Expand All @@ -21,11 +22,13 @@
from vllm.worker.worker import Worker
from vllm.config import CacheConfig, ModelConfig, ParallelConfig
from vllm.worker.cache_engine import CacheEngine
from vllm.config import _GB

from llumnix.logger import init_logger
from llumnix.backends.vllm.utils import _sample_with_torch
from llumnix.backends.vllm.migration_backend import MigrationBackendBase, get_migration_backend
from llumnix.internal_config import MigrationConfig
from llumnix.utils import convert_bytes

logger = init_logger(__name__)

Expand Down Expand Up @@ -104,10 +107,19 @@ def init_migration(self, instance_id: str, migration_config: MigrationConfig, sr

def migrate_cache(self, src_worker_handle_list, src_blocks: List[int], dst_blocks: List[int]) -> None:
src_worker_handle = src_worker_handle_list[self.rank]

start_time = time.time()
try:
self.migration_backend.migrate_cache(src_worker_handle, src_blocks, dst_blocks)
except ray.exceptions.RayActorError:
logger.info("[migrate_cache] self.rank: {}, src_worker_handle {} is dead".format(self.rank, src_worker_handle))
end_time = time.time()

total_kv_cache_size = len(src_blocks) * CacheEngine.get_cache_block_size(
self.cache_config, self.model_config, self.parallel_config)
speed = total_kv_cache_size/_GB/(end_time - start_time)
logger.info("[migration_cache] blocks_num: {}, total_kv_cache_size: {}, time: {}s, speed: {}GB/s."
.format(len(src_blocks), convert_bytes(total_kv_cache_size), end_time-start_time, speed))

def do_recv(self, *args, **kwargs):
return self.migration_backend.do_recv(*args, **kwargs)
Expand Down
6 changes: 3 additions & 3 deletions llumnix/entrypoints/llumnix_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import time
from typing import List, Tuple
import asyncio
import socket
import ray

from llumnix.llm_engine_manager import LLMEngineManager, MANAGER_ACTOR_NAME
Expand All @@ -38,10 +39,9 @@
MAX_TASK_RETRIES = 300
RETRIES_INTERVALS = 0.1


def get_ip_address():
result = subprocess.run(['hostname', '-i'], stdout=subprocess.PIPE, check=True)
ip_address = result.stdout.decode('utf-8').strip()
hostname = socket.gethostname()
ip_address = socket.gethostbyname(hostname)
return ip_address

def launch_ray_cluster(ray_cluster_port: int) -> subprocess.CompletedProcess:
Expand Down
14 changes: 14 additions & 0 deletions llumnix/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,17 @@

def random_uuid() -> str:
return str(uuid.uuid4().hex)

def convert_bytes(bytes_size):
"""Convert bytes to KB, MB, GB, etc."""
if bytes_size < 0:
raise ValueError("Size must be a non-negative integer.")

size_suffixes = ['B', 'KB', 'MB', 'GB', 'TB']
index = 0

while bytes_size >= 1024 and index < len(size_suffixes) - 1:
bytes_size /= 1024.0
index += 1

return f"{bytes_size:.2f} {size_suffixes[index]}"
2 changes: 1 addition & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[pytest]
asyncio_default_fixture_loop_scope = function
asyncio_default_fixture_loop_scope = function
Loading

0 comments on commit 91a6454

Please sign in to comment.