chore: update rebase tests

Signed-off-by: paperspace <29749331+aarnphm@users.noreply.github.com>
bentoml · May 22, 2024 · 79df679 · 79df679
1 parent b719351
commit 79df679
Show file tree

Hide file tree

Showing 15 changed files with 217 additions and 661 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -36,10 +36,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ['3.8', '3.11']
-        exclude:
-          - os: 'windows-latest'
+        os: [ubuntu-latest]
+        python-version: ['3.9', '3.12']
     name: tests (${{ matrix.python-version }}.${{ matrix.os }})
     steps:
       - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6
@@ -52,89 +50,89 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Run tests
         run: hatch run tests:python
-      - name: Disambiguate coverage filename
-        run: mv .coverage ".coverage.${{ matrix.os }}.${{ matrix.python-version }}"
-      - name: Upload coverage data
-        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
-        with:
-          name: coverage-data
-          path: .coverage.*
-  coverage:
-    name: report-coverage
-    runs-on: ubuntu-latest
-    if: false
-    needs: tests
-    steps:
-      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6
-        with:
-          fetch-depth: 0
-          ref: ${{ github.event.pull_request.head.sha }}
-      - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
-        with:
-          bentoml-version: 'main'
-          python-version-file: .python-version-default
-      - name: Download coverage data
-        uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3
-        with:
-          name: coverage-data
-      - name: Combine coverage data
-        run: hatch run coverage:combine
-      - name: Export coverage reports
-        run: |
-          hatch run coverage:report-xml openllm-python
-          hatch run coverage:report-uncovered-html openllm-python
-      - name: Upload uncovered HTML report
-        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
-        with:
-          name: uncovered-html-report
-          path: htmlcov
-      - name: Generate coverage summary
-        run: hatch run coverage:generate-summary
-      - name: Write coverage summary report
-        if: github.event_name == 'pull_request'
-        run: hatch run coverage:write-summary-report
-      - name: Update coverage pull request comment
-        if: github.event_name == 'pull_request' && !github.event.pull_request.head.repo.fork
-        uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # ratchet:marocchino/sticky-pull-request-comment@v2
-        with:
-          path: coverage-report.md
-  cli-benchmark:
-    name: Check for CLI responsiveness
-    runs-on: ubuntu-latest
-    env:
-      HYPERFINE_VERSION: '1.12.0'
-    steps:
-      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # ratchet:actions/checkout@v4.1.6
-        with:
-          fetch-depth: 0
-      - name: Install hyperfine
-        run: |
-          wget https://github.com/sharkdp/hyperfine/releases/download/v${HYPERFINE_VERSION}/hyperfine_${HYPERFINE_VERSION}_amd64.deb
-          sudo dpkg -i hyperfine_${HYPERFINE_VERSION}_amd64.deb
-      - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
-        with:
-          bentoml-version: 'main'
-          python-version-file: .python-version-default
-      - name: Install self
-        run: bash local.sh
-      - name: Speed
-        run: hyperfine -m 100 --warmup 10 openllm
-  brew-dry-run:
-    name: Running dry-run tests for brew
-    runs-on: macos-latest
-    steps:
-      - name: Install tap and dry-run
-        run: |
-          brew tap bentoml/openllm https://github.com/bentoml/openllm
-          brew install openllm
-          openllm --help
-          openllm models --show-available
+      # - name: Disambiguate coverage filename
+      #   run: mv .coverage ".coverage.${{ matrix.os }}.${{ matrix.python-version }}"
+      # - name: Upload coverage data
+      #   uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
+      #   with:
+      #     name: coverage-data
+      #     path: .coverage.*
+  # coverage:
+  #   name: report-coverage
+  #   runs-on: ubuntu-latest
+  #   if: false
+  #   needs: tests
+  #   steps:
+  #     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/checkout@v4.1.1
+  #       with:
+  #         fetch-depth: 0
+  #         ref: ${{ github.event.pull_request.head.sha }}
+  #     - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
+  #       with:
+  #         bentoml-version: 'main'
+  #         python-version-file: .python-version-default
+  #     - name: Download coverage data
+  #       uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # ratchet:actions/download-artifact@v3
+  #       with:
+  #         name: coverage-data
+  #     - name: Combine coverage data
+  #       run: hatch run coverage:combine
+  #     - name: Export coverage reports
+  #       run: |
+  #         hatch run coverage:report-xml openllm-python
+  #         hatch run coverage:report-uncovered-html openllm-python
+  #     - name: Upload uncovered HTML report
+  #       uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # ratchet:actions/upload-artifact@v3
+  #       with:
+  #         name: uncovered-html-report
+  #         path: htmlcov
+  #     - name: Generate coverage summary
+  #       run: hatch run coverage:generate-summary
+  #     - name: Write coverage summary report
+  #       if: github.event_name == 'pull_request'
+  #       run: hatch run coverage:write-summary-report
+  #     - name: Update coverage pull request comment
+  #       if: github.event_name == 'pull_request' && !github.event.pull_request.head.repo.fork
+  #       uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # ratchet:marocchino/sticky-pull-request-comment@v2
+  #       with:
+  #         path: coverage-report.md
+  # cli-benchmark:
+  #   name: Check for CLI responsiveness
+  #   runs-on: ubuntu-latest
+  #   env:
+  #     HYPERFINE_VERSION: '1.12.0'
+  #   steps:
+  #     - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/checkout@v4.1.1
+  #       with:
+  #         fetch-depth: 0
+  #     - name: Install hyperfine
+  #       run: |
+  #         wget https://github.com/sharkdp/hyperfine/releases/download/v${HYPERFINE_VERSION}/hyperfine_${HYPERFINE_VERSION}_amd64.deb
+  #         sudo dpkg -i hyperfine_${HYPERFINE_VERSION}_amd64.deb
+  #     - uses: bentoml/setup-bentoml-action@862aa8fa0e0c3793fcca4bfe7a62717a497417e4 # ratchet:bentoml/setup-bentoml-action@v1
+  #       with:
+  #         bentoml-version: 'main'
+  #         python-version-file: .python-version-default
+  #     - name: Install self
+  #       run: bash local.sh
+  #     - name: Speed
+  #       run: hyperfine -m 100 --warmup 10 openllm
+  # brew-dry-run:
+  #   name: Running dry-run tests for brew
+  #   runs-on: macos-latest
+  #   steps:
+  #     - name: Install tap and dry-run
+  #       run: |
+  #         brew tap bentoml/openllm https://github.com/bentoml/openllm
+  #         brew install openllm
+  #         openllm --help
+  #         openllm models --show-available
   evergreen: # https://github.com/marketplace/actions/alls-green#why
     if: always()
     needs:
       - tests
-      - cli-benchmark
-      - brew-dry-run
+      # - cli-benchmark
+      # - brew-dry-run
     runs-on: ubuntu-latest
     steps:
       - name: Decide whether the needed jobs succeeded or failed

diff --git a/examples/openai_chat_completion_client.py b/examples/openai_chat_completion_client.py
@@ -14,7 +14,7 @@
 # Chat completion API
 stream = str(os.getenv('STREAM', False)).upper() in ['TRUE', '1', 'YES', 'Y', 'ON']
 completions = client.chat.completions.create(messages=[
-  ChatCompletionSystemMessageParam(role='system', content='You will be the writing assistant that assume the ton of Ernest Hemmingway.'),
+  ChatCompletionSystemMessageParam(role='system', content='You will be the writing assistant that assume the tone of Ernest Hemmingway.'),
   ChatCompletionUserMessageParam(role='user', content='Write an essay on Nietzsche and absurdism.'),
 ], model=model, max_tokens=1024, stream=stream)
 

diff --git a/hatch.toml b/hatch.toml
@@ -1,4 +1,6 @@
 [envs.default]
+installer = "uv"
+type = "virtual"
 dependencies = [
     "openllm-core @ {root:uri}/openllm-core",
     "openllm-client @ {root:uri}/openllm-client",
@@ -29,12 +31,12 @@ setup = [
 quality = ["bash ./all.sh", "- pre-commit run --all-files", "- pnpm format"]
 tool = ["quality", "bash ./clean.sh", 'python ./cz.py']
 [envs.tests]
+installer = "uv"
+type = "virtual"
 dependencies = [
+    "openllm[vllm] @ {root:uri}/openllm-python",
     "openllm-core @ {root:uri}/openllm-core",
     "openllm-client @ {root:uri}/openllm-client",
-    "openllm[chatglm,fine-tune] @ {root:uri}/openllm-python",
-    # NOTE: interact with docker for container tests.
-    "docker",
     # NOTE: Tests strategies with Hypothesis and pytest, and snapshot testing with syrupy
     "coverage[toml]>=6.5",
     "filelock>=3.7.1",
@@ -54,9 +56,7 @@ template = "tests"
 [envs.tests.scripts]
 _run_script = "pytest --cov --cov-report={env:COVERAGE_REPORT:term-missing} --cov-config=pyproject.toml -vv"
 distributed = "_run_script --reruns 5 --reruns-delay 3 --ignore openllm-python/tests/models -n 3 -r aR {args:openllm-python/tests}"
-models = "_run_script -s {args:openllm-python/tests/models}"
-python = "_run_script --reruns 5 --reruns-delay 3 --ignore openllm-python/tests/models -r aR {args:openllm-python/tests}"
-snapshot-models = "_run_script -s --snapshot-update {args:openllm-python/tests/models}"
+python = "_run_script -r aR {args:openllm-python/tests}"
 [envs.tests.overrides]
 env.GITHUB_ACTIONS.env-vars = "COVERAGE_REPORT="
 [envs.coverage]

diff --git a/openllm-python/pyproject.toml b/openllm-python/pyproject.toml
@@ -150,45 +150,6 @@ only-include = ["src/openllm", "src/openllm_cli", "src/_openllm_tiny"]
 sources = ["src"]
 [tool.hatch.build.targets.sdist]
 exclude = ["/.git_archival.txt", "tests", "/.python-version-default"]
-[tool.hatch.build.targets.wheel.hooks.mypyc]
-dependencies = [
-    "hatch-mypyc==0.16.0",
-    "mypy==1.7.0",
-    # avoid https://github.com/pallets/click/issues/2558
-    "click==8.1.3",
-    "bentoml==1.1.9",
-    "transformers>=4.32.1",
-    "pandas-stubs",
-    "types-psutil",
-    "types-tabulate",
-    "types-PyYAML",
-    "types-protobuf",
-]
-enable-by-default = false
-exclude = ["src/_openllm_tiny/_service.py", "src/openllm/utils/__init__.py"]
-include = [
-    "src/openllm/__init__.py",
-    "src/openllm/_quantisation.py",
-    "src/openllm/_generation.py",
-    "src/openllm/exceptions.py",
-    "src/openllm/testing.py",
-    "src/openllm/utils",
-]
-# NOTE: This is consistent with pyproject.toml
-mypy-args = [
-    "--strict",
-    # this is because all transient library doesn't have types
-    "--follow-imports=skip",
-    "--allow-subclassing-any",
-    "--check-untyped-defs",
-    "--ignore-missing-imports",
-    "--no-warn-return-any",
-    "--warn-unreachable",
-    "--no-warn-no-return",
-    "--no-warn-unused-ignores",
-]
-options = { verbose = true, strip_asserts = true, debug_level = "2", opt_level = "3", include_runtime_files = true }
-require-runtime-dependencies = true
 [tool.hatch.metadata.hooks.fancy-pypi-readme]
 content-type = "text/markdown"
 # PyPI doesn't support the <picture> tag.

diff --git a/openllm-python/tests/__init__.py b/openllm-python/tests/__init__.py
@@ -1,9 +0,0 @@
-from __future__ import annotations
-import os
-
-from hypothesis import HealthCheck, settings
-
-settings.register_profile('CI', settings(suppress_health_check=[HealthCheck.too_slow]), deadline=None)
-
-if 'CI' in os.environ:
-  settings.load_profile('CI')

diff --git a/openllm-python/tests/_data.py b/openllm-python/tests/_data.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+import typing as t
+from openllm_core._typing_compat import TypedDict
+from datasets import load_dataset
+
+if t.TYPE_CHECKING:
+  from transformers import PreTrainedTokenizerBase
+
+FIXED_OUTPUT_LENGTH = 128
+
+
+class DatasetEntry(TypedDict):
+  human: str
+  gpt: str
+
+
+class SampledRequest(TypedDict):
+  prompt: str
+  prompt_length: int
+  output_length: int
+
+
+def prepare_sharegpt_request(
+  num_requests: int, tokenizer: PreTrainedTokenizerBase, max_output_length: int | None = None
+) -> list[SampledRequest]:
+  def transform(examples) -> DatasetEntry:
+    human, gpt = [], []
+    for example in examples['conversations']:
+      human.append(example[0]['value'])
+      gpt.append(example[1]['value'])
+    return {'human': human, 'gpt': gpt}
+
+  def process(examples, tokenizer, max_output_length: t.Optional[int]):
+    # Tokenize the 'human' and 'gpt' values in batches
+    prompt_token_ids = tokenizer(examples['human']).input_ids
+    completion_token_ids = tokenizer(examples['gpt']).input_ids
+
+    # Create the transformed entries
+    return {
+      'prompt': examples['human'],
+      'prompt_length': [len(ids) for ids in prompt_token_ids],
+      'output_length': [
+        len(ids) if max_output_length is None else FIXED_OUTPUT_LENGTH for ids in completion_token_ids
+      ],
+    }
+
+  def filter_length(examples) -> list[bool]:
+    result = []
+    for prompt_length, output_length in zip(examples['prompt_length'], examples['output_length']):
+      if prompt_length < 4 or output_length < 4:
+        result.append(False)
+      elif prompt_length > 1024 or prompt_length + output_length > 2048:
+        result.append(False)
+      else:
+        result.append(True)
+    return result
+
+  return (
+    (
+      dataset := load_dataset(
+        'anon8231489123/ShareGPT_Vicuna_unfiltered',
+        data_files='ShareGPT_V3_unfiltered_cleaned_split.json',
+        split='train',
+      )
+    )
+    .filter(lambda example: len(example['conversations']) >= 2, num_proc=8)
+    .map(transform, remove_columns=dataset.column_names, batched=True)
+    .map(
+      process,
+      fn_kwargs={'tokenizer': tokenizer, 'max_output_length': max_output_length},
+      remove_columns=['human', 'gpt'],
+      batched=True,
+    )
+    .filter(filter_length, batched=True)
+    .shuffle(seed=42)
+    .to_list()[:num_requests]
+  )
diff --git a/openllm-python/tests/_strategies/__init__.py b/openllm-python/tests/_strategies/__init__.py