diff --git a/benchmark/llava_bench/README.md b/benchmark/llava_bench/README.md index 854b57eba5..ba38021152 100644 --- a/benchmark/llava_bench/README.md +++ b/benchmark/llava_bench/README.md @@ -17,7 +17,7 @@ pip3 install "torch>=2.1.2" "transformers>=4.36" pillow ### Benchmark sglang Launch a server ``` -python3 -m sglang.launch_server --model-path liuhaotian/llava-v1.5-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --port 30000 +python3 -m sglang.launch_server --model-path liuhaotian/llava-v1.6-vicuna-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --port 30000 ``` Run benchmark diff --git a/benchmark/llava_bench/bench_sglang.py b/benchmark/llava_bench/bench_sglang.py index 69dc1c56af..f84c8a90fb 100644 --- a/benchmark/llava_bench/bench_sglang.py +++ b/benchmark/llava_bench/bench_sglang.py @@ -20,7 +20,7 @@ def image_qa(s, image_file, question): def main(args): - lines = read_jsonl(args.question_file)[: args.num_questions] + lines = list(read_jsonl(args.question_file))[: args.num_questions] arguments = [ { "image_file": os.path.abspath(args.image_folder + "/" + l["image"]), diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 9afae99f91..5716815e08 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -706,6 +706,10 @@ def __del__(self): self.shutdown() +STREAM_END_SYMBOL = b"data: [DONE]" +STREAM_CHUNK_START_SYMBOL = b"data:" + + class Engine: """ SRT Engine without an HTTP server layer. @@ -749,8 +753,6 @@ def generate( ret = loop.run_until_complete(generate_request(obj, None)) if stream is True: - STREAM_END_SYMBOL = "data: [DONE]" - STREAM_CHUNK_START_SYMBOL = "data:" def generator_wrapper(): offset = 0 @@ -796,9 +798,6 @@ async def async_generate( ret = await generate_request(obj, None) if stream is True: - STREAM_END_SYMBOL = "data: [DONE]" - STREAM_CHUNK_START_SYMBOL = "data:" - generator = ret.body_iterator async def generator_wrapper(): diff --git a/test/srt/test_srt_engine.py b/test/srt/test_srt_engine.py index 5219ef90fe..8743e0ef9b 100644 --- a/test/srt/test_srt_engine.py +++ b/test/srt/test_srt_engine.py @@ -1,3 +1,8 @@ +""" +Usage: +python3 -m unittest test_srt_engine.TestSRTEngine.test_3_sync_streaming_combination +""" + import asyncio import json import unittest @@ -8,7 +13,7 @@ from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST -class TestSRTBackend(unittest.TestCase): +class TestSRTEngine(unittest.TestCase): def test_1_engine_runtime_consistency(self): prompt = "Today is a sunny day and I like"