From 854ab21baee490fa33cc7ceb5dca06d568a64012 Mon Sep 17 00:00:00 2001 From: Cade Daniel Date: Tue, 30 Jul 2024 15:36:14 -0700 Subject: [PATCH 1/2] spec decode serving benchmark --- .../tests/serving-tests.json | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests.json b/.buildkite/nightly-benchmarks/tests/serving-tests.json index 86a0fefa339f..6a57e5154d21 100644 --- a/.buildkite/nightly-benchmarks/tests/serving-tests.json +++ b/.buildkite/nightly-benchmarks/tests/serving-tests.json @@ -55,5 +55,26 @@ "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json", "num_prompts": 200 } + }, + { + "test_name": "serving_llama70B_tp4_sharegpt_specdecode", + "qps_list": [2], + "server_parameters": { + "model": "meta-llama/Meta-Llama-3-70B-Instruct", + "disable_log_requests": "", + "tensor_parallel_size": 4, + "swap_space": 16, + "speculative_model": "turboderp/Qwama-0.5B-Instruct", + "num_speculative_tokens": 4, + "speculative_draft_tensor_parallel_size": 1, + "use_v2_block_manager": "" + }, + "client_parameters": { + "model": "meta-llama/Meta-Llama-3-70B-Instruct", + "backend": "vllm", + "dataset_name": "sharegpt", + "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json", + "num_prompts": 200 + } } -] \ No newline at end of file +] From 9b27e7305ac086ad8fefaf14636542a8f65b3a24 Mon Sep 17 00:00:00 2001 From: Cade Daniel Date: Tue, 30 Jul 2024 15:41:47 -0700 Subject: [PATCH 2/2] fix --- .buildkite/nightly-benchmarks/tests/serving-tests.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/nightly-benchmarks/tests/serving-tests.json b/.buildkite/nightly-benchmarks/tests/serving-tests.json index 6a57e5154d21..300af0524d7c 100644 --- a/.buildkite/nightly-benchmarks/tests/serving-tests.json +++ b/.buildkite/nightly-benchmarks/tests/serving-tests.json @@ -56,7 +56,7 @@ "num_prompts": 200 } }, - { + { "test_name": "serving_llama70B_tp4_sharegpt_specdecode", "qps_list": [2], "server_parameters": {