Skip to content

Commit 489231e

Browse files
mcalmanAlvant
authored andcommitted
[Bugfix] fix missing last itl in openai completions benchmark (vllm-project#5926)
Signed-off-by: Alvant <alvasian@yandex.ru>
1 parent e4a0a9c commit 489231e

File tree

1 file changed

+5
-6
lines changed

1 file changed

+5
-6
lines changed

benchmarks/backend_request_func.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,9 @@ async def async_request_openai_completions(
265265
else:
266266
data = json.loads(chunk)
267267

268+
# NOTE: Some completion API might have a last
269+
# usage summary response without a token so we
270+
# want to check a token was generated
268271
if data["choices"][0]["text"]:
269272
timestamp = time.perf_counter()
270273
# First token
@@ -273,12 +276,8 @@ async def async_request_openai_completions(
273276
output.ttft = ttft
274277

275278
# Decoding phase
276-
# NOTE: Some completion API might have a last
277-
# usage summary response without a token so we
278-
# do not want to include as inter-token-latency
279-
elif data.get("usage", None) is None:
280-
output.itl.append(timestamp -
281-
most_recent_timestamp)
279+
output.itl.append(timestamp -
280+
most_recent_timestamp)
282281

283282
most_recent_timestamp = timestamp
284283
generated_text += data["choices"][0]["text"]

0 commit comments

Comments
 (0)