Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bugfix][Frontend] Cleanup "fix chat logprobs" #5026

Merged
merged 21 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
2e19b90
Fix logprobs for chat completion API
DarkLight1337 May 24, 2024
08e41d7
Update and fix tests
DarkLight1337 May 24, 2024
bbd4415
Fix and refine tests
DarkLight1337 May 24, 2024
504dd49
Fix incorrect parameters to `_create_chat_logprobs`
DarkLight1337 May 24, 2024
390e93d
Allow `logprobs=True` when `top_logprobs=0` or `top_logprobs=None` (#…
DarkLight1337 May 24, 2024
cbed5ec
Refine tests and fix them
DarkLight1337 May 24, 2024
518ff5f
Merge branch 'upstream' into openai-logprobs
DarkLight1337 May 25, 2024
a72b33c
Use stricter test for Chat Completions API
DarkLight1337 May 28, 2024
d18287a
Merge branch 'upstream' into openai-logprobs
DarkLight1337 May 30, 2024
4cb9068
Merge branch 'upstream' into openai-logprobs
DarkLight1337 May 30, 2024
5ed37cd
Update tests
DarkLight1337 May 30, 2024
edeb3f6
Apply formatter
DarkLight1337 May 30, 2024
6584a51
Remove unused typevar
DarkLight1337 May 30, 2024
2b6b3d8
Remove unnecessary disable
DarkLight1337 May 30, 2024
fcf4d6f
Update `test_single_chat_session`
DarkLight1337 May 30, 2024
fed335b
Use strict equality tests for length, and remove unnecessary non-null…
DarkLight1337 Jun 1, 2024
ecef584
Revert use strict equality tests
DarkLight1337 Jun 1, 2024
72d58e1
Fix bad types caused by reassignment of same variable
DarkLight1337 Jun 3, 2024
8226295
Merge branch 'upstream' into openai-logprobs
DarkLight1337 Jun 4, 2024
908cac4
Fix confusing assertion and variable name
DarkLight1337 Jun 4, 2024
7de6cf7
Merge branch 'upstream' into openai-logprobs
DarkLight1337 Jun 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 11 additions & 14 deletions tests/async_engine/test_openapi_server_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,8 @@ async def test_single_completion(server, client: openai.AsyncOpenAI):
temperature=0.0)

assert completion.id is not None
assert completion.choices is not None and len(completion.choices) == 1
assert completion.choices[0].text is not None and len(
completion.choices[0].text) >= 5
assert len(completion.choices) == 1
assert len(completion.choices[0].text) >= 5
assert completion.choices[0].finish_reason == "length"
assert completion.usage == openai.types.CompletionUsage(
completion_tokens=5, prompt_tokens=6, total_tokens=11)
Expand All @@ -69,8 +68,7 @@ async def test_single_completion(server, client: openai.AsyncOpenAI):
max_tokens=5,
temperature=0.0,
)
assert completion.choices[0].text is not None and len(
completion.choices[0].text) >= 5
assert len(completion.choices[0].text) >= 5


@pytest.mark.asyncio
Expand All @@ -90,15 +88,14 @@ async def test_single_chat_session(server, client: openai.AsyncOpenAI):
logprobs=True,
top_logprobs=5)
assert chat_completion.id is not None
assert chat_completion.choices is not None and len(
chat_completion.choices) == 1
assert chat_completion.choices[0].message is not None
assert chat_completion.choices[0].logprobs is not None
assert chat_completion.choices[0].logprobs.content[
0].top_logprobs is not None
assert len(
chat_completion.choices[0].logprobs.content[0].top_logprobs) == 5
message = chat_completion.choices[0].message
assert len(chat_completion.choices) == 1

choice = chat_completion.choices[0]
assert choice.finish_reason == "length"
assert chat_completion.usage == openai.types.CompletionUsage(
completion_tokens=10, prompt_tokens=13, total_tokens=23)

message = choice.message
assert message.content is not None and len(message.content) >= 10
assert message.role == "assistant"
messages.append({"role": "assistant", "content": message.content})
Expand Down
169 changes: 84 additions & 85 deletions tests/entrypoints/test_openai_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,10 @@ async def test_single_completion(server, client: openai.AsyncOpenAI,

assert completion.id is not None
assert completion.choices is not None and len(completion.choices) == 1
assert completion.choices[0].text is not None and len(
completion.choices[0].text) >= 5
assert completion.choices[0].finish_reason == "length"

choice = completion.choices[0]
assert len(choice.text) >= 5
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit : since we are doing a more stricter check about completion.usage (L:174) wonder if we can have a more strict equality check in the len(choice.text) for consistency here and in other places?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that we can't check the length of the string using strict equality as each token may correspond to multiple characters. I'll keep the range check then.

assert choice.finish_reason == "length"
assert completion.usage == openai.types.CompletionUsage(
completion_tokens=5, prompt_tokens=6, total_tokens=11)

Expand All @@ -180,8 +181,7 @@ async def test_single_completion(server, client: openai.AsyncOpenAI,
max_tokens=5,
temperature=0.0,
)
assert completion.choices[0].text is not None and len(
completion.choices[0].text) >= 5
assert len(completion.choices[0].text) >= 5


@pytest.mark.asyncio
Expand All @@ -206,9 +206,9 @@ async def test_no_logprobs(server, client: openai.AsyncOpenAI,

@pytest.mark.asyncio
@pytest.mark.parametrize(
# first test base model, then test loras
# just test 1 lora hereafter
"model_name",
[MODEL_NAME, "zephyr-lora", "zephyr-lora2"],
[MODEL_NAME, "zephyr-lora"],
)
async def test_zero_logprobs(server, client: openai.AsyncOpenAI,
model_name: str):
Expand Down Expand Up @@ -287,55 +287,7 @@ async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI,
max_tokens=5,
temperature=0.0,
)
completion = completion.choices[0].text
assert completion is not None and len(completion) >= 0


@pytest.mark.asyncio
@pytest.mark.parametrize(
# just test 1 lora hereafter
"model_name",
[MODEL_NAME, "zephyr-lora"],
)
async def test_single_chat_session(server, client: openai.AsyncOpenAI,
model_name: str):
messages = [{
"role": "system",
"content": "you are a helpful assistant"
}, {
"role": "user",
"content": "what is 1+1?"
}]

# test single completion
chat_completion = await client.chat.completions.create(model=model_name,
messages=messages,
max_tokens=10,
logprobs=True,
top_logprobs=5)
assert chat_completion.id is not None
assert chat_completion.choices is not None and len(
chat_completion.choices) == 1
assert chat_completion.choices[0].message is not None
assert chat_completion.choices[0].logprobs is not None
assert chat_completion.choices[0].logprobs.content[
0].top_logprobs is not None
assert len(
chat_completion.choices[0].logprobs.content[0].top_logprobs) == 5
message = chat_completion.choices[0].message
assert message.content is not None and len(message.content) >= 10
assert message.role == "assistant"
messages.append({"role": "assistant", "content": message.content})

# test multi-turn dialogue
messages.append({"role": "user", "content": "express your result in json"})
chat_completion = await client.chat.completions.create(
model=model_name,
messages=messages,
max_tokens=10,
)
message = chat_completion.choices[0].message
assert message.content is not None and len(message.content) >= 0
assert len(completion.choices[0].text) >= 0


@pytest.mark.asyncio
Expand Down Expand Up @@ -390,7 +342,7 @@ async def test_zero_logprobs_chat(server, client: openai.AsyncOpenAI,
choice = chat_completion.choices[0]
assert choice.logprobs is not None
assert choice.logprobs.content is not None
assert len(choice.logprobs.content[0].top_logprobs) <= 1
assert len(choice.logprobs.content[0].top_logprobs) == 0


@pytest.mark.asyncio
Expand Down Expand Up @@ -418,11 +370,14 @@ async def test_some_logprobs_chat(server, client: openai.AsyncOpenAI,
choice = chat_completion.choices[0]
assert choice.logprobs is not None
assert choice.logprobs.content is not None
assert len(choice.logprobs.content[0].top_logprobs) <= 6
assert len(choice.logprobs.content[0].top_logprobs) == 5


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize(
"model_name",
[MODEL_NAME, "zephyr-lora"],
)
async def test_too_many_chat_logprobs(server, client: openai.AsyncOpenAI,
model_name: str):
messages = [{
Expand Down Expand Up @@ -463,7 +418,51 @@ async def test_too_many_chat_logprobs(server, client: openai.AsyncOpenAI,

@pytest.mark.asyncio
@pytest.mark.parametrize(
# just test 1 lora hereafter
"model_name",
[MODEL_NAME, "zephyr-lora"],
)
async def test_single_chat_session(server, client: openai.AsyncOpenAI,
model_name: str):
messages = [{
"role": "system",
"content": "you are a helpful assistant"
}, {
"role": "user",
"content": "what is 1+1?"
}]

# test single completion
chat_completion = await client.chat.completions.create(model=model_name,
messages=messages,
max_tokens=10,
logprobs=True,
top_logprobs=5)
assert chat_completion.id is not None
assert len(chat_completion.choices) == 1

choice = chat_completion.choices[0]
assert choice.finish_reason == "length"
assert chat_completion.usage == openai.types.CompletionUsage(
completion_tokens=10, prompt_tokens=37, total_tokens=47)

message = choice.message
assert message.content is not None and len(message.content) >= 10
assert message.role == "assistant"
messages.append({"role": "assistant", "content": message.content})

# test multi-turn dialogue
messages.append({"role": "user", "content": "express your result in json"})
chat_completion = await client.chat.completions.create(
model=model_name,
messages=messages,
max_tokens=10,
)
message = chat_completion.choices[0].message
assert message.content is not None and len(message.content) >= 0


@pytest.mark.asyncio
@pytest.mark.parametrize(
"model_name",
[MODEL_NAME, "zephyr-lora"],
)
Expand Down Expand Up @@ -749,8 +748,7 @@ async def test_logits_bias(server, client: openai.AsyncOpenAI):
logit_bias={str(token_id): 100},
seed=42,
)
assert completion.choices[0].text is not None and len(
completion.choices[0].text) >= 5
assert len(completion.choices[0].text) >= 5
response_tokens = tokenizer(completion.choices[0].text,
add_special_tokens=False)["input_ids"]
expected_tokens = tokenizer(tokenizer.decode([token_id] * 5),
Expand Down Expand Up @@ -797,9 +795,8 @@ async def test_guided_json_completion(server, client: openai.AsyncOpenAI,
guided_decoding_backend=guided_decoding_backend))

assert completion.id is not None
assert completion.choices is not None and len(completion.choices) == 3
assert len(completion.choices) == 3
for i in range(3):
assert completion.choices[i].text is not None
output_json = json.loads(completion.choices[i].text)
jsonschema.validate(instance=output_json, schema=TEST_SCHEMA)

Expand Down Expand Up @@ -866,9 +863,8 @@ async def test_guided_regex_completion(server, client: openai.AsyncOpenAI,
guided_decoding_backend=guided_decoding_backend))

assert completion.id is not None
assert completion.choices is not None and len(completion.choices) == 3
assert len(completion.choices) == 3
for i in range(3):
assert completion.choices[i].text is not None
assert re.fullmatch(TEST_REGEX, completion.choices[i].text) is not None


Expand Down Expand Up @@ -925,7 +921,7 @@ async def test_guided_choice_completion(server, client: openai.AsyncOpenAI,
guided_decoding_backend=guided_decoding_backend))

assert completion.id is not None
assert completion.choices is not None and len(completion.choices) == 2
assert len(completion.choices) == 2
for i in range(2):
assert completion.choices[i].text in TEST_CHOICE

Expand Down Expand Up @@ -1027,12 +1023,14 @@ async def test_guided_choice_chat_logprobs(server, client: openai.AsyncOpenAI,
top_logprobs=5,
extra_body=dict(guided_choice=TEST_CHOICE,
guided_decoding_backend=guided_decoding_backend))

assert chat_completion.choices[0].logprobs is not None
assert chat_completion.choices[0].logprobs.content is not None
top_logprobs = chat_completion.choices[0].logprobs.content[0].top_logprobs

# -9999.0 is the minimum logprob returned by OpenAI
assert all(
isinstance(token.logprob, float) and token.logprob >= -9999.0
for token in top_logprobs)
for item in top_logprobs:
assert item.logprob >= -9999.0, f"Failed (top_logprobs={top_logprobs})"


@pytest.mark.asyncio
Expand Down Expand Up @@ -1234,6 +1232,8 @@ async def test_response_format_json_object(server, client: openai.AsyncOpenAI):
response_format={"type": "json_object"})

content = resp.choices[0].message.content
assert content is not None

loaded = json.loads(content)
assert loaded == {"result": 2}, loaded

Expand Down Expand Up @@ -1361,8 +1361,7 @@ async def test_echo_logprob_completion(server, client: openai.AsyncOpenAI,

prompt_text = tokenizer.decode(prompt) if isinstance(prompt,
list) else prompt
assert (completion.choices[0].text is not None
and re.search(r"^" + prompt_text, completion.choices[0].text))
assert re.search(r"^" + prompt_text, completion.choices[0].text)
logprobs = completion.choices[0].logprobs
assert logprobs is not None
assert len(logprobs.text_offset) > 5
Expand Down Expand Up @@ -1403,32 +1402,32 @@ async def test_long_seed(server, client: openai.AsyncOpenAI):
)
async def test_single_embedding(embedding_server, client: openai.AsyncOpenAI,
model_name: str):
input = [
input_texts = [
"The chef prepared a delicious meal.",
]

# test single embedding
embeddings = await client.embeddings.create(
model=model_name,
input=input,
input=input_texts,
encoding_format="float",
)
assert embeddings.id is not None
assert embeddings.data is not None and len(embeddings.data) == 1
assert len(embeddings.data) == 1
assert len(embeddings.data[0].embedding) == 4096
assert embeddings.usage.completion_tokens == 0
assert embeddings.usage.prompt_tokens == 9
assert embeddings.usage.total_tokens == 9

# test using token IDs
input = [1, 1, 1, 1, 1]
input_tokens = [1, 1, 1, 1, 1]
embeddings = await client.embeddings.create(
model=model_name,
input=input,
input=input_tokens,
encoding_format="float",
)
assert embeddings.id is not None
assert embeddings.data is not None and len(embeddings.data) == 1
assert len(embeddings.data) == 1
assert len(embeddings.data[0].embedding) == 4096
assert embeddings.usage.completion_tokens == 0
assert embeddings.usage.prompt_tokens == 5
Expand All @@ -1443,29 +1442,29 @@ async def test_single_embedding(embedding_server, client: openai.AsyncOpenAI,
async def test_batch_embedding(embedding_server, client: openai.AsyncOpenAI,
model_name: str):
# test List[str]
inputs = [
input_texts = [
"The cat sat on the mat.", "A feline was resting on a rug.",
"Stars twinkle brightly in the night sky."
]
embeddings = await client.embeddings.create(
model=model_name,
input=inputs,
input=input_texts,
encoding_format="float",
)
assert embeddings.id is not None
assert embeddings.data is not None and len(embeddings.data) == 3
assert len(embeddings.data) == 3
assert len(embeddings.data[0].embedding) == 4096

# test List[List[int]]
inputs = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24],
[25, 32, 64, 77]]
input_tokens = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24],
[25, 32, 64, 77]]
embeddings = await client.embeddings.create(
model=model_name,
input=inputs,
input=input_tokens,
encoding_format="float",
)
assert embeddings.id is not None
assert embeddings.data is not None and len(embeddings.data) == 4
assert len(embeddings.data) == 4
assert len(embeddings.data[0].embedding) == 4096
assert embeddings.usage.completion_tokens == 0
assert embeddings.usage.prompt_tokens == 17
Expand Down
5 changes: 2 additions & 3 deletions tests/tensorizer_loader/test_tensorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,8 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
temperature=0.0)

assert completion.id is not None
assert completion.choices is not None and len(completion.choices) == 1
assert completion.choices[0].text is not None and len(
completion.choices[0].text) >= 5
assert len(completion.choices) == 1
assert len(completion.choices[0].text) >= 5
assert completion.choices[0].finish_reason == "length"
assert completion.usage == openai.types.CompletionUsage(
completion_tokens=5, prompt_tokens=6, total_tokens=11)
Expand Down
Loading
Loading