Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore:improve thinking display for llm from xinference and ollama pro… #13318

Merged
merged 2 commits into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 1 addition & 9 deletions api/core/model_runtime/model_providers/ollama/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,6 @@ def _handle_generate_stream_response(
"""
full_text = ""
chunk_index = 0
is_reasoning_started = False

def create_final_llm_result_chunk(
index: int, message: AssistantPromptMessage, finish_reason: str
Expand Down Expand Up @@ -368,14 +367,7 @@ def create_final_llm_result_chunk(

# transform assistant message to prompt message
text = chunk_json["response"]
if "<think>" in text:
is_reasoning_started = True
text = text.replace("<think>", "> 💭 ")
elif "</think>" in text:
is_reasoning_started = False
text = text.replace("</think>", "") + "\n\n"
elif is_reasoning_started:
text = text.replace("\n", "\n> ")
text = self._wrap_thinking_by_tag(text)

assistant_prompt_message = AssistantPromptMessage(content=text)

Expand Down
16 changes: 2 additions & 14 deletions api/core/model_runtime/model_providers/xinference/llm/llm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import re
from collections.abc import Generator, Iterator
from typing import Optional, cast

Expand Down Expand Up @@ -636,16 +635,13 @@ def _handle_chat_stream_response(
handle stream chat generate response
"""
full_response = ""
is_reasoning_started_tag = False
for chunk in resp:
if len(chunk.choices) == 0:
continue
delta = chunk.choices[0]
if delta.finish_reason is None and (delta.delta.content is None or delta.delta.content == ""):
continue
delta_content = delta.delta.content
if not delta_content:
delta_content = ""
delta_content = delta.delta.content or ""
# check if there is a tool call in the response
function_call = None
tool_calls = []
Expand All @@ -658,15 +654,7 @@ def _handle_chat_stream_response(
if function_call:
assistant_message_tool_calls += [self._extract_response_function_call(function_call)]

if not is_reasoning_started_tag and "<think>" in delta_content:
is_reasoning_started_tag = True
delta_content = "> 💭 " + delta_content.replace("<think>", "")
elif is_reasoning_started_tag and "</think>" in delta_content:
delta_content = delta_content.replace("</think>", "") + "\n\n"
is_reasoning_started_tag = False
elif is_reasoning_started_tag:
if "\n" in delta_content:
delta_content = re.sub(r"\n(?!(>|\n))", "\n> ", delta_content)
delta_content = self._wrap_thinking_by_tag(delta_content)
# transform assistant message to prompt message
assistant_prompt_message = AssistantPromptMessage(
content=delta_content or "", tool_calls=assistant_message_tool_calls
Expand Down