From 3f42fabff837b06a167ca2d0c5f864a6de9a4752 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=86=E8=90=8C=E9=97=B7=E6=B2=B9=E7=93=B6?= <253605712@qq.com> Date: Fri, 7 Feb 2025 14:29:29 +0800 Subject: [PATCH] =?UTF-8?q?chore:improve=20thinking=20display=20for=20llm?= =?UTF-8?q?=20from=20xinference=20and=20ollama=20pro=E2=80=A6=20(#13318)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../model_providers/ollama/llm/llm.py | 10 +--------- .../model_providers/xinference/llm/llm.py | 16 ++-------------- 2 files changed, 3 insertions(+), 23 deletions(-) diff --git a/api/core/model_runtime/model_providers/ollama/llm/llm.py b/api/core/model_runtime/model_providers/ollama/llm/llm.py index 0377731175ebbe..b640914b394993 100644 --- a/api/core/model_runtime/model_providers/ollama/llm/llm.py +++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py @@ -314,7 +314,6 @@ def _handle_generate_stream_response( """ full_text = "" chunk_index = 0 - is_reasoning_started = False def create_final_llm_result_chunk( index: int, message: AssistantPromptMessage, finish_reason: str @@ -368,14 +367,7 @@ def create_final_llm_result_chunk( # transform assistant message to prompt message text = chunk_json["response"] - if "" in text: - is_reasoning_started = True - text = text.replace("", "> 💭 ") - elif "" in text: - is_reasoning_started = False - text = text.replace("", "") + "\n\n" - elif is_reasoning_started: - text = text.replace("\n", "\n> ") + text = self._wrap_thinking_by_tag(text) assistant_prompt_message = AssistantPromptMessage(content=text) diff --git a/api/core/model_runtime/model_providers/xinference/llm/llm.py b/api/core/model_runtime/model_providers/xinference/llm/llm.py index 87f89ed061210f..fcf452d62729f3 100644 --- a/api/core/model_runtime/model_providers/xinference/llm/llm.py +++ b/api/core/model_runtime/model_providers/xinference/llm/llm.py @@ -1,4 +1,3 @@ -import re from collections.abc import Generator, Iterator from typing import Optional, cast @@ -636,16 +635,13 @@ def _handle_chat_stream_response( handle stream chat generate response """ full_response = "" - is_reasoning_started_tag = False for chunk in resp: if len(chunk.choices) == 0: continue delta = chunk.choices[0] if delta.finish_reason is None and (delta.delta.content is None or delta.delta.content == ""): continue - delta_content = delta.delta.content - if not delta_content: - delta_content = "" + delta_content = delta.delta.content or "" # check if there is a tool call in the response function_call = None tool_calls = [] @@ -658,15 +654,7 @@ def _handle_chat_stream_response( if function_call: assistant_message_tool_calls += [self._extract_response_function_call(function_call)] - if not is_reasoning_started_tag and "" in delta_content: - is_reasoning_started_tag = True - delta_content = "> 💭 " + delta_content.replace("", "") - elif is_reasoning_started_tag and "" in delta_content: - delta_content = delta_content.replace("", "") + "\n\n" - is_reasoning_started_tag = False - elif is_reasoning_started_tag: - if "\n" in delta_content: - delta_content = re.sub(r"\n(?!(>|\n))", "\n> ", delta_content) + delta_content = self._wrap_thinking_by_tag(delta_content) # transform assistant message to prompt message assistant_prompt_message = AssistantPromptMessage( content=delta_content or "", tool_calls=assistant_message_tool_calls