From 3f42fabff837b06a167ca2d0c5f864a6de9a4752 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=86=E8=90=8C=E9=97=B7=E6=B2=B9=E7=93=B6?=
 <253605712@qq.com>
Date: Fri, 7 Feb 2025 14:29:29 +0800
Subject: [PATCH] =?UTF-8?q?chore:improve=20thinking=20display=20for=20llm?=
 =?UTF-8?q?=20from=20xinference=20and=20ollama=20pro=E2=80=A6=20(#13318)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../model_providers/ollama/llm/llm.py            | 10 +---------
 .../model_providers/xinference/llm/llm.py        | 16 ++--------------
 2 files changed, 3 insertions(+), 23 deletions(-)
diff --git a/api/core/model_runtime/model_providers/ollama/llm/llm.py b/api/core/model_runtime/model_providers/ollama/llm/llm.py
index 0377731175ebbe..b640914b394993 100644
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@@ -314,7 +314,6 @@ def _handle_generate_stream_response(
         """
         full_text = ""
         chunk_index = 0
-        is_reasoning_started = False
 
         def create_final_llm_result_chunk(
             index: int, message: AssistantPromptMessage, finish_reason: str
@@ -368,14 +367,7 @@ def create_final_llm_result_chunk(
 
                 # transform assistant message to prompt message
                 text = chunk_json["response"]
-            if "<think>" in text:
-                is_reasoning_started = True
-                text = text.replace("<think>", "> 💭 ")
-            elif "</think>" in text:
-                is_reasoning_started = False
-                text = text.replace("</think>", "") + "\n\n"
-            elif is_reasoning_started:
-                text = text.replace("\n", "\n> ")
+            text = self._wrap_thinking_by_tag(text)
 
             assistant_prompt_message = AssistantPromptMessage(content=text)
 
diff --git a/api/core/model_runtime/model_providers/xinference/llm/llm.py b/api/core/model_runtime/model_providers/xinference/llm/llm.py
index 87f89ed061210f..fcf452d62729f3 100644
--- a/api/core/model_runtime/model_providers/xinference/llm/llm.py
+++ b/api/core/model_runtime/model_providers/xinference/llm/llm.py
@@ -1,4 +1,3 @@
-import re
 from collections.abc import Generator, Iterator
 from typing import Optional, cast
 
@@ -636,16 +635,13 @@ def _handle_chat_stream_response(
         handle stream chat generate response
         """
         full_response = ""
-        is_reasoning_started_tag = False
         for chunk in resp:
             if len(chunk.choices) == 0:
                 continue
             delta = chunk.choices[0]
             if delta.finish_reason is None and (delta.delta.content is None or delta.delta.content == ""):
                 continue
-            delta_content = delta.delta.content
-            if not delta_content:
-                delta_content = ""
+            delta_content = delta.delta.content or ""
             # check if there is a tool call in the response
             function_call = None
             tool_calls = []
@@ -658,15 +654,7 @@ def _handle_chat_stream_response(
             if function_call:
                 assistant_message_tool_calls += [self._extract_response_function_call(function_call)]
 
-            if not is_reasoning_started_tag and "<think>" in delta_content:
-                is_reasoning_started_tag = True
-                delta_content = "> 💭 " + delta_content.replace("<think>", "")
-            elif is_reasoning_started_tag and "</think>" in delta_content:
-                delta_content = delta_content.replace("</think>", "") + "\n\n"
-                is_reasoning_started_tag = False
-            elif is_reasoning_started_tag:
-                if "\n" in delta_content:
-                    delta_content = re.sub(r"\n(?!(>|\n))", "\n> ", delta_content)
+            delta_content = self._wrap_thinking_by_tag(delta_content)
             # transform assistant message to prompt message
             assistant_prompt_message = AssistantPromptMessage(
                 content=delta_content or "", tool_calls=assistant_message_tool_calls