diff --git a/api/core/model_runtime/model_providers/ollama/llm/llm.py b/api/core/model_runtime/model_providers/ollama/llm/llm.py
index 0377731175ebbe..b640914b394993 100644
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@@ -314,7 +314,6 @@ def _handle_generate_stream_response(
"""
full_text = ""
chunk_index = 0
- is_reasoning_started = False
def create_final_llm_result_chunk(
index: int, message: AssistantPromptMessage, finish_reason: str
@@ -368,14 +367,7 @@ def create_final_llm_result_chunk(
# transform assistant message to prompt message
text = chunk_json["response"]
- if "" in text:
- is_reasoning_started = True
- text = text.replace("", "> 💠")
- elif "" in text:
- is_reasoning_started = False
- text = text.replace("", "") + "\n\n"
- elif is_reasoning_started:
- text = text.replace("\n", "\n> ")
+ text = self._wrap_thinking_by_tag(text)
assistant_prompt_message = AssistantPromptMessage(content=text)
diff --git a/api/core/model_runtime/model_providers/xinference/llm/llm.py b/api/core/model_runtime/model_providers/xinference/llm/llm.py
index 87f89ed061210f..fcf452d62729f3 100644
--- a/api/core/model_runtime/model_providers/xinference/llm/llm.py
+++ b/api/core/model_runtime/model_providers/xinference/llm/llm.py
@@ -1,4 +1,3 @@
-import re
from collections.abc import Generator, Iterator
from typing import Optional, cast
@@ -636,16 +635,13 @@ def _handle_chat_stream_response(
handle stream chat generate response
"""
full_response = ""
- is_reasoning_started_tag = False
for chunk in resp:
if len(chunk.choices) == 0:
continue
delta = chunk.choices[0]
if delta.finish_reason is None and (delta.delta.content is None or delta.delta.content == ""):
continue
- delta_content = delta.delta.content
- if not delta_content:
- delta_content = ""
+ delta_content = delta.delta.content or ""
# check if there is a tool call in the response
function_call = None
tool_calls = []
@@ -658,15 +654,7 @@ def _handle_chat_stream_response(
if function_call:
assistant_message_tool_calls += [self._extract_response_function_call(function_call)]
- if not is_reasoning_started_tag and "" in delta_content:
- is_reasoning_started_tag = True
- delta_content = "> 💠" + delta_content.replace("", "")
- elif is_reasoning_started_tag and "" in delta_content:
- delta_content = delta_content.replace("", "") + "\n\n"
- is_reasoning_started_tag = False
- elif is_reasoning_started_tag:
- if "\n" in delta_content:
- delta_content = re.sub(r"\n(?!(>|\n))", "\n> ", delta_content)
+ delta_content = self._wrap_thinking_by_tag(delta_content)
# transform assistant message to prompt message
assistant_prompt_message = AssistantPromptMessage(
content=delta_content or "", tool_calls=assistant_message_tool_calls