langgenius · crazywoola · Feb 6, 2025 · Feb 6, 2025
diff --git a/api/core/model_runtime/model_providers/xinference/llm/llm.py b/api/core/model_runtime/model_providers/xinference/llm/llm.py
@@ -1,3 +1,4 @@
+import re
 from collections.abc import Generator, Iterator
 from typing import Optional, cast
 
@@ -635,16 +636,16 @@ def _handle_chat_stream_response(
         handle stream chat generate response
         """
         full_response = ""
-
+        is_reasoning_started_tag = False
         for chunk in resp:
             if len(chunk.choices) == 0:
                 continue
-
             delta = chunk.choices[0]
-
             if delta.finish_reason is None and (delta.delta.content is None or delta.delta.content == ""):
                 continue
-
+            delta_content = delta.delta.content
+            if not delta_content:
+                delta_content = ""
             # check if there is a tool call in the response
             function_call = None
             tool_calls = []
@@ -657,9 +658,18 @@ def _handle_chat_stream_response(
             if function_call:
                 assistant_message_tool_calls += [self._extract_response_function_call(function_call)]
 
+            if not is_reasoning_started_tag and "<think>" in delta_content:
+                is_reasoning_started_tag = True
+                delta_content = "> 💭 " + delta_content.replace("<think>", "")
+            elif is_reasoning_started_tag and "</think>" in delta_content:
+                delta_content = delta_content.replace("</think>", "") + "\n\n"
+                is_reasoning_started_tag = False
+            elif is_reasoning_started_tag:
+                if "\n" in delta_content:
+                    delta_content = re.sub(r"\n(?!(>|\n))", "\n> ", delta_content)
             # transform assistant message to prompt message
             assistant_prompt_message = AssistantPromptMessage(
-                content=delta.delta.content or "", tool_calls=assistant_message_tool_calls
+                content=delta_content or "", tool_calls=assistant_message_tool_calls
             )
 
             if delta.finish_reason is not None:
@@ -697,7 +707,7 @@ def _handle_chat_stream_response(
                     ),
                 )
 
-                full_response += delta.delta.content
+                full_response += delta_content
 
     def _handle_completion_generate_response(
         self,