From 0a85e53e27d89f61b53962df3d51effb9fc640f9 Mon Sep 17 00:00:00 2001
From: Zhedong Cen <cenzhedong2@126.com>
Date: Mon, 2 Sep 2024 18:18:14 +0800
Subject: [PATCH 1/3] fix tts interface error

---
 api/apps/conversation_app.py   |  6 +++---
 api/db/services/llm_service.py | 13 ++++++++++++-
 rag/llm/tts_model.py           |  9 ++++++---
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py
index bc4ec163355..c4529704e57 100644
--- a/api/apps/conversation_app.py
+++ b/api/apps/conversation_app.py
@@ -196,12 +196,12 @@ def tts():
     tts_mdl = LLMBundle(tenants[0]["tenant_id"], LLMType.TTS, tts_id)
     def stream_audio():
         try:
-            for chunk in tts_mdl(text):  
+            for chunk in tts_mdl.tts(text):  
                 yield chunk  
         except Exception as e:
-            yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e),
+            yield ("data:" + json.dumps({"retcode": 500, "retmsg": str(e),
                             "data": {"answer": "**ERROR**: "+str(e)}},
-                            ensure_ascii=False).encode('utf-8')
+                            ensure_ascii=False)).encode('utf-8')
 
     resp = Response(stream_audio(), mimetype="audio/mpeg")  
     resp.headers.add_header("Cache-Control", "no-cache")
diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py
index c49669f80e4..9a548e728ab 100644
--- a/api/db/services/llm_service.py
+++ b/api/db/services/llm_service.py
@@ -194,7 +194,7 @@ def __init__(self, tenant_id, llm_type, llm_name=None, lang="Chinese"):
         for lm in LLMService.query(llm_name=llm_name):
             self.max_length = lm.max_tokens
             break
-
+    
     def encode(self, texts: list, batch_size=32):
         emd, used_tokens = self.mdl.encode(texts, batch_size)
         if not TenantLLMService.increase_usage(
@@ -235,6 +235,17 @@ def transcription(self, audio):
                 "Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id))
         return txt
 
+    def tts(self, text):
+        for chunk in self.mdl.tts(text):
+            if isinstance(chunk,int):
+                if not TenantLLMService.increase_usage(
+                    self.tenant_id, self.llm_type, chunk, self.llm_name):
+                        database_logger.error(
+                            "Can't update token usage for {}/TTS".format(self.tenant_id))
+                return
+            yield chunk     
+
+    
     def chat(self, system, history, gen_conf):
         txt, used_tokens = self.mdl.chat(system, history, gen_conf)
         if not TenantLLMService.increase_usage(
diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py
index 731725e5475..0e6205893ec 100644
--- a/rag/llm/tts_model.py
+++ b/rag/llm/tts_model.py
@@ -21,7 +21,7 @@
 from pydantic import BaseModel, conint
 from rag.utils import num_tokens_from_string
 import json
-
+import re
 
 class ServeReferenceAudio(BaseModel):
     audio: bytes
@@ -52,7 +52,9 @@ def __init__(self, key, model_name, base_url):
 
     def transcription(self, audio):
         pass
-
+    
+    def normalize_text(text):
+        return re.sub(r'(\*\*|##\d+\$\$|#)', '', text)
 
 class FishAudioTTS(Base):
     def __init__(self, key, model_name, base_url="https://api.fish.audio/v1/tts"):
@@ -66,9 +68,10 @@ def __init__(self, key, model_name, base_url="https://api.fish.audio/v1/tts"):
         self.ref_id = key.get("fish_audio_refid")
         self.base_url = base_url
 
-    def transcription(self, text):
+    def tts(self, text):
         from http import HTTPStatus
 
+        text = self.normalize_text(text)
         request = request = ServeTTSRequest(text=text, reference_id=self.ref_id)
 
         with httpx.Client() as client:

From ade2b238fbd4cee4f0fc3efd62cba4bb8318a807 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhu.sh@gmail.com>
Date: Mon, 2 Sep 2024 18:32:35 +0800
Subject: [PATCH 2/3] Update rag/llm/tts_model.py

---
 rag/llm/tts_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py
index 0e6205893ec..f77c3784b9e 100644
--- a/rag/llm/tts_model.py
+++ b/rag/llm/tts_model.py
@@ -50,7 +50,7 @@ class Base(ABC):
     def __init__(self, key, model_name, base_url):
         pass
 
-    def transcription(self, audio):
+    def tts(self, audio):
         pass
     
     def normalize_text(text):

From 2f247e769fc7448cbb474215d480285cccbce936 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhu.sh@gmail.com>
Date: Mon, 2 Sep 2024 18:32:56 +0800
Subject: [PATCH 3/3] Update rag/llm/tts_model.py

---
 rag/llm/tts_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py
index f77c3784b9e..3ae170ee8e0 100644
--- a/rag/llm/tts_model.py
+++ b/rag/llm/tts_model.py
@@ -72,7 +72,7 @@ def tts(self, text):
         from http import HTTPStatus
 
         text = self.normalize_text(text)
-        request = request = ServeTTSRequest(text=text, reference_id=self.ref_id)
+        request = ServeTTSRequest(text=text, reference_id=self.ref_id)
 
         with httpx.Client() as client:
             try: