From 0a85e53e27d89f61b53962df3d51effb9fc640f9 Mon Sep 17 00:00:00 2001 From: Zhedong Cen Date: Mon, 2 Sep 2024 18:18:14 +0800 Subject: [PATCH 1/3] fix tts interface error --- api/apps/conversation_app.py | 6 +++--- api/db/services/llm_service.py | 13 ++++++++++++- rag/llm/tts_model.py | 9 ++++++--- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py index bc4ec163355..c4529704e57 100644 --- a/api/apps/conversation_app.py +++ b/api/apps/conversation_app.py @@ -196,12 +196,12 @@ def tts(): tts_mdl = LLMBundle(tenants[0]["tenant_id"], LLMType.TTS, tts_id) def stream_audio(): try: - for chunk in tts_mdl(text): + for chunk in tts_mdl.tts(text): yield chunk except Exception as e: - yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e), + yield ("data:" + json.dumps({"retcode": 500, "retmsg": str(e), "data": {"answer": "**ERROR**: "+str(e)}}, - ensure_ascii=False).encode('utf-8') + ensure_ascii=False)).encode('utf-8') resp = Response(stream_audio(), mimetype="audio/mpeg") resp.headers.add_header("Cache-Control", "no-cache") diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py index c49669f80e4..9a548e728ab 100644 --- a/api/db/services/llm_service.py +++ b/api/db/services/llm_service.py @@ -194,7 +194,7 @@ def __init__(self, tenant_id, llm_type, llm_name=None, lang="Chinese"): for lm in LLMService.query(llm_name=llm_name): self.max_length = lm.max_tokens break - + def encode(self, texts: list, batch_size=32): emd, used_tokens = self.mdl.encode(texts, batch_size) if not TenantLLMService.increase_usage( @@ -235,6 +235,17 @@ def transcription(self, audio): "Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id)) return txt + def tts(self, text): + for chunk in self.mdl.tts(text): + if isinstance(chunk,int): + if not TenantLLMService.increase_usage( + self.tenant_id, self.llm_type, chunk, self.llm_name): + database_logger.error( + "Can't update token usage for {}/TTS".format(self.tenant_id)) + return + yield chunk + + def chat(self, system, history, gen_conf): txt, used_tokens = self.mdl.chat(system, history, gen_conf) if not TenantLLMService.increase_usage( diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py index 731725e5475..0e6205893ec 100644 --- a/rag/llm/tts_model.py +++ b/rag/llm/tts_model.py @@ -21,7 +21,7 @@ from pydantic import BaseModel, conint from rag.utils import num_tokens_from_string import json - +import re class ServeReferenceAudio(BaseModel): audio: bytes @@ -52,7 +52,9 @@ def __init__(self, key, model_name, base_url): def transcription(self, audio): pass - + + def normalize_text(text): + return re.sub(r'(\*\*|##\d+\$\$|#)', '', text) class FishAudioTTS(Base): def __init__(self, key, model_name, base_url="https://api.fish.audio/v1/tts"): @@ -66,9 +68,10 @@ def __init__(self, key, model_name, base_url="https://api.fish.audio/v1/tts"): self.ref_id = key.get("fish_audio_refid") self.base_url = base_url - def transcription(self, text): + def tts(self, text): from http import HTTPStatus + text = self.normalize_text(text) request = request = ServeTTSRequest(text=text, reference_id=self.ref_id) with httpx.Client() as client: From ade2b238fbd4cee4f0fc3efd62cba4bb8318a807 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Mon, 2 Sep 2024 18:32:35 +0800 Subject: [PATCH 2/3] Update rag/llm/tts_model.py --- rag/llm/tts_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py index 0e6205893ec..f77c3784b9e 100644 --- a/rag/llm/tts_model.py +++ b/rag/llm/tts_model.py @@ -50,7 +50,7 @@ class Base(ABC): def __init__(self, key, model_name, base_url): pass - def transcription(self, audio): + def tts(self, audio): pass def normalize_text(text): From 2f247e769fc7448cbb474215d480285cccbce936 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Mon, 2 Sep 2024 18:32:56 +0800 Subject: [PATCH 3/3] Update rag/llm/tts_model.py --- rag/llm/tts_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rag/llm/tts_model.py b/rag/llm/tts_model.py index f77c3784b9e..3ae170ee8e0 100644 --- a/rag/llm/tts_model.py +++ b/rag/llm/tts_model.py @@ -72,7 +72,7 @@ def tts(self, text): from http import HTTPStatus text = self.normalize_text(text) - request = request = ServeTTSRequest(text=text, reference_id=self.ref_id) + request = ServeTTSRequest(text=text, reference_id=self.ref_id) with httpx.Client() as client: try: