From d5cb51a91598a60a566edcaa92a3da82b1258e5d Mon Sep 17 00:00:00 2001 From: Zhedong Cen Date: Tue, 27 Aug 2024 09:41:16 +0800 Subject: [PATCH 1/7] update groq llm --- conf/llm_factories.json | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/conf/llm_factories.json b/conf/llm_factories.json index 289dcffa1a2..0082a3f752b 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -906,6 +906,18 @@ "max_tokens": 8192, "model_type": "chat" }, + { + "llm_name": "llama-3.1-70b-versatile", + "tags": "LLM,CHAT,128k", + "max_tokens": 131072, + "model_type": "chat" + }, + { + "llm_name": "llama-3.1-8b-instant", + "tags": "LLM,CHAT,128k", + "max_tokens": 131072, + "model_type": "chat" + }, { "llm_name": "mixtral-8x7b-32768", "tags": "LLM,CHAT,5k", From f691d86b8e2523511444e33b9a5d9ccd36c67242 Mon Sep 17 00:00:00 2001 From: Zhedong Cen Date: Tue, 27 Aug 2024 11:47:14 +0800 Subject: [PATCH 2/7] add tts api --- api/apps/conversation_app.py | 39 +++++++++++++++++++++++++++++++-- api/db/services/user_service.py | 1 + 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py index a1da4156bb5..3d5d6289e2a 100644 --- a/api/apps/conversation_app.py +++ b/api/apps/conversation_app.py @@ -15,8 +15,9 @@ # from copy import deepcopy from flask import request, Response -from flask_login import login_required +from flask_login import login_required,current_user from api.db.services.dialog_service import DialogService, ConversationService, chat +from api.db.services.llm_service import LLMBundle, TenantService from api.utils.api_utils import server_error_response, get_data_error_result, validate_request from api.utils import get_uuid from api.utils.api_utils import get_json_result @@ -176,6 +177,40 @@ def stream(): return server_error_response(e) +@manager.route('/tts', methods=['POST']) +@login_required +def tts(): + req = request.json + text = req["text"] + + tenants = TenantService.get_by_user_id(current_user.id) + if not tenants: + return get_data_error_result(retmsg="Tenant not found!") + + tts_id = tenants[0]["tts_id"] + if not tts_id: + return get_data_error_result(retmsg="No default TTS model is set") + + tts_mdl = LLMBundle(tenants[0]["tenant_id"],"tts",tts_id) + def stream_audio(): + try: + for chunk in tts_mdl(text): + yield chunk + except Exception as e: + yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e), + "data": {"answer": "**ERROR**: "+str(e)}}, + ensure_ascii=False).encode('utf-8') + + resp = Response(stream_audio(), mimetype="audio/mpeg") + resp.headers.add_header("Cache-Control", "no-cache") + resp.headers.add_header("Connection", "keep-alive") + resp.headers.add_header("X-Accel-Buffering", "no") + + return resp + + + + @manager.route('/delete_msg', methods=['POST']) @login_required @validate_request("conversation_id", "message_id") @@ -221,4 +256,4 @@ def thumbup(): break ConversationService.update_by_id(conv["id"], conv) - return get_json_result(data=conv) \ No newline at end of file + return get_json_result(data=conv) diff --git a/api/db/services/user_service.py b/api/db/services/user_service.py index 07468b814bb..07e20d47a3e 100644 --- a/api/db/services/user_service.py +++ b/api/db/services/user_service.py @@ -96,6 +96,7 @@ def get_by_user_id(cls, user_id): cls.model.rerank_id, cls.model.asr_id, cls.model.img2txt_id, + cls.model.tts_id, cls.model.parser_ids, UserTenant.role] return list(cls.model.select(*fields) From 16cb05f95e7b66a358482ca7bd577da7ac7e2a71 Mon Sep 17 00:00:00 2001 From: Zhedong Cen Date: Tue, 27 Aug 2024 11:49:51 +0800 Subject: [PATCH 3/7] update --- api/apps/conversation_app.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py index 3d5d6289e2a..06f108b0cd1 100644 --- a/api/apps/conversation_app.py +++ b/api/apps/conversation_app.py @@ -209,8 +209,6 @@ def stream_audio(): return resp - - @manager.route('/delete_msg', methods=['POST']) @login_required @validate_request("conversation_id", "message_id") From 08aff569fb391817453d3c9168c3ab302bd81912 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Tue, 27 Aug 2024 13:10:56 +0800 Subject: [PATCH 4/7] Update api/apps/conversation_app.py --- api/apps/conversation_app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py index 06f108b0cd1..15a40ab726f 100644 --- a/api/apps/conversation_app.py +++ b/api/apps/conversation_app.py @@ -191,7 +191,7 @@ def tts(): if not tts_id: return get_data_error_result(retmsg="No default TTS model is set") - tts_mdl = LLMBundle(tenants[0]["tenant_id"],"tts",tts_id) + tts_mdl = LLMBundle(tenants[0]["tenant_id"], LLMType.TTS, tts_id) def stream_audio(): try: for chunk in tts_mdl(text): From 1c7a990f6685175a8b7b6e7b263324dea9448030 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Tue, 27 Aug 2024 13:11:09 +0800 Subject: [PATCH 5/7] Update api/apps/conversation_app.py --- api/apps/conversation_app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py index 15a40ab726f..fbff768508c 100644 --- a/api/apps/conversation_app.py +++ b/api/apps/conversation_app.py @@ -18,6 +18,7 @@ from flask_login import login_required,current_user from api.db.services.dialog_service import DialogService, ConversationService, chat from api.db.services.llm_service import LLMBundle, TenantService +from api.db import LLMType from api.utils.api_utils import server_error_response, get_data_error_result, validate_request from api.utils import get_uuid from api.utils.api_utils import get_json_result From 805c0e9ea5d5c291a68608ff1a80c4217f8dea5c Mon Sep 17 00:00:00 2001 From: Zhedong Cen Date: Tue, 27 Aug 2024 18:35:05 +0800 Subject: [PATCH 6/7] add support for Anthropic --- conf/llm_factories.json | 50 +++++++++++++ rag/llm/__init__.py | 3 +- rag/llm/chat_model.py | 72 +++++++++++++++++-- web/src/assets/svg/llm/anthropic.svg | 1 + .../user-setting/setting-model/constant.ts | 1 + 5 files changed, 122 insertions(+), 5 deletions(-) create mode 100644 web/src/assets/svg/llm/anthropic.svg diff --git a/conf/llm_factories.json b/conf/llm_factories.json index dd09833a613..5d2c7b10c9f 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -3240,6 +3240,56 @@ "tags": "SPEECH2TEXT", "status": "1", "llm": [] + }, + { + "name": "Anthropic", + "logo": "", + "tags": "LLM", + "status": "1", + "llm": [ + { + "llm_name": "claude-3-5-sonnet-20240620", + "tags": "LLM,CHAT,200k", + "max_tokens": 204800, + "model_type": "chat" + }, + { + "llm_name": "claude-3-opus-20240229", + "tags": "LLM,CHAT,200k", + "max_tokens": 204800, + "model_type": "chat" + }, + { + "llm_name": "claude-3-sonnet-20240229", + "tags": "LLM,CHAT,200k", + "max_tokens": 204800, + "model_type": "chat" + }, + { + "llm_name": "claude-3-haiku-20240307", + "tags": "LLM,CHAT,200k", + "max_tokens": 204800, + "model_type": "chat" + }, + { + "llm_name": "claude-2.1", + "tags": "LLM,CHAT,200k", + "max_tokens": 204800, + "model_type": "chat" + }, + { + "llm_name": "claude-2.0", + "tags": "LLM,CHAT,100k", + "max_tokens": 102400, + "model_type": "chat" + }, + { + "llm_name": "claude-instant-1.2", + "tags": "LLM,CHAT,100k", + "max_tokens": 102400, + "model_type": "chat" + } + ] } ] } diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py index adcb53f1ca9..ef37d6446fa 100644 --- a/rag/llm/__init__.py +++ b/rag/llm/__init__.py @@ -104,7 +104,8 @@ "Replicate": ReplicateChat, "Tencent Hunyuan": HunyuanChat, "XunFei Spark": SparkChat, - "BaiduYiyan": BaiduYiyanChat + "BaiduYiyan": BaiduYiyanChat, + "Anthropic": AnthropicChat } diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 64c39912ffa..3af0e0257a4 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -1132,7 +1132,7 @@ def __init__( class BaiduYiyanChat(Base): def __init__(self, key, model_name, base_url=None): import qianfan - + key = json.loads(key) ak = key.get("yiyan_ak","") sk = key.get("yiyan_sk","") @@ -1149,7 +1149,7 @@ def chat(self, system, history, gen_conf): if "max_tokens" in gen_conf: gen_conf["max_output_tokens"] = gen_conf["max_tokens"] ans = "" - + try: response = self.client.do( model=self.model_name, @@ -1159,7 +1159,7 @@ def chat(self, system, history, gen_conf): ).body ans = response['result'] return ans, response["usage"]["total_tokens"] - + except Exception as e: return ans + "\n**ERROR**: " + str(e), 0 @@ -1173,7 +1173,7 @@ def chat_streamly(self, system, history, gen_conf): gen_conf["max_output_tokens"] = gen_conf["max_tokens"] ans = "" total_tokens = 0 - + try: response = self.client.do( model=self.model_name, @@ -1193,3 +1193,67 @@ def chat_streamly(self, system, history, gen_conf): return ans + "\n**ERROR**: " + str(e), 0 yield total_tokens + + +class AnthropicChat(Base): + def __init__(self, key, model_name, base_url=None): + import anthropic + + self.client = anthropic.Anthropic(api_key=key) + self.model_name = model_name + self.system = "" + + def chat(self, system, history, gen_conf): + if system: + self.system = system + if "max_tokens" not in gen_conf: + gen_conf["max_tokens"] = 4096 + + try: + response = self.client.messages.create( + model=self.model_name, + messages=history, + system=self.system, + stream=False, + **gen_conf, + ).json() + ans = response["content"][0]["text"] + if response["stop_reason"] == "max_tokens": + ans += ( + "...\nFor the content length reason, it stopped, continue?" + if is_english([ans]) + else "······\n由于长度的原因,回答被截断了,要继续吗?" + ) + return ( + ans, + response["usage"]["input_tokens"] + response["usage"]["output_tokens"], + ) + except Exception as e: + return ans + "\n**ERROR**: " + str(e), 0 + + def chat_streamly(self, system, history, gen_conf): + if system: + self.system = system + if "max_tokens" not in gen_conf: + gen_conf["max_tokens"] = 4096 + + ans = "" + total_tokens = 0 + try: + response = self.client.messages.create( + model=self.model_name, + messages=history, + system=self.system, + stream=True, + **gen_conf, + ) + for res in response.iter_lines(): + res = res.decode("utf-8") + if "content_block_delta" in res and "data" in res: + text = json.loads(res[6:])["delta"]["text"] + ans += text + total_tokens += num_tokens_from_string(text) + except Exception as e: + yield ans + "\n**ERROR**: " + str(e) + + yield total_tokens diff --git a/web/src/assets/svg/llm/anthropic.svg b/web/src/assets/svg/llm/anthropic.svg new file mode 100644 index 00000000000..249c9503cb8 --- /dev/null +++ b/web/src/assets/svg/llm/anthropic.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/web/src/pages/user-setting/setting-model/constant.ts b/web/src/pages/user-setting/setting-model/constant.ts index dae68f74672..33cf76d86a9 100644 --- a/web/src/pages/user-setting/setting-model/constant.ts +++ b/web/src/pages/user-setting/setting-model/constant.ts @@ -37,6 +37,7 @@ export const IconMap = { BaiduYiyan: 'yiyan', 'Fish Audio': 'fish-audio', 'Tencent Cloud': 'tencent-cloud', + Anthropic: 'anthropic', }; export const BedrockRegionList = [ From b02da83b43e2eba9b655f3f795a4093b1c98f845 Mon Sep 17 00:00:00 2001 From: Zhedong Cen Date: Thu, 29 Aug 2024 13:21:06 +0800 Subject: [PATCH 7/7] update --- requirements.txt | 1 + requirements_arm.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 094e20151fe..0bcd697af4b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +anthropic===0.34.1 arxiv==2.1.3 Aspose.Slides==24.2.0 BCEmbedding==0.1.3 diff --git a/requirements_arm.txt b/requirements_arm.txt index d064ee40ace..1207d6d8d21 100644 --- a/requirements_arm.txt +++ b/requirements_arm.txt @@ -2,6 +2,7 @@ accelerate==0.27.2 aiohttp==3.9.4 aiosignal==1.3.1 annotated-types==0.6.0 +anthropic===0.34.1 anyio==4.3.0 argon2-cffi==23.1.0 argon2-cffi-bindings==21.2.0