Merge pull request #1 from binary-husky/master

Fork Sync: Update from parent repository
jackeyzzz12138 · Apr 7, 2024 · 018a37c · 018a37c
2 parents fa930d6 + ae51a0e
commit 018a37c
Show file tree

Hide file tree

Showing 94 changed files with 1,573 additions and 835 deletions.
diff --git a/README.md b/README.md
@@ -1,7 +1,6 @@
-> [!IMPORTANT]  
+> [!IMPORTANT]
+> 2024.3.11: 恭迎Claude3和Moonshot，全力支持Qwen、GLM、DeepseekCoder等中文大语言模型！  
 > 2024.1.18: 更新3.70版本，支持Mermaid绘图库（让大模型绘制脑图）  
-> 2024.1.17: 恭迎GLM4，全力支持Qwen、GLM、DeepseekCoder等国内中文大语言基座模型！  
-> 2024.1.17: 某些依赖包尚不兼容python 3.12，推荐python 3.11。  
 > 2024.1.17: 安装依赖时，请选择`requirements.txt`中**指定的版本**。 安装命令：`pip install -r requirements.txt`。本项目完全开源免费，您可通过订阅[在线服务](https://github.com/binary-husky/gpt_academic/wiki/online)的方式鼓励本项目的发展。
 
 <br>

diff --git a/check_proxy.py b/check_proxy.py
@@ -47,7 +47,7 @@ def backup_and_download(current_version, remote_version):
     shutil.copytree('./', backup_dir, ignore=lambda x, y: ['history'])
     proxies = get_conf('proxies')
     try:    r = requests.get('https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
-    except: r = requests.get('https://public.gpt-academic.top/publish/master.zip', proxies=proxies, stream=True)
+    except: r = requests.get('https://public.agent-matrix.com/publish/master.zip', proxies=proxies, stream=True)
     zip_file_path = backup_dir+'/master.zip'
     with open(zip_file_path, 'wb+') as f:
         f.write(r.content)
@@ -81,7 +81,7 @@ def patch_and_restart(path):
     dir_util.copy_tree(path_new_version, './')
     print亮绿('代码已经更新，即将更新pip包依赖……')
     for i in reversed(range(5)): time.sleep(1); print(i)
-    try: 
+    try:
         import subprocess
         subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'])
     except:
@@ -113,7 +113,7 @@ def auto_update(raise_error=False):
         import json
         proxies = get_conf('proxies')
         try:    response = requests.get("https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=5)
-        except: response = requests.get("https://public.gpt-academic.top/publish/version", proxies=proxies, timeout=5)
+        except: response = requests.get("https://public.agent-matrix.com/publish/version", proxies=proxies, timeout=5)
         remote_json_data = json.loads(response.text)
         remote_version = remote_json_data['version']
         if remote_json_data["show_feature"]:
@@ -159,15 +159,15 @@ def warm_up_modules():
         enc.encode("模块预热", disallowed_special=())
         enc = model_info["gpt-4"]['tokenizer']
         enc.encode("模块预热", disallowed_special=())
-        
+
 def warm_up_vectordb():
     print('正在执行一些模块的预热 ...')
     from toolbox import ProxyNetworkActivate
     with ProxyNetworkActivate("Warmup_Modules"):
         import nltk
         with ProxyNetworkActivate("Warmup_Modules"): nltk.download("punkt")
 
-        
+
 if __name__ == '__main__':
     import os
     os.environ['no_proxy'] = '*'  # 避免代理网络产生意外污染

diff --git a/colorful.py b/colorful.py
@@ -3,7 +3,7 @@
 
 if platform.system()=="Linux":
     pass
-else: 
+else:
     from colorama import init
     init()
 

diff --git a/config.py b/config.py
@@ -30,7 +30,33 @@
 else:
     proxies = None
 
-# ------------------------------------ 以下配置可以优化体验, 但大部分场合下并不需要修改 ------------------------------------
+# [step 3]>> 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
+LLM_MODEL = "gpt-3.5-turbo-16k" # 可选 ↓↓↓
+AVAIL_LLM_MODELS = ["gpt-4-1106-preview", "gpt-4-turbo-preview", "gpt-4-vision-preview",
+                    "gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
+                    "gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-3-turbo",
+                    "gemini-pro", "chatglm3"
+                    ]
+# --- --- --- ---
+# P.S. 其他可用的模型还包括
+# AVAIL_LLM_MODELS = [
+#   "qianfan", "deepseekcoder",
+#   "spark", "sparkv2", "sparkv3", "sparkv3.5",
+#   "qwen-turbo", "qwen-plus", "qwen-max", "qwen-local",
+#   "moonshot-v1-128k", "moonshot-v1-32k", "moonshot-v1-8k",
+#   "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0125"
+#   "claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2",
+#   "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama",
+#   "yi-34b-chat-0205", "yi-34b-chat-200k"
+# ]
+# --- --- --- ---
+# 此外，为了更灵活地接入one-api多模型管理界面，您还可以在接入one-api时，
+# 使用"one-api-*"前缀直接使用非标准方式接入的模型，例如
+# AVAIL_LLM_MODELS = ["one-api-claude-3-sonnet-20240229(max_token=100000)"]
+# --- --- --- ---
+
+
+# --------------- 以下配置可以优化体验 ---------------
 
 # 重新URL重新定向，实现更换API_URL的作用（高危设置! 常规情况下不要修改! 通过修改此设置，您将把您的API-KEY和对话隐私完全暴露给您设定的中间人！）
 # 格式: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"}
@@ -85,22 +111,6 @@
 DEFAULT_FN_GROUPS = ['对话', '编程', '学术', '智能体']
 
 
-# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
-LLM_MODEL = "gpt-3.5-turbo-16k" # 可选 ↓↓↓
-AVAIL_LLM_MODELS = ["gpt-4-1106-preview", "gpt-4-turbo-preview", "gpt-4-vision-preview",
-                    "gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
-                    "gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-3-turbo",
-                    "gemini-pro", "chatglm3", "claude-2"]
-# P.S. 其他可用的模型还包括 [
-# "moonshot-v1-128k", "moonshot-v1-32k", "moonshot-v1-8k",
-# "qwen-turbo", "qwen-plus", "qwen-max",
-# "zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen-local", "gpt-3.5-turbo-0613", "moss",
-# "gpt-3.5-turbo-16k-0613", "gpt-3.5-random", "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k',
-# "spark", "sparkv2", "sparkv3", "sparkv3.5",
-# "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"
-# ]
-
-
 # 定义界面上“询问多个GPT模型”插件应该使用哪些模型，请从AVAIL_LLM_MODELS中选择，并在不同模型之间用`&`间隔，例如"gpt-3.5-turbo&chatglm3&azure-gpt-4"
 MULTI_QUERY_LLM_MODELS = "gpt-3.5-turbo&chatglm3"
 
@@ -129,6 +139,7 @@
 LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
 LOCAL_MODEL_QUANT = "FP16" # 默认 "FP16" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本
 
+
 # 设置gradio的并行线程数（不需要修改）
 CONCURRENT_COUNT = 100
 
@@ -174,14 +185,8 @@
 AZURE_CFG_ARRAY = {}
 
 
-# 使用Newbing (不推荐使用，未来将删除)
-NEWBING_STYLE = "creative"  # ["creative", "balanced", "precise"]
-NEWBING_COOKIES = """
-put your new bing cookies here
-"""
-
-
-# 阿里云实时语音识别 配置难度较高 仅建议高手用户使用 参考 https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md
+# 阿里云实时语音识别 配置难度较高
+# 参考 https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md
 ENABLE_AUDIO = False
 ALIYUN_TOKEN=""     # 例如 f37f30e0f9934c34a992f6f64f7eba4f
 ALIYUN_APPKEY=""    # 例如 RoPlZrM88DnAFkZK
@@ -200,18 +205,18 @@
 ZHIPUAI_MODEL = "" # 此选项已废弃，不再需要填写
 
 
-# # 火山引擎YUNQUE大模型
-# YUNQUE_SECRET_KEY = ""
-# YUNQUE_ACCESS_KEY = ""
-# YUNQUE_MODEL = ""
-
-
 # Claude API KEY
 ANTHROPIC_API_KEY = ""
 
+
 # 月之暗面 API KEY
 MOONSHOT_API_KEY = ""
 
+
+# 零一万物(Yi Model) API KEY
+YIMODEL_API_KEY = ""
+
+
 # Mathpix 拥有执行PDF的OCR功能，但是需要注册账号
 MATHPIX_APPID = ""
 MATHPIX_APPKEY = ""
@@ -270,7 +275,11 @@
 # 自定义按钮的最大数量限制
 NUM_CUSTOM_BASIC_BTN = 4
 
+
+
 """
+--------------- 配置关联关系说明 ---------------
+
 在线大模型配置关联关系示意图
 │
 ├── "gpt-3.5-turbo" 等openai模型
@@ -294,7 +303,7 @@
 │   ├── XFYUN_API_SECRET
 │   └── XFYUN_API_KEY
 │
-├── "claude-1-100k" 等claude模型
+├── "claude-3-opus-20240229" 等claude模型
 │   └── ANTHROPIC_API_KEY
 │
 ├── "stack-claude"
@@ -309,15 +318,19 @@
 ├── "glm-4", "glm-3-turbo", "zhipuai" 智谱AI大模型
 │   └── ZHIPUAI_API_KEY
 │
+├── "yi-34b-chat-0205", "yi-34b-chat-200k" 等零一万物(Yi Model)大模型
+│   └── YIMODEL_API_KEY
+│
 ├── "qwen-turbo" 等通义千问大模型
 │   └──  DASHSCOPE_API_KEY
 │
 ├── "Gemini"
 │   └──  GEMINI_API_KEY
 │
-└── "newbing" Newbing接口不再稳定，不推荐使用
-    ├── NEWBING_STYLE
-    └── NEWBING_COOKIES
+└── "one-api-...(max_token=...)" 用一种更方便的方式接入one-api多模型管理界面
+    ├── AVAIL_LLM_MODELS
+    ├── API_KEY
+    └── API_URL_REDIRECT
 
 
 本地大模型示意图
@@ -364,4 +377,4 @@
     └── MATHPIX_APPKEY
 
 
-"""
+"""
diff --git a/core_functional.py b/core_functional.py
@@ -34,16 +34,16 @@ def get_core_functions():
             # [6] 文本预处理 （可选参数，默认 None，举例：写个函数移除所有的换行符）
             "PreProcess": None,
         },
-        
-        
+
+
         "总结绘制脑图": {
             # 前缀，会被加在你的输入之前。例如，用来描述你的要求，例如翻译、解释代码、润色等等
-            "Prefix":   r"",
+            "Prefix":   '''"""\n\n''',
             # 后缀，会被加在你的输入之后。例如，配合前缀可以把你的输入内容用引号圈起来
             "Suffix":
                 # dedent() 函数用于去除多行字符串的缩进
-                dedent("\n"+r'''
-                    ==============================
+                dedent("\n\n"+r'''
+                    """
 
                     使用mermaid flowchart对以上文本进行总结，概括上述段落的内容以及内在逻辑关系，例如：
 
@@ -57,15 +57,15 @@ def get_core_functions():
                         C --> |"箭头名2"| F["节点名6"]
                     ```
 
-                    警告：
+                    注意：
                     （1）使用中文
                     （2）节点名字使用引号包裹，如["Laptop"]
                     （3）`|` 和 `"`之间不要存在空格
                     （4）根据情况选择flowchart LR（从左到右）或者flowchart TD（从上到下）
                 '''),
         },
-        
-        
+
+
         "查找语法错误": {
             "Prefix":   r"Help me ensure that the grammar and the spelling is correct. "
                         r"Do not try to polish the text, if no mistake is found, tell me that this paragraph is good. "
@@ -85,14 +85,14 @@ def get_core_functions():
             "Suffix":   r"",
             "PreProcess": clear_line_break,    # 预处理：清除换行符
         },
-        
-        
+
+
         "中译英": {
             "Prefix":   r"Please translate following sentence to English:" + "\n\n",
             "Suffix":   r"",
         },
-        
-        
+
+
         "学术英中互译": {
             "Prefix":   build_gpt_academic_masked_string_langbased(
                             text_show_chinese=
@@ -112,29 +112,29 @@ def get_core_functions():
                         ) + "\n\n",
             "Suffix":   r"",
         },
-        
-        
+
+
         "英译中": {
             "Prefix":   r"翻译成地道的中文：" + "\n\n",
             "Suffix":   r"",
             "Visible":  False,
         },
-        
-        
+
+
         "找图片": {
             "Prefix":   r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL，"
                         r"然后请使用Markdown格式封装，并且不要有反斜线，不要用代码块。现在，请按以下描述给我发送图片：" + "\n\n",
             "Suffix":   r"",
             "Visible":  False,
         },
-        
-        
+
+
         "解释代码": {
             "Prefix":   r"请解释以下代码：" + "\n```\n",
             "Suffix":   "\n```\n",
         },
-        
-        
+
+
         "参考文献转Bib": {
             "Prefix":   r"Here are some bibliography items, please transform them into bibtex style."
                         r"Note that, reference styles maybe more than one kind, you should transform each item correctly."

diff --git a/crazy_functions/Latex全文润色.py b/crazy_functions/Latex全文润色.py
@@ -46,7 +46,7 @@ def write_result(self):
                 manifest.append(path + '.polish.tex')
                 f.write(res)
         return manifest
-    
+
     def zip_result(self):
         import os, time
         folder = os.path.dirname(self.file_paths[0])
@@ -59,7 +59,7 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
     from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
 
 
-    #  <-------- 读取Latex文件，删除其中的所有注释 ----------> 
+    #  <-------- 读取Latex文件，删除其中的所有注释 ---------->
     pfg = PaperFileGroup()
 
     for index, fp in enumerate(file_manifest):
@@ -73,31 +73,31 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
             pfg.file_paths.append(fp)
             pfg.file_contents.append(clean_tex_content)
 
-    #  <-------- 拆分过长的latex文件 ----------> 
+    #  <-------- 拆分过长的latex文件 ---------->
     pfg.run_file_split(max_token_limit=1024)
     n_split = len(pfg.sp_file_contents)
 
 
-    #  <-------- 多线程润色开始 ----------> 
+    #  <-------- 多线程润色开始 ---------->
     if language == 'en':
         if mode == 'polish':
-            inputs_array = ["Below is a section from an academic paper, polish this section to meet the academic standard, " + 
-                            "improve the grammar, clarity and overall readability, do not modify any latex command such as \section, \cite and equations:" + 
+            inputs_array = [r"Below is a section from an academic paper, polish this section to meet the academic standard, " +
+                            r"improve the grammar, clarity and overall readability, do not modify any latex command such as \section, \cite and equations:" +
                             f"\n\n{frag}" for frag in pfg.sp_file_contents]
         else:
-            inputs_array = [r"Below is a section from an academic paper, proofread this section." + 
-                            r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + 
-                            r"Answer me only with the revised text:" + 
+            inputs_array = [r"Below is a section from an academic paper, proofread this section." +
+                            r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
+                            r"Answer me only with the revised text:" +
                         f"\n\n{frag}" for frag in pfg.sp_file_contents]
         inputs_show_user_array = [f"Polish {f}" for f in pfg.sp_file_tag]
         sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
     elif language == 'zh':
         if mode == 'polish':
-            inputs_array = [f"以下是一篇学术论文中的一段内容，请将此部分润色以满足学术标准，提高语法、清晰度和整体可读性，不要修改任何LaTeX命令，例如\section，\cite和方程式：" + 
+            inputs_array = [r"以下是一篇学术论文中的一段内容，请将此部分润色以满足学术标准，提高语法、清晰度和整体可读性，不要修改任何LaTeX命令，例如\section，\cite和方程式：" +
                             f"\n\n{frag}" for frag in pfg.sp_file_contents]
         else:
-            inputs_array = [f"以下是一篇学术论文中的一段内容，请对这部分内容进行语法矫正。不要修改任何LaTeX命令，例如\section，\cite和方程式：" + 
-                            f"\n\n{frag}" for frag in pfg.sp_file_contents] 
+            inputs_array = [r"以下是一篇学术论文中的一段内容，请对这部分内容进行语法矫正。不要修改任何LaTeX命令，例如\section，\cite和方程式：" +
+                            f"\n\n{frag}" for frag in pfg.sp_file_contents]
         inputs_show_user_array = [f"润色 {f}" for f in pfg.sp_file_tag]
         sys_prompt_array=["你是一位专业的中文学术论文作家。" for _ in range(n_split)]
 
@@ -113,7 +113,7 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
         scroller_max_len = 80
     )
 
-    #  <-------- 文本碎片重组为完整的tex文件，整理结果为压缩包 ----------> 
+    #  <-------- 文本碎片重组为完整的tex文件，整理结果为压缩包 ---------->
     try:
         pfg.sp_file_result = []
         for i_say, gpt_say in zip(gpt_response_collection[0::2], gpt_response_collection[1::2]):
@@ -124,7 +124,7 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
     except:
         print(trimmed_format_exc())
 
-    #  <-------- 整理结果，退出 ----------> 
+    #  <-------- 整理结果，退出 ---------->
     create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
     res = write_history_to_file(gpt_response_collection, file_basename=create_report_file_name)
     promote_file_to_downloadzone(res, chatbot=chatbot)