Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fork Sync: Update from parent repository #1

Merged
merged 18 commits into from
Apr 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
> [!IMPORTANT]
> [!IMPORTANT]
> 2024.3.11: 恭迎Claude3和Moonshot,全力支持Qwen、GLM、DeepseekCoder等中文大语言模型!
> 2024.1.18: 更新3.70版本,支持Mermaid绘图库(让大模型绘制脑图)
> 2024.1.17: 恭迎GLM4,全力支持Qwen、GLM、DeepseekCoder等国内中文大语言基座模型!
> 2024.1.17: 某些依赖包尚不兼容python 3.12,推荐python 3.11。
> 2024.1.17: 安装依赖时,请选择`requirements.txt`中**指定的版本**。 安装命令:`pip install -r requirements.txt`。本项目完全开源免费,您可通过订阅[在线服务](https://github.com/binary-husky/gpt_academic/wiki/online)的方式鼓励本项目的发展。

<br>
Expand Down
10 changes: 5 additions & 5 deletions check_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def backup_and_download(current_version, remote_version):
shutil.copytree('./', backup_dir, ignore=lambda x, y: ['history'])
proxies = get_conf('proxies')
try: r = requests.get('https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
except: r = requests.get('https://public.gpt-academic.top/publish/master.zip', proxies=proxies, stream=True)
except: r = requests.get('https://public.agent-matrix.com/publish/master.zip', proxies=proxies, stream=True)
zip_file_path = backup_dir+'/master.zip'
with open(zip_file_path, 'wb+') as f:
f.write(r.content)
Expand Down Expand Up @@ -81,7 +81,7 @@ def patch_and_restart(path):
dir_util.copy_tree(path_new_version, './')
print亮绿('代码已经更新,即将更新pip包依赖……')
for i in reversed(range(5)): time.sleep(1); print(i)
try:
try:
import subprocess
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'])
except:
Expand Down Expand Up @@ -113,7 +113,7 @@ def auto_update(raise_error=False):
import json
proxies = get_conf('proxies')
try: response = requests.get("https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=5)
except: response = requests.get("https://public.gpt-academic.top/publish/version", proxies=proxies, timeout=5)
except: response = requests.get("https://public.agent-matrix.com/publish/version", proxies=proxies, timeout=5)
remote_json_data = json.loads(response.text)
remote_version = remote_json_data['version']
if remote_json_data["show_feature"]:
Expand Down Expand Up @@ -159,15 +159,15 @@ def warm_up_modules():
enc.encode("模块预热", disallowed_special=())
enc = model_info["gpt-4"]['tokenizer']
enc.encode("模块预热", disallowed_special=())

def warm_up_vectordb():
print('正在执行一些模块的预热 ...')
from toolbox import ProxyNetworkActivate
with ProxyNetworkActivate("Warmup_Modules"):
import nltk
with ProxyNetworkActivate("Warmup_Modules"): nltk.download("punkt")


if __name__ == '__main__':
import os
os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
Expand Down
2 changes: 1 addition & 1 deletion colorful.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

if platform.system()=="Linux":
pass
else:
else:
from colorama import init
init()

Expand Down
85 changes: 49 additions & 36 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,33 @@
else:
proxies = None

# ------------------------------------ 以下配置可以优化体验, 但大部分场合下并不需要修改 ------------------------------------
# [step 3]>> 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
LLM_MODEL = "gpt-3.5-turbo-16k" # 可选 ↓↓↓
AVAIL_LLM_MODELS = ["gpt-4-1106-preview", "gpt-4-turbo-preview", "gpt-4-vision-preview",
"gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
"gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-3-turbo",
"gemini-pro", "chatglm3"
]
# --- --- --- ---
# P.S. 其他可用的模型还包括
# AVAIL_LLM_MODELS = [
# "qianfan", "deepseekcoder",
# "spark", "sparkv2", "sparkv3", "sparkv3.5",
# "qwen-turbo", "qwen-plus", "qwen-max", "qwen-local",
# "moonshot-v1-128k", "moonshot-v1-32k", "moonshot-v1-8k",
# "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0125"
# "claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2",
# "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama",
# "yi-34b-chat-0205", "yi-34b-chat-200k"
# ]
# --- --- --- ---
# 此外,为了更灵活地接入one-api多模型管理界面,您还可以在接入one-api时,
# 使用"one-api-*"前缀直接使用非标准方式接入的模型,例如
# AVAIL_LLM_MODELS = ["one-api-claude-3-sonnet-20240229(max_token=100000)"]
# --- --- --- ---


# --------------- 以下配置可以优化体验 ---------------

# 重新URL重新定向,实现更换API_URL的作用(高危设置! 常规情况下不要修改! 通过修改此设置,您将把您的API-KEY和对话隐私完全暴露给您设定的中间人!)
# 格式: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"}
Expand Down Expand Up @@ -85,22 +111,6 @@
DEFAULT_FN_GROUPS = ['对话', '编程', '学术', '智能体']


# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
LLM_MODEL = "gpt-3.5-turbo-16k" # 可选 ↓↓↓
AVAIL_LLM_MODELS = ["gpt-4-1106-preview", "gpt-4-turbo-preview", "gpt-4-vision-preview",
"gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
"gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-3-turbo",
"gemini-pro", "chatglm3", "claude-2"]
# P.S. 其他可用的模型还包括 [
# "moonshot-v1-128k", "moonshot-v1-32k", "moonshot-v1-8k",
# "qwen-turbo", "qwen-plus", "qwen-max",
# "zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen-local", "gpt-3.5-turbo-0613", "moss",
# "gpt-3.5-turbo-16k-0613", "gpt-3.5-random", "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k',
# "spark", "sparkv2", "sparkv3", "sparkv3.5",
# "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"
# ]


# 定义界面上“询问多个GPT模型”插件应该使用哪些模型,请从AVAIL_LLM_MODELS中选择,并在不同模型之间用`&`间隔,例如"gpt-3.5-turbo&chatglm3&azure-gpt-4"
MULTI_QUERY_LLM_MODELS = "gpt-3.5-turbo&chatglm3"

Expand Down Expand Up @@ -129,6 +139,7 @@
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
LOCAL_MODEL_QUANT = "FP16" # 默认 "FP16" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本


# 设置gradio的并行线程数(不需要修改)
CONCURRENT_COUNT = 100

Expand Down Expand Up @@ -174,14 +185,8 @@
AZURE_CFG_ARRAY = {}


# 使用Newbing (不推荐使用,未来将删除)
NEWBING_STYLE = "creative" # ["creative", "balanced", "precise"]
NEWBING_COOKIES = """
put your new bing cookies here
"""


# 阿里云实时语音识别 配置难度较高 仅建议高手用户使用 参考 https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md
# 阿里云实时语音识别 配置难度较高
# 参考 https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md
ENABLE_AUDIO = False
ALIYUN_TOKEN="" # 例如 f37f30e0f9934c34a992f6f64f7eba4f
ALIYUN_APPKEY="" # 例如 RoPlZrM88DnAFkZK
Expand All @@ -200,18 +205,18 @@
ZHIPUAI_MODEL = "" # 此选项已废弃,不再需要填写


# # 火山引擎YUNQUE大模型
# YUNQUE_SECRET_KEY = ""
# YUNQUE_ACCESS_KEY = ""
# YUNQUE_MODEL = ""


# Claude API KEY
ANTHROPIC_API_KEY = ""


# 月之暗面 API KEY
MOONSHOT_API_KEY = ""


# 零一万物(Yi Model) API KEY
YIMODEL_API_KEY = ""


# Mathpix 拥有执行PDF的OCR功能,但是需要注册账号
MATHPIX_APPID = ""
MATHPIX_APPKEY = ""
Expand Down Expand Up @@ -270,7 +275,11 @@
# 自定义按钮的最大数量限制
NUM_CUSTOM_BASIC_BTN = 4



"""
--------------- 配置关联关系说明 ---------------

在线大模型配置关联关系示意图
├── "gpt-3.5-turbo" 等openai模型
Expand All @@ -294,7 +303,7 @@
│ ├── XFYUN_API_SECRET
│ └── XFYUN_API_KEY
├── "claude-1-100k" 等claude模型
├── "claude-3-opus-20240229" 等claude模型
│ └── ANTHROPIC_API_KEY
├── "stack-claude"
Expand All @@ -309,15 +318,19 @@
├── "glm-4", "glm-3-turbo", "zhipuai" 智谱AI大模型
│ └── ZHIPUAI_API_KEY
├── "yi-34b-chat-0205", "yi-34b-chat-200k" 等零一万物(Yi Model)大模型
│ └── YIMODEL_API_KEY
├── "qwen-turbo" 等通义千问大模型
│ └── DASHSCOPE_API_KEY
├── "Gemini"
│ └── GEMINI_API_KEY
└── "newbing" Newbing接口不再稳定,不推荐使用
├── NEWBING_STYLE
└── NEWBING_COOKIES
└── "one-api-...(max_token=...)" 用一种更方便的方式接入one-api多模型管理界面
├── AVAIL_LLM_MODELS
├── API_KEY
└── API_URL_REDIRECT


本地大模型示意图
Expand Down Expand Up @@ -364,4 +377,4 @@
└── MATHPIX_APPKEY


"""
"""
40 changes: 20 additions & 20 deletions core_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,16 @@ def get_core_functions():
# [6] 文本预处理 (可选参数,默认 None,举例:写个函数移除所有的换行符)
"PreProcess": None,
},


"总结绘制脑图": {
# 前缀,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等
"Prefix": r"",
"Prefix": '''"""\n\n''',
# 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来
"Suffix":
# dedent() 函数用于去除多行字符串的缩进
dedent("\n"+r'''
==============================
dedent("\n\n"+r'''
"""

使用mermaid flowchart对以上文本进行总结,概括上述段落的内容以及内在逻辑关系,例如:

Expand All @@ -57,15 +57,15 @@ def get_core_functions():
C --> |"箭头名2"| F["节点名6"]
```

警告
注意
(1)使用中文
(2)节点名字使用引号包裹,如["Laptop"]
(3)`|` 和 `"`之间不要存在空格
(4)根据情况选择flowchart LR(从左到右)或者flowchart TD(从上到下)
'''),
},


"查找语法错误": {
"Prefix": r"Help me ensure that the grammar and the spelling is correct. "
r"Do not try to polish the text, if no mistake is found, tell me that this paragraph is good. "
Expand All @@ -85,14 +85,14 @@ def get_core_functions():
"Suffix": r"",
"PreProcess": clear_line_break, # 预处理:清除换行符
},


"中译英": {
"Prefix": r"Please translate following sentence to English:" + "\n\n",
"Suffix": r"",
},


"学术英中互译": {
"Prefix": build_gpt_academic_masked_string_langbased(
text_show_chinese=
Expand All @@ -112,29 +112,29 @@ def get_core_functions():
) + "\n\n",
"Suffix": r"",
},


"英译中": {
"Prefix": r"翻译成地道的中文:" + "\n\n",
"Suffix": r"",
"Visible": False,
},


"找图片": {
"Prefix": r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL,"
r"然后请使用Markdown格式封装,并且不要有反斜线,不要用代码块。现在,请按以下描述给我发送图片:" + "\n\n",
"Suffix": r"",
"Visible": False,
},


"解释代码": {
"Prefix": r"请解释以下代码:" + "\n```\n",
"Suffix": "\n```\n",
},


"参考文献转Bib": {
"Prefix": r"Here are some bibliography items, please transform them into bibtex style."
r"Note that, reference styles maybe more than one kind, you should transform each item correctly."
Expand Down
28 changes: 14 additions & 14 deletions crazy_functions/Latex全文润色.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def write_result(self):
manifest.append(path + '.polish.tex')
f.write(res)
return manifest

def zip_result(self):
import os, time
folder = os.path.dirname(self.file_paths[0])
Expand All @@ -59,7 +59,7 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency


# <-------- 读取Latex文件,删除其中的所有注释 ---------->
# <-------- 读取Latex文件,删除其中的所有注释 ---------->
pfg = PaperFileGroup()

for index, fp in enumerate(file_manifest):
Expand All @@ -73,31 +73,31 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
pfg.file_paths.append(fp)
pfg.file_contents.append(clean_tex_content)

# <-------- 拆分过长的latex文件 ---------->
# <-------- 拆分过长的latex文件 ---------->
pfg.run_file_split(max_token_limit=1024)
n_split = len(pfg.sp_file_contents)


# <-------- 多线程润色开始 ---------->
# <-------- 多线程润色开始 ---------->
if language == 'en':
if mode == 'polish':
inputs_array = ["Below is a section from an academic paper, polish this section to meet the academic standard, " +
"improve the grammar, clarity and overall readability, do not modify any latex command such as \section, \cite and equations:" +
inputs_array = [r"Below is a section from an academic paper, polish this section to meet the academic standard, " +
r"improve the grammar, clarity and overall readability, do not modify any latex command such as \section, \cite and equations:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
else:
inputs_array = [r"Below is a section from an academic paper, proofread this section." +
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
r"Answer me only with the revised text:" +
inputs_array = [r"Below is a section from an academic paper, proofread this section." +
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
r"Answer me only with the revised text:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
inputs_show_user_array = [f"Polish {f}" for f in pfg.sp_file_tag]
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
elif language == 'zh':
if mode == 'polish':
inputs_array = [f"以下是一篇学术论文中的一段内容,请将此部分润色以满足学术标准,提高语法、清晰度和整体可读性,不要修改任何LaTeX命令,例如\section,\cite和方程式:" +
inputs_array = [r"以下是一篇学术论文中的一段内容,请将此部分润色以满足学术标准,提高语法、清晰度和整体可读性,不要修改任何LaTeX命令,例如\section,\cite和方程式:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
else:
inputs_array = [f"以下是一篇学术论文中的一段内容,请对这部分内容进行语法矫正。不要修改任何LaTeX命令,例如\section,\cite和方程式:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
inputs_array = [r"以下是一篇学术论文中的一段内容,请对这部分内容进行语法矫正。不要修改任何LaTeX命令,例如\section,\cite和方程式:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
inputs_show_user_array = [f"润色 {f}" for f in pfg.sp_file_tag]
sys_prompt_array=["你是一位专业的中文学术论文作家。" for _ in range(n_split)]

Expand All @@ -113,7 +113,7 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
scroller_max_len = 80
)

# <-------- 文本碎片重组为完整的tex文件,整理结果为压缩包 ---------->
# <-------- 文本碎片重组为完整的tex文件,整理结果为压缩包 ---------->
try:
pfg.sp_file_result = []
for i_say, gpt_say in zip(gpt_response_collection[0::2], gpt_response_collection[1::2]):
Expand All @@ -124,7 +124,7 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
except:
print(trimmed_format_exc())

# <-------- 整理结果,退出 ---------->
# <-------- 整理结果,退出 ---------->
create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
res = write_history_to_file(gpt_response_collection, file_basename=create_report_file_name)
promote_file_to_downloadzone(res, chatbot=chatbot)
Expand Down
Loading