Skip to content

Commit

Permalink
collect source file
Browse files Browse the repository at this point in the history
  • Loading branch information
withoutabc committed Feb 12, 2024
1 parent a7ce233 commit 9a374d9
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.idea
/data_base
/knowledge_db
/.env
/.env
30 changes: 30 additions & 0 deletions docs/sources.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
药理学
https://zhuanlan.zhihu.com/p/164961617
https://zhuanlan.zhihu.com/p/165216521
医疗器械
http://www.nxgy.gov.cn/zwgk/zfxxgkml/spyp/spypjg3/201708/t20170808_386803.html
http://www.nxgy.gov.cn/zwgk/zfxxgkml/spyp/spypjg3/201708/t20170808_386804.html
心内科
https://www.sohu.com/a/342578283_456057
癌症
https://wjw.hunan.gov.cn/wjw/ztzl/knowledge/zthd/jljh/201605/t20160530_4044693.html
http://www.caca.org.cn/system/2021/04/30/030005405.shtml
https://www.cancer.org/content/dam/cancer-org/cancer-control/zh/booklets-flyers/what-is-cancer.pdf
https://blog.dana-farber.org/insight/zh-hans/%E7%99%8C%E7%97%87%E5%92%8C%E8%82%BF%E7%98%A4%E7%9A%84%E5%8C%BA%E5%88%AB/
眼科
http://cnszgx.cn/read.asp?ID=86
https://zhuanlan.zhihu.com/p/359339233
https://www.sohu.com/a/654565938_120960539
https://www.sohu.com/a/415154851_487694
妇产科
https://www.medsci.cn/article/show_article.do?id=5fbb11600e0a
https://zhuanlan.zhihu.com/p/267976684
http://wsjkw.jl.gov.cn/zdzt/jkkp/202108/t20210805_8170018.html
疾病
https://hospital.nwu.edu.cn/info/1036/2087.htm
http://www.ynlf.gov.cn/info/3132/68031.htm
https://www.kepuchina.cn/qykj/yxqy/201712/t20171208_291650.shtml
http://jk.anhuinews.com/newjklm/jkdjt/202310/t20231003_7104539.html
中医体质
http://www.sdgwlc.com/health_detail/879.html
https://finance.sina.cn/2021-07-27/detail-ikqciyzk7865390.d.html
Binary file modified server/api/__pycache__/hello.cpython-39.pyc
Binary file not shown.
Binary file modified server/service/__pycache__/chain.cpython-39.pyc
Binary file not shown.
Binary file modified server/service/__pycache__/load.cpython-39.pyc
Binary file not shown.
Binary file modified server/service/__pycache__/split.cpython-39.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion server/service/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def load_docs():


def load_db(category: str):
base_directory = '../../data_base/chroma/'
base_directory = '../../data_base/'
persist_directory = os.path.join(base_directory, category)
# 定义 Embeddings
embedding = QianfanEmbeddingsEndpoint(
Expand Down
8 changes: 7 additions & 1 deletion server/service/persist_cmd.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
import os
import time

from langchain_community.embeddings import QianfanEmbeddingsEndpoint
from langchain_community.vectorstores.chroma import Chroma
from server.service.split import split_docs

from server.service.load import load_docs
from dotenv import load_dotenv, find_dotenv


def persist_vector_db(category: str, s_docs):
# 定义 Embeddings
embedding = QianfanEmbeddingsEndpoint(
streaming=True,
model="Embedding-V1",
chunk_size=16,
)
base_directory = '../../data_base/chroma/'
base_directory = '../../data_base/'
# 定义持久化路径
persist_directory = os.path.join(base_directory, category)

Expand All @@ -29,6 +32,9 @@ def persist_vector_db(category: str, s_docs):


if __name__ == '__main__':
_ = load_dotenv(find_dotenv())
docs_dict = load_docs()
for category, docs in docs_dict.items():
# if category not in ['Chinese_medicine_physique','device','medicine','cancer','cardiology',]:
# print(docs)
persist_vector_db(category, docs)

0 comments on commit 9a374d9

Please sign in to comment.