-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathst_outline_search_func.py
96 lines (82 loc) · 3.42 KB
/
st_outline_search_func.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from apiclient.discovery import build
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
# outlines format
outline_format = """{
"I. Header": {
"A. Section": [
{"1": "to discuss"},
{"2": "to discuss"},
{"3": "to discuss"}
],
"B. Section": [
{"1": "to discuss"},
{"2": "to discuss"},
{"3": "to discuss"}
]
}
"""
# journal info collection
journal_info_format = {
"title": "",
"author(s)": "",
"publishing date": "",
"journal": "",
"abstract": "",
"link": "",
}
def generate_outline_prompt():
"""outlines creation prompt
Returns:
str: prompt string
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(
"You have exceptional proficiency in the area(s) of {expertise_areas}, also you are specialized in creating outlines for narrative review papers that meet the rigorous standards of top academic journals"
)
human_message_prompt = HumanMessagePromptTemplate.from_template(
"""Create an outline for narrative review papers on the topic of "{subject}". Please ensure you integrate these specific criteria "{elaborate_user}" into your outline formation process. Follow this format: {outline_format}. For the final output, please structure the outline as a Python dictionary, also just give the python dictionary without anything else"""
)
chat_prompt = ChatPromptTemplate.from_messages(
[system_message_prompt, human_message_prompt]
)
return chat_prompt
def google_search(search_term, api_key, cse_id, total_results=10, dateRestrict=None):
"""Perform a Google search using the Custom Search JSON API.
Args:
search_term (str): The search term to query.
api_key (str): The API key for accessing the Google API.
cse_id (str): The ID for the Custom Search Engine (CSE).
total_results (int, optional): The total number of results to return. Default is 20.
dateRestrict (str, optional): Limits results to a date range specified as [unit][time] (e.g., 'y5' for past 5 years). Default is None.
Returns:
list: A list of search results obtained from the Google API.
"""
service = build("customsearch", "v1", developerKey=api_key)
results = []
for i in range(0, total_results, 10):
start = i + 1
res = (
service.cse()
.list(q=search_term, cx=cse_id, start=start, dateRestrict=dateRestrict)
.execute()
)
results.extend(res["items"])
return results
def search_parsing_prompt():
"""parse google search items
Returns:
str: prompt string
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(
"You are an academic researcher specializing in data extraction with advanced skills in JSON and HTML parsing."
)
human_message_prompt = HumanMessagePromptTemplate.from_template(
"""Given this JSON data "{paper_html}", extract and organize the information according to the following format "{journal_info_format}". If certain information isn't clear or is unavailable, insert "None". For the final output, make it as a python dictionary without anything else"""
)
chat_prompt = ChatPromptTemplate.from_messages(
[system_message_prompt, human_message_prompt]
)
return chat_prompt