-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathwebpage_summarizer.py
95 lines (85 loc) · 4.19 KB
/
webpage_summarizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import os
from dotenv import load_dotenv
from langchain.agents import Tool, load_tools
from crewai import Agent, Task, Process, Crew
from crewai_tools import WebsiteSearchTool, SerperDevTool
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI
class WebpageSummarizerAI:
def __init__(self, web_site: str = None):
self.website = web_site
print(self.website)
self.setup_environment()
self.setup_tools()
self.setup_agents()
self.setup_tasks()
self.setup_crew()
def setup_environment(self):
load_dotenv()
self.openai_api_key = os.getenv("OPENAI_API_KEY")
self.serper_api_key = os.getenv("SERPER_API_KEY")
self.gemini_api_key = os.getenv("GEMINI_API_KEY")
# os.environ["OPENAI_MODEL_NAME"]="gpt-3.5-turbo-instruct"
self.openai_llm = ChatOpenAI(
model="gpt-3.5-turbo",
verbose=False,
api_key=self.openai_api_key
)
# self.gemini_llm = ChatGoogleGenerativeAI(
# model="gemini-1.5-flash",
# verbose=False,
# temperature=0.5,
# google_api_key=self.gemini_api_key
# )
def setup_tools(self):
self.web_extractor = WebsiteSearchTool(website=self.website)
self.serper_tool = SerperDevTool()
def setup_agents(self):
self.extractor = Agent(
role="Content Extractor",
goal="Extract and summarize the most useful information from a given webpage URL",
backstory="""You are an expert at analyzing and extracting key information from any webpage. You know how to identify and summarize the most
relevant and useful content, ensuring that the summary is comprehensive and accurate.""",
verbose=False,
# llm=self.openai_llm,
allow_delegation=False,
tools=[self.web_extractor],
)
self.summarizer = Agent(
role="Content Summarizer",
goal="Provide a concise and useful summary of the extracted content from the webpage",
backstory="""You are skilled at summarizing large amounts of information into concise, clear, and useful content. You know how to highlight
the most important points and present them in a way that is easy to understand for the user.""",
verbose=False,
# llm=self.openai_llm,
allow_delegation=True,
)
def setup_tasks(self):
self.task_extract = Task(
description="""Extract content from the provided webpage URL and summarize the key points. The summary should be comprehensive and cover the main
topics and useful information presented on the page. Your final output should be a text summary that captures the essence of the webpage content.""",
agent=self.extractor,
expected_output="""A comprehensive summary covering the main topics and useful information from the webpage. The summary should be text-only and well-structured."""
)
self.task_summarize = Task(
description="""Provide a detailed and concise summary of the extracted webpage content. Ensure that the summary is easy to understand and includes
the most important and useful information. The summary should be well-organized and present the key points clearly.""",
agent=self.summarizer,
expected_output="""A concise and well-organized summary that highlights the most important and useful information from the extracted content. The summary should be clear and easy to understand.""",
output_file=f"summary_{len(self.website)}.txt"
)
def setup_crew(self):
self.crew = Crew(
agents=[self.extractor, self.summarizer],
tasks=[self.task_extract, self.task_summarize],
verbose=0,
process=Process.sequential,
)
def run(self):
result = self.crew.kickoff()
return result
if __name__ == "__main__":
url = input("Enter the webpage URL: ")
webpage_summarizer_ai = WebpageSummarizerAI(url)
webpage_summarizer_ai.run()