-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathyoutube.py
107 lines (79 loc) · 3.61 KB
/
youtube.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from langchain_community.document_loaders import YoutubeLoader
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from dotenv import find_dotenv, load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
import textwrap
import os
os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"
# Load environment variables
load_dotenv(find_dotenv())
# Initialize OpenAI embeddings
embeddings = OpenAIEmbeddings()
def create_db_from_youtube_video_url(video_url):
loader = YoutubeLoader.from_youtube_url(video_url)
transcript = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
docs = text_splitter.split_documents(transcript)
db = FAISS.from_documents(docs, embeddings)
return db, transcript
def summarize_video(transcript):
chat = ChatOpenAI(model="gpt-4o-mini")
summary_prompt = """
You are a helpful assistant that summarizes the content of YouTube video transcripts.
Please provide a concise summary of the following transcript:
{transcript}
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(summary_prompt)
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])
chain = LLMChain(llm=chat, prompt=chat_prompt)
summary = chain.run(transcript=transcript[0].page_content)
return summary.strip()
def get_response_from_query(db, query, k=4):
docs = db.similarity_search(query, k=k)
docs_page_content = " ".join([d.page_content for d in docs])
chat = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.2)
# Template to use for the system message prompt
template = """
You are a helpful assistant that can answer questions about YouTube videos
based on the video's transcript: {docs}
Only use the factual information from the transcript to answer the question.
If you feel like you don't have enough information to answer the question, say "I don't know".
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
# Human question prompt
human_template = "Answer the following question: {question}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages(
[system_message_prompt, human_message_prompt]
)
chain = LLMChain(llm=chat, prompt=chat_prompt)
response = chain.run(question=query, docs=docs_page_content)
response = response.replace("\n", "")
return response, docs
# Main Program
if __name__ == "__main__":
# Ask the user for the YouTube video URL
video_url = input("Please enter the YouTube video URL: ")
# Create the database and get the transcript
db, transcript = create_db_from_youtube_video_url(video_url)
# Summarize the video
print("Summarizing the video...")
summary = summarize_video(transcript)
print("\nVideo Summary:\n")
print(textwrap.fill(summary, width=50))
# Allow the user to ask questions about the video
while True:
query = input("\nAsk any question about the video (or type 'exit' to quit): ")
if query.lower() == "exit":
break
response, docs = get_response_from_query(db, query)
print("\nAnswer:\n")
print(textwrap.fill(response, width=50))