-
Notifications
You must be signed in to change notification settings - Fork 4
/
main.py
139 lines (113 loc) · 4.75 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import sys
import re
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import NoTranscriptFound
import pyperclip
MAX_TOKENS_PER_PART = 1400
class InvalidModeError(Exception):
pass
def get_video_ID(entry: str) -> str:
"""
Extracts and returns the YouTube video ID from the given input string (either URL or video ID).
Returns None if the input is invalid.
"""
youtube_id_pattern = r'(?:v=|\/)([0-9A-Za-z_-]{10}[048AEIMQUYcgkosw])'
match = re.search(youtube_id_pattern, entry)
return match.group(1) if match else None
def get_transcript(video_id: str) -> str:
"""
Fetches and returns the transcript of a YouTube video with the given video ID.
Returns an empty string if the transcript is not found.
"""
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
return " ".join([entry["text"] for entry in transcript])
except NoTranscriptFound:
return ""
def create_chunks(text: str, max_tokens_per_part: int, mode: str):
"""
Splits the given text into chunks based on the specified mode and the maximum tokens per part.
Returns a list of chunks.
"""
if mode == "auto":
token_count = len(text.split())
if token_count <= max_tokens_per_part:
return [text]
words = text.split()
chunks = []
current_part = f"I have a text that I would like to summarize. It consists of {len(chunks)+1} and these parts are separated by '-----'. Here is the 1. part: ---- "
current_token_count = 0
for word in words:
if current_token_count + 1 <= max_tokens_per_part:
current_part += word + " "
current_token_count += 1
else:
current_part += "-----\n\Please take note of this paragraph carefully and refrain from responding to it. Kindly wait for the next part. "
chunks.append(current_part)
current_part = f"Here is the {len(chunks)+1} part: ---- "
current_token_count = 0
if current_part:
current_part += "-----\n\ntl;dr Create a very verbose summary.\n\nSummary:\n"
chunks.append(current_part)
return chunks
def process_args(args) -> tuple:
"""
Processes the command-line arguments and returns a tuple containing the mode and input string.
Raises an InvalidModeError if the specified mode is not one of 'separate', 'full', or 'auto'.
"""
if len(args) != 3:
print("Usage: python3 main.py [separate/full/auto] [Video-Link]")
sys.exit(1)
mode = args[1]
input_str = args[2]
if mode not in ['separate', 'full', 'auto']:
raise InvalidModeError("Invalid mode. Please choose either 'separate', 'full', or 'auto'.")
return mode, input_str
def process_transcript(mode: str, transcript: str) -> list:
"""
Processes the transcript based on the specified mode. If the mode is 'full', the transcript is copied to the clipboard.
If the mode is 'separate' or 'auto', the transcript is split into chunks and a list of chunks is returned.
"""
if mode == "full":
pyperclip.copy(transcript)
print("Transcript successfully fetched and copied to clipboard.")
return None
else:
chunks = create_chunks(transcript, MAX_TOKENS_PER_PART, mode)
total_chunks = len(chunks)
print(f"There are {total_chunks} chunks to be copied.\n")
return chunks
def copy_chunks_to_clipboard(chunks: list):
"""
Copies the chunks of text to the clipboard one at a time, prompting the user to press Enter to copy the next chunk or 'q' to quit.
"""
total_chunks = len(chunks)
for i, chunk in enumerate(chunks):
user_input = input(f"Press Enter to copy chunk {i + 1} of {total_chunks} or type 'q' to quit: ")
if user_input == 'q':
break
pyperclip.copy(chunk)
print(f"Chunk {i + 1} of {total_chunks} copied to clipboard.")
def main():
"""
The main function that ties all the other functions together. It processes the command-line arguments, fetches the video transcript,
processes the transcript based on the mode, and copies the transcript or its chunks to the clipboard.
"""
try:
mode, input_str = process_args(sys.argv)
except InvalidModeError as e:
print(e)
sys.exit(1)
video_id = get_video_ID(input_str)
if video_id is None:
print("Invalid input. Please enter a valid YouTube-Link or YouTube-Video-ID.")
sys.exit(1)
transcript = get_transcript(video_id)
if not transcript:
print("No transcript found for the given video.")
sys.exit(1)
chunks = process_transcript(mode, transcript)
if chunks:
copy_chunks_to_clipboard(chunks)
if __name__ == "__main__":
main()