-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtoSpeach.py
72 lines (55 loc) · 2.16 KB
/
toSpeach.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from openai import AsyncOpenAI
import asyncio
from pathlib import Path
import argparse
import os
import fitz
from pydub import AudioSegment
import re
def sort_key(filename):
match = re.match(r"(\d+)", filename)
if match:
return int(match.group())
return 0
def sort_fileList(fileList):
return sorted(fileList, key=sort_key)
def combine_audio_files(audio_files, output_file):
combined = AudioSegment.empty()
for audio_file in audio_files:
combined += AudioSegment.from_file(audio_file)
combined.export(output_file, format="mp3")
def pdf_to_text(pdf_file):
pdf_document = fitz.open(pdf_file)
text = ""
for page_num in range(pdf_document.page_count):
page = pdf_document[page_num]
text += page.get_text()
return text
def split_text(text, max_length=4000):
return [text[i:i + max_length] for i in range(0, len(text), max_length)]
async def text_to_speech(text, fileName, apikey):
client = AsyncOpenAI(api_key=apikey)
speech_file_path = Path(__file__).parent / f"{fileName}.mp3"
response = await client.audio.speech.create(
model="tts-1", voice="nova", input=text)
await asyncio.to_thread(response.stream_to_file, speech_file_path)
return str(speech_file_path)
async def main(pdf_file, output_file, apikey):
text = pdf_to_text(pdf_file)
text_parts = split_text(text)
audio_file_futures = []
for i, text_part in enumerate(text_parts):
fileName = f"part_{i}"
audio_file_futures.append(text_to_speech(text_part, fileName, apikey))
audio_files = await asyncio.gather(*audio_file_futures)
combine_audio_files(sort_fileList(audio_files), output_file)
for audio_file in audio_files:
os.remove(audio_file)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Convert a PDF to an MP3 file")
parser.add_argument("pdf_file", type=str, help="The PDF file to convert")
parser.add_argument("output_file", type=str, help="The output MP3 file")
parser.add_argument("apikey", type=str, help="OpenAI API key")
args = parser.parse_args()
asyncio.run(main(args.pdf_file, args.output_file, args.apikey))