-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
81 lines (60 loc) · 2.32 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import utils.epub as epub
import openai
import json
from typing import List
from pydub import AudioSegment
from pathlib import Path
from gtts import gTTS
# try speaking hello world with gTTS
def text_to_speech(text: str) -> None:
gTTS(text=text, lang='en').save("data/gtts.mp3")
# call openai tts api to convert text to audio
def text_to_audio(json_file: str, output_file: str, shitty: bool = False):
with open(json_file, "r") as f:
data = json.load(f)
text = data[0]['text']
# gTTS audio
text_to_speech(text)
# openai audio
for i, chunk in enumerate(chunk_text(text)):
# do the rest of the chunks
if i < 2:
continue
response = openai.audio.speech.create(
model="tts-1",
voice="shimmer",
input=chunk,
speed=1.1
)
response.stream_to_file(f"{output_file}_{i}.mp3")
MAX_CHUNK_SIZE = 4000
def chunk_text(text: str) -> List[str]:
if not text:
raise ValueError("Input text cannot be empty")
chunks = []
while text:
# Find the last sentence break within MAX_CHUNK_SIZE
chunk_end = min(MAX_CHUNK_SIZE, len(text))
if chunk_end < len(text):
# Look for the last sentence ending (.!?) before MAX_CHUNK_SIZE
for punct in ['. ', '! ', '? ']:
last_punct = text[:chunk_end].rfind(punct)
if last_punct != -1:
chunk_end = last_punct + 1
break
chunks.append(text[:chunk_end].strip())
text = text[chunk_end:].strip()
return chunks
def splice_audio(path_to_folder: str):
audio_files = list(Path(path_to_folder).glob("*_[0-9]*.mp3")) # Only get numbered files
if not audio_files:
raise FileNotFoundError(f"No MP3 files found in {path_to_folder}")
audio_files.sort(key=lambda x: int(''.join(filter(str.isdigit, x.stem)))) # Extract just the numbers
combined = AudioSegment.silent(duration=0)
for audio_file in audio_files:
combined += AudioSegment.from_mp3(audio_file)
combined.export(f"{path_to_folder}/output.mp3", format="mp3")
if __name__ == "__main__":
with open("data/test.json", "r") as f:
data = json.load(f)
text_to_speech(data[0]['text'])