-
Notifications
You must be signed in to change notification settings - Fork 4
/
tts.py
76 lines (67 loc) · 2.34 KB
/
tts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""
TTS idea for ALPACA
"""
from pathlib import Path
import subprocess
from google.cloud import texttospeech
import json
SRC_PATH = "data-src/dialog/2080.schnack"
# Instantiates a client
client = texttospeech.TextToSpeechClient()
# Build the voice request, select the language code ("en-US") and the ssml
# voice gender ("neutral")
voice = texttospeech.VoiceSelectionParams(
language_code="de-DE",
ssml_gender=texttospeech.SsmlVoiceGender.FEMALE,
name="de-DE-Neural2-A",
)
# Select the type of audio file you want returned
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3,
speaking_rate=1.23,
effects_profile_id=["handset-class-device"],
)
with Path(SRC_PATH).open() as f:
data = f.read()
parsed = json.loads(data)
localizations = parsed["localization"]
for localization in localizations:
out_path_temp: Path = Path(f"./data-src/audio/voice/de_{localization}.ogg_temp")
out_path: Path = Path(f"./data-src/audio/voice/de_{localization}.ogg")
if not out_path_temp.exists():
print(f"{localization}.ogg")
# Set the text input to be synthesized
synthesis_input = texttospeech.SynthesisInput(
text=localizations[localization][0]
)
# Perform the text-to-speech request on the text input with the selected
# voice parameters and audio file type
response = client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=audio_config
)
# The response's audio_content is binary.
with out_path_temp.open("wb") as out:
# Write the response to the output file.
out.write(response.audio_content)
print(f'Audio content written to file "{out_path_temp}"')
command = [
"ffmpeg",
"-i",
str(out_path_temp),
"-c:a",
"vorbis",
"-ac",
"2",
"-strict",
"experimental",
str(out_path),
"-y",
]
p = subprocess.Popen(
command,
stdout=subprocess.PIPE,
stdin=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
output = p.communicate()[0]
out_path_temp.unlink(missing_ok=True)