Skip to content

Commit d784ec1

Browse files
committed
Add support for translating vtt files
1 parent 3207aac commit d784ec1

File tree

5 files changed

+301
-89
lines changed

5 files changed

+301
-89
lines changed

.pre-commit-config.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ repos:
2323
hooks:
2424
- id: black
2525
- repo: https://github.com/astral-sh/ruff-pre-commit
26-
rev: v0.9.6
26+
rev: v0.9.7
2727
hooks:
2828
- id: ruff
2929
types: [file]
@@ -40,7 +40,7 @@ repos:
4040
additional_dependencies:
4141
- tomli
4242
- repo: https://github.com/crate-ci/typos
43-
rev: typos-dict-v0.12.5
43+
rev: v1.29.9
4444
hooks:
4545
- id: typos
4646
- repo: https://github.com/woodruffw/zizmor-pre-commit

src/subtitle_translate/main.py

+103-18
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,17 @@
2727

2828

2929
import argparse
30+
import sys
31+
from typing import TYPE_CHECKING
3032

3133
import httpx
3234
import trio
3335

3436
from subtitle_translate import extricate, subtitle_parser, translate
3537

38+
if TYPE_CHECKING:
39+
from collections.abc import Iterable
40+
3641

3742
async def translate_texts(
3843
texts: dict[int, tuple[str, ...]],
@@ -61,16 +66,12 @@ async def translate_texts(
6166

6267

6368
async def translate_subtitles(
64-
source_file: str,
65-
dest_file: str,
69+
generator: Iterable[tuple[int, subtitle_parser.Subtitle]],
6670
source_language: str = "auto",
6771
dest_language: str = "en",
68-
) -> None:
72+
) -> tuple[dict[int, subtitle_parser.Subtitle], dict[int, tuple[str, ...]]]:
6973
"""Translate subtitles file asynchronously."""
70-
print(f"Loading subtitles file {source_file!r}...")
71-
subs, texts = subtitle_parser.convert_text(
72-
subtitle_parser.parse_file(source_file),
73-
)
74+
subs, texts = subtitle_parser.convert_text(generator)
7475

7576
print(f"Parsed {len(subs)} subtitles")
7677

@@ -80,11 +81,76 @@ async def translate_subtitles(
8081
sentence_count = sum(map(len, texts.values()))
8182
print(f"Translated {sentence_count} sentences.")
8283

84+
return subs, new_texts
85+
86+
87+
async def translate_subtitles_srt(
88+
source_file: str,
89+
dest_file: str | None = None,
90+
source_language: str = "auto",
91+
dest_language: str = "en",
92+
) -> None:
93+
"""Translate subtitles file asynchronously."""
94+
# Set destination if not provided
95+
if dest_file is None:
96+
name, ext = source_file.rsplit(".", 1)
97+
dest_file = f"{name}.{dest_language}.{ext}"
98+
99+
print(f"Loading subtitles file {source_file!r}...")
100+
subs, new_texts = await translate_subtitles(
101+
subtitle_parser.parse_file_srt(source_file),
102+
)
103+
83104
print("Updating subtitle texts...")
84105
subs = subtitle_parser.modify_subtitles(subs, new_texts)
85106

86107
print("Saving...")
87-
subtitle_parser.write_subtitles_file(dest_file, subs)
108+
subtitle_parser.write_subtitles_srt_file(dest_file, subs)
109+
print("Save complete.")
110+
print(f"Saved to {dest_file!r}")
111+
112+
113+
async def translate_subtitles_vtt(
114+
source_file: str,
115+
dest_file: str | None = None,
116+
source_language: str = "auto",
117+
dest_language: str = "en",
118+
) -> None:
119+
"""Translate subtitles file asynchronously."""
120+
print(f"Loading subtitles file {source_file!r}...")
121+
gen = subtitle_parser.parse_file_vtt(source_file)
122+
header = next(gen)
123+
124+
new_header = []
125+
for line in header:
126+
assert isinstance(line, str)
127+
if source_language == "auto" and line.startswith("Language: "):
128+
key, value = line.split(" ", 1)
129+
source_language = value
130+
line = f"{key} {dest_language}"
131+
new_header.append(line)
132+
133+
# Set destination if not provided
134+
if dest_file is None:
135+
name, ext = source_file.rsplit(".", 1)
136+
source_lang_ext = f".{source_language}"
137+
if source_lang_ext in name:
138+
name = name.removesuffix(source_lang_ext)
139+
dest_file = f"{name}.{dest_language}.{ext}"
140+
141+
subs, new_texts = await translate_subtitles(
142+
enumerate(x for x in gen if isinstance(x, subtitle_parser.Subtitle)),
143+
)
144+
145+
print("Updating subtitle texts...")
146+
subs = subtitle_parser.modify_subtitles_plain(subs, new_texts)
147+
148+
print("Saving...")
149+
subtitle_parser.write_subtitles_vtt_file(
150+
dest_file,
151+
new_header,
152+
subs.values(),
153+
)
88154
print("Save complete.")
89155
print(f"Saved to {dest_file!r}")
90156

@@ -119,6 +185,15 @@ async def run_async() -> None:
119185
"Must be a ISO 639-1:2002 language code or 'auto' to guess."
120186
),
121187
)
188+
parser.add_argument(
189+
"--source-type",
190+
type=str,
191+
default="auto",
192+
help=(
193+
"Subtitle source type (default: 'auto').\n"
194+
"Must be either 'srt' or 'vtt', or 'auto' to guess from filename."
195+
),
196+
)
122197
parser.add_argument(
123198
"--dest-lang",
124199
type=str,
@@ -136,17 +211,27 @@ async def run_async() -> None:
136211

137212
args = parser.parse_args()
138213

139-
# Set destination if not provided
140-
if args.dest_file is None:
214+
if args.source_type == "auto":
141215
name, ext = args.source_file.rsplit(".", 1)
142-
args.dest_file = f"{name}.{args.dest_lang}.{ext}"
143-
144-
await translate_subtitles(
145-
args.source_file,
146-
args.dest_file,
147-
args.source_lang,
148-
args.dest_lang,
149-
)
216+
args.source_type = ext
217+
218+
if args.source_type == "srt":
219+
await translate_subtitles_srt(
220+
args.source_file,
221+
args.dest_file,
222+
args.source_lang,
223+
args.dest_lang,
224+
)
225+
elif args.source_type == "vtt":
226+
await translate_subtitles_vtt(
227+
args.source_file,
228+
args.dest_file,
229+
args.source_lang,
230+
args.dest_lang,
231+
)
232+
else:
233+
print(f"Unhandled source type {args.source_type!r}.")
234+
sys.exit(1)
150235

151236

152237
def cli_run() -> None:

0 commit comments

Comments
 (0)