Skip to content

Commit 402ca1b

Browse files
committed
Upgrade translation module to use better endpoint
See ssut/py-googletrans#268 (comment)
1 parent 7c0f466 commit 402ca1b

File tree

2 files changed

+94
-51
lines changed

2 files changed

+94
-51
lines changed

.pre-commit-config.yaml

+5-5
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ repos:
1919
- id: sort-simple-yaml
2020
files: .pre-commit-config.yaml
2121
- repo: https://github.com/psf/black-pre-commit-mirror
22-
rev: 24.10.0
22+
rev: 25.1.0
2323
hooks:
2424
- id: black
2525
- repo: https://github.com/astral-sh/ruff-pre-commit
26-
rev: v0.8.6
26+
rev: v0.9.6
2727
hooks:
2828
- id: ruff
2929
types: [file]
@@ -33,17 +33,17 @@ repos:
3333
hooks:
3434
- id: badgie
3535
- repo: https://github.com/codespell-project/codespell
36-
rev: v2.3.0
36+
rev: v2.4.1
3737
hooks:
3838
- id: codespell
3939
additional_dependencies:
4040
- tomli
4141
- repo: https://github.com/crate-ci/typos
42-
rev: dictgen-v0.3.1
42+
rev: typos-dict-v0.12.4
4343
hooks:
4444
- id: typos
4545
- repo: https://github.com/woodruffw/zizmor-pre-commit
46-
rev: v1.0.0
46+
rev: v1.3.1
4747
hooks:
4848
- id: zizmor
4949
- repo: local

src/subtitle_translate/translate.py

+89-46
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,11 @@
2525

2626
import json
2727
import random
28-
import urllib.request
2928
from functools import partial
3029
from typing import TYPE_CHECKING, Any, Final, TypeVar, cast
31-
from urllib.parse import urlencode
3230

3331
import httpx
32+
import orjson
3433
import trio
3534

3635
from subtitle_translate import agents
@@ -70,22 +69,22 @@ async def collect(
7069
## return 'http://clients5.google.com/translate_a/t?'+urlencode(query)
7170

7271

73-
def get_translation_url(
74-
sentence: str,
75-
to_language: str,
76-
source_language: str = "auto",
77-
) -> str:
78-
"""Return the URL you should visit to get query translated to language to_language."""
79-
query = {
80-
"client": "gtx",
81-
"dt": "t",
82-
"sl": source_language,
83-
"tl": to_language,
84-
"q": sentence,
85-
}
86-
return "https://translate.googleapis.com/translate_a/single?" + urlencode(
87-
query,
88-
)
72+
##def get_translation_url(
73+
## sentence: str,
74+
## to_language: str,
75+
## source_language: str = "auto",
76+
##) -> str:
77+
## """Return the URL you should visit to get query translated to language to_language."""
78+
## query = {
79+
## "client": "gtx",
80+
## "dt": "t",
81+
## "sl": source_language,
82+
## "tl": to_language,
83+
## "q": sentence,
84+
## }
85+
## return "https://translate.googleapis.com/translate_a/single?" + urlencode(
86+
## query,
87+
## )
8988

9089

9190
def process_response(result: list[str] | list[list[Any]]) -> str:
@@ -99,7 +98,7 @@ def process_response(result: list[str] | list[list[Any]]) -> str:
9998
part = next_
10099
else:
101100
raise ValueError(
102-
f"Unexpected type {type(part)!r}, expected list or str",
101+
f"Unexpected type {type(part)!r}, expected list or str\n{part = }",
103102
)
104103
raise RuntimeError("Unreachable")
105104

@@ -114,23 +113,65 @@ def is_url(text: str) -> bool:
114113
)
115114

116115

117-
def translate_sync(
118-
sentence: str,
119-
to_lang: str,
120-
source_lang: str = "auto",
121-
) -> str:
122-
"""Return sentence translated from source_lang to to_lang."""
123-
if is_url(sentence):
124-
# skip URLs
125-
return sentence
126-
127-
# Get URL from function, which uses urllib to generate proper query
128-
url = get_translation_url(sentence, to_lang, source_lang)
129-
if not url.startswith("http"):
130-
raise ValueError("URL not http(s), is this intended?")
131-
with urllib.request.urlopen(url, timeout=0.5) as file: # noqa: S310
132-
request_result = json.loads(file.read())
133-
return process_response(request_result)
116+
##def translate_sync(
117+
## sentence: str,
118+
## to_lang: str,
119+
## source_lang: str = "auto",
120+
##) -> str:
121+
## """Return sentence translated from source_lang to to_lang."""
122+
## if is_url(sentence):
123+
## # skip URLs
124+
## return sentence
125+
##
126+
## # Get URL from function, which uses urllib to generate proper query
127+
## url = get_translation_url(sentence, to_lang, source_lang)
128+
## if not url.startswith("http"):
129+
## raise ValueError("URL not http(s), is this intended?")
130+
## with urllib.request.urlopen(url, timeout=0.5) as file:
131+
## request_result = json.loads(file.read())
132+
## return process_response(request_result)
133+
134+
135+
##async def get_translated_coroutine(
136+
## client: httpx.AsyncClient,
137+
## sentence: str,
138+
## to_lang: str,
139+
## source_lang: str = "auto",
140+
##) -> str:
141+
## """Return the sentence translated, asynchronously."""
142+
## global AGENT # pylint: disable=global-statement
143+
##
144+
## if is_url(sentence):
145+
## # skip URLs
146+
## return sentence
147+
## # Make sure we have a timeout, so that in the event of network failures
148+
## # or something code doesn't get stuck
149+
## # Get URL from function, which uses urllib to generate proper query
150+
## url = get_translation_url(sentence, to_lang, source_lang)
151+
##
152+
## headers = {
153+
## "User-Agent": "",
154+
## "Accept": "*/*",
155+
## "Accept-Language": "en-US,en-GB; q=0.5",
156+
## "Accept-Encoding": "gzip, deflate",
157+
## "Connection": "keep-alive",
158+
## }
159+
##
160+
## while True:
161+
## AGENT = (AGENT + 1) % len(agents.USER_AGENTS)
162+
## headers["User-Agent"] = agents.USER_AGENTS[AGENT]
163+
##
164+
## try:
165+
## # Go to that URL and get our translated response
166+
## response = await client.get(url, headers=headers)
167+
## # Wait for our response and make it json so we can look at
168+
## # it like a dictionary
169+
## return process_response(response.json())
170+
## except httpx.ConnectTimeout:
171+
## pass
172+
## except json.decoder.JSONDecodeError:
173+
## print(f"{type(response) = }\n{response = }")
174+
## raise
134175

135176

136177
async def get_translated_coroutine(
@@ -148,31 +189,33 @@ async def get_translated_coroutine(
148189
# Make sure we have a timeout, so that in the event of network failures
149190
# or something code doesn't get stuck
150191
# Get URL from function, which uses urllib to generate proper query
151-
url = get_translation_url(sentence, to_lang, source_lang)
192+
url = "https://translate-pa.googleapis.com/v1/translateHtml"
152193

153194
headers = {
154-
"User-Agent": "",
155195
"Accept": "*/*",
156-
"Accept-Language": "en-US,en-GB; q=0.5",
157-
"Accept-Encoding": "gzip, deflate",
158-
"Connection": "keep-alive",
196+
"X-Goog-API-Key": "AIzaSyATBXajvzQLTDHEQbcpq0Ihe0vWDHmO520",
197+
"Content-Type": "application/json+protobuf",
159198
}
160199

161-
while True:
162-
AGENT = (AGENT + 1) % len(agents.USER_AGENTS)
163-
headers["User-Agent"] = agents.USER_AGENTS[AGENT]
200+
sentence = sentence.replace("\n", "<br>")
201+
202+
raw_content = [[[sentence], source_lang, to_lang], "wt_lib"]
203+
content = orjson.dumps(raw_content)
164204

205+
while True:
165206
try:
166207
# Go to that URL and get our translated response
167-
response = await client.get(url, headers=headers)
208+
response = await client.post(url, content=content, headers=headers)
168209
# Wait for our response and make it json so we can look at
169210
# it like a dictionary
170-
return process_response(response.json())
211+
return process_response(response.json()).replace("<br>", "\n")
171212
except httpx.ConnectTimeout:
172213
pass
173214
except json.decoder.JSONDecodeError:
174215
print(f"{type(response) = }\n{response = }")
175216
raise
217+
AGENT = (AGENT + 1) % len(agents.USER_AGENTS)
218+
headers["User-Agent"] = agents.USER_AGENTS[AGENT]
176219

177220

178221
async def translate_async(

0 commit comments

Comments
 (0)