Skip to content

Commit

Permalink
Merge pull request #475 from awwaawwa/yadt
Browse files Browse the repository at this point in the history
  • Loading branch information
Byaidu authored Jan 20, 2025
2 parents e44fc83 + e53fb51 commit c7a3cbd
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.x'
python-version: '3.12'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,5 @@ cython_debug/
.vscode
.DS_Store
uv.lock
*.pdf
*.docx
115 changes: 114 additions & 1 deletion pdf2zh/pdf2zh.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
from typing import List, Optional

from pdf2zh import __version__, log
from pdf2zh.high_level import translate
from pdf2zh.high_level import translate, download_remote_fonts
from pdf2zh.doclayout import OnnxModel, ModelInstance
import os

from pdf2zh.config import ConfigManager
from yadt.translation_config import TranslationConfig as YadtConfig
from yadt.high_level import translate as yadt_translate


def create_parser() -> argparse.ArgumentParser:
Expand Down Expand Up @@ -164,6 +166,13 @@ def create_parser() -> argparse.ArgumentParser:
help="config file.",
)

parse_params.add_argument(
"--yadt",
default=False,
action="store_true",
help="Use experimental backend yadt.",
)

return parser


Expand All @@ -178,6 +187,7 @@ def parse_args(args: Optional[List[str]]) -> argparse.Namespace:
pages.extend(range(int(start) - 1, int(end)))
else:
pages.append(int(p) - 1)
parsed_args.raw_pages = parsed_args.pages
parsed_args.pages = pages

return parsed_args
Expand Down Expand Up @@ -255,6 +265,8 @@ def main(args: Optional[List[str]] = None) -> int:
raise ValueError("prompt error.")

print(parsed_args)
if parsed_args.yadt:
return yadt_main(parsed_args)
if parsed_args.dir:
untranlate_file = find_all_files_in_directory(parsed_args.files[0])
parsed_args.files = untranlate_file
Expand All @@ -265,5 +277,106 @@ def main(args: Optional[List[str]] = None) -> int:
return 0


def yadt_main(parsed_args) -> int:
if parsed_args.dir:
untranlate_file = find_all_files_in_directory(parsed_args.files[0])
else:
untranlate_file = parsed_args.files
lang_in = parsed_args.lang_in
lang_out = parsed_args.lang_out
outputdir = None
if parsed_args.output:
outputdir = parsed_args.output
font_path = download_remote_fonts(lang_out.lower())

param = parsed_args.service.split(":", 1)
service_name = param[0]
service_model = param[1] if len(param) > 1 else None

envs = {}
prompt = []

if parsed_args.prompt:
try:
with open(parsed_args.prompt, "r", encoding="utf-8") as file:
content = file.read()
prompt = Template(content)
except Exception:
raise ValueError("prompt error.")

from pdf2zh.translator import (
AzureOpenAITranslator,
GoogleTranslator,
BingTranslator,
DeepLTranslator,
DeepLXTranslator,
OllamaTranslator,
OpenAITranslator,
ZhipuTranslator,
ModelScopeTranslator,
SiliconTranslator,
GeminiTranslator,
AzureTranslator,
TencentTranslator,
DifyTranslator,
AnythingLLMTranslator,
XinferenceTranslator,
ArgosTranslator,
GorkTranslator,
GroqTranslator,
DeepseekTranslator,
OpenAIlikedTranslator,
)

for translator in [
GoogleTranslator,
BingTranslator,
DeepLTranslator,
DeepLXTranslator,
OllamaTranslator,
XinferenceTranslator,
AzureOpenAITranslator,
OpenAITranslator,
ZhipuTranslator,
ModelScopeTranslator,
SiliconTranslator,
GeminiTranslator,
AzureTranslator,
TencentTranslator,
DifyTranslator,
AnythingLLMTranslator,
ArgosTranslator,
GorkTranslator,
GroqTranslator,
DeepseekTranslator,
OpenAIlikedTranslator,
]:
if service_name == translator.name:
translator = translator(
lang_in, lang_out, service_model, envs=envs, prompt=prompt
)
break
else:
raise ValueError("Unsupported translation service")

for file in untranlate_file:
file = file.strip("\"'")
yadt_config = YadtConfig(
input_file=file,
font=font_path,
pages=",".join((str(x) for x in parsed_args.raw_pages)),
output_dir=outputdir,
translator=translator,
debug=parsed_args.debug,
lang_in=lang_in,
lang_out=lang_out,
no_dual=False,
no_mono=False,
qps=parsed_args.thread,
)
yadt_translate(yadt_config)
return 0


if __name__ == "__main__":
sys.exit(main())
20 changes: 20 additions & 0 deletions pdf2zh/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,17 @@ def prompt(self, text, prompt):
def __str__(self):
return f"{self.name} {self.lang_in} {self.lang_out} {self.model}"

def get_rich_text_left_placeholder(self, id: int):
return f"<b{id}>"

def get_rich_text_right_placeholder(self, id: int):
return f"</b{id}>"

def get_formular_placeholder(self, id: int):
return self.get_rich_text_left_placeholder(
id
) + self.get_rich_text_right_placeholder(id)


class GoogleTranslator(BaseTranslator):
name = "google"
Expand Down Expand Up @@ -384,6 +395,15 @@ def do_translate(self, text) -> str:
)
return response.choices[0].message.content.strip()

def get_formular_placeholder(self, id: int):
return "{{v" + str(id) + "}}"

def get_rich_text_left_placeholder(self, id: int):
return self.get_formular_placeholder(id)

def get_rich_text_right_placeholder(self, id: int):
return self.get_formular_placeholder(id + 1)


class AzureOpenAITranslator(BaseTranslator):
name = "azure-openai"
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description = "Latex PDF Translator"
authors = [{ name = "Byaidu", email = "byaidux@gmail.com" }]
license = "AGPL-3.0"
readme = "README.md"
requires-python = ">=3.9,<3.13"
requires-python = ">=3.10,<3.13"
classifiers = [
"Programming Language :: Python :: 3",
"Operating System :: OS Independent",
Expand All @@ -32,7 +32,8 @@ dependencies = [
"pikepdf",
"peewee>=3.17.8",
"argostranslate",
"fontTools"
"fontTools",
"yadt>=0.0.1a20, <0.0.2",
]

[project.optional-dependencies]
Expand Down

0 comments on commit c7a3cbd

Please sign in to comment.