diff --git a/openlibrary/i18n/__init__.py b/openlibrary/i18n/__init__.py index 24488345c58..672e54020e0 100644 --- a/openlibrary/i18n/__init__.py +++ b/openlibrary/i18n/__init__.py @@ -12,6 +12,7 @@ from babel.messages import Catalog, Message from babel.messages.extract import ( extract_from_dir, + extract_from_file, extract_python, ) from babel.messages.mofile import write_mo @@ -38,7 +39,9 @@ def warning_color_fn(text: str) -> str: return '\033[93m' + text + '\033[0m' -def get_untracked_files(dirs: list[str], extensions: tuple[str, str] | str) -> set: +def get_untracked_files( + dirs: list[str], extensions: tuple[str, str] | str +) -> set[Path]: """Returns a set of all currently untracked files with specified extension(s).""" untracked_files = { Path(line) @@ -162,7 +165,7 @@ def extract_templetor(fileobj, keywords, comment_tags, options): return extract_python(f, keywords, comment_tags, options) -def extract_messages(dirs: list[str], verbose: bool, skip_untracked: bool): +def extract_messages(sources: list[str], verbose: bool, skip_untracked: bool): # The creation date is fixed to prevent merge conflicts on this line as a result of i18n auto-updates # In the unlikely event we need to update the fixed creation date, you can change the hard-coded date below fixed_creation_date = datetime.fromisoformat('2024-05-01 18:58-0400') @@ -176,25 +179,43 @@ def extract_messages(dirs: list[str], verbose: bool, skip_untracked: bool): skipped_files = set() if skip_untracked: - skipped_files = get_untracked_files(dirs, ('.py', '.html')) + skipped_files = get_untracked_files(sources, ('.py', '.html')) - for d in dirs: - extracted = extract_from_dir( - d, METHODS, comment_tags=COMMENT_TAGS, strip_comment_tags=True - ) + for source in map(Path, sources): + counts: dict[Path, int] = {} + + if source.is_file(): + extracted = extract_from_file( + next(method for (glb, method) in METHODS if source.match(glb)), + source, + comment_tags=COMMENT_TAGS, + strip_comment_tags=True, + ) + + # Make it have the same shape as extract_from_dir + extracted = ((source, source, *x) for x in extracted) + else: + extracted = extract_from_dir( + source, + METHODS, + comment_tags=COMMENT_TAGS, + strip_comment_tags=True, + ) - counts: dict[str, int] = {} - for filename, lineno, message, comments, context in extracted: - file_path = Path(d) / filename + # Make it have the same shape as extract_from_file + extracted = ((source / x[0], x[0], *x[1:]) for x in extracted) + + for file_path, partial_path, lineno, message, comments, context in extracted: if file_path in skipped_files: continue - counts[filename] = counts.get(filename, 0) + 1 - catalog.add(message, None, [(filename, lineno)], auto_comments=comments) + counts[file_path] = counts.get(file_path, 0) + 1 + catalog.add( + message, None, [(str(partial_path), lineno)], auto_comments=comments + ) if verbose: - for filename, count in counts.items(): - path = filename if d == filename else os.path.join(d, filename) - print(f"{count}\t{path}", file=sys.stderr) + for file_path, count in counts.items(): + print(f"{count}\t{file_path}", file=sys.stderr) path = os.path.join(root, 'messages.pot') with open(path, 'wb') as f: diff --git a/scripts/i18n-messages b/scripts/i18n-messages index c65e3808f5c..2484988d993 100755 --- a/scripts/i18n-messages +++ b/scripts/i18n-messages @@ -2,6 +2,7 @@ """Utility script to extract all translatable messages from templates and macros and write to openlibrary/i18n/messages.pot file. """ +import subprocess import sys import _init_path # noqa: F401 Imported for its side effect of setting PYTHONPATH @@ -30,9 +31,17 @@ def main(cmd, args): message_sources = [ 'openlibrary/templates/', 'openlibrary/macros/', - # TODO: We should check all python files somehow, but too slow - 'openlibrary/plugins/upstream', + *( + subprocess.run( + ["grep", "-rlF", "' _('", "openlibrary/", "--include", "'*.py'"], + capture_output=True, + check=False, + ) + .stdout.decode() + .splitlines() + ), ] + i18n.extract_messages( message_sources, verbose='--verbose' in args,