Skip to content

Commit

Permalink
Merge pull request #25 from google/fine-grained-extraction
Browse files Browse the repository at this point in the history
Implement fine-grained extraction of translatable text
  • Loading branch information
mgeisler authored May 1, 2023
2 parents a876308 + 107484c commit 44b4b46
Show file tree
Hide file tree
Showing 5 changed files with 512 additions and 341 deletions.
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ anyhow = "1.0.68"
mdbook = "0.4.25"
polib = "0.2.0"
pulldown-cmark = { version = "0.9.2", default-features = false }
pulldown-cmark-to-cmark = "10.0.4"
semver = "1.0.16"
serde_json = "1.0.91"

Expand Down
111 changes: 70 additions & 41 deletions src/bin/mdbook-gettext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,40 +28,47 @@ use anyhow::{anyhow, Context};
use mdbook::book::Book;
use mdbook::preprocess::{CmdPreprocessor, PreprocessorContext};
use mdbook::BookItem;
use mdbook_i18n_helpers::extract_msgs;
use mdbook_i18n_helpers::{extract_events, group_events, reconstruct_markdown, Group};
use polib::catalog::Catalog;
use polib::po_file;
use semver::{Version, VersionReq};
use std::{io, process};

fn translate(text: &str, catalog: &Catalog) -> String {
let mut consumed = 0; // bytes of text consumed so far
let mut output = String::with_capacity(text.len());
let mut translated_events = Vec::new();
let events = extract_events(text, None);
let mut state = None;

for msg in extract_msgs(text) {
let span = msg.span();

// Copy over any bytes of text that precede this message.
if consumed < span.start {
output.push_str(&text[consumed..span.start]);
for group in group_events(&events) {
match group {
Group::Translate(events) => {
// Reconstruct the message.
let (msgid, new_state) = reconstruct_markdown(events, state.clone());
let translated = catalog
.find_message(None, &msgid, None)
.filter(|msg| !msg.flags().is_fuzzy())
.and_then(|msg| msg.msgstr().ok())
.filter(|msgstr| !msgstr.is_empty());
// Generate new events or reuse old events.
match translated {
Some(msgstr) => translated_events.extend(extract_events(msgstr, state)),
None => translated_events.extend_from_slice(events),
}
// Advance the state.
state = Some(new_state);
}
Group::Skip(events) => {
// Copy the events unchanged to the output.
translated_events.extend_from_slice(events);
// Advance the state.
let (_, new_state) = reconstruct_markdown(events, state);
state = Some(new_state);
}
}

// Insert the translated text
let msg_text = msg.text(text);
let translated = catalog
.find_message(None, msg_text, None)
.filter(|msg| !msg.flags().is_fuzzy())
.and_then(|msg| msg.msgstr().ok())
.filter(|msgstr| !msgstr.is_empty())
.unwrap_or(msg_text);
output.push_str(translated);
consumed = span.end;
}

// Handle any text left over after the last message.
let suffix = &text[consumed..];
output.push_str(suffix);
output
let (translated, _) = reconstruct_markdown(&translated_events, None);
translated
}

fn translate_book(ctx: &PreprocessorContext, mut book: Book) -> anyhow::Result<Book> {
Expand Down Expand Up @@ -160,19 +167,22 @@ mod tests {
#[test]
fn test_translate_single_paragraph() {
let catalog = create_catalog(&[("foo bar", "FOO BAR")]);
assert_eq!(translate("foo bar\n", &catalog), "FOO BAR\n");
// The output is normalized so the newline disappears.
assert_eq!(translate("foo bar\n", &catalog), "FOO BAR");
}

#[test]
fn test_translate_paragraph_with_leading_newlines() {
let catalog = create_catalog(&[("foo bar", "FOO BAR")]);
assert_eq!(translate("\n\n\nfoo bar\n", &catalog), "\n\n\nFOO BAR\n");
// The output is normalized so the newlines disappear.
assert_eq!(translate("\n\n\nfoo bar\n", &catalog), "FOO BAR");
}

#[test]
fn test_translate_paragraph_with_trailing_newlines() {
let catalog = create_catalog(&[("foo bar", "FOO BAR")]);
assert_eq!(translate("foo bar\n\n\n", &catalog), "FOO BAR\n\n\n");
// The output is normalized so the newlines disappear.
assert_eq!(translate("foo bar\n\n\n", &catalog), "FOO BAR");
}

#[test]
Expand All @@ -191,7 +201,7 @@ mod tests {
\n\
FOO BAR\n\
\n\
last paragraph\n"
last paragraph"
);
}

Expand All @@ -214,33 +224,52 @@ mod tests {
PARAGRAPH",
),
]);
// Paragraph separation is kept intact while translating.
// Paragraph separation is normalized when translating.
assert_eq!(
translate(
"\n\
first\n\
"first\n\
paragraph\n\
\n\
\n\
\n\
last\n\
paragraph\n\
\n\
\n",
paragraph\n",
&catalog
),
"\n\
FIRST\n\
"FIRST\n\
TRANSLATED\n\
PARAGRAPH\n\
\n\
\n\
\n\
LAST\n\
TRANSLATED\n\
PARAGRAPH\n\
PARAGRAPH"
);
}

#[test]
fn test_translate_code_block() {
let catalog = create_catalog(&[(
"fn foo() {\n\n let x = 10;\n\n}\n",
"fn FOO() {\n\n let X = 10;\n\n}\n",
)]);
assert_eq!(
translate(
"Text before.\n\
\n\
\n\
```rust,editable\n\
fn foo() {\n\n let x = 10;\n\n}\n\
```\n\
\n\
Text after.\n",
&catalog
),
"Text before.\n\
\n\
```rust,editable\n\
fn FOO() {\n\n let X = 10;\n\n}\n\
```\n\
\n\
\n"
Text after.",
);
}
}
10 changes: 5 additions & 5 deletions src/bin/mdbook-xgettext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
use anyhow::{anyhow, Context};
use mdbook::renderer::RenderContext;
use mdbook::BookItem;
use mdbook_i18n_helpers::extract_msgs;
use mdbook_i18n_helpers::extract_messages;
use polib::catalog::Catalog;
use polib::message::Message;
use polib::metadata::CatalogMetadata;
Expand Down Expand Up @@ -88,9 +88,9 @@ fn create_catalog(ctx: &RenderContext) -> anyhow::Result<Catalog> {
Some(path) => ctx.config.book.src.join(path),
None => continue,
};
for msg in extract_msgs(&chapter.content) {
let source = format!("{}:{}", path.display(), msg.line_number());
add_message(&mut catalog, msg.text(&chapter.content), &source);
for (lineno, msgid) in extract_messages(&chapter.content) {
let source = format!("{}:{}", path.display(), lineno);
add_message(&mut catalog, &msgid, &source);
}
}
}
Expand Down Expand Up @@ -214,7 +214,7 @@ mod tests {
.collect::<Vec<&str>>(),
&[
"The Foo Chapter",
"# How to Foo",
"How to Foo",
"The first paragraph about Foo.\n\
Still the first paragraph."
]
Expand Down
Loading

0 comments on commit 44b4b46

Please sign in to comment.