Skip to content

Commit

Permalink
Add FR translation for R-messages (#19)
Browse files Browse the repository at this point in the history
* add FR message translation

* wrap cli_abort for translation

* add test

* add NEWS

* fix typos
  • Loading branch information
cregouby authored Sep 17, 2024
1 parent 8a8d835 commit 33ec18b
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 7 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Title: Fast Text Tokenization
Version: 0.1.4.9000
Authors@R: c(
person("Daniel", "Falbel", , "daniel@posit.co", c("aut", "cre")),
person("Regouby", "Christophe", , "christophe.regouby@free.fr", c("ctb")),
person(family = "Posit", role = c("cph"))
)
Description:
Expand All @@ -14,7 +15,7 @@ License: MIT + file LICENSE
SystemRequirements: Rust tool chain w/ cargo, libclang/llvm-config
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Depends:
R (>= 4.2.0)
Imports:
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# tok (development version)

- add message translation in FR (#19, @cregouby)

# tok 0.1.4

- Updated libR-sys to fix mac oldrel notes. (#18)
Expand Down
2 changes: 1 addition & 1 deletion R/encoding.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ encoding <- R6::R6Class(
if (inherits(encoding, "REncoding")) {
self$.encoding <- encoding
} else {
cli::cli_abort("Expected class {.cls REncoding} but got {.cls {class(encoding)}}.")
cli::cli_abort(gettext("Expected class {.cls REncoding} but got {.cls {class(encoding)}}."))
}
}
),
Expand Down
10 changes: 5 additions & 5 deletions R/tokenizer.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ tokenizer <- R6::R6Class(
#' the tokenizer.
#' @param path Path to tokenizer.json file
from_file = function(path) {
cli::cli_abort("This is a static method. Not available for tokenizers instances.")
cli::cli_abort(gettext("This is a static method. Not available for tokenizers instances."))
},

#' @description
Expand All @@ -94,7 +94,7 @@ tokenizer <- R6::R6Class(
#' @param auth_token An optional auth token used to access private repositories
#' on the Hugging Face Hub
from_pretrained = function(identifier, revision = "main", auth_token = NULL) {
cli::cli_abort("This is a static method. Not available for tokenizers instances.")
cli::cli_abort(gettext("This is a static method. Not available for tokenizers instances."))
},

#' @description
Expand All @@ -104,7 +104,7 @@ tokenizer <- R6::R6Class(
#' @param files character vector of file paths.
train = function(files, trainer) {
if (!inherits(trainer, "tok_trainer"))
cli::cli_abort("{.arg trainer} must inherit from {.cls tok_trainer}.")
cli::cli_abort(gettext("{.arg trainer} must inherit from {.cls tok_trainer}."))

self$.tokenizer$train_from_files(trainer$.trainer, normalizePath(files))
},
Expand Down Expand Up @@ -227,15 +227,15 @@ tokenizer <- R6::R6Class(
#' @field padding Gets padding configuration
padding = function(x) {
if (!missing(x)) {
cli::cli_abort("Can't be set this way, use {.fn enable_padding}.")
cli::cli_abort(gettext("Can't be set this way, use {.fn enable_padding}."))
}

self$.tokenizer$get_padding()
},
#' @field truncation Gets truncation configuration
truncation = function(x) {
if (!missing(x)) {
cli::cli_abort("Can't be set this way, use {.fn enable_truncation}.")
cli::cli_abort(gettext("Can't be set this way, use {.fn enable_truncation}."))
}

self$.tokenizer$get_truncation()
Expand Down
Binary file added inst/po/fr/LC_MESSAGES/R-tok.mo
Binary file not shown.
32 changes: 32 additions & 0 deletions po/R-fr.po
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
msgid ""
msgstr ""
"Project-Id-Version: tok 0.1.4.9000\n"
"POT-Creation-Date: 2024-09-04 19:29+0200\n"
"PO-Revision-Date: 2024-09-04 19:39+0200\n"
"Last-Translator: \n"
"Language-Team: \n"
"Language: fr\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Generator: Poedit 3.4.3\n"

#: encoding.R:29
msgid "Expected class {.cls REncoding} but got {.cls {class(encoding)}}."
msgstr "Classe attendue {.cls REncoding}. Ici la classe est {.cls {class(encoding)}}."

#: tokenizer.R:86 tokenizer.R:97
msgid "This is a static method. Not available for tokenizers instances."
msgstr "Il s'agit d'une méthode statique. Elle n'est pas disponible pour les instances de tokenizers."

#: tokenizer.R:107
msgid "{.arg trainer} must inherit from {.cls tok_trainer}."
msgstr "{.arg trainer} doit hériter de {.cls tok_trainer}."

#: tokenizer.R:230
msgid "Can't be set this way, use {.fn enable_padding}."
msgstr "Ne peut pas être défini de cette manière, vous devez utilisez {.fn enable_padding}."

#: tokenizer.R:238
msgid "Can't be set this way, use {.fn enable_truncation}."
msgstr "Ne peut pas être défini de cette manière, vous devez utilisez {.fn enable_truncation}."
31 changes: 31 additions & 0 deletions po/R-tok.pot
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
msgid ""
msgstr ""
"Project-Id-Version: tok 0.1.4.9000\n"
"POT-Creation-Date: 2024-09-04 19:29+0200\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"Language: \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"

#: encoding.R:29
msgid "Expected class {.cls REncoding} but got {.cls {class(encoding)}}."
msgstr ""

#: tokenizer.R:86 tokenizer.R:97
msgid "This is a static method. Not available for tokenizers instances."
msgstr ""

#: tokenizer.R:107
msgid "{.arg trainer} must inherit from {.cls tok_trainer}."
msgstr ""

#: tokenizer.R:230
msgid "Can't be set this way, use {.fn enable_padding}."
msgstr ""

#: tokenizer.R:238
msgid "Can't be set this way, use {.fn enable_truncation}."
msgstr ""
15 changes: 15 additions & 0 deletions tests/testthat/test-message-translations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
test_that("R-level cli_abort messages are correctly translated in FR", {
withr::with_envvar(c(HUGGINGFACE_HUB_CACHE = tempdir()), {
try({
tok <- tokenizer$from_pretrained("gpt2")
temp_json <- tempfile(fileext = ".json")
withr::with_language(lang = "fr",
expect_error(
tok$train(temp_json, temp_json),
regexp = "doit hériter de",
fixed = TRUE
))
})
})

})

0 comments on commit 33ec18b

Please sign in to comment.