Skip to content

Commit

Permalink
Merge pull request #3 from bookingexperts/21941-bug-update-i18n-tasks…
Browse files Browse the repository at this point in the history
…-interpolations-for-deepl

21941 replaces the <i18n> interpolation tag in i18n-tasks for our cus…
  • Loading branch information
barak-neeman authored Jul 31, 2024
2 parents 383db5d + b135469 commit 5c48708
Show file tree
Hide file tree
Showing 2 changed files with 177 additions and 7 deletions.
87 changes: 84 additions & 3 deletions lib/i18n/tasks/translators/deepl_translator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
require 'i18n/tasks/translators/base_translator'

module I18n::Tasks::Translators
class DeeplTranslator < BaseTranslator
class DeeplTranslator < BaseTranslator # rubocop:disable Metrics/ClassLength
# max allowed texts per request
BATCH_SIZE = 50
# those languages must be specified with their sub-kind e.g en-us
Expand Down Expand Up @@ -60,21 +60,102 @@ def options_for_plain

# @param [String] value
# @return [String] 'hello, %{name}' => 'hello, <i18n>%{name}</i18n>'
def replace_interpolations(value)
def original_replace_interpolations(value)
value.gsub(INTERPOLATION_KEY_RE, '<i18n>\0</i18n>')
end

# @param [String] untranslated
# @param [String] translated
# @return [String] 'hello, <i18n>%{name}</i18n>' => 'hello, %{name}'
def restore_interpolations(untranslated, translated)
def original_restore_interpolations(untranslated, translated)
return translated if untranslated !~ INTERPOLATION_KEY_RE

translated.gsub(%r{</?i18n>}, '')
rescue StandardError => e
raise_interpolation_error(untranslated, translated, e)
end

# deepl does a better job with interpolations when it doesn't have to deal
# with <br> tags, so we replace all of them with meaningless asterisk chains
BR_REGEXP = %r{(<br\s*/?>\s*)+}i.freeze
BR_SINGLE_MARKER = ' *** '
BR_DOUBLE_MARKER = ' ***** '

# letting deepl 'read' the interpolations gives better translations (and
# solves the problem of interpolations getting pushed all the way to the
# front of the sentence), however, deepl will also try to translate
# the interpolations and that gets messy.
# we use nonsense three-letter acronyms so deepl will 'read' them and leave
# them alone (the letter X also works very well, except in sentences with
# several consecutive interpolations because that reads X X X and deepl
# doesn't handle that well)
# deepl also needs to know if an interpolation will be a word or a number,
# for romance languages it matters. a little Spanish lesson to illustrate:
# "%{foo} betalingen" translates either to "facturas %{foo}"
# (openstaande betalingen -> facturas pendientes) or to "%{foo} facturas"
# (5 betalingen -> 5 facturas)
# for interpolation keys that are usually numeric, we pick a number
# instead of the three-letter acronym (more consistency in how we name
# interpolation keys would help)
LETTER_SUBS = %w[RYX QFN VLB XOG DWP ZMQ JZQ WVS LRX HPM].freeze
NUM_SUBS = %w[17 19 23 29 31 37 41 43 47 53].freeze

def sub_for_handle(handle, index)
case handle.gsub(/[^a-z]/, '')
when 'count', 'minutes', 'hours'
NUM_SUBS[index % NUM_SUBS.size]
else
LETTER_SUBS[index % LETTER_SUBS.size]
end
end

# BEX version of replace_interpolation
def replace_interpolations(value)
index = 0
value.gsub(INTERPOLATION_KEY_RE) do |handle|
sub = sub_for_handle(handle, index)
index += 1
"<var handle=\"#{handle}\" sub=\"#{sub}\">#{sub}</var>"
end.gsub(BR_REGEXP) do |br|
if br.downcase.count('b') == 2
# never more than two <br> in a row, it gets messy
BR_DOUBLE_MARKER
else
BR_SINGLE_MARKER
end
end
end

# reversing our substitutions should be straight-forward, but it's not
# because deepl gets creative. cases are explained inline.
def restore_interpolations(untranslated, translated)
translated.gsub(%r{(.?)<var handle="([^"]*)" sub="([^"]*)">([^<]*)</var>}) do
char = ::Regexp.last_match(1)
handle = ::Regexp.last_match(2)
sub = ::Regexp.last_match(3)
body = ::Regexp.last_match(4)
if body == sub
# deepl kept the 'sub' text inside the <var> tag and nothing else, clean.
"#{char}#{handle}"
elsif body.index(sub)
# deepl took some letters from outside the <var> tag and placed them
# inside the <var> e.g. task <var>"RYX</var>"
before, after = body.split(sub, 2)
"#{before}#{handle}#{after}"
elsif "#{char}#{body}".downcase == sub.downcase
# deepl took the first letter from inside the <var> tag and placed it
# immediately before the <var> tag e.g. R<var>yx</var>
handle
else
# instead of trying to look normal the fallback prints something
# obviously wrong hoping to get some attention and a manual fix
"!!!!!#{sub.inspect} (#{char.inspect} #{body.inspect})!!!!!"
end
end.gsub(BR_DOUBLE_MARKER, '<br /><br />').gsub(BR_SINGLE_MARKER, '<br />')
rescue StandardError => e
raise_interpolation_error(untranslated, translated, e)
end

def no_results_error_message
I18n.t('i18n_tasks.deepl_translate.errors.no_results')
end
Expand Down
97 changes: 93 additions & 4 deletions spec/deepl_translate_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@

text_test = [
'key',
"Hello, %{user} O'Neill! How are you? {{ Check out this Liquid tag, it should not be translated }} \
{% That applies to this Liquid tag as well %}",
"¡Hola, %{user} O'Neill! ¿Qué tal estás? {{ Check out this Liquid tag, it should not be translated }} \
{% That applies to this Liquid tag as well %}"
"Hello, %{user} O'Neill! How are you? {{ Check out this Liquid tag, it should not be translated }} " \
'{% That applies to this Liquid tag as well %}',
"¡Hola, %{user} O'Neill! ¿Qué tal? {{ Check out this Liquid tag, it should not be translated }} " \
'{% That applies to this Liquid tag as well %}'
]

html_test_plrl = [
Expand Down Expand Up @@ -92,4 +92,93 @@
end
end
end

# Don't expect deepl's answers to be exactly the same each run
describe 'translating Dutch into other languages' do
let(:base_task) { I18n::Tasks::BaseTask.new }

before do
skip 'temporarily disabled on JRuby due to https://github.com/jruby/jruby/issues/4802' if RUBY_ENGINE == 'jruby'
skip 'DEEPL_AUTH_KEY env var not set' unless ENV['DEEPL_AUTH_KEY']
end

it 'tells time' do
german, english, spanish =
translate_dutch(hours_and_minutes: '%{hours} uur en %{minutes} minuten')
expect(german).to eq '%{hours} Stunden und %{minutes} Minuten'
expect(english).to eq '%{hours} hours and %{minutes} minutes'
expect(spanish).to eq '%{hours} horas y %{minutes} minutos'
end

it 'counts' do
german, english, spanish =
translate_dutch(other: '%{count} taken')
expect(german).to eq '%{count} Aufgaben'
expect(english).to eq '%{count} tasks'
expect(spanish).to eq '%{count} tareas'
end

it 'assigns' do
german, english, spanish =
translate_dutch(assigned: 'Taak "%{todo}" toegewezen aan %{user}')
expect(german).to eq 'To-dos "%{todo}" zugewiesen an %{user}'
expect(english).to eq 'Task "%{todo}" assigned to %{user}'
expect(spanish).to eq 'Tarea "%{todo}" asignada a %{user}'
end

it 'sings' do
german, english, spanish =
translate_dutch(verse: 'Ik zou zo graag een %{animal} kopen. Ik zag %{count} beren %{food} smeren')
expect(german).to eq 'Ich würde so gerne einen %{animal} kaufen. Ich sah %{count} Bären, die %{food} schmierten'
# greasing is a funny way to say smeren, but we let it slide
expect(english).to eq 'I would so love to buy a %{animal}. I saw %{count} bears greasing %{food}'
expect(spanish).to eq 'Me encantaría comprar un %{animal}. Vi %{count} osos engrasando %{food}'
end

it 'sends emails' do
german, english, spanish =
translate_dutch(
email_body_html: '{{ booking.greeting }},<br><br>Bijgevoegd ziet u een factuur van {{ park.name }} met ' \
'factuurnummer {{ invoice.invoice_nr }}.<br />Volgens onze administratie had het ' \
'verschuldigde bedrag van {{ locals.payment_collector_total }} op ' \
'{{ locals.payment_collector_deadline }} moeten zijn betaald. Helaas hebben we nog ' \
'geen betaling ontvangen.<br>'
)
expect(german).to eq '{{ booking.greeting }},<br /><br />Anbei finden Sie eine Rechnung von {{ park.name }} ' \
'mit der Rechnungsnummer {{ invoice.invoice_nr }}.<br />Laut unserer Verwaltung hätte der ' \
'von {{ locals.payment_collector_total }} geschuldete Betrag ' \
'am {{ locals.payment_collector_deadline }} bezahlt werden müssen. Leider haben wir die ' \
'Zahlungen noch nicht erhalten.<br />'
expect(english).to eq '{{ booking.greeting }},<br /><br />Attached please find an invoice from {{ park.name }} ' \
'with invoice number {{ invoice.invoice_nr }}.<br />According to our records, the amount ' \
'due from {{ locals.payment_collector_total }} on ' \
'{{ locals.payment_collector_deadline }} should have been paid. Unfortunately, we have ' \
'not yet received payment.<br />'
expect(spanish).to eq '{{ booking.greeting }},<br /><br />Adjuntamos una factura de {{ park.name }} con el ' \
'número de factura {{ invoice.invoice_nr }}.<br />Según nuestros registros, el importe ' \
'adeudado por {{ locals.payment_collector_total }} debería haber sido abonado en ' \
'{{ locals.payment_collector_deadline }}. Lamentablemente, aún no hemos recibido ' \
'el pago.<br />'
end

it 'asks itself why are you even translating this' do
german, english, spanish =
translate_dutch(action: '%{subject} %{verb} %{object}')
expect(german).to eq '%{subject} %{verb} %{object}'
expect(english).to eq '%{subject} %{verb} %{object}'
expect(spanish).to eq '%{subject} %{verb} %{object}'
end

def translate_dutch(dutch_pair)
key = dutch_pair.keys.first
phrase = dutch_pair[key]
locales = %w[de en-us es]
branches = locales.each_with_object({}) do |locale, hash|
hash[locale] = { 'testing' => { key => phrase } }
end
tree = build_tree(branches)
translations = base_task.translate_forest(tree, from: 'nl', backend: :deepl)
locales.map { |locale| translations[locale]['testing'][key].value.strip }
end
end
end

0 comments on commit 5c48708

Please sign in to comment.