From ffb537385355f4defbfa9d900d1a698c7ccd4a9b Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 16 Jan 2022 23:33:44 +0400 Subject: [PATCH] i18n data is kept in and generated from JSON files Introduced a new resource compiler script kiwix-compile-i18n that processes i18n string data stored in JSON files and generates sorted C++ tables of string keys and values for all languages. --- scripts/kiwix-compile-i18n | 167 ++++++++++++++++++++++++++++++++ scripts/kiwix-compile-i18n.1 | 18 ++++ scripts/kiwix-compile-resources | 2 +- scripts/meson.build | 6 ++ src/meson.build | 1 + src/server/i18n.cpp | 17 ++-- static/i18n/en.json | 8 ++ static/i18n/qqq.json | 9 ++ static/i18n_resources_list.txt | 1 + static/meson.build | 15 +++ 10 files changed, 235 insertions(+), 9 deletions(-) create mode 100755 scripts/kiwix-compile-i18n create mode 100644 scripts/kiwix-compile-i18n.1 create mode 100644 static/i18n/en.json create mode 100644 static/i18n/qqq.json create mode 100644 static/i18n_resources_list.txt diff --git a/scripts/kiwix-compile-i18n b/scripts/kiwix-compile-i18n new file mode 100755 index 000000000..3dfb3a194 --- /dev/null +++ b/scripts/kiwix-compile-i18n @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 + +''' +Copyright 2022 Veloman Yunkan + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301, USA. +''' + +import argparse +import os.path +import re +import json + +def to_identifier(name): + ident = re.sub(r'[^0-9a-zA-Z]', '_', name) + if ident[0].isnumeric(): + return "_"+ident + return ident + +def lang_code(filename): + filename = os.path.basename(filename) + lang = to_identifier(os.path.splitext(filename)[0]) + print(filename, '->', lang) + return lang + +# In CXX code templates, fragments that must be replaced are marked up with +# $< and >$, while curly braces (as significant part of C++ syntax) are not +# artificially doubled. The CXX code template is converted into a form usable +# by Python's str.format() via 4 simple replace() operations +def expand_cxx_template(t, **kwargs): + fmt = t.replace('{', '{{') \ + .replace('}', '}}') \ + .replace('$<', '{') \ + .replace('>$', '}') + return fmt.format(**kwargs) + +def cxx_string_literal(s): + # Taking advantage of the fact the JSON string escape rules match + # those of C++ + return 'u8' + json.dumps(s) + +string_table_cxx_template = ''' +const I18nString $$[] = { + $$ +}; +''' + +lang_table_entry_cxx_template = ''' + { + $$, + ARRAY_ELEMENT_COUNT($$), + $$ + }''' + +cxxfile_template = '''// This file is automatically generated. Do not modify it. + +#include "server/i18n.h" + +namespace kiwix { +namespace i18n { + +namespace +{ + +$$ + +} // unnamed namespace + +#define ARRAY_ELEMENT_COUNT(a) (sizeof(a)/sizeof(a[0])) + +extern const I18nStringTable stringTables[] = { + $$ +}; + +extern const size_t langCount = $$; + +} // namespace i18n +} // namespace kiwix +''' + +class Resource: + def __init__(self, base_dirs, filename): + filename = filename.strip() + self.filename = filename + self.lang_code = lang_code(filename) + found = False + for base_dir in base_dirs: + try: + with open(os.path.join(base_dir, filename), 'r') as f: + self.data = f.read() + found = True + break + except FileNotFoundError: + continue + if not found: + raise Exception("Impossible to find {}".format(filename)) + + + def get_string_table_name(self): + return "string_table_for_" + self.lang_code + + def get_string_table(self): + table_entries = ",\n ".join(self.get_string_table_entries()) + return expand_cxx_template(string_table_cxx_template, + TABLE_NAME=self.get_string_table_name(), + TABLE_ENTRIES=table_entries) + + def get_string_table_entries(self): + d = json.loads(self.data) + for k in sorted(d.keys()): + if k != "@metadata": + key_string = cxx_string_literal(k) + value_string = cxx_string_literal(d[k]) + yield '{ ' + key_string + ', ' + value_string + ' }' + + def get_lang_table_entry(self): + return expand_cxx_template(lang_table_entry_cxx_template, + LANG_STRING_LITERAL=cxx_string_literal(self.lang_code), + STRING_TABLE_NAME=self.get_string_table_name()) + + + +def gen_c_file(resources): + string_data = [] + lang_table = [] + for r in resources: + string_data.append(r.get_string_table()) + lang_table.append(r.get_lang_table_entry()) + + return expand_cxx_template(cxxfile_template, + STRING_DATA="\n".join(string_data), + LANG_TABLE=",\n ".join(lang_table), + LANG_COUNT=len(resources) + ) + + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--cxxfile', + required=True, + help='The Cpp file name to generate') + parser.add_argument('i18n_resource_file', + help='The list of resources to compile.') + args = parser.parse_args() + + base_dir = os.path.dirname(os.path.realpath(args.i18n_resource_file)) + with open(args.i18n_resource_file, 'r') as f: + resources = [Resource([base_dir], filename) + for filename in f.readlines()] + + with open(args.cxxfile, 'w') as f: + f.write(gen_c_file(resources)) + diff --git a/scripts/kiwix-compile-i18n.1 b/scripts/kiwix-compile-i18n.1 new file mode 100644 index 000000000..383ae83b9 --- /dev/null +++ b/scripts/kiwix-compile-i18n.1 @@ -0,0 +1,18 @@ +.TH KIWIX-COMPILE-I18N "1" "January 2022" "Kiwix" "User Commands" +.SH NAME +kiwix-compile-i18n \- helper to compile Kiwix i18n (internationalization) data +.SH SYNOPSIS +\fBkiwix\-compile\-i18n\fR [\-h] \-\-cxxfile CXXFILE i18n_resource_file\fR +.SH DESCRIPTION +.TP +i18n_resource_file +The list of i18n resources to compile. +.TP +\fB\-h\fR, \fB\-\-help\fR +show a help message and exit +.TP +\fB\-\-cxxfile\fR CXXFILE +The Cpp file name to generate +.TP +.SH AUTHOR +Veloman Yunkan diff --git a/scripts/kiwix-compile-resources b/scripts/kiwix-compile-resources index 265db5ff6..7c1bf3a8a 100755 --- a/scripts/kiwix-compile-resources +++ b/scripts/kiwix-compile-resources @@ -102,7 +102,7 @@ class Resource: -master_c_template = """//This file is automaically generated. Do not modify it. +master_c_template = """//This file is automatically generated. Do not modify it. #include #include diff --git a/scripts/meson.build b/scripts/meson.build index fb0f9cb97..c4ddec873 100644 --- a/scripts/meson.build +++ b/scripts/meson.build @@ -4,3 +4,9 @@ res_compiler = find_program('kiwix-compile-resources') install_data(res_compiler.path(), install_dir:get_option('bindir')) install_man('kiwix-compile-resources.1') + +i18n_compiler = find_program('kiwix-compile-i18n') + +install_data(i18n_compiler.path(), install_dir:get_option('bindir')) + +install_man('kiwix-compile-i18n.1') diff --git a/src/meson.build b/src/meson.build index 9a5d4ff56..c9c69445d 100644 --- a/src/meson.build +++ b/src/meson.build @@ -33,6 +33,7 @@ kiwix_sources = [ 'version.cpp' ] kiwix_sources += lib_resources +kiwix_sources += i18n_resources if host_machine.system() == 'windows' kiwix_sources += 'subprocess_windows.cpp' diff --git a/src/server/i18n.cpp b/src/server/i18n.cpp index 3953ccf3c..407f5f485 100644 --- a/src/server/i18n.cpp +++ b/src/server/i18n.cpp @@ -36,6 +36,13 @@ const char* I18nStringTable::get(const std::string& key) const return found == end ? nullptr : found->value; } +namespace i18n +{ +// this data is generated by the i18n resource compiler +extern const I18nStringTable stringTables[]; +extern const size_t langCount; +} + namespace { @@ -44,18 +51,12 @@ const I18nString enStrings[] = { { "suggest-full-text-search", "containing '{{{SEARCH_TERMS}}}'..."} }; -#define ARRAY_ELEMENT_COUNT(a) (sizeof(a)/sizeof(a[0])) - -const I18nStringTable i18nStringTables[] = { - { "en", ARRAY_ELEMENT_COUNT(enStrings), enStrings } -}; - class I18nStringDB { public: // functions I18nStringDB() { - for ( size_t i = 0; i < ARRAY_ELEMENT_COUNT(i18nStringTables); ++i ) { - const auto& t = i18nStringTables[i]; + for ( size_t i = 0; i < kiwix::i18n::langCount; ++i ) { + const auto& t = kiwix::i18n::stringTables[i]; lang2TableMap[t.lang] = &t; } }; diff --git a/static/i18n/en.json b/static/i18n/en.json new file mode 100644 index 000000000..6a111b289 --- /dev/null +++ b/static/i18n/en.json @@ -0,0 +1,8 @@ +{ + "@metadata": { + "authors": [ + ] + }, + "name":"English", + "suggest-full-text-search": "containing '{{{SEARCH_TERMS}}}'..." +} diff --git a/static/i18n/qqq.json b/static/i18n/qqq.json new file mode 100644 index 000000000..f0aaaa8dd --- /dev/null +++ b/static/i18n/qqq.json @@ -0,0 +1,9 @@ +{ + "@metadata": { + "authors": [ + "Veloman Yunkan" + ] + }, + "name": "Current language to which the string is being translated to.", + "suggest-full-text-search": "Text appearing in the suggestion list that, when selected, runs a full text search instead of the title search" +} diff --git a/static/i18n_resources_list.txt b/static/i18n_resources_list.txt new file mode 100644 index 000000000..eb8bddb12 --- /dev/null +++ b/static/i18n_resources_list.txt @@ -0,0 +1 @@ +i18n/en.json diff --git a/static/meson.build b/static/meson.build index 5c6d2899b..6a8ce1773 100644 --- a/static/meson.build +++ b/static/meson.build @@ -14,3 +14,18 @@ lib_resources = custom_target('resources', '@INPUT@'], depend_files: resource_files ) + +i18n_resource_files = run_command(find_program('python3'), + '-c', + 'import sys; f=open(sys.argv[1]); print(f.read())', + files('i18n_resources_list.txt') + ).stdout().strip().split('\n') + +i18n_resources = custom_target('i18n_resources', + input: 'i18n_resources_list.txt', + output: ['libkiwix-i18n-resources.cpp'], + command:[i18n_compiler, + '--cxxfile', '@OUTPUT0@', + '@INPUT@'], + depend_files: i18n_resource_files +)