Skip to content

Commit

Permalink
i18n data is kept in and generated from JSON files
Browse files Browse the repository at this point in the history
Introduced a new resource compiler script kiwix-compile-i18n that
processes i18n string data stored in JSON files and generates sorted C++
tables of string keys and values for all languages.
  • Loading branch information
veloman-yunkan committed Jan 16, 2022
1 parent b12060e commit ffb5373
Show file tree
Hide file tree
Showing 10 changed files with 235 additions and 9 deletions.
167 changes: 167 additions & 0 deletions scripts/kiwix-compile-i18n
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#!/usr/bin/env python3

'''
Copyright 2022 Veloman Yunkan <veloman.yunkan@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or any
later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
'''

import argparse
import os.path
import re
import json

def to_identifier(name):
ident = re.sub(r'[^0-9a-zA-Z]', '_', name)
if ident[0].isnumeric():
return "_"+ident
return ident

def lang_code(filename):
filename = os.path.basename(filename)
lang = to_identifier(os.path.splitext(filename)[0])
print(filename, '->', lang)
return lang

# In CXX code templates, fragments that must be replaced are marked up with
# $< and >$, while curly braces (as significant part of C++ syntax) are not
# artificially doubled. The CXX code template is converted into a form usable
# by Python's str.format() via 4 simple replace() operations
def expand_cxx_template(t, **kwargs):
fmt = t.replace('{', '{{') \
.replace('}', '}}') \
.replace('$<', '{') \
.replace('>$', '}')
return fmt.format(**kwargs)

def cxx_string_literal(s):
# Taking advantage of the fact the JSON string escape rules match
# those of C++
return 'u8' + json.dumps(s)

string_table_cxx_template = '''
const I18nString $<TABLE_NAME>$[] = {
$<TABLE_ENTRIES>$
};
'''

lang_table_entry_cxx_template = '''
{
$<LANG_STRING_LITERAL>$,
ARRAY_ELEMENT_COUNT($<STRING_TABLE_NAME>$),
$<STRING_TABLE_NAME>$
}'''

cxxfile_template = '''// This file is automatically generated. Do not modify it.
#include "server/i18n.h"
namespace kiwix {
namespace i18n {
namespace
{
$<STRING_DATA>$
} // unnamed namespace
#define ARRAY_ELEMENT_COUNT(a) (sizeof(a)/sizeof(a[0]))
extern const I18nStringTable stringTables[] = {
$<LANG_TABLE>$
};
extern const size_t langCount = $<LANG_COUNT>$;
} // namespace i18n
} // namespace kiwix
'''

class Resource:
def __init__(self, base_dirs, filename):
filename = filename.strip()
self.filename = filename
self.lang_code = lang_code(filename)
found = False
for base_dir in base_dirs:
try:
with open(os.path.join(base_dir, filename), 'r') as f:
self.data = f.read()
found = True
break
except FileNotFoundError:
continue
if not found:
raise Exception("Impossible to find {}".format(filename))


def get_string_table_name(self):
return "string_table_for_" + self.lang_code

def get_string_table(self):
table_entries = ",\n ".join(self.get_string_table_entries())
return expand_cxx_template(string_table_cxx_template,
TABLE_NAME=self.get_string_table_name(),
TABLE_ENTRIES=table_entries)

def get_string_table_entries(self):
d = json.loads(self.data)
for k in sorted(d.keys()):
if k != "@metadata":
key_string = cxx_string_literal(k)
value_string = cxx_string_literal(d[k])
yield '{ ' + key_string + ', ' + value_string + ' }'

def get_lang_table_entry(self):
return expand_cxx_template(lang_table_entry_cxx_template,
LANG_STRING_LITERAL=cxx_string_literal(self.lang_code),
STRING_TABLE_NAME=self.get_string_table_name())



def gen_c_file(resources):
string_data = []
lang_table = []
for r in resources:
string_data.append(r.get_string_table())
lang_table.append(r.get_lang_table_entry())

return expand_cxx_template(cxxfile_template,
STRING_DATA="\n".join(string_data),
LANG_TABLE=",\n ".join(lang_table),
LANG_COUNT=len(resources)
)



if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--cxxfile',
required=True,
help='The Cpp file name to generate')
parser.add_argument('i18n_resource_file',
help='The list of resources to compile.')
args = parser.parse_args()

base_dir = os.path.dirname(os.path.realpath(args.i18n_resource_file))
with open(args.i18n_resource_file, 'r') as f:
resources = [Resource([base_dir], filename)
for filename in f.readlines()]

with open(args.cxxfile, 'w') as f:
f.write(gen_c_file(resources))

18 changes: 18 additions & 0 deletions scripts/kiwix-compile-i18n.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
.TH KIWIX-COMPILE-I18N "1" "January 2022" "Kiwix" "User Commands"
.SH NAME
kiwix-compile-i18n \- helper to compile Kiwix i18n (internationalization) data
.SH SYNOPSIS
\fBkiwix\-compile\-i18n\fR [\-h] \-\-cxxfile CXXFILE i18n_resource_file\fR
.SH DESCRIPTION
.TP
i18n_resource_file
The list of i18n resources to compile.
.TP
\fB\-h\fR, \fB\-\-help\fR
show a help message and exit
.TP
\fB\-\-cxxfile\fR CXXFILE
The Cpp file name to generate
.TP
.SH AUTHOR
Veloman Yunkan <veloman.yunkan@gmail.com>
2 changes: 1 addition & 1 deletion scripts/kiwix-compile-resources
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class Resource:



master_c_template = """//This file is automaically generated. Do not modify it.
master_c_template = """//This file is automatically generated. Do not modify it.
#include <stdlib.h>
#include <fstream>
Expand Down
6 changes: 6 additions & 0 deletions scripts/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,9 @@ res_compiler = find_program('kiwix-compile-resources')
install_data(res_compiler.path(), install_dir:get_option('bindir'))

install_man('kiwix-compile-resources.1')

i18n_compiler = find_program('kiwix-compile-i18n')

install_data(i18n_compiler.path(), install_dir:get_option('bindir'))

install_man('kiwix-compile-i18n.1')
1 change: 1 addition & 0 deletions src/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ kiwix_sources = [
'version.cpp'
]
kiwix_sources += lib_resources
kiwix_sources += i18n_resources

if host_machine.system() == 'windows'
kiwix_sources += 'subprocess_windows.cpp'
Expand Down
17 changes: 9 additions & 8 deletions src/server/i18n.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ const char* I18nStringTable::get(const std::string& key) const
return found == end ? nullptr : found->value;
}

namespace i18n
{
// this data is generated by the i18n resource compiler
extern const I18nStringTable stringTables[];
extern const size_t langCount;
}

namespace
{

Expand All @@ -44,18 +51,12 @@ const I18nString enStrings[] = {
{ "suggest-full-text-search", "containing '{{{SEARCH_TERMS}}}'..."}
};

#define ARRAY_ELEMENT_COUNT(a) (sizeof(a)/sizeof(a[0]))

const I18nStringTable i18nStringTables[] = {
{ "en", ARRAY_ELEMENT_COUNT(enStrings), enStrings }
};

class I18nStringDB
{
public: // functions
I18nStringDB() {
for ( size_t i = 0; i < ARRAY_ELEMENT_COUNT(i18nStringTables); ++i ) {
const auto& t = i18nStringTables[i];
for ( size_t i = 0; i < kiwix::i18n::langCount; ++i ) {
const auto& t = kiwix::i18n::stringTables[i];
lang2TableMap[t.lang] = &t;
}
};
Expand Down
8 changes: 8 additions & 0 deletions static/i18n/en.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"@metadata": {
"authors": [
]
},
"name":"English",
"suggest-full-text-search": "containing '{{{SEARCH_TERMS}}}'..."
}
9 changes: 9 additions & 0 deletions static/i18n/qqq.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"@metadata": {
"authors": [
"Veloman Yunkan"
]
},
"name": "Current language to which the string is being translated to.",
"suggest-full-text-search": "Text appearing in the suggestion list that, when selected, runs a full text search instead of the title search"
}
1 change: 1 addition & 0 deletions static/i18n_resources_list.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
i18n/en.json
15 changes: 15 additions & 0 deletions static/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,18 @@ lib_resources = custom_target('resources',
'@INPUT@'],
depend_files: resource_files
)

i18n_resource_files = run_command(find_program('python3'),
'-c',
'import sys; f=open(sys.argv[1]); print(f.read())',
files('i18n_resources_list.txt')
).stdout().strip().split('\n')

i18n_resources = custom_target('i18n_resources',
input: 'i18n_resources_list.txt',
output: ['libkiwix-i18n-resources.cpp'],
command:[i18n_compiler,
'--cxxfile', '@OUTPUT0@',
'@INPUT@'],
depend_files: i18n_resource_files
)

0 comments on commit ffb5373

Please sign in to comment.