-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathxliff.py
260 lines (218 loc) · 8.4 KB
/
xliff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
"""
https://en.wikipedia.org/wiki/XLIFF
https://www.oasis-open.org/committees/tc_home.php?wg_abbrev=xliff
https://wiki.oasis-open.org/xliff/FAQ
http://docs.oasis-open.org/xliff/xliff-core/xliff-core.html
http://docs.oasis-open.org/xliff/v1.2/os/xliff-core.html
http://docs.oasis-open.org/xliff/xliff-core/v2.1/csprd01/xliff-core-v2.1-csprd01.html
https://www.k15t.com/blog/2017/06/xliff-standard-file-format-for-translations
https://pypi.python.org/pypi/itools/0.77.8 <----- see itools.xliff
https://github.com/translate/pyliff
https://pypi.python.org/pypi/slc.xliff/1.3.3
https://pypi.python.org/pypi/translate-toolkit/1.0.1
"""
from xml.sax import saxutils
### from itools.srx.segment.py
# Constants
TEXT, START_FORMAT, END_FORMAT = range(3)
### from itools.datatypes.primitive.py
class XMLContent(object):
@staticmethod
def encode(value):
return value.replace('&', '&').replace('<', '<')
@staticmethod
def decode(value):
return value.replace('&', '&').replace('<', '<')
### from itools.datatypes.primitive.py
class XMLAttribute(object):
@staticmethod
def encode(value):
value = value.replace('&', '&').replace('<', '<')
return value.replace('"', '"')
@staticmethod
def decode(value):
value = value.replace('&', '&').replace('<', '<')
return value.replace('"', '"')
### from itools.xliff.xliff.py
doctype = (
'<!DOCTYPE xliff PUBLIC "-//XLIFF//DTD XLIFF//EN"\n'
' "http://www.oasis-open.org/committees/xliff/documents/xliff.dtd">\n')
### from itools.xliff.xliff.py
# FIXME TMXNote and XLFNote are the same
class XLFNote(object):
def __init__(self, text='', attributes=None):
if attributes is None:
attributes = {}
self.text = text
self.attributes = attributes
def to_str(self):
# Attributes
attributes = []
for attr_name in self.attributes:
attr_value = self.attributes[attr_name]
attr_value = XMLContent.encode(attr_value)
if attr_name == 'lang':
attr_name = 'xml:lang'
attributes.append(' %s="%s"' % (attr_name, attr_value))
attributes = ''.join(attributes)
# Ok
return '<note%s>%s</note>\n' % (attributes, self.text)
### from itools.xliff.xliff.py
class XLFUnit(object):
def __init__(self, attributes):
self.source = None
self.target = None
self.context = None
self.line = None
self.attributes = attributes
self.notes = []
def to_str(self):
s = []
if self.attributes != {}:
att = ['%s="%s"' % (k, self.attributes[k])
for k in self.attributes.keys() if k != 'space']
# s.append(' <trans-unit %s ' % '\n'.join(att))
s.append(' <trans-unit %s ' % ' '.join(att))
if 'space' in self.attributes.keys():
s.append('xml:space="%s"' % self.attributes['space'])
s.append('>\n')
else:
s.append(' <trans-unit>\n')
if self.source:
s.append(' <source>')
# s.append(encode_source(self.source))
s.append(self.source)
s.append('</source>\n')
# if self.target:
if True:
s.append(' <target>')
# s.append(encode_source(self.target))
s.append(self.target)
s.append('</target>\n')
if self.line is not None or self.context is not None:
s.append(' <context-group name="context info">\n')
if self.line is not None:
s.append(' <context context-type="linenumber">%d' %
self.line)
s.append('</context>\n')
if self.context is not None:
s.append(' <context context-type="x-context">%s' %
self.context)
s.append('</context>\n')
s.append(' </context-group>\n')
for note in self.notes:
s.append(note.to_str())
s.append(' </trans-unit>\n')
return ''.join(s)
### from itools.xliff.xliff.py
class File(object):
def __init__(self, original, attributes):
self.original = original
self.attributes = attributes
# self.body = {}
self.body = []
self.header = []
def to_str(self):
output = []
# Opent tag
open_tag = '<file original="%s"%s>\n'
attributes = [
' %s="%s"' % (key, XMLAttribute.encode(value))
for key, value in self.attributes.items() if key != 'space']
if 'space' in self.attributes:
attributes.append(' xml:space="%s"' % self.attributes['space'])
attributes = ''.join(attributes)
open_tag = open_tag % (self.original, attributes)
output.append(open_tag)
# The header
if self.header:
output.append('<header>\n')
for line in self.header:
output.append(line.to_str())
output.append('</header>\n')
# The body
output.append('<body>\n')
if self.body:
# output.extend([ unit.to_str() for unit in self.body.values() ])
output.extend([ unit.to_str() for unit in self.body ])
output.append('</body>\n')
# Close tag
output.append('</file>\n')
return ''.join(output)
### from itools.xliff.xliff.py
# class XLFFile(TextFile):
class XLFFile(object):
class_mimetypes = ['application/x-xliff']
class_extension = 'xlf'
# def new(self, version='1.0'):
def __init__(self, version='1.2'):
self.version = version
self.lang = None
self.files = {}
#######################################################################
# Load
#######################################################################
# Save
#######################################################################
def to_str(self, encoding='UTF-8'):
output = []
# The XML declaration
output.append('<?xml version="1.0" encoding="%s"?>\n' % encoding)
# The Doctype
output.append(doctype)
# <xliff>
if self.lang:
template = '<xliff version="%s">\n'
output.append(template % self.version)
else:
template = '<xliff version="%s" xml:lang="%s">\n'
output.append(template % (self.version, self.lang))
# The files
for file in self.files.values():
output.append(file.to_str())
# </xliff>
output.append('</xliff>\n')
# Ok
return ''.join(output).encode(encoding)
#######################################################################
# API
#######################################################################
def build(self, version, files):
self.version = version
self.files = files
def get_languages(self):
files_id, sources, targets = [], [], []
for file in self.files:
file_id = file.attributes['original']
source = file.attributes['source-language']
target = file.attributes.get('target-language', '')
if file_id not in files_id:
files_id.append(file_id)
if source not in sources:
sources.append(source)
if target not in targets:
targets.append(target)
return ((files_id, sources, targets))
# def add_unit(self, filename, source, context, line):
def add_unit(self, filename, source, target, context, line):
file = self.files.setdefault(filename, File(filename, {}))
unit = XLFUnit({})
unit.source = source and saxutils.escape(source) or source
unit.target = target and saxutils.escape(target) or target # added by GT
unit.context = context and saxutils.escape(context) or context
unit.line = line
# file.body[context, source] = unit
file.body.append(unit)
return unit
def gettext(self, source, context=None):
"""Returns the translation of the given message id.
If the context /msgid is not present in the message catalog, then the
message id is returned.
"""
key = (context, source)
for file in self.files.values():
if key in file.body:
unit = file.body[key]
if unit.target:
return unit.target
return source