Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/i18n/msgfmt.py
12 views
1
#! /usr/bin/env python3
2
# Written by Martin v. Löwis <[email protected]>
3
4
"""Generate binary message catalog from textual translation description.
5
6
This program converts a textual Uniforum-style message catalog (.po file) into
7
a binary GNU catalog (.mo file). This is essentially the same function as the
8
GNU msgfmt program, however, it is a simpler implementation. Currently it
9
does not handle plural forms but it does handle message contexts.
10
11
Usage: msgfmt.py [OPTIONS] filename.po
12
13
Options:
14
-o file
15
--output-file=file
16
Specify the output file to write to. If omitted, output will go to a
17
file named filename.mo (based off the input file name).
18
19
-h
20
--help
21
Print this message and exit.
22
23
-V
24
--version
25
Display version information and exit.
26
"""
27
28
import os
29
import sys
30
import ast
31
import getopt
32
import struct
33
import array
34
from email.parser import HeaderParser
35
36
__version__ = "1.2"
37
38
MESSAGES = {}
39
40
41
def usage(code, msg=''):
42
print(__doc__, file=sys.stderr)
43
if msg:
44
print(msg, file=sys.stderr)
45
sys.exit(code)
46
47
48
def add(ctxt, id, str, fuzzy):
49
"Add a non-fuzzy translation to the dictionary."
50
global MESSAGES
51
if not fuzzy and str:
52
if ctxt is None:
53
MESSAGES[id] = str
54
else:
55
MESSAGES[b"%b\x04%b" % (ctxt, id)] = str
56
57
58
def generate():
59
"Return the generated output."
60
global MESSAGES
61
# the keys are sorted in the .mo file
62
keys = sorted(MESSAGES.keys())
63
offsets = []
64
ids = strs = b''
65
for id in keys:
66
# For each string, we need size and file offset. Each string is NUL
67
# terminated; the NUL does not count into the size.
68
offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
69
ids += id + b'\0'
70
strs += MESSAGES[id] + b'\0'
71
output = ''
72
# The header is 7 32-bit unsigned integers. We don't use hash tables, so
73
# the keys start right after the index tables.
74
# translated string.
75
keystart = 7*4+16*len(keys)
76
# and the values start after the keys
77
valuestart = keystart + len(ids)
78
koffsets = []
79
voffsets = []
80
# The string table first has the list of keys, then the list of values.
81
# Each entry has first the size of the string, then the file offset.
82
for o1, l1, o2, l2 in offsets:
83
koffsets += [l1, o1+keystart]
84
voffsets += [l2, o2+valuestart]
85
offsets = koffsets + voffsets
86
output = struct.pack("Iiiiiii",
87
0x950412de, # Magic
88
0, # Version
89
len(keys), # # of entries
90
7*4, # start of key index
91
7*4+len(keys)*8, # start of value index
92
0, 0) # size and offset of hash table
93
output += array.array("i", offsets).tobytes()
94
output += ids
95
output += strs
96
return output
97
98
99
def make(filename, outfile):
100
ID = 1
101
STR = 2
102
CTXT = 3
103
104
# Compute .mo name from .po name and arguments
105
if filename.endswith('.po'):
106
infile = filename
107
else:
108
infile = filename + '.po'
109
if outfile is None:
110
outfile = os.path.splitext(infile)[0] + '.mo'
111
112
try:
113
with open(infile, 'rb') as f:
114
lines = f.readlines()
115
except IOError as msg:
116
print(msg, file=sys.stderr)
117
sys.exit(1)
118
119
section = msgctxt = None
120
fuzzy = 0
121
122
# Start off assuming Latin-1, so everything decodes without failure,
123
# until we know the exact encoding
124
encoding = 'latin-1'
125
126
# Parse the catalog
127
lno = 0
128
for l in lines:
129
l = l.decode(encoding)
130
lno += 1
131
# If we get a comment line after a msgstr, this is a new entry
132
if l[0] == '#' and section == STR:
133
add(msgctxt, msgid, msgstr, fuzzy)
134
section = msgctxt = None
135
fuzzy = 0
136
# Record a fuzzy mark
137
if l[:2] == '#,' and 'fuzzy' in l:
138
fuzzy = 1
139
# Skip comments
140
if l[0] == '#':
141
continue
142
# Now we are in a msgid or msgctxt section, output previous section
143
if l.startswith('msgctxt'):
144
if section == STR:
145
add(msgctxt, msgid, msgstr, fuzzy)
146
section = CTXT
147
l = l[7:]
148
msgctxt = b''
149
elif l.startswith('msgid') and not l.startswith('msgid_plural'):
150
if section == STR:
151
add(msgctxt, msgid, msgstr, fuzzy)
152
if not msgid:
153
# See whether there is an encoding declaration
154
p = HeaderParser()
155
charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
156
if charset:
157
encoding = charset
158
section = ID
159
l = l[5:]
160
msgid = msgstr = b''
161
is_plural = False
162
# This is a message with plural forms
163
elif l.startswith('msgid_plural'):
164
if section != ID:
165
print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno),
166
file=sys.stderr)
167
sys.exit(1)
168
l = l[12:]
169
msgid += b'\0' # separator of singular and plural
170
is_plural = True
171
# Now we are in a msgstr section
172
elif l.startswith('msgstr'):
173
section = STR
174
if l.startswith('msgstr['):
175
if not is_plural:
176
print('plural without msgid_plural on %s:%d' % (infile, lno),
177
file=sys.stderr)
178
sys.exit(1)
179
l = l.split(']', 1)[1]
180
if msgstr:
181
msgstr += b'\0' # Separator of the various plural forms
182
else:
183
if is_plural:
184
print('indexed msgstr required for plural on %s:%d' % (infile, lno),
185
file=sys.stderr)
186
sys.exit(1)
187
l = l[6:]
188
# Skip empty lines
189
l = l.strip()
190
if not l:
191
continue
192
l = ast.literal_eval(l)
193
if section == CTXT:
194
msgctxt += l.encode(encoding)
195
elif section == ID:
196
msgid += l.encode(encoding)
197
elif section == STR:
198
msgstr += l.encode(encoding)
199
else:
200
print('Syntax error on %s:%d' % (infile, lno), \
201
'before:', file=sys.stderr)
202
print(l, file=sys.stderr)
203
sys.exit(1)
204
# Add last entry
205
if section == STR:
206
add(msgctxt, msgid, msgstr, fuzzy)
207
208
# Compute output
209
output = generate()
210
211
try:
212
with open(outfile,"wb") as f:
213
f.write(output)
214
except IOError as msg:
215
print(msg, file=sys.stderr)
216
217
218
def main():
219
try:
220
opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
221
['help', 'version', 'output-file='])
222
except getopt.error as msg:
223
usage(1, msg)
224
225
outfile = None
226
# parse options
227
for opt, arg in opts:
228
if opt in ('-h', '--help'):
229
usage(0)
230
elif opt in ('-V', '--version'):
231
print("msgfmt.py", __version__)
232
sys.exit(0)
233
elif opt in ('-o', '--output-file'):
234
outfile = arg
235
# do it
236
if not args:
237
print('No input file given', file=sys.stderr)
238
print("Try `msgfmt --help' for more information.", file=sys.stderr)
239
return
240
241
for filename in args:
242
make(filename, outfile)
243
244
245
if __name__ == '__main__':
246
main()
247
248