Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/unicode/genmap_support.py
12 views
1
#
2
# genmap_support.py: Multibyte Codec Map Generator
3
#
4
# Original Author: Hye-Shik Chang <[email protected]>
5
# Modified Author: Dong-hee Na <[email protected]>
6
#
7
8
9
class BufferedFiller:
10
def __init__(self, column=78):
11
self.column = column
12
self.buffered = []
13
self.cline = []
14
self.clen = 0
15
self.count = 0
16
17
def write(self, *data):
18
for s in data:
19
if len(s) > self.column:
20
raise ValueError("token is too long")
21
if len(s) + self.clen > self.column:
22
self.flush()
23
self.clen += len(s)
24
self.cline.append(s)
25
self.count += 1
26
27
def flush(self):
28
if not self.cline:
29
return
30
self.buffered.append(''.join(self.cline))
31
self.clen = 0
32
del self.cline[:]
33
34
def printout(self, fp):
35
self.flush()
36
for l in self.buffered:
37
fp.write(f'{l}\n')
38
del self.buffered[:]
39
40
def __len__(self):
41
return self.count
42
43
44
class DecodeMapWriter:
45
filler_class = BufferedFiller
46
47
def __init__(self, fp, prefix, decode_map):
48
self.fp = fp
49
self.prefix = prefix
50
self.decode_map = decode_map
51
self.filler = self.filler_class()
52
53
def update_decode_map(self, c1range, c2range, onlymask=(), wide=0):
54
c2values = range(c2range[0], c2range[1] + 1)
55
56
for c1 in range(c1range[0], c1range[1] + 1):
57
if c1 not in self.decode_map or (onlymask and c1 not in onlymask):
58
continue
59
c2map = self.decode_map[c1]
60
rc2values = [n for n in c2values if n in c2map]
61
if not rc2values:
62
continue
63
64
c2map[self.prefix] = True
65
c2map['min'] = rc2values[0]
66
c2map['max'] = rc2values[-1]
67
c2map['midx'] = len(self.filler)
68
69
for v in range(rc2values[0], rc2values[-1] + 1):
70
if v in c2map:
71
self.filler.write('%d,' % c2map[v])
72
else:
73
self.filler.write('U,')
74
75
def generate(self, wide=False):
76
if not wide:
77
self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
78
else:
79
self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
80
81
self.filler.printout(self.fp)
82
self.fp.write("};\n\n")
83
84
if not wide:
85
self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n")
86
else:
87
self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n")
88
89
for i in range(256):
90
if i in self.decode_map and self.prefix in self.decode_map[i]:
91
m = self.decode_map
92
prefix = self.prefix
93
else:
94
self.filler.write("{", "0,", "0,", "0", "},")
95
continue
96
97
self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'],
98
",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},")
99
self.filler.printout(self.fp)
100
self.fp.write("};\n\n")
101
102
103
class EncodeMapWriter:
104
filler_class = BufferedFiller
105
elemtype = 'DBCHAR'
106
indextype = 'struct unim_index'
107
108
def __init__(self, fp, prefix, encode_map):
109
self.fp = fp
110
self.prefix = prefix
111
self.encode_map = encode_map
112
self.filler = self.filler_class()
113
114
def generate(self):
115
self.buildmap()
116
self.printmap()
117
118
def buildmap(self):
119
for c1 in range(0, 256):
120
if c1 not in self.encode_map:
121
continue
122
c2map = self.encode_map[c1]
123
rc2values = [k for k in c2map.keys()]
124
rc2values.sort()
125
if not rc2values:
126
continue
127
128
c2map[self.prefix] = True
129
c2map['min'] = rc2values[0]
130
c2map['max'] = rc2values[-1]
131
c2map['midx'] = len(self.filler)
132
133
for v in range(rc2values[0], rc2values[-1] + 1):
134
if v not in c2map:
135
self.write_nochar()
136
elif isinstance(c2map[v], int):
137
self.write_char(c2map[v])
138
elif isinstance(c2map[v], tuple):
139
self.write_multic(c2map[v])
140
else:
141
raise ValueError
142
143
def write_nochar(self):
144
self.filler.write('N,')
145
146
def write_multic(self, point):
147
self.filler.write('M,')
148
149
def write_char(self, point):
150
self.filler.write(str(point) + ',')
151
152
def printmap(self):
153
self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n")
154
self.filler.printout(self.fp)
155
self.fp.write("};\n\n")
156
self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n")
157
158
for i in range(256):
159
if i in self.encode_map and self.prefix in self.encode_map[i]:
160
self.filler.write("{", "__%s_encmap" % self.prefix, "+",
161
"%d" % self.encode_map[i]['midx'], ",",
162
"%d," % self.encode_map[i]['min'],
163
"%d" % self.encode_map[i]['max'], "},")
164
else:
165
self.filler.write("{", "0,", "0,", "0", "},")
166
continue
167
self.filler.printout(self.fp)
168
self.fp.write("};\n\n")
169
170
171
def open_mapping_file(path, source):
172
try:
173
f = open(path)
174
except IOError:
175
raise SystemExit(f'{source} is needed')
176
return f
177
178
179
def print_autogen(fo, source):
180
fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n')
181
182
183
def loadmap(fo, natcol=0, unicol=1, sbcs=0):
184
print("Loading from", fo)
185
fo.seek(0, 0)
186
decmap = {}
187
for line in fo:
188
line = line.split('#', 1)[0].strip()
189
if not line or len(line.split()) < 2:
190
continue
191
192
row = [eval(e) for e in line.split()]
193
loc, uni = row[natcol], row[unicol]
194
if loc >= 0x100 or sbcs:
195
decmap.setdefault((loc >> 8), {})
196
decmap[(loc >> 8)][(loc & 0xff)] = uni
197
198
return decmap
199
200