Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/unicode/genwincodec.py
12 views
1
"""This script generates a Python codec module from a Windows Code Page.
2
3
It uses the function MultiByteToWideChar to generate a decoding table.
4
"""
5
6
import ctypes
7
from ctypes import wintypes
8
from gencodec import codegen
9
import unicodedata
10
11
def genwinmap(codepage):
12
MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar
13
MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD,
14
wintypes.LPCSTR, ctypes.c_int,
15
wintypes.LPWSTR, ctypes.c_int]
16
MultiByteToWideChar.restype = ctypes.c_int
17
18
enc2uni = {}
19
20
for i in list(range(32)) + [127]:
21
enc2uni[i] = (i, 'CONTROL CHARACTER')
22
23
for i in range(256):
24
buf = ctypes.create_unicode_buffer(2)
25
ret = MultiByteToWideChar(
26
codepage, 0,
27
bytes([i]), 1,
28
buf, 2)
29
assert ret == 1, "invalid code page"
30
assert buf[1] == '\x00'
31
try:
32
name = unicodedata.name(buf[0])
33
except ValueError:
34
try:
35
name = enc2uni[i][1]
36
except KeyError:
37
name = ''
38
39
enc2uni[i] = (ord(buf[0]), name)
40
41
return enc2uni
42
43
def genwincodec(codepage):
44
import platform
45
map = genwinmap(codepage)
46
encodingname = 'cp%d' % codepage
47
code = codegen("", map, encodingname)
48
# Replace first lines with our own docstring
49
code = '''\
50
"""Python Character Mapping Codec %s generated on Windows:
51
%s with the command:
52
python Tools/unicode/genwincodec.py %s
53
"""#"
54
''' % (encodingname, ' '.join(platform.win32_ver()), codepage
55
) + code.split('"""#"', 1)[1]
56
57
print(code)
58
59
if __name__ == '__main__':
60
import sys
61
genwincodec(int(sys.argv[1]))
62
63