CoCalc -- encodings.py

GitHub Repository: sagemathinc/python-wasm
Path: blob/main/python/pylang/src/lib/encodings.py
²¹²¹ views
1
# vim:fileencoding=utf-8
2
# License: BSD Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
3

4
def base64encode(bytes, altchars, pad_char):
5
    # Convert an array of bytes into a base-64 encoded string
6
    l = bytes.length
7
    remainder = l % 3
8
    main_length = l - remainder
9
    encodings = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789' + (altchars or '+/')
10
    pad_char = '=' if pad_char is undefined else pad_char
11
    ans = v'[]'
12
    for v'var i = 0; i < main_length; i += 3':
13
        chunk = (bytes[i] << 16) | (bytes[i + 1] << 8) | bytes[i + 2]
14
        ans.push(encodings[(chunk & 16515072) >> 18], encodings[(chunk & 258048) >> 12], encodings[(chunk & 4032) >> 6], encodings[chunk & 63])
15
    if remainder is 1:
16
        chunk = bytes[main_length]
17
        ans.push(encodings[(chunk & 252) >> 2], encodings[(chunk & 3) << 4], pad_char, pad_char)
18
    elif remainder is 2:
19
        chunk = (bytes[main_length] << 8) | bytes[main_length + 1]
20
        ans.push(encodings[(chunk & 64512) >> 10], encodings[(chunk & 1008) >> 4], encodings[(chunk & 15) << 2], pad_char)
21
    return ans.join('')
22

23
def base64decode(string):
24
    # convert the output of base64encode back into an array of bytes
25
    # (Uint8Array) only works with the standard altchars and pad_char
26
    if jstype(window) is not 'undefined':
27
        chars = window.atob(string)
28
    else:
29
        chars = new Buffer(string, 'base64').toString('binary')  # noqa: undef
30
    ans = Uint8Array(chars.length)
31
    for i in range(ans.length):
32
        ans[i] = chars.charCodeAt(i)
33
    return ans
34

35
def urlsafe_b64encode(bytes, pad_char):
36
    return base64encode(bytes, '-_', pad_char)
37

38
def urlsafe_b64decode(string):
39
    string = String.prototype.replace.call(string, /[_-]/g, def(m): return '+' if m is '-' else '/';)
40
    return base64decode(string)
41

42
def hexlify(bytes):
43
    ans = v'[]'
44
    for v'var i = 0; i < bytes.length; i++':
45
        x = bytes[i].toString(16)
46
        if x.length is 1:
47
            x = '0' + x
48
        ans.push(x)
49
    return ans.join('')
50

51
def unhexlify(string):
52
    num = string.length // 2
53
    if num * 2 is not string.length:
54
        raise ValueError('string length is not a multiple of two')
55
    ans = Uint8Array(num)
56
    for v'var i = 0; i < num; i++':
57
        x = parseInt(string[i*2:i*2+2], 16)
58
        if isNaN(x):
59
            raise ValueError('string is not hex-encoded')
60
        ans[i] = x
61
    return ans
62

63
utf8_decoder_table = v'''[
64
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
65
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
66
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
67
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
68
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
69
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
70
  8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
71
  0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
72
  0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
73
  0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
74
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
75
  1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
76
  1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
77
  1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
78
]'''
79

80
def _from_code_point(x):
81
    if x <= 0xFFFF:
82
        return String.fromCharCode(x)
83
    x -= 0x10000
84
    return String.fromCharCode((x >> 10) + 0xD800, (x % 0x400) + 0xDC00)
85

86
def utf8_decode(bytes, errors, replacement):
87
    # Convert an array of UTF-8 encoded bytes into a string
88
    state = 0
89
    ans = v'[]'
90

91
    for v'var i = 0, l = bytes.length; i < l; i++':  # noqa
92
        byte = bytes[i]
93
        typ = utf8_decoder_table[byte]
94
        codep = (byte & 0x3f) | (codep << 6) if state is not 0 else (0xff >> typ) & (byte)
95
        state = utf8_decoder_table[256 + state*16 + typ]
96
        if state is 0:
97
            ans.push(_from_code_point(codep))
98
        elif state is 1:
99
            if not errors or errors is 'strict':
100
                raise UnicodeDecodeError(str.format('The byte 0x{:02x} at position {} is not valid UTF-8', byte, i))
101
            elif errors is 'replace':
102
                ans.push(replacement or '?')
103
    return ans.join('')
104

105
def utf8_encode_js(string):
106
    # Encode a string as an array of UTF-8 bytes
107
    escstr = encodeURIComponent(string)
108
    ans = v'[]'
109
    for v'var i = 0; i < escstr.length; i++':
110
        ch = escstr[i]
111
        if ch is '%':
112
            ans.push(parseInt(escstr[i+1:i+3], 16))
113
            i += 2
114
        else:
115
            ans.push(ch.charCodeAt(0))
116
    return Uint8Array(ans)
117

118
if jstype(TextEncoder) is 'function':
119
    _u8enc = TextEncoder('utf-8')
120
    utf8_encode = _u8enc.encode.bind(_u8enc)
121
    _u8enc = undefined
122
else:
123
    utf8_encode = utf8_encode_js
124

125
def utf8_encode_native(string):
126
    return _u8enc.encode(string)
127

128
Product

Resources

Company