Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sqlmapproject
GitHub Repository: sqlmapproject/sqlmap
Path: blob/master/thirdparty/chardet/mbcssm.py
2992 views
1
######################## BEGIN LICENSE BLOCK ########################
2
# The Original Code is mozilla.org code.
3
#
4
# The Initial Developer of the Original Code is
5
# Netscape Communications Corporation.
6
# Portions created by the Initial Developer are Copyright (C) 1998
7
# the Initial Developer. All Rights Reserved.
8
#
9
# Contributor(s):
10
# Mark Pilgrim - port to Python
11
#
12
# This library is free software; you can redistribute it and/or
13
# modify it under the terms of the GNU Lesser General Public
14
# License as published by the Free Software Foundation; either
15
# version 2.1 of the License, or (at your option) any later version.
16
#
17
# This library is distributed in the hope that it will be useful,
18
# but WITHOUT ANY WARRANTY; without even the implied warranty of
19
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
# Lesser General Public License for more details.
21
#
22
# You should have received a copy of the GNU Lesser General Public
23
# License along with this library; if not, write to the Free Software
24
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25
# 02110-1301 USA
26
######################### END LICENSE BLOCK #########################
27
28
from .enums import MachineState
29
30
# BIG5
31
32
BIG5_CLS = (
33
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
34
1,1,1,1,1,1,0,0, # 08 - 0f
35
1,1,1,1,1,1,1,1, # 10 - 17
36
1,1,1,0,1,1,1,1, # 18 - 1f
37
1,1,1,1,1,1,1,1, # 20 - 27
38
1,1,1,1,1,1,1,1, # 28 - 2f
39
1,1,1,1,1,1,1,1, # 30 - 37
40
1,1,1,1,1,1,1,1, # 38 - 3f
41
2,2,2,2,2,2,2,2, # 40 - 47
42
2,2,2,2,2,2,2,2, # 48 - 4f
43
2,2,2,2,2,2,2,2, # 50 - 57
44
2,2,2,2,2,2,2,2, # 58 - 5f
45
2,2,2,2,2,2,2,2, # 60 - 67
46
2,2,2,2,2,2,2,2, # 68 - 6f
47
2,2,2,2,2,2,2,2, # 70 - 77
48
2,2,2,2,2,2,2,1, # 78 - 7f
49
4,4,4,4,4,4,4,4, # 80 - 87
50
4,4,4,4,4,4,4,4, # 88 - 8f
51
4,4,4,4,4,4,4,4, # 90 - 97
52
4,4,4,4,4,4,4,4, # 98 - 9f
53
4,3,3,3,3,3,3,3, # a0 - a7
54
3,3,3,3,3,3,3,3, # a8 - af
55
3,3,3,3,3,3,3,3, # b0 - b7
56
3,3,3,3,3,3,3,3, # b8 - bf
57
3,3,3,3,3,3,3,3, # c0 - c7
58
3,3,3,3,3,3,3,3, # c8 - cf
59
3,3,3,3,3,3,3,3, # d0 - d7
60
3,3,3,3,3,3,3,3, # d8 - df
61
3,3,3,3,3,3,3,3, # e0 - e7
62
3,3,3,3,3,3,3,3, # e8 - ef
63
3,3,3,3,3,3,3,3, # f0 - f7
64
3,3,3,3,3,3,3,0 # f8 - ff
65
)
66
67
BIG5_ST = (
68
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
69
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f
70
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17
71
)
72
73
BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
74
75
BIG5_SM_MODEL = {'class_table': BIG5_CLS,
76
'class_factor': 5,
77
'state_table': BIG5_ST,
78
'char_len_table': BIG5_CHAR_LEN_TABLE,
79
'name': 'Big5'}
80
81
# CP949
82
83
CP949_CLS = (
84
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
85
1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
86
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
87
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f
88
1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f
89
4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f
90
1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f
91
5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f
92
0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f
93
6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f
94
6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af
95
7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf
96
7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf
97
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df
98
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef
99
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
100
)
101
102
CP949_ST = (
103
#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
104
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START
105
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR
106
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME
107
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3
108
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4
109
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5
110
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6
111
)
112
113
CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
114
115
CP949_SM_MODEL = {'class_table': CP949_CLS,
116
'class_factor': 10,
117
'state_table': CP949_ST,
118
'char_len_table': CP949_CHAR_LEN_TABLE,
119
'name': 'CP949'}
120
121
# EUC-JP
122
123
EUCJP_CLS = (
124
4,4,4,4,4,4,4,4, # 00 - 07
125
4,4,4,4,4,4,5,5, # 08 - 0f
126
4,4,4,4,4,4,4,4, # 10 - 17
127
4,4,4,5,4,4,4,4, # 18 - 1f
128
4,4,4,4,4,4,4,4, # 20 - 27
129
4,4,4,4,4,4,4,4, # 28 - 2f
130
4,4,4,4,4,4,4,4, # 30 - 37
131
4,4,4,4,4,4,4,4, # 38 - 3f
132
4,4,4,4,4,4,4,4, # 40 - 47
133
4,4,4,4,4,4,4,4, # 48 - 4f
134
4,4,4,4,4,4,4,4, # 50 - 57
135
4,4,4,4,4,4,4,4, # 58 - 5f
136
4,4,4,4,4,4,4,4, # 60 - 67
137
4,4,4,4,4,4,4,4, # 68 - 6f
138
4,4,4,4,4,4,4,4, # 70 - 77
139
4,4,4,4,4,4,4,4, # 78 - 7f
140
5,5,5,5,5,5,5,5, # 80 - 87
141
5,5,5,5,5,5,1,3, # 88 - 8f
142
5,5,5,5,5,5,5,5, # 90 - 97
143
5,5,5,5,5,5,5,5, # 98 - 9f
144
5,2,2,2,2,2,2,2, # a0 - a7
145
2,2,2,2,2,2,2,2, # a8 - af
146
2,2,2,2,2,2,2,2, # b0 - b7
147
2,2,2,2,2,2,2,2, # b8 - bf
148
2,2,2,2,2,2,2,2, # c0 - c7
149
2,2,2,2,2,2,2,2, # c8 - cf
150
2,2,2,2,2,2,2,2, # d0 - d7
151
2,2,2,2,2,2,2,2, # d8 - df
152
0,0,0,0,0,0,0,0, # e0 - e7
153
0,0,0,0,0,0,0,0, # e8 - ef
154
0,0,0,0,0,0,0,0, # f0 - f7
155
0,0,0,0,0,0,0,5 # f8 - ff
156
)
157
158
EUCJP_ST = (
159
3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
160
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
161
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
162
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f
163
3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27
164
)
165
166
EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
167
168
EUCJP_SM_MODEL = {'class_table': EUCJP_CLS,
169
'class_factor': 6,
170
'state_table': EUCJP_ST,
171
'char_len_table': EUCJP_CHAR_LEN_TABLE,
172
'name': 'EUC-JP'}
173
174
# EUC-KR
175
176
EUCKR_CLS = (
177
1,1,1,1,1,1,1,1, # 00 - 07
178
1,1,1,1,1,1,0,0, # 08 - 0f
179
1,1,1,1,1,1,1,1, # 10 - 17
180
1,1,1,0,1,1,1,1, # 18 - 1f
181
1,1,1,1,1,1,1,1, # 20 - 27
182
1,1,1,1,1,1,1,1, # 28 - 2f
183
1,1,1,1,1,1,1,1, # 30 - 37
184
1,1,1,1,1,1,1,1, # 38 - 3f
185
1,1,1,1,1,1,1,1, # 40 - 47
186
1,1,1,1,1,1,1,1, # 48 - 4f
187
1,1,1,1,1,1,1,1, # 50 - 57
188
1,1,1,1,1,1,1,1, # 58 - 5f
189
1,1,1,1,1,1,1,1, # 60 - 67
190
1,1,1,1,1,1,1,1, # 68 - 6f
191
1,1,1,1,1,1,1,1, # 70 - 77
192
1,1,1,1,1,1,1,1, # 78 - 7f
193
0,0,0,0,0,0,0,0, # 80 - 87
194
0,0,0,0,0,0,0,0, # 88 - 8f
195
0,0,0,0,0,0,0,0, # 90 - 97
196
0,0,0,0,0,0,0,0, # 98 - 9f
197
0,2,2,2,2,2,2,2, # a0 - a7
198
2,2,2,2,2,3,3,3, # a8 - af
199
2,2,2,2,2,2,2,2, # b0 - b7
200
2,2,2,2,2,2,2,2, # b8 - bf
201
2,2,2,2,2,2,2,2, # c0 - c7
202
2,3,2,2,2,2,2,2, # c8 - cf
203
2,2,2,2,2,2,2,2, # d0 - d7
204
2,2,2,2,2,2,2,2, # d8 - df
205
2,2,2,2,2,2,2,2, # e0 - e7
206
2,2,2,2,2,2,2,2, # e8 - ef
207
2,2,2,2,2,2,2,2, # f0 - f7
208
2,2,2,2,2,2,2,0 # f8 - ff
209
)
210
211
EUCKR_ST = (
212
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
213
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f
214
)
215
216
EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
217
218
EUCKR_SM_MODEL = {'class_table': EUCKR_CLS,
219
'class_factor': 4,
220
'state_table': EUCKR_ST,
221
'char_len_table': EUCKR_CHAR_LEN_TABLE,
222
'name': 'EUC-KR'}
223
224
# EUC-TW
225
226
EUCTW_CLS = (
227
2,2,2,2,2,2,2,2, # 00 - 07
228
2,2,2,2,2,2,0,0, # 08 - 0f
229
2,2,2,2,2,2,2,2, # 10 - 17
230
2,2,2,0,2,2,2,2, # 18 - 1f
231
2,2,2,2,2,2,2,2, # 20 - 27
232
2,2,2,2,2,2,2,2, # 28 - 2f
233
2,2,2,2,2,2,2,2, # 30 - 37
234
2,2,2,2,2,2,2,2, # 38 - 3f
235
2,2,2,2,2,2,2,2, # 40 - 47
236
2,2,2,2,2,2,2,2, # 48 - 4f
237
2,2,2,2,2,2,2,2, # 50 - 57
238
2,2,2,2,2,2,2,2, # 58 - 5f
239
2,2,2,2,2,2,2,2, # 60 - 67
240
2,2,2,2,2,2,2,2, # 68 - 6f
241
2,2,2,2,2,2,2,2, # 70 - 77
242
2,2,2,2,2,2,2,2, # 78 - 7f
243
0,0,0,0,0,0,0,0, # 80 - 87
244
0,0,0,0,0,0,6,0, # 88 - 8f
245
0,0,0,0,0,0,0,0, # 90 - 97
246
0,0,0,0,0,0,0,0, # 98 - 9f
247
0,3,4,4,4,4,4,4, # a0 - a7
248
5,5,1,1,1,1,1,1, # a8 - af
249
1,1,1,1,1,1,1,1, # b0 - b7
250
1,1,1,1,1,1,1,1, # b8 - bf
251
1,1,3,1,3,3,3,3, # c0 - c7
252
3,3,3,3,3,3,3,3, # c8 - cf
253
3,3,3,3,3,3,3,3, # d0 - d7
254
3,3,3,3,3,3,3,3, # d8 - df
255
3,3,3,3,3,3,3,3, # e0 - e7
256
3,3,3,3,3,3,3,3, # e8 - ef
257
3,3,3,3,3,3,3,3, # f0 - f7
258
3,3,3,3,3,3,3,0 # f8 - ff
259
)
260
261
EUCTW_ST = (
262
MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07
263
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
264
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17
265
MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
266
5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27
267
MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
268
)
269
270
EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
271
272
EUCTW_SM_MODEL = {'class_table': EUCTW_CLS,
273
'class_factor': 7,
274
'state_table': EUCTW_ST,
275
'char_len_table': EUCTW_CHAR_LEN_TABLE,
276
'name': 'x-euc-tw'}
277
278
# GB2312
279
280
GB2312_CLS = (
281
1,1,1,1,1,1,1,1, # 00 - 07
282
1,1,1,1,1,1,0,0, # 08 - 0f
283
1,1,1,1,1,1,1,1, # 10 - 17
284
1,1,1,0,1,1,1,1, # 18 - 1f
285
1,1,1,1,1,1,1,1, # 20 - 27
286
1,1,1,1,1,1,1,1, # 28 - 2f
287
3,3,3,3,3,3,3,3, # 30 - 37
288
3,3,1,1,1,1,1,1, # 38 - 3f
289
2,2,2,2,2,2,2,2, # 40 - 47
290
2,2,2,2,2,2,2,2, # 48 - 4f
291
2,2,2,2,2,2,2,2, # 50 - 57
292
2,2,2,2,2,2,2,2, # 58 - 5f
293
2,2,2,2,2,2,2,2, # 60 - 67
294
2,2,2,2,2,2,2,2, # 68 - 6f
295
2,2,2,2,2,2,2,2, # 70 - 77
296
2,2,2,2,2,2,2,4, # 78 - 7f
297
5,6,6,6,6,6,6,6, # 80 - 87
298
6,6,6,6,6,6,6,6, # 88 - 8f
299
6,6,6,6,6,6,6,6, # 90 - 97
300
6,6,6,6,6,6,6,6, # 98 - 9f
301
6,6,6,6,6,6,6,6, # a0 - a7
302
6,6,6,6,6,6,6,6, # a8 - af
303
6,6,6,6,6,6,6,6, # b0 - b7
304
6,6,6,6,6,6,6,6, # b8 - bf
305
6,6,6,6,6,6,6,6, # c0 - c7
306
6,6,6,6,6,6,6,6, # c8 - cf
307
6,6,6,6,6,6,6,6, # d0 - d7
308
6,6,6,6,6,6,6,6, # d8 - df
309
6,6,6,6,6,6,6,6, # e0 - e7
310
6,6,6,6,6,6,6,6, # e8 - ef
311
6,6,6,6,6,6,6,6, # f0 - f7
312
6,6,6,6,6,6,6,0 # f8 - ff
313
)
314
315
GB2312_ST = (
316
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07
317
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
318
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17
319
4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
320
MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27
321
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
322
)
323
324
# To be accurate, the length of class 6 can be either 2 or 4.
325
# But it is not necessary to discriminate between the two since
326
# it is used for frequency analysis only, and we are validating
327
# each code range there as well. So it is safe to set it to be
328
# 2 here.
329
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
330
331
GB2312_SM_MODEL = {'class_table': GB2312_CLS,
332
'class_factor': 7,
333
'state_table': GB2312_ST,
334
'char_len_table': GB2312_CHAR_LEN_TABLE,
335
'name': 'GB2312'}
336
337
# Shift_JIS
338
339
SJIS_CLS = (
340
1,1,1,1,1,1,1,1, # 00 - 07
341
1,1,1,1,1,1,0,0, # 08 - 0f
342
1,1,1,1,1,1,1,1, # 10 - 17
343
1,1,1,0,1,1,1,1, # 18 - 1f
344
1,1,1,1,1,1,1,1, # 20 - 27
345
1,1,1,1,1,1,1,1, # 28 - 2f
346
1,1,1,1,1,1,1,1, # 30 - 37
347
1,1,1,1,1,1,1,1, # 38 - 3f
348
2,2,2,2,2,2,2,2, # 40 - 47
349
2,2,2,2,2,2,2,2, # 48 - 4f
350
2,2,2,2,2,2,2,2, # 50 - 57
351
2,2,2,2,2,2,2,2, # 58 - 5f
352
2,2,2,2,2,2,2,2, # 60 - 67
353
2,2,2,2,2,2,2,2, # 68 - 6f
354
2,2,2,2,2,2,2,2, # 70 - 77
355
2,2,2,2,2,2,2,1, # 78 - 7f
356
3,3,3,3,3,2,2,3, # 80 - 87
357
3,3,3,3,3,3,3,3, # 88 - 8f
358
3,3,3,3,3,3,3,3, # 90 - 97
359
3,3,3,3,3,3,3,3, # 98 - 9f
360
#0xa0 is illegal in sjis encoding, but some pages does
361
#contain such byte. We need to be more error forgiven.
362
2,2,2,2,2,2,2,2, # a0 - a7
363
2,2,2,2,2,2,2,2, # a8 - af
364
2,2,2,2,2,2,2,2, # b0 - b7
365
2,2,2,2,2,2,2,2, # b8 - bf
366
2,2,2,2,2,2,2,2, # c0 - c7
367
2,2,2,2,2,2,2,2, # c8 - cf
368
2,2,2,2,2,2,2,2, # d0 - d7
369
2,2,2,2,2,2,2,2, # d8 - df
370
3,3,3,3,3,3,3,3, # e0 - e7
371
3,3,3,3,3,4,4,4, # e8 - ef
372
3,3,3,3,3,3,3,3, # f0 - f7
373
3,3,3,3,3,0,0,0) # f8 - ff
374
375
376
SJIS_ST = (
377
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
378
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
379
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17
380
)
381
382
SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
383
384
SJIS_SM_MODEL = {'class_table': SJIS_CLS,
385
'class_factor': 6,
386
'state_table': SJIS_ST,
387
'char_len_table': SJIS_CHAR_LEN_TABLE,
388
'name': 'Shift_JIS'}
389
390
# UCS2-BE
391
392
UCS2BE_CLS = (
393
0,0,0,0,0,0,0,0, # 00 - 07
394
0,0,1,0,0,2,0,0, # 08 - 0f
395
0,0,0,0,0,0,0,0, # 10 - 17
396
0,0,0,3,0,0,0,0, # 18 - 1f
397
0,0,0,0,0,0,0,0, # 20 - 27
398
0,3,3,3,3,3,0,0, # 28 - 2f
399
0,0,0,0,0,0,0,0, # 30 - 37
400
0,0,0,0,0,0,0,0, # 38 - 3f
401
0,0,0,0,0,0,0,0, # 40 - 47
402
0,0,0,0,0,0,0,0, # 48 - 4f
403
0,0,0,0,0,0,0,0, # 50 - 57
404
0,0,0,0,0,0,0,0, # 58 - 5f
405
0,0,0,0,0,0,0,0, # 60 - 67
406
0,0,0,0,0,0,0,0, # 68 - 6f
407
0,0,0,0,0,0,0,0, # 70 - 77
408
0,0,0,0,0,0,0,0, # 78 - 7f
409
0,0,0,0,0,0,0,0, # 80 - 87
410
0,0,0,0,0,0,0,0, # 88 - 8f
411
0,0,0,0,0,0,0,0, # 90 - 97
412
0,0,0,0,0,0,0,0, # 98 - 9f
413
0,0,0,0,0,0,0,0, # a0 - a7
414
0,0,0,0,0,0,0,0, # a8 - af
415
0,0,0,0,0,0,0,0, # b0 - b7
416
0,0,0,0,0,0,0,0, # b8 - bf
417
0,0,0,0,0,0,0,0, # c0 - c7
418
0,0,0,0,0,0,0,0, # c8 - cf
419
0,0,0,0,0,0,0,0, # d0 - d7
420
0,0,0,0,0,0,0,0, # d8 - df
421
0,0,0,0,0,0,0,0, # e0 - e7
422
0,0,0,0,0,0,0,0, # e8 - ef
423
0,0,0,0,0,0,0,0, # f0 - f7
424
0,0,0,0,0,0,4,5 # f8 - ff
425
)
426
427
UCS2BE_ST = (
428
5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
429
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
430
MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17
431
6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f
432
6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27
433
5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f
434
6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37
435
)
436
437
UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
438
439
UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS,
440
'class_factor': 6,
441
'state_table': UCS2BE_ST,
442
'char_len_table': UCS2BE_CHAR_LEN_TABLE,
443
'name': 'UTF-16BE'}
444
445
# UCS2-LE
446
447
UCS2LE_CLS = (
448
0,0,0,0,0,0,0,0, # 00 - 07
449
0,0,1,0,0,2,0,0, # 08 - 0f
450
0,0,0,0,0,0,0,0, # 10 - 17
451
0,0,0,3,0,0,0,0, # 18 - 1f
452
0,0,0,0,0,0,0,0, # 20 - 27
453
0,3,3,3,3,3,0,0, # 28 - 2f
454
0,0,0,0,0,0,0,0, # 30 - 37
455
0,0,0,0,0,0,0,0, # 38 - 3f
456
0,0,0,0,0,0,0,0, # 40 - 47
457
0,0,0,0,0,0,0,0, # 48 - 4f
458
0,0,0,0,0,0,0,0, # 50 - 57
459
0,0,0,0,0,0,0,0, # 58 - 5f
460
0,0,0,0,0,0,0,0, # 60 - 67
461
0,0,0,0,0,0,0,0, # 68 - 6f
462
0,0,0,0,0,0,0,0, # 70 - 77
463
0,0,0,0,0,0,0,0, # 78 - 7f
464
0,0,0,0,0,0,0,0, # 80 - 87
465
0,0,0,0,0,0,0,0, # 88 - 8f
466
0,0,0,0,0,0,0,0, # 90 - 97
467
0,0,0,0,0,0,0,0, # 98 - 9f
468
0,0,0,0,0,0,0,0, # a0 - a7
469
0,0,0,0,0,0,0,0, # a8 - af
470
0,0,0,0,0,0,0,0, # b0 - b7
471
0,0,0,0,0,0,0,0, # b8 - bf
472
0,0,0,0,0,0,0,0, # c0 - c7
473
0,0,0,0,0,0,0,0, # c8 - cf
474
0,0,0,0,0,0,0,0, # d0 - d7
475
0,0,0,0,0,0,0,0, # d8 - df
476
0,0,0,0,0,0,0,0, # e0 - e7
477
0,0,0,0,0,0,0,0, # e8 - ef
478
0,0,0,0,0,0,0,0, # f0 - f7
479
0,0,0,0,0,0,4,5 # f8 - ff
480
)
481
482
UCS2LE_ST = (
483
6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
484
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
485
MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17
486
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f
487
7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27
488
5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f
489
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37
490
)
491
492
UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
493
494
UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS,
495
'class_factor': 6,
496
'state_table': UCS2LE_ST,
497
'char_len_table': UCS2LE_CHAR_LEN_TABLE,
498
'name': 'UTF-16LE'}
499
500
# UTF-8
501
502
UTF8_CLS = (
503
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
504
1,1,1,1,1,1,0,0, # 08 - 0f
505
1,1,1,1,1,1,1,1, # 10 - 17
506
1,1,1,0,1,1,1,1, # 18 - 1f
507
1,1,1,1,1,1,1,1, # 20 - 27
508
1,1,1,1,1,1,1,1, # 28 - 2f
509
1,1,1,1,1,1,1,1, # 30 - 37
510
1,1,1,1,1,1,1,1, # 38 - 3f
511
1,1,1,1,1,1,1,1, # 40 - 47
512
1,1,1,1,1,1,1,1, # 48 - 4f
513
1,1,1,1,1,1,1,1, # 50 - 57
514
1,1,1,1,1,1,1,1, # 58 - 5f
515
1,1,1,1,1,1,1,1, # 60 - 67
516
1,1,1,1,1,1,1,1, # 68 - 6f
517
1,1,1,1,1,1,1,1, # 70 - 77
518
1,1,1,1,1,1,1,1, # 78 - 7f
519
2,2,2,2,3,3,3,3, # 80 - 87
520
4,4,4,4,4,4,4,4, # 88 - 8f
521
4,4,4,4,4,4,4,4, # 90 - 97
522
4,4,4,4,4,4,4,4, # 98 - 9f
523
5,5,5,5,5,5,5,5, # a0 - a7
524
5,5,5,5,5,5,5,5, # a8 - af
525
5,5,5,5,5,5,5,5, # b0 - b7
526
5,5,5,5,5,5,5,5, # b8 - bf
527
0,0,6,6,6,6,6,6, # c0 - c7
528
6,6,6,6,6,6,6,6, # c8 - cf
529
6,6,6,6,6,6,6,6, # d0 - d7
530
6,6,6,6,6,6,6,6, # d8 - df
531
7,8,8,8,8,8,8,8, # e0 - e7
532
8,8,8,8,8,9,8,8, # e8 - ef
533
10,11,11,11,11,11,11,11, # f0 - f7
534
12,13,13,13,14,15,0,0 # f8 - ff
535
)
536
537
UTF8_ST = (
538
MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07
539
9, 11, 8, 7, 6, 5, 4, 3,#08-0f
540
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
541
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
542
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27
543
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f
544
MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37
545
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f
546
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47
547
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f
548
MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57
549
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f
550
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67
551
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f
552
MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77
553
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f
554
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87
555
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f
556
MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97
557
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f
558
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7
559
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af
560
MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7
561
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf
562
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7
563
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf
564
)
565
566
UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
567
568
UTF8_SM_MODEL = {'class_table': UTF8_CLS,
569
'class_factor': 16,
570
'state_table': UTF8_ST,
571
'char_len_table': UTF8_CHAR_LEN_TABLE,
572
'name': 'UTF-8'}
573
574