Path: blob/master/venv/Lib/site-packages/chardet/mbcssm.py
811 views
######################## BEGIN LICENSE BLOCK ########################1# The Original Code is mozilla.org code.2#3# The Initial Developer of the Original Code is4# Netscape Communications Corporation.5# Portions created by the Initial Developer are Copyright (C) 19986# the Initial Developer. All Rights Reserved.7#8# Contributor(s):9# Mark Pilgrim - port to Python10#11# This library is free software; you can redistribute it and/or12# modify it under the terms of the GNU Lesser General Public13# License as published by the Free Software Foundation; either14# version 2.1 of the License, or (at your option) any later version.15#16# This library is distributed in the hope that it will be useful,17# but WITHOUT ANY WARRANTY; without even the implied warranty of18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU19# Lesser General Public License for more details.20#21# You should have received a copy of the GNU Lesser General Public22# License along with this library; if not, write to the Free Software23# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA24# 02110-1301 USA25######################### END LICENSE BLOCK #########################2627from .enums import MachineState2829# BIG53031BIG5_CLS = (321,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value331,1,1,1,1,1,0,0, # 08 - 0f341,1,1,1,1,1,1,1, # 10 - 17351,1,1,0,1,1,1,1, # 18 - 1f361,1,1,1,1,1,1,1, # 20 - 27371,1,1,1,1,1,1,1, # 28 - 2f381,1,1,1,1,1,1,1, # 30 - 37391,1,1,1,1,1,1,1, # 38 - 3f402,2,2,2,2,2,2,2, # 40 - 47412,2,2,2,2,2,2,2, # 48 - 4f422,2,2,2,2,2,2,2, # 50 - 57432,2,2,2,2,2,2,2, # 58 - 5f442,2,2,2,2,2,2,2, # 60 - 67452,2,2,2,2,2,2,2, # 68 - 6f462,2,2,2,2,2,2,2, # 70 - 77472,2,2,2,2,2,2,1, # 78 - 7f484,4,4,4,4,4,4,4, # 80 - 87494,4,4,4,4,4,4,4, # 88 - 8f504,4,4,4,4,4,4,4, # 90 - 97514,4,4,4,4,4,4,4, # 98 - 9f524,3,3,3,3,3,3,3, # a0 - a7533,3,3,3,3,3,3,3, # a8 - af543,3,3,3,3,3,3,3, # b0 - b7553,3,3,3,3,3,3,3, # b8 - bf563,3,3,3,3,3,3,3, # c0 - c7573,3,3,3,3,3,3,3, # c8 - cf583,3,3,3,3,3,3,3, # d0 - d7593,3,3,3,3,3,3,3, # d8 - df603,3,3,3,3,3,3,3, # e0 - e7613,3,3,3,3,3,3,3, # e8 - ef623,3,3,3,3,3,3,3, # f0 - f7633,3,3,3,3,3,3,0 # f8 - ff64)6566BIG5_ST = (67MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-0768MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f69MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-1770)7172BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)7374BIG5_SM_MODEL = {'class_table': BIG5_CLS,75'class_factor': 5,76'state_table': BIG5_ST,77'char_len_table': BIG5_CHAR_LEN_TABLE,78'name': 'Big5'}7980# CP9498182CP949_CLS = (831,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f841,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f851,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f861,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f871,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f884,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f891,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f905,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f910,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f926,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f936,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af947,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf957,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf962,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df972,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef982,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff99)100101CP949_ST = (102#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =103MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START104MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR105MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME106MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3107MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4108MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5109MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6110)111112CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)113114CP949_SM_MODEL = {'class_table': CP949_CLS,115'class_factor': 10,116'state_table': CP949_ST,117'char_len_table': CP949_CHAR_LEN_TABLE,118'name': 'CP949'}119120# EUC-JP121122EUCJP_CLS = (1234,4,4,4,4,4,4,4, # 00 - 071244,4,4,4,4,4,5,5, # 08 - 0f1254,4,4,4,4,4,4,4, # 10 - 171264,4,4,5,4,4,4,4, # 18 - 1f1274,4,4,4,4,4,4,4, # 20 - 271284,4,4,4,4,4,4,4, # 28 - 2f1294,4,4,4,4,4,4,4, # 30 - 371304,4,4,4,4,4,4,4, # 38 - 3f1314,4,4,4,4,4,4,4, # 40 - 471324,4,4,4,4,4,4,4, # 48 - 4f1334,4,4,4,4,4,4,4, # 50 - 571344,4,4,4,4,4,4,4, # 58 - 5f1354,4,4,4,4,4,4,4, # 60 - 671364,4,4,4,4,4,4,4, # 68 - 6f1374,4,4,4,4,4,4,4, # 70 - 771384,4,4,4,4,4,4,4, # 78 - 7f1395,5,5,5,5,5,5,5, # 80 - 871405,5,5,5,5,5,1,3, # 88 - 8f1415,5,5,5,5,5,5,5, # 90 - 971425,5,5,5,5,5,5,5, # 98 - 9f1435,2,2,2,2,2,2,2, # a0 - a71442,2,2,2,2,2,2,2, # a8 - af1452,2,2,2,2,2,2,2, # b0 - b71462,2,2,2,2,2,2,2, # b8 - bf1472,2,2,2,2,2,2,2, # c0 - c71482,2,2,2,2,2,2,2, # c8 - cf1492,2,2,2,2,2,2,2, # d0 - d71502,2,2,2,2,2,2,2, # d8 - df1510,0,0,0,0,0,0,0, # e0 - e71520,0,0,0,0,0,0,0, # e8 - ef1530,0,0,0,0,0,0,0, # f0 - f71540,0,0,0,0,0,0,5 # f8 - ff155)156157EUCJP_ST = (1583, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07159MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f160MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17161MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f1623,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27163)164165EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)166167EUCJP_SM_MODEL = {'class_table': EUCJP_CLS,168'class_factor': 6,169'state_table': EUCJP_ST,170'char_len_table': EUCJP_CHAR_LEN_TABLE,171'name': 'EUC-JP'}172173# EUC-KR174175EUCKR_CLS = (1761,1,1,1,1,1,1,1, # 00 - 071771,1,1,1,1,1,0,0, # 08 - 0f1781,1,1,1,1,1,1,1, # 10 - 171791,1,1,0,1,1,1,1, # 18 - 1f1801,1,1,1,1,1,1,1, # 20 - 271811,1,1,1,1,1,1,1, # 28 - 2f1821,1,1,1,1,1,1,1, # 30 - 371831,1,1,1,1,1,1,1, # 38 - 3f1841,1,1,1,1,1,1,1, # 40 - 471851,1,1,1,1,1,1,1, # 48 - 4f1861,1,1,1,1,1,1,1, # 50 - 571871,1,1,1,1,1,1,1, # 58 - 5f1881,1,1,1,1,1,1,1, # 60 - 671891,1,1,1,1,1,1,1, # 68 - 6f1901,1,1,1,1,1,1,1, # 70 - 771911,1,1,1,1,1,1,1, # 78 - 7f1920,0,0,0,0,0,0,0, # 80 - 871930,0,0,0,0,0,0,0, # 88 - 8f1940,0,0,0,0,0,0,0, # 90 - 971950,0,0,0,0,0,0,0, # 98 - 9f1960,2,2,2,2,2,2,2, # a0 - a71972,2,2,2,2,3,3,3, # a8 - af1982,2,2,2,2,2,2,2, # b0 - b71992,2,2,2,2,2,2,2, # b8 - bf2002,2,2,2,2,2,2,2, # c0 - c72012,3,2,2,2,2,2,2, # c8 - cf2022,2,2,2,2,2,2,2, # d0 - d72032,2,2,2,2,2,2,2, # d8 - df2042,2,2,2,2,2,2,2, # e0 - e72052,2,2,2,2,2,2,2, # e8 - ef2062,2,2,2,2,2,2,2, # f0 - f72072,2,2,2,2,2,2,0 # f8 - ff208)209210EUCKR_ST = (211MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07212MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f213)214215EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)216217EUCKR_SM_MODEL = {'class_table': EUCKR_CLS,218'class_factor': 4,219'state_table': EUCKR_ST,220'char_len_table': EUCKR_CHAR_LEN_TABLE,221'name': 'EUC-KR'}222223# EUC-TW224225EUCTW_CLS = (2262,2,2,2,2,2,2,2, # 00 - 072272,2,2,2,2,2,0,0, # 08 - 0f2282,2,2,2,2,2,2,2, # 10 - 172292,2,2,0,2,2,2,2, # 18 - 1f2302,2,2,2,2,2,2,2, # 20 - 272312,2,2,2,2,2,2,2, # 28 - 2f2322,2,2,2,2,2,2,2, # 30 - 372332,2,2,2,2,2,2,2, # 38 - 3f2342,2,2,2,2,2,2,2, # 40 - 472352,2,2,2,2,2,2,2, # 48 - 4f2362,2,2,2,2,2,2,2, # 50 - 572372,2,2,2,2,2,2,2, # 58 - 5f2382,2,2,2,2,2,2,2, # 60 - 672392,2,2,2,2,2,2,2, # 68 - 6f2402,2,2,2,2,2,2,2, # 70 - 772412,2,2,2,2,2,2,2, # 78 - 7f2420,0,0,0,0,0,0,0, # 80 - 872430,0,0,0,0,0,6,0, # 88 - 8f2440,0,0,0,0,0,0,0, # 90 - 972450,0,0,0,0,0,0,0, # 98 - 9f2460,3,4,4,4,4,4,4, # a0 - a72475,5,1,1,1,1,1,1, # a8 - af2481,1,1,1,1,1,1,1, # b0 - b72491,1,1,1,1,1,1,1, # b8 - bf2501,1,3,1,3,3,3,3, # c0 - c72513,3,3,3,3,3,3,3, # c8 - cf2523,3,3,3,3,3,3,3, # d0 - d72533,3,3,3,3,3,3,3, # d8 - df2543,3,3,3,3,3,3,3, # e0 - e72553,3,3,3,3,3,3,3, # e8 - ef2563,3,3,3,3,3,3,3, # f0 - f72573,3,3,3,3,3,3,0 # f8 - ff258)259260EUCTW_ST = (261MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07262MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f263MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17264MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f2655,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27266MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f267)268269EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)270271EUCTW_SM_MODEL = {'class_table': EUCTW_CLS,272'class_factor': 7,273'state_table': EUCTW_ST,274'char_len_table': EUCTW_CHAR_LEN_TABLE,275'name': 'x-euc-tw'}276277# GB2312278279GB2312_CLS = (2801,1,1,1,1,1,1,1, # 00 - 072811,1,1,1,1,1,0,0, # 08 - 0f2821,1,1,1,1,1,1,1, # 10 - 172831,1,1,0,1,1,1,1, # 18 - 1f2841,1,1,1,1,1,1,1, # 20 - 272851,1,1,1,1,1,1,1, # 28 - 2f2863,3,3,3,3,3,3,3, # 30 - 372873,3,1,1,1,1,1,1, # 38 - 3f2882,2,2,2,2,2,2,2, # 40 - 472892,2,2,2,2,2,2,2, # 48 - 4f2902,2,2,2,2,2,2,2, # 50 - 572912,2,2,2,2,2,2,2, # 58 - 5f2922,2,2,2,2,2,2,2, # 60 - 672932,2,2,2,2,2,2,2, # 68 - 6f2942,2,2,2,2,2,2,2, # 70 - 772952,2,2,2,2,2,2,4, # 78 - 7f2965,6,6,6,6,6,6,6, # 80 - 872976,6,6,6,6,6,6,6, # 88 - 8f2986,6,6,6,6,6,6,6, # 90 - 972996,6,6,6,6,6,6,6, # 98 - 9f3006,6,6,6,6,6,6,6, # a0 - a73016,6,6,6,6,6,6,6, # a8 - af3026,6,6,6,6,6,6,6, # b0 - b73036,6,6,6,6,6,6,6, # b8 - bf3046,6,6,6,6,6,6,6, # c0 - c73056,6,6,6,6,6,6,6, # c8 - cf3066,6,6,6,6,6,6,6, # d0 - d73076,6,6,6,6,6,6,6, # d8 - df3086,6,6,6,6,6,6,6, # e0 - e73096,6,6,6,6,6,6,6, # e8 - ef3106,6,6,6,6,6,6,6, # f0 - f73116,6,6,6,6,6,6,0 # f8 - ff312)313314GB2312_ST = (315MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07316MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f317MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-173184,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f319MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27320MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f321)322323# To be accurate, the length of class 6 can be either 2 or 4.324# But it is not necessary to discriminate between the two since325# it is used for frequency analysis only, and we are validating326# each code range there as well. So it is safe to set it to be327# 2 here.328GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)329330GB2312_SM_MODEL = {'class_table': GB2312_CLS,331'class_factor': 7,332'state_table': GB2312_ST,333'char_len_table': GB2312_CHAR_LEN_TABLE,334'name': 'GB2312'}335336# Shift_JIS337338SJIS_CLS = (3391,1,1,1,1,1,1,1, # 00 - 073401,1,1,1,1,1,0,0, # 08 - 0f3411,1,1,1,1,1,1,1, # 10 - 173421,1,1,0,1,1,1,1, # 18 - 1f3431,1,1,1,1,1,1,1, # 20 - 273441,1,1,1,1,1,1,1, # 28 - 2f3451,1,1,1,1,1,1,1, # 30 - 373461,1,1,1,1,1,1,1, # 38 - 3f3472,2,2,2,2,2,2,2, # 40 - 473482,2,2,2,2,2,2,2, # 48 - 4f3492,2,2,2,2,2,2,2, # 50 - 573502,2,2,2,2,2,2,2, # 58 - 5f3512,2,2,2,2,2,2,2, # 60 - 673522,2,2,2,2,2,2,2, # 68 - 6f3532,2,2,2,2,2,2,2, # 70 - 773542,2,2,2,2,2,2,1, # 78 - 7f3553,3,3,3,3,2,2,3, # 80 - 873563,3,3,3,3,3,3,3, # 88 - 8f3573,3,3,3,3,3,3,3, # 90 - 973583,3,3,3,3,3,3,3, # 98 - 9f359#0xa0 is illegal in sjis encoding, but some pages does360#contain such byte. We need to be more error forgiven.3612,2,2,2,2,2,2,2, # a0 - a73622,2,2,2,2,2,2,2, # a8 - af3632,2,2,2,2,2,2,2, # b0 - b73642,2,2,2,2,2,2,2, # b8 - bf3652,2,2,2,2,2,2,2, # c0 - c73662,2,2,2,2,2,2,2, # c8 - cf3672,2,2,2,2,2,2,2, # d0 - d73682,2,2,2,2,2,2,2, # d8 - df3693,3,3,3,3,3,3,3, # e0 - e73703,3,3,3,3,4,4,4, # e8 - ef3713,3,3,3,3,3,3,3, # f0 - f73723,3,3,3,3,0,0,0) # f8 - ff373374375SJIS_ST = (376MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07377MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f378MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17379)380381SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)382383SJIS_SM_MODEL = {'class_table': SJIS_CLS,384'class_factor': 6,385'state_table': SJIS_ST,386'char_len_table': SJIS_CHAR_LEN_TABLE,387'name': 'Shift_JIS'}388389# UCS2-BE390391UCS2BE_CLS = (3920,0,0,0,0,0,0,0, # 00 - 073930,0,1,0,0,2,0,0, # 08 - 0f3940,0,0,0,0,0,0,0, # 10 - 173950,0,0,3,0,0,0,0, # 18 - 1f3960,0,0,0,0,0,0,0, # 20 - 273970,3,3,3,3,3,0,0, # 28 - 2f3980,0,0,0,0,0,0,0, # 30 - 373990,0,0,0,0,0,0,0, # 38 - 3f4000,0,0,0,0,0,0,0, # 40 - 474010,0,0,0,0,0,0,0, # 48 - 4f4020,0,0,0,0,0,0,0, # 50 - 574030,0,0,0,0,0,0,0, # 58 - 5f4040,0,0,0,0,0,0,0, # 60 - 674050,0,0,0,0,0,0,0, # 68 - 6f4060,0,0,0,0,0,0,0, # 70 - 774070,0,0,0,0,0,0,0, # 78 - 7f4080,0,0,0,0,0,0,0, # 80 - 874090,0,0,0,0,0,0,0, # 88 - 8f4100,0,0,0,0,0,0,0, # 90 - 974110,0,0,0,0,0,0,0, # 98 - 9f4120,0,0,0,0,0,0,0, # a0 - a74130,0,0,0,0,0,0,0, # a8 - af4140,0,0,0,0,0,0,0, # b0 - b74150,0,0,0,0,0,0,0, # b8 - bf4160,0,0,0,0,0,0,0, # c0 - c74170,0,0,0,0,0,0,0, # c8 - cf4180,0,0,0,0,0,0,0, # d0 - d74190,0,0,0,0,0,0,0, # d8 - df4200,0,0,0,0,0,0,0, # e0 - e74210,0,0,0,0,0,0,0, # e8 - ef4220,0,0,0,0,0,0,0, # f0 - f74230,0,0,0,0,0,4,5 # f8 - ff424)425426UCS2BE_ST = (4275, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07428MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f429MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-174306, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f4316, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-274325, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f4336, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37434)435436UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)437438UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS,439'class_factor': 6,440'state_table': UCS2BE_ST,441'char_len_table': UCS2BE_CHAR_LEN_TABLE,442'name': 'UTF-16BE'}443444# UCS2-LE445446UCS2LE_CLS = (4470,0,0,0,0,0,0,0, # 00 - 074480,0,1,0,0,2,0,0, # 08 - 0f4490,0,0,0,0,0,0,0, # 10 - 174500,0,0,3,0,0,0,0, # 18 - 1f4510,0,0,0,0,0,0,0, # 20 - 274520,3,3,3,3,3,0,0, # 28 - 2f4530,0,0,0,0,0,0,0, # 30 - 374540,0,0,0,0,0,0,0, # 38 - 3f4550,0,0,0,0,0,0,0, # 40 - 474560,0,0,0,0,0,0,0, # 48 - 4f4570,0,0,0,0,0,0,0, # 50 - 574580,0,0,0,0,0,0,0, # 58 - 5f4590,0,0,0,0,0,0,0, # 60 - 674600,0,0,0,0,0,0,0, # 68 - 6f4610,0,0,0,0,0,0,0, # 70 - 774620,0,0,0,0,0,0,0, # 78 - 7f4630,0,0,0,0,0,0,0, # 80 - 874640,0,0,0,0,0,0,0, # 88 - 8f4650,0,0,0,0,0,0,0, # 90 - 974660,0,0,0,0,0,0,0, # 98 - 9f4670,0,0,0,0,0,0,0, # a0 - a74680,0,0,0,0,0,0,0, # a8 - af4690,0,0,0,0,0,0,0, # b0 - b74700,0,0,0,0,0,0,0, # b8 - bf4710,0,0,0,0,0,0,0, # c0 - c74720,0,0,0,0,0,0,0, # c8 - cf4730,0,0,0,0,0,0,0, # d0 - d74740,0,0,0,0,0,0,0, # d8 - df4750,0,0,0,0,0,0,0, # e0 - e74760,0,0,0,0,0,0,0, # e8 - ef4770,0,0,0,0,0,0,0, # f0 - f74780,0,0,0,0,0,4,5 # f8 - ff479)480481UCS2LE_ST = (4826, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07483MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f484MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-174855, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f4867, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-274875, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f4885, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37489)490491UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)492493UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS,494'class_factor': 6,495'state_table': UCS2LE_ST,496'char_len_table': UCS2LE_CHAR_LEN_TABLE,497'name': 'UTF-16LE'}498499# UTF-8500501UTF8_CLS = (5021,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value5031,1,1,1,1,1,0,0, # 08 - 0f5041,1,1,1,1,1,1,1, # 10 - 175051,1,1,0,1,1,1,1, # 18 - 1f5061,1,1,1,1,1,1,1, # 20 - 275071,1,1,1,1,1,1,1, # 28 - 2f5081,1,1,1,1,1,1,1, # 30 - 375091,1,1,1,1,1,1,1, # 38 - 3f5101,1,1,1,1,1,1,1, # 40 - 475111,1,1,1,1,1,1,1, # 48 - 4f5121,1,1,1,1,1,1,1, # 50 - 575131,1,1,1,1,1,1,1, # 58 - 5f5141,1,1,1,1,1,1,1, # 60 - 675151,1,1,1,1,1,1,1, # 68 - 6f5161,1,1,1,1,1,1,1, # 70 - 775171,1,1,1,1,1,1,1, # 78 - 7f5182,2,2,2,3,3,3,3, # 80 - 875194,4,4,4,4,4,4,4, # 88 - 8f5204,4,4,4,4,4,4,4, # 90 - 975214,4,4,4,4,4,4,4, # 98 - 9f5225,5,5,5,5,5,5,5, # a0 - a75235,5,5,5,5,5,5,5, # a8 - af5245,5,5,5,5,5,5,5, # b0 - b75255,5,5,5,5,5,5,5, # b8 - bf5260,0,6,6,6,6,6,6, # c0 - c75276,6,6,6,6,6,6,6, # c8 - cf5286,6,6,6,6,6,6,6, # d0 - d75296,6,6,6,6,6,6,6, # d8 - df5307,8,8,8,8,8,8,8, # e0 - e75318,8,8,8,8,9,8,8, # e8 - ef53210,11,11,11,11,11,11,11, # f0 - f753312,13,13,13,14,15,0,0 # f8 - ff534)535536UTF8_ST = (537MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-075389, 11, 8, 7, 6, 5, 4, 3,#08-0f539MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17540MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f541MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27542MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f543MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37544MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f545MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47546MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f547MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57548MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f549MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67550MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f551MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77552MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f553MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87554MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f555MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97556MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f557MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7558MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af559MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7560MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf561MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7562MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf563)564565UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)566567UTF8_SM_MODEL = {'class_table': UTF8_CLS,568'class_factor': 16,569'state_table': UTF8_ST,570'char_len_table': UTF8_CHAR_LEN_TABLE,571'name': 'UTF-8'}572573574