Path: blob/main/python/pylang/src/unicode_aliases.py
1396 views
# vim:fileencoding=utf-81# License: BSD2# Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>34# Alias DB from http://www.unicode.org/Public/8.0.0/ucd/NameAliases.txt {{{5DB = '''6# NameAliases-8.0.0.txt7# Date: 2014-11-19, 01:30:00 GMT [KW, LI]8#9# This file is a normative contributory data file in the10# Unicode Character Database.11#12# Copyright (c) 2005-2014 Unicode, Inc.13# For terms of use, see http://www.unicode.org/terms_of_use.html14#15# This file defines the formal name aliases for Unicode characters.16#17# For informative aliases, see NamesList.txt18#19# The formal name aliases are divided into five types, each with a distinct label.20#21# Type Labels:22#23# 1. correction24# Corrections for serious problems in the character names25# 2. control26# ISO 6429 names for C0 and C1 control functions, and other27# commonly occurring names for control codes28# 3. alternate29# A few widely used alternate names for format characters30# 4. figment31# Several documented labels for C1 control code points which32# were never actually approved in any standard33# 5. abbreviation34# Commonly occurring abbreviations (or acronyms) for control codes,35# format characters, spaces, and variation selectors36#37# The formal name aliases are part of the Unicode character namespace, which38# includes the character names and the names of named character sequences.39# The inclusion of ISO 6429 names and other commonly occurring names and40# abbreviations for control codes and format characters as formal name aliases41# is to help avoid name collisions between Unicode character names and the42# labels which commonly appear in text and/or in implementations such as regex, for43# control codes (which for historical reasons have no Unicode character name)44# or for format characters.45#46# For documentation, see NamesList.html and http://www.unicode.org/reports/tr44/47#48# FORMAT49#50# Each line has three fields, as described here:51#52# First field: Code point53# Second field: Alias54# Third field: Type55#56# The type labels used are defined above. As for property values, comparisons57# of type labels should ignore case.58#59# The type labels can be mapped to other strings for display, if desired.60#61# In case multiple aliases are assigned, additional aliases62# are provided on separate lines. Parsers of this data file should63# take note that the same code point can (and does) occur more than once.64#65# Note that currently the only instances of multiple aliases of the same66# type for a single code point are either of type "control" or "abbreviation".67# An alias of type "abbreviation" can, in principle, be added for any code68# point, although currently aliases of type "correction" do not have69# any additional aliases of type "abbreviation". Such relationships70# are not enforced by stability policies.71#72#-----------------------------------------------------------------73740000;NULL;control750000;NUL;abbreviation760001;START OF HEADING;control770001;SOH;abbreviation780002;START OF TEXT;control790002;STX;abbreviation800003;END OF TEXT;control810003;ETX;abbreviation820004;END OF TRANSMISSION;control830004;EOT;abbreviation840005;ENQUIRY;control850005;ENQ;abbreviation860006;ACKNOWLEDGE;control870006;ACK;abbreviation8889# Note that no formal name alias for the ISO 6429 "BELL" is90# provided for U+0007, because of the existing name collision91# with U+1F514 BELL.92930007;ALERT;control940007;BEL;abbreviation95960008;BACKSPACE;control970008;BS;abbreviation980009;CHARACTER TABULATION;control990009;HORIZONTAL TABULATION;control1000009;HT;abbreviation1010009;TAB;abbreviation102000A;LINE FEED;control103000A;NEW LINE;control104000A;END OF LINE;control105000A;LF;abbreviation106000A;NL;abbreviation107000A;EOL;abbreviation108000B;LINE TABULATION;control109000B;VERTICAL TABULATION;control110000B;VT;abbreviation111000C;FORM FEED;control112000C;FF;abbreviation113000D;CARRIAGE RETURN;control114000D;CR;abbreviation115000E;SHIFT OUT;control116000E;LOCKING-SHIFT ONE;control117000E;SO;abbreviation118000F;SHIFT IN;control119000F;LOCKING-SHIFT ZERO;control120000F;SI;abbreviation1210010;DATA LINK ESCAPE;control1220010;DLE;abbreviation1230011;DEVICE CONTROL ONE;control1240011;DC1;abbreviation1250012;DEVICE CONTROL TWO;control1260012;DC2;abbreviation1270013;DEVICE CONTROL THREE;control1280013;DC3;abbreviation1290014;DEVICE CONTROL FOUR;control1300014;DC4;abbreviation1310015;NEGATIVE ACKNOWLEDGE;control1320015;NAK;abbreviation1330016;SYNCHRONOUS IDLE;control1340016;SYN;abbreviation1350017;END OF TRANSMISSION BLOCK;control1360017;ETB;abbreviation1370018;CANCEL;control1380018;CAN;abbreviation1390019;END OF MEDIUM;control1400019;EOM;abbreviation141001A;SUBSTITUTE;control142001A;SUB;abbreviation143001B;ESCAPE;control144001B;ESC;abbreviation145001C;INFORMATION SEPARATOR FOUR;control146001C;FILE SEPARATOR;control147001C;FS;abbreviation148001D;INFORMATION SEPARATOR THREE;control149001D;GROUP SEPARATOR;control150001D;GS;abbreviation151001E;INFORMATION SEPARATOR TWO;control152001E;RECORD SEPARATOR;control153001E;RS;abbreviation154001F;INFORMATION SEPARATOR ONE;control155001F;UNIT SEPARATOR;control156001F;US;abbreviation1570020;SP;abbreviation158007F;DELETE;control159007F;DEL;abbreviation160161# PADDING CHARACTER and HIGH OCTET PRESET represent162# architectural concepts initially proposed for early163# drafts of ISO/IEC 10646-1. They were never actually164# approved or standardized: hence their designation165# here as the "figment" type. Formal name aliases166# (and corresponding abbreviations) for these code167# points are included here because these names leaked168# out from the draft documents and were published in169# at least one RFC whose names for code points was170# implemented in Perl regex expressions.1711720080;PADDING CHARACTER;figment1730080;PAD;abbreviation1740081;HIGH OCTET PRESET;figment1750081;HOP;abbreviation1761770082;BREAK PERMITTED HERE;control1780082;BPH;abbreviation1790083;NO BREAK HERE;control1800083;NBH;abbreviation1810084;INDEX;control1820084;IND;abbreviation1830085;NEXT LINE;control1840085;NEL;abbreviation1850086;START OF SELECTED AREA;control1860086;SSA;abbreviation1870087;END OF SELECTED AREA;control1880087;ESA;abbreviation1890088;CHARACTER TABULATION SET;control1900088;HORIZONTAL TABULATION SET;control1910088;HTS;abbreviation1920089;CHARACTER TABULATION WITH JUSTIFICATION;control1930089;HORIZONTAL TABULATION WITH JUSTIFICATION;control1940089;HTJ;abbreviation195008A;LINE TABULATION SET;control196008A;VERTICAL TABULATION SET;control197008A;VTS;abbreviation198008B;PARTIAL LINE FORWARD;control199008B;PARTIAL LINE DOWN;control200008B;PLD;abbreviation201008C;PARTIAL LINE BACKWARD;control202008C;PARTIAL LINE UP;control203008C;PLU;abbreviation204008D;REVERSE LINE FEED;control205008D;REVERSE INDEX;control206008D;RI;abbreviation207008E;SINGLE SHIFT TWO;control208008E;SINGLE-SHIFT-2;control209008E;SS2;abbreviation210008F;SINGLE SHIFT THREE;control211008F;SINGLE-SHIFT-3;control212008F;SS3;abbreviation2130090;DEVICE CONTROL STRING;control2140090;DCS;abbreviation2150091;PRIVATE USE ONE;control2160091;PRIVATE USE-1;control2170091;PU1;abbreviation2180092;PRIVATE USE TWO;control2190092;PRIVATE USE-2;control2200092;PU2;abbreviation2210093;SET TRANSMIT STATE;control2220093;STS;abbreviation2230094;CANCEL CHARACTER;control2240094;CCH;abbreviation2250095;MESSAGE WAITING;control2260095;MW;abbreviation2270096;START OF GUARDED AREA;control2280096;START OF PROTECTED AREA;control2290096;SPA;abbreviation2300097;END OF GUARDED AREA;control2310097;END OF PROTECTED AREA;control2320097;EPA;abbreviation2330098;START OF STRING;control2340098;SOS;abbreviation235236# SINGLE GRAPHIC CHARACTER INTRODUCER is another237# architectural concept from early drafts of ISO/IEC 10646-1238# which was never approved and standardized.2392400099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment2410099;SGC;abbreviation242243009A;SINGLE CHARACTER INTRODUCER;control244009A;SCI;abbreviation245009B;CONTROL SEQUENCE INTRODUCER;control246009B;CSI;abbreviation247009C;STRING TERMINATOR;control248009C;ST;abbreviation249009D;OPERATING SYSTEM COMMAND;control250009D;OSC;abbreviation251009E;PRIVACY MESSAGE;control252009E;PM;abbreviation253009F;APPLICATION PROGRAM COMMAND;control254009F;APC;abbreviation25500A0;NBSP;abbreviation25600AD;SHY;abbreviation25701A2;LATIN CAPITAL LETTER GHA;correction25801A3;LATIN SMALL LETTER GHA;correction259034F;CGJ;abbreviation260061C;ALM;abbreviation2610709;SYRIAC SUBLINEAR COLON SKEWED LEFT;correction2620CDE;KANNADA LETTER LLLA;correction2630E9D;LAO LETTER FO FON;correction2640E9F;LAO LETTER FO FAY;correction2650EA3;LAO LETTER RO;correction2660EA5;LAO LETTER LO;correction2670FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN;correction268180B;FVS1;abbreviation269180C;FVS2;abbreviation270180D;FVS3;abbreviation271180E;MVS;abbreviation272200B;ZWSP;abbreviation273200C;ZWNJ;abbreviation274200D;ZWJ;abbreviation275200E;LRM;abbreviation276200F;RLM;abbreviation277202A;LRE;abbreviation278202B;RLE;abbreviation279202C;PDF;abbreviation280202D;LRO;abbreviation281202E;RLO;abbreviation282202F;NNBSP;abbreviation283205F;MMSP;abbreviation2842060;WJ;abbreviation2852066;LRI;abbreviation2862067;RLI;abbreviation2872068;FSI;abbreviation2882069;PDI;abbreviation2892118;WEIERSTRASS ELLIPTIC FUNCTION;correction2902448;MICR ON US SYMBOL;correction2912449;MICR DASH SYMBOL;correction2922B7A;LEFTWARDS TRIANGLE-HEADED ARROW WITH DOUBLE VERTICAL STROKE;correction2932B7C;RIGHTWARDS TRIANGLE-HEADED ARROW WITH DOUBLE VERTICAL STROKE;correction294A015;YI SYLLABLE ITERATION MARK;correction295FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET;correction296FE00;VS1;abbreviation297FE01;VS2;abbreviation298FE02;VS3;abbreviation299FE03;VS4;abbreviation300FE04;VS5;abbreviation301FE05;VS6;abbreviation302FE06;VS7;abbreviation303FE07;VS8;abbreviation304FE08;VS9;abbreviation305FE09;VS10;abbreviation306FE0A;VS11;abbreviation307FE0B;VS12;abbreviation308FE0C;VS13;abbreviation309FE0D;VS14;abbreviation310FE0E;VS15;abbreviation311FE0F;VS16;abbreviation312FEFF;BYTE ORDER MARK;alternate313FEFF;BOM;abbreviation314FEFF;ZWNBSP;abbreviation315122D4;CUNEIFORM SIGN NU11 TENU;correction316122D5;CUNEIFORM SIGN NU11 OVER NU11 BUR OVER BUR;correction3171D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS;correction318E0100;VS17;abbreviation319E0101;VS18;abbreviation320E0102;VS19;abbreviation321E0103;VS20;abbreviation322E0104;VS21;abbreviation323E0105;VS22;abbreviation324E0106;VS23;abbreviation325E0107;VS24;abbreviation326E0108;VS25;abbreviation327E0109;VS26;abbreviation328E010A;VS27;abbreviation329E010B;VS28;abbreviation330E010C;VS29;abbreviation331E010D;VS30;abbreviation332E010E;VS31;abbreviation333E010F;VS32;abbreviation334E0110;VS33;abbreviation335E0111;VS34;abbreviation336E0112;VS35;abbreviation337E0113;VS36;abbreviation338E0114;VS37;abbreviation339E0115;VS38;abbreviation340E0116;VS39;abbreviation341E0117;VS40;abbreviation342E0118;VS41;abbreviation343E0119;VS42;abbreviation344E011A;VS43;abbreviation345E011B;VS44;abbreviation346E011C;VS45;abbreviation347E011D;VS46;abbreviation348E011E;VS47;abbreviation349E011F;VS48;abbreviation350E0120;VS49;abbreviation351E0121;VS50;abbreviation352E0122;VS51;abbreviation353E0123;VS52;abbreviation354E0124;VS53;abbreviation355E0125;VS54;abbreviation356E0126;VS55;abbreviation357E0127;VS56;abbreviation358E0128;VS57;abbreviation359E0129;VS58;abbreviation360E012A;VS59;abbreviation361E012B;VS60;abbreviation362E012C;VS61;abbreviation363E012D;VS62;abbreviation364E012E;VS63;abbreviation365E012F;VS64;abbreviation366E0130;VS65;abbreviation367E0131;VS66;abbreviation368E0132;VS67;abbreviation369E0133;VS68;abbreviation370E0134;VS69;abbreviation371E0135;VS70;abbreviation372E0136;VS71;abbreviation373E0137;VS72;abbreviation374E0138;VS73;abbreviation375E0139;VS74;abbreviation376E013A;VS75;abbreviation377E013B;VS76;abbreviation378E013C;VS77;abbreviation379E013D;VS78;abbreviation380E013E;VS79;abbreviation381E013F;VS80;abbreviation382E0140;VS81;abbreviation383E0141;VS82;abbreviation384E0142;VS83;abbreviation385E0143;VS84;abbreviation386E0144;VS85;abbreviation387E0145;VS86;abbreviation388E0146;VS87;abbreviation389E0147;VS88;abbreviation390E0148;VS89;abbreviation391E0149;VS90;abbreviation392E014A;VS91;abbreviation393E014B;VS92;abbreviation394E014C;VS93;abbreviation395E014D;VS94;abbreviation396E014E;VS95;abbreviation397E014F;VS96;abbreviation398E0150;VS97;abbreviation399E0151;VS98;abbreviation400E0152;VS99;abbreviation401E0153;VS100;abbreviation402E0154;VS101;abbreviation403E0155;VS102;abbreviation404E0156;VS103;abbreviation405E0157;VS104;abbreviation406E0158;VS105;abbreviation407E0159;VS106;abbreviation408E015A;VS107;abbreviation409E015B;VS108;abbreviation410E015C;VS109;abbreviation411E015D;VS110;abbreviation412E015E;VS111;abbreviation413E015F;VS112;abbreviation414E0160;VS113;abbreviation415E0161;VS114;abbreviation416E0162;VS115;abbreviation417E0163;VS116;abbreviation418E0164;VS117;abbreviation419E0165;VS118;abbreviation420E0166;VS119;abbreviation421E0167;VS120;abbreviation422E0168;VS121;abbreviation423E0169;VS122;abbreviation424E016A;VS123;abbreviation425E016B;VS124;abbreviation426E016C;VS125;abbreviation427E016D;VS126;abbreviation428E016E;VS127;abbreviation429E016F;VS128;abbreviation430E0170;VS129;abbreviation431E0171;VS130;abbreviation432E0172;VS131;abbreviation433E0173;VS132;abbreviation434E0174;VS133;abbreviation435E0175;VS134;abbreviation436E0176;VS135;abbreviation437E0177;VS136;abbreviation438E0178;VS137;abbreviation439E0179;VS138;abbreviation440E017A;VS139;abbreviation441E017B;VS140;abbreviation442E017C;VS141;abbreviation443E017D;VS142;abbreviation444E017E;VS143;abbreviation445E017F;VS144;abbreviation446E0180;VS145;abbreviation447E0181;VS146;abbreviation448E0182;VS147;abbreviation449E0183;VS148;abbreviation450E0184;VS149;abbreviation451E0185;VS150;abbreviation452E0186;VS151;abbreviation453E0187;VS152;abbreviation454E0188;VS153;abbreviation455E0189;VS154;abbreviation456E018A;VS155;abbreviation457E018B;VS156;abbreviation458E018C;VS157;abbreviation459E018D;VS158;abbreviation460E018E;VS159;abbreviation461E018F;VS160;abbreviation462E0190;VS161;abbreviation463E0191;VS162;abbreviation464E0192;VS163;abbreviation465E0193;VS164;abbreviation466E0194;VS165;abbreviation467E0195;VS166;abbreviation468E0196;VS167;abbreviation469E0197;VS168;abbreviation470E0198;VS169;abbreviation471E0199;VS170;abbreviation472E019A;VS171;abbreviation473E019B;VS172;abbreviation474E019C;VS173;abbreviation475E019D;VS174;abbreviation476E019E;VS175;abbreviation477E019F;VS176;abbreviation478E01A0;VS177;abbreviation479E01A1;VS178;abbreviation480E01A2;VS179;abbreviation481E01A3;VS180;abbreviation482E01A4;VS181;abbreviation483E01A5;VS182;abbreviation484E01A6;VS183;abbreviation485E01A7;VS184;abbreviation486E01A8;VS185;abbreviation487E01A9;VS186;abbreviation488E01AA;VS187;abbreviation489E01AB;VS188;abbreviation490E01AC;VS189;abbreviation491E01AD;VS190;abbreviation492E01AE;VS191;abbreviation493E01AF;VS192;abbreviation494E01B0;VS193;abbreviation495E01B1;VS194;abbreviation496E01B2;VS195;abbreviation497E01B3;VS196;abbreviation498E01B4;VS197;abbreviation499E01B5;VS198;abbreviation500E01B6;VS199;abbreviation501E01B7;VS200;abbreviation502E01B8;VS201;abbreviation503E01B9;VS202;abbreviation504E01BA;VS203;abbreviation505E01BB;VS204;abbreviation506E01BC;VS205;abbreviation507E01BD;VS206;abbreviation508E01BE;VS207;abbreviation509E01BF;VS208;abbreviation510E01C0;VS209;abbreviation511E01C1;VS210;abbreviation512E01C2;VS211;abbreviation513E01C3;VS212;abbreviation514E01C4;VS213;abbreviation515E01C5;VS214;abbreviation516E01C6;VS215;abbreviation517E01C7;VS216;abbreviation518E01C8;VS217;abbreviation519E01C9;VS218;abbreviation520E01CA;VS219;abbreviation521E01CB;VS220;abbreviation522E01CC;VS221;abbreviation523E01CD;VS222;abbreviation524E01CE;VS223;abbreviation525E01CF;VS224;abbreviation526E01D0;VS225;abbreviation527E01D1;VS226;abbreviation528E01D2;VS227;abbreviation529E01D3;VS228;abbreviation530E01D4;VS229;abbreviation531E01D5;VS230;abbreviation532E01D6;VS231;abbreviation533E01D7;VS232;abbreviation534E01D8;VS233;abbreviation535E01D9;VS234;abbreviation536E01DA;VS235;abbreviation537E01DB;VS236;abbreviation538E01DC;VS237;abbreviation539E01DD;VS238;abbreviation540E01DE;VS239;abbreviation541E01DF;VS240;abbreviation542E01E0;VS241;abbreviation543E01E1;VS242;abbreviation544E01E2;VS243;abbreviation545E01E3;VS244;abbreviation546E01E4;VS245;abbreviation547E01E5;VS246;abbreviation548E01E6;VS247;abbreviation549E01E7;VS248;abbreviation550E01E8;VS249;abbreviation551E01E9;VS250;abbreviation552E01EA;VS251;abbreviation553E01EB;VS252;abbreviation554E01EC;VS253;abbreviation555E01ED;VS254;abbreviation556E01EE;VS255;abbreviation557E01EF;VS256;abbreviation558559# EOF560'''561# }}}562563564def make_alias_map():565ans = {}566for line in DB.split('\n'):567line = line.trim()568if not line or line[0] is '#':569continue570parts = line.split(';')571if parts.length >= 2:572code_point = parseInt(parts[0], 16)573if code_point is not undefined and parts[1]:574ans[parts[1].toLowerCase()] = code_point575return ans576577578ALIAS_MAP = make_alias_map()579580581