Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/python-wasm
Path: blob/main/python/pylang/src/unicode_aliases.py
1396 views
1
# vim:fileencoding=utf-8
2
# License: BSD
3
# Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
4
5
# Alias DB from http://www.unicode.org/Public/8.0.0/ucd/NameAliases.txt {{{
6
DB = '''
7
# NameAliases-8.0.0.txt
8
# Date: 2014-11-19, 01:30:00 GMT [KW, LI]
9
#
10
# This file is a normative contributory data file in the
11
# Unicode Character Database.
12
#
13
# Copyright (c) 2005-2014 Unicode, Inc.
14
# For terms of use, see http://www.unicode.org/terms_of_use.html
15
#
16
# This file defines the formal name aliases for Unicode characters.
17
#
18
# For informative aliases, see NamesList.txt
19
#
20
# The formal name aliases are divided into five types, each with a distinct label.
21
#
22
# Type Labels:
23
#
24
# 1. correction
25
# Corrections for serious problems in the character names
26
# 2. control
27
# ISO 6429 names for C0 and C1 control functions, and other
28
# commonly occurring names for control codes
29
# 3. alternate
30
# A few widely used alternate names for format characters
31
# 4. figment
32
# Several documented labels for C1 control code points which
33
# were never actually approved in any standard
34
# 5. abbreviation
35
# Commonly occurring abbreviations (or acronyms) for control codes,
36
# format characters, spaces, and variation selectors
37
#
38
# The formal name aliases are part of the Unicode character namespace, which
39
# includes the character names and the names of named character sequences.
40
# The inclusion of ISO 6429 names and other commonly occurring names and
41
# abbreviations for control codes and format characters as formal name aliases
42
# is to help avoid name collisions between Unicode character names and the
43
# labels which commonly appear in text and/or in implementations such as regex, for
44
# control codes (which for historical reasons have no Unicode character name)
45
# or for format characters.
46
#
47
# For documentation, see NamesList.html and http://www.unicode.org/reports/tr44/
48
#
49
# FORMAT
50
#
51
# Each line has three fields, as described here:
52
#
53
# First field: Code point
54
# Second field: Alias
55
# Third field: Type
56
#
57
# The type labels used are defined above. As for property values, comparisons
58
# of type labels should ignore case.
59
#
60
# The type labels can be mapped to other strings for display, if desired.
61
#
62
# In case multiple aliases are assigned, additional aliases
63
# are provided on separate lines. Parsers of this data file should
64
# take note that the same code point can (and does) occur more than once.
65
#
66
# Note that currently the only instances of multiple aliases of the same
67
# type for a single code point are either of type "control" or "abbreviation".
68
# An alias of type "abbreviation" can, in principle, be added for any code
69
# point, although currently aliases of type "correction" do not have
70
# any additional aliases of type "abbreviation". Such relationships
71
# are not enforced by stability policies.
72
#
73
#-----------------------------------------------------------------
74
75
0000;NULL;control
76
0000;NUL;abbreviation
77
0001;START OF HEADING;control
78
0001;SOH;abbreviation
79
0002;START OF TEXT;control
80
0002;STX;abbreviation
81
0003;END OF TEXT;control
82
0003;ETX;abbreviation
83
0004;END OF TRANSMISSION;control
84
0004;EOT;abbreviation
85
0005;ENQUIRY;control
86
0005;ENQ;abbreviation
87
0006;ACKNOWLEDGE;control
88
0006;ACK;abbreviation
89
90
# Note that no formal name alias for the ISO 6429 "BELL" is
91
# provided for U+0007, because of the existing name collision
92
# with U+1F514 BELL.
93
94
0007;ALERT;control
95
0007;BEL;abbreviation
96
97
0008;BACKSPACE;control
98
0008;BS;abbreviation
99
0009;CHARACTER TABULATION;control
100
0009;HORIZONTAL TABULATION;control
101
0009;HT;abbreviation
102
0009;TAB;abbreviation
103
000A;LINE FEED;control
104
000A;NEW LINE;control
105
000A;END OF LINE;control
106
000A;LF;abbreviation
107
000A;NL;abbreviation
108
000A;EOL;abbreviation
109
000B;LINE TABULATION;control
110
000B;VERTICAL TABULATION;control
111
000B;VT;abbreviation
112
000C;FORM FEED;control
113
000C;FF;abbreviation
114
000D;CARRIAGE RETURN;control
115
000D;CR;abbreviation
116
000E;SHIFT OUT;control
117
000E;LOCKING-SHIFT ONE;control
118
000E;SO;abbreviation
119
000F;SHIFT IN;control
120
000F;LOCKING-SHIFT ZERO;control
121
000F;SI;abbreviation
122
0010;DATA LINK ESCAPE;control
123
0010;DLE;abbreviation
124
0011;DEVICE CONTROL ONE;control
125
0011;DC1;abbreviation
126
0012;DEVICE CONTROL TWO;control
127
0012;DC2;abbreviation
128
0013;DEVICE CONTROL THREE;control
129
0013;DC3;abbreviation
130
0014;DEVICE CONTROL FOUR;control
131
0014;DC4;abbreviation
132
0015;NEGATIVE ACKNOWLEDGE;control
133
0015;NAK;abbreviation
134
0016;SYNCHRONOUS IDLE;control
135
0016;SYN;abbreviation
136
0017;END OF TRANSMISSION BLOCK;control
137
0017;ETB;abbreviation
138
0018;CANCEL;control
139
0018;CAN;abbreviation
140
0019;END OF MEDIUM;control
141
0019;EOM;abbreviation
142
001A;SUBSTITUTE;control
143
001A;SUB;abbreviation
144
001B;ESCAPE;control
145
001B;ESC;abbreviation
146
001C;INFORMATION SEPARATOR FOUR;control
147
001C;FILE SEPARATOR;control
148
001C;FS;abbreviation
149
001D;INFORMATION SEPARATOR THREE;control
150
001D;GROUP SEPARATOR;control
151
001D;GS;abbreviation
152
001E;INFORMATION SEPARATOR TWO;control
153
001E;RECORD SEPARATOR;control
154
001E;RS;abbreviation
155
001F;INFORMATION SEPARATOR ONE;control
156
001F;UNIT SEPARATOR;control
157
001F;US;abbreviation
158
0020;SP;abbreviation
159
007F;DELETE;control
160
007F;DEL;abbreviation
161
162
# PADDING CHARACTER and HIGH OCTET PRESET represent
163
# architectural concepts initially proposed for early
164
# drafts of ISO/IEC 10646-1. They were never actually
165
# approved or standardized: hence their designation
166
# here as the "figment" type. Formal name aliases
167
# (and corresponding abbreviations) for these code
168
# points are included here because these names leaked
169
# out from the draft documents and were published in
170
# at least one RFC whose names for code points was
171
# implemented in Perl regex expressions.
172
173
0080;PADDING CHARACTER;figment
174
0080;PAD;abbreviation
175
0081;HIGH OCTET PRESET;figment
176
0081;HOP;abbreviation
177
178
0082;BREAK PERMITTED HERE;control
179
0082;BPH;abbreviation
180
0083;NO BREAK HERE;control
181
0083;NBH;abbreviation
182
0084;INDEX;control
183
0084;IND;abbreviation
184
0085;NEXT LINE;control
185
0085;NEL;abbreviation
186
0086;START OF SELECTED AREA;control
187
0086;SSA;abbreviation
188
0087;END OF SELECTED AREA;control
189
0087;ESA;abbreviation
190
0088;CHARACTER TABULATION SET;control
191
0088;HORIZONTAL TABULATION SET;control
192
0088;HTS;abbreviation
193
0089;CHARACTER TABULATION WITH JUSTIFICATION;control
194
0089;HORIZONTAL TABULATION WITH JUSTIFICATION;control
195
0089;HTJ;abbreviation
196
008A;LINE TABULATION SET;control
197
008A;VERTICAL TABULATION SET;control
198
008A;VTS;abbreviation
199
008B;PARTIAL LINE FORWARD;control
200
008B;PARTIAL LINE DOWN;control
201
008B;PLD;abbreviation
202
008C;PARTIAL LINE BACKWARD;control
203
008C;PARTIAL LINE UP;control
204
008C;PLU;abbreviation
205
008D;REVERSE LINE FEED;control
206
008D;REVERSE INDEX;control
207
008D;RI;abbreviation
208
008E;SINGLE SHIFT TWO;control
209
008E;SINGLE-SHIFT-2;control
210
008E;SS2;abbreviation
211
008F;SINGLE SHIFT THREE;control
212
008F;SINGLE-SHIFT-3;control
213
008F;SS3;abbreviation
214
0090;DEVICE CONTROL STRING;control
215
0090;DCS;abbreviation
216
0091;PRIVATE USE ONE;control
217
0091;PRIVATE USE-1;control
218
0091;PU1;abbreviation
219
0092;PRIVATE USE TWO;control
220
0092;PRIVATE USE-2;control
221
0092;PU2;abbreviation
222
0093;SET TRANSMIT STATE;control
223
0093;STS;abbreviation
224
0094;CANCEL CHARACTER;control
225
0094;CCH;abbreviation
226
0095;MESSAGE WAITING;control
227
0095;MW;abbreviation
228
0096;START OF GUARDED AREA;control
229
0096;START OF PROTECTED AREA;control
230
0096;SPA;abbreviation
231
0097;END OF GUARDED AREA;control
232
0097;END OF PROTECTED AREA;control
233
0097;EPA;abbreviation
234
0098;START OF STRING;control
235
0098;SOS;abbreviation
236
237
# SINGLE GRAPHIC CHARACTER INTRODUCER is another
238
# architectural concept from early drafts of ISO/IEC 10646-1
239
# which was never approved and standardized.
240
241
0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment
242
0099;SGC;abbreviation
243
244
009A;SINGLE CHARACTER INTRODUCER;control
245
009A;SCI;abbreviation
246
009B;CONTROL SEQUENCE INTRODUCER;control
247
009B;CSI;abbreviation
248
009C;STRING TERMINATOR;control
249
009C;ST;abbreviation
250
009D;OPERATING SYSTEM COMMAND;control
251
009D;OSC;abbreviation
252
009E;PRIVACY MESSAGE;control
253
009E;PM;abbreviation
254
009F;APPLICATION PROGRAM COMMAND;control
255
009F;APC;abbreviation
256
00A0;NBSP;abbreviation
257
00AD;SHY;abbreviation
258
01A2;LATIN CAPITAL LETTER GHA;correction
259
01A3;LATIN SMALL LETTER GHA;correction
260
034F;CGJ;abbreviation
261
061C;ALM;abbreviation
262
0709;SYRIAC SUBLINEAR COLON SKEWED LEFT;correction
263
0CDE;KANNADA LETTER LLLA;correction
264
0E9D;LAO LETTER FO FON;correction
265
0E9F;LAO LETTER FO FAY;correction
266
0EA3;LAO LETTER RO;correction
267
0EA5;LAO LETTER LO;correction
268
0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN;correction
269
180B;FVS1;abbreviation
270
180C;FVS2;abbreviation
271
180D;FVS3;abbreviation
272
180E;MVS;abbreviation
273
200B;ZWSP;abbreviation
274
200C;ZWNJ;abbreviation
275
200D;ZWJ;abbreviation
276
200E;LRM;abbreviation
277
200F;RLM;abbreviation
278
202A;LRE;abbreviation
279
202B;RLE;abbreviation
280
202C;PDF;abbreviation
281
202D;LRO;abbreviation
282
202E;RLO;abbreviation
283
202F;NNBSP;abbreviation
284
205F;MMSP;abbreviation
285
2060;WJ;abbreviation
286
2066;LRI;abbreviation
287
2067;RLI;abbreviation
288
2068;FSI;abbreviation
289
2069;PDI;abbreviation
290
2118;WEIERSTRASS ELLIPTIC FUNCTION;correction
291
2448;MICR ON US SYMBOL;correction
292
2449;MICR DASH SYMBOL;correction
293
2B7A;LEFTWARDS TRIANGLE-HEADED ARROW WITH DOUBLE VERTICAL STROKE;correction
294
2B7C;RIGHTWARDS TRIANGLE-HEADED ARROW WITH DOUBLE VERTICAL STROKE;correction
295
A015;YI SYLLABLE ITERATION MARK;correction
296
FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET;correction
297
FE00;VS1;abbreviation
298
FE01;VS2;abbreviation
299
FE02;VS3;abbreviation
300
FE03;VS4;abbreviation
301
FE04;VS5;abbreviation
302
FE05;VS6;abbreviation
303
FE06;VS7;abbreviation
304
FE07;VS8;abbreviation
305
FE08;VS9;abbreviation
306
FE09;VS10;abbreviation
307
FE0A;VS11;abbreviation
308
FE0B;VS12;abbreviation
309
FE0C;VS13;abbreviation
310
FE0D;VS14;abbreviation
311
FE0E;VS15;abbreviation
312
FE0F;VS16;abbreviation
313
FEFF;BYTE ORDER MARK;alternate
314
FEFF;BOM;abbreviation
315
FEFF;ZWNBSP;abbreviation
316
122D4;CUNEIFORM SIGN NU11 TENU;correction
317
122D5;CUNEIFORM SIGN NU11 OVER NU11 BUR OVER BUR;correction
318
1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS;correction
319
E0100;VS17;abbreviation
320
E0101;VS18;abbreviation
321
E0102;VS19;abbreviation
322
E0103;VS20;abbreviation
323
E0104;VS21;abbreviation
324
E0105;VS22;abbreviation
325
E0106;VS23;abbreviation
326
E0107;VS24;abbreviation
327
E0108;VS25;abbreviation
328
E0109;VS26;abbreviation
329
E010A;VS27;abbreviation
330
E010B;VS28;abbreviation
331
E010C;VS29;abbreviation
332
E010D;VS30;abbreviation
333
E010E;VS31;abbreviation
334
E010F;VS32;abbreviation
335
E0110;VS33;abbreviation
336
E0111;VS34;abbreviation
337
E0112;VS35;abbreviation
338
E0113;VS36;abbreviation
339
E0114;VS37;abbreviation
340
E0115;VS38;abbreviation
341
E0116;VS39;abbreviation
342
E0117;VS40;abbreviation
343
E0118;VS41;abbreviation
344
E0119;VS42;abbreviation
345
E011A;VS43;abbreviation
346
E011B;VS44;abbreviation
347
E011C;VS45;abbreviation
348
E011D;VS46;abbreviation
349
E011E;VS47;abbreviation
350
E011F;VS48;abbreviation
351
E0120;VS49;abbreviation
352
E0121;VS50;abbreviation
353
E0122;VS51;abbreviation
354
E0123;VS52;abbreviation
355
E0124;VS53;abbreviation
356
E0125;VS54;abbreviation
357
E0126;VS55;abbreviation
358
E0127;VS56;abbreviation
359
E0128;VS57;abbreviation
360
E0129;VS58;abbreviation
361
E012A;VS59;abbreviation
362
E012B;VS60;abbreviation
363
E012C;VS61;abbreviation
364
E012D;VS62;abbreviation
365
E012E;VS63;abbreviation
366
E012F;VS64;abbreviation
367
E0130;VS65;abbreviation
368
E0131;VS66;abbreviation
369
E0132;VS67;abbreviation
370
E0133;VS68;abbreviation
371
E0134;VS69;abbreviation
372
E0135;VS70;abbreviation
373
E0136;VS71;abbreviation
374
E0137;VS72;abbreviation
375
E0138;VS73;abbreviation
376
E0139;VS74;abbreviation
377
E013A;VS75;abbreviation
378
E013B;VS76;abbreviation
379
E013C;VS77;abbreviation
380
E013D;VS78;abbreviation
381
E013E;VS79;abbreviation
382
E013F;VS80;abbreviation
383
E0140;VS81;abbreviation
384
E0141;VS82;abbreviation
385
E0142;VS83;abbreviation
386
E0143;VS84;abbreviation
387
E0144;VS85;abbreviation
388
E0145;VS86;abbreviation
389
E0146;VS87;abbreviation
390
E0147;VS88;abbreviation
391
E0148;VS89;abbreviation
392
E0149;VS90;abbreviation
393
E014A;VS91;abbreviation
394
E014B;VS92;abbreviation
395
E014C;VS93;abbreviation
396
E014D;VS94;abbreviation
397
E014E;VS95;abbreviation
398
E014F;VS96;abbreviation
399
E0150;VS97;abbreviation
400
E0151;VS98;abbreviation
401
E0152;VS99;abbreviation
402
E0153;VS100;abbreviation
403
E0154;VS101;abbreviation
404
E0155;VS102;abbreviation
405
E0156;VS103;abbreviation
406
E0157;VS104;abbreviation
407
E0158;VS105;abbreviation
408
E0159;VS106;abbreviation
409
E015A;VS107;abbreviation
410
E015B;VS108;abbreviation
411
E015C;VS109;abbreviation
412
E015D;VS110;abbreviation
413
E015E;VS111;abbreviation
414
E015F;VS112;abbreviation
415
E0160;VS113;abbreviation
416
E0161;VS114;abbreviation
417
E0162;VS115;abbreviation
418
E0163;VS116;abbreviation
419
E0164;VS117;abbreviation
420
E0165;VS118;abbreviation
421
E0166;VS119;abbreviation
422
E0167;VS120;abbreviation
423
E0168;VS121;abbreviation
424
E0169;VS122;abbreviation
425
E016A;VS123;abbreviation
426
E016B;VS124;abbreviation
427
E016C;VS125;abbreviation
428
E016D;VS126;abbreviation
429
E016E;VS127;abbreviation
430
E016F;VS128;abbreviation
431
E0170;VS129;abbreviation
432
E0171;VS130;abbreviation
433
E0172;VS131;abbreviation
434
E0173;VS132;abbreviation
435
E0174;VS133;abbreviation
436
E0175;VS134;abbreviation
437
E0176;VS135;abbreviation
438
E0177;VS136;abbreviation
439
E0178;VS137;abbreviation
440
E0179;VS138;abbreviation
441
E017A;VS139;abbreviation
442
E017B;VS140;abbreviation
443
E017C;VS141;abbreviation
444
E017D;VS142;abbreviation
445
E017E;VS143;abbreviation
446
E017F;VS144;abbreviation
447
E0180;VS145;abbreviation
448
E0181;VS146;abbreviation
449
E0182;VS147;abbreviation
450
E0183;VS148;abbreviation
451
E0184;VS149;abbreviation
452
E0185;VS150;abbreviation
453
E0186;VS151;abbreviation
454
E0187;VS152;abbreviation
455
E0188;VS153;abbreviation
456
E0189;VS154;abbreviation
457
E018A;VS155;abbreviation
458
E018B;VS156;abbreviation
459
E018C;VS157;abbreviation
460
E018D;VS158;abbreviation
461
E018E;VS159;abbreviation
462
E018F;VS160;abbreviation
463
E0190;VS161;abbreviation
464
E0191;VS162;abbreviation
465
E0192;VS163;abbreviation
466
E0193;VS164;abbreviation
467
E0194;VS165;abbreviation
468
E0195;VS166;abbreviation
469
E0196;VS167;abbreviation
470
E0197;VS168;abbreviation
471
E0198;VS169;abbreviation
472
E0199;VS170;abbreviation
473
E019A;VS171;abbreviation
474
E019B;VS172;abbreviation
475
E019C;VS173;abbreviation
476
E019D;VS174;abbreviation
477
E019E;VS175;abbreviation
478
E019F;VS176;abbreviation
479
E01A0;VS177;abbreviation
480
E01A1;VS178;abbreviation
481
E01A2;VS179;abbreviation
482
E01A3;VS180;abbreviation
483
E01A4;VS181;abbreviation
484
E01A5;VS182;abbreviation
485
E01A6;VS183;abbreviation
486
E01A7;VS184;abbreviation
487
E01A8;VS185;abbreviation
488
E01A9;VS186;abbreviation
489
E01AA;VS187;abbreviation
490
E01AB;VS188;abbreviation
491
E01AC;VS189;abbreviation
492
E01AD;VS190;abbreviation
493
E01AE;VS191;abbreviation
494
E01AF;VS192;abbreviation
495
E01B0;VS193;abbreviation
496
E01B1;VS194;abbreviation
497
E01B2;VS195;abbreviation
498
E01B3;VS196;abbreviation
499
E01B4;VS197;abbreviation
500
E01B5;VS198;abbreviation
501
E01B6;VS199;abbreviation
502
E01B7;VS200;abbreviation
503
E01B8;VS201;abbreviation
504
E01B9;VS202;abbreviation
505
E01BA;VS203;abbreviation
506
E01BB;VS204;abbreviation
507
E01BC;VS205;abbreviation
508
E01BD;VS206;abbreviation
509
E01BE;VS207;abbreviation
510
E01BF;VS208;abbreviation
511
E01C0;VS209;abbreviation
512
E01C1;VS210;abbreviation
513
E01C2;VS211;abbreviation
514
E01C3;VS212;abbreviation
515
E01C4;VS213;abbreviation
516
E01C5;VS214;abbreviation
517
E01C6;VS215;abbreviation
518
E01C7;VS216;abbreviation
519
E01C8;VS217;abbreviation
520
E01C9;VS218;abbreviation
521
E01CA;VS219;abbreviation
522
E01CB;VS220;abbreviation
523
E01CC;VS221;abbreviation
524
E01CD;VS222;abbreviation
525
E01CE;VS223;abbreviation
526
E01CF;VS224;abbreviation
527
E01D0;VS225;abbreviation
528
E01D1;VS226;abbreviation
529
E01D2;VS227;abbreviation
530
E01D3;VS228;abbreviation
531
E01D4;VS229;abbreviation
532
E01D5;VS230;abbreviation
533
E01D6;VS231;abbreviation
534
E01D7;VS232;abbreviation
535
E01D8;VS233;abbreviation
536
E01D9;VS234;abbreviation
537
E01DA;VS235;abbreviation
538
E01DB;VS236;abbreviation
539
E01DC;VS237;abbreviation
540
E01DD;VS238;abbreviation
541
E01DE;VS239;abbreviation
542
E01DF;VS240;abbreviation
543
E01E0;VS241;abbreviation
544
E01E1;VS242;abbreviation
545
E01E2;VS243;abbreviation
546
E01E3;VS244;abbreviation
547
E01E4;VS245;abbreviation
548
E01E5;VS246;abbreviation
549
E01E6;VS247;abbreviation
550
E01E7;VS248;abbreviation
551
E01E8;VS249;abbreviation
552
E01E9;VS250;abbreviation
553
E01EA;VS251;abbreviation
554
E01EB;VS252;abbreviation
555
E01EC;VS253;abbreviation
556
E01ED;VS254;abbreviation
557
E01EE;VS255;abbreviation
558
E01EF;VS256;abbreviation
559
560
# EOF
561
'''
562
# }}}
563
564
565
def make_alias_map():
566
ans = {}
567
for line in DB.split('\n'):
568
line = line.trim()
569
if not line or line[0] is '#':
570
continue
571
parts = line.split(';')
572
if parts.length >= 2:
573
code_point = parseInt(parts[0], 16)
574
if code_point is not undefined and parts[1]:
575
ans[parts[1].toLowerCase()] = code_point
576
return ans
577
578
579
ALIAS_MAP = make_alias_map()
580
581