Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/src/pcre2_ucp.h
9903 views
1
/*************************************************
2
* Perl-Compatible Regular Expressions *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
Written by Philip Hazel
9
Original API code Copyright (c) 1997-2012 University of Cambridge
10
New API code Copyright (c) 2016-2022 University of Cambridge
11
12
This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
13
Instead, modify the maint/GenerateUcpHeader.py script and run it to generate
14
a new version of this code.
15
16
-----------------------------------------------------------------------------
17
Redistribution and use in source and binary forms, with or without
18
modification, are permitted provided that the following conditions are met:
19
20
* Redistributions of source code must retain the above copyright notice,
21
this list of conditions and the following disclaimer.
22
23
* Redistributions in binary form must reproduce the above copyright
24
notice, this list of conditions and the following disclaimer in the
25
documentation and/or other materials provided with the distribution.
26
27
* Neither the name of the University of Cambridge nor the names of its
28
contributors may be used to endorse or promote products derived from
29
this software without specific prior written permission.
30
31
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41
POSSIBILITY OF SUCH DAMAGE.
42
-----------------------------------------------------------------------------
43
*/
44
45
#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
46
#define PCRE2_UCP_H_IDEMPOTENT_GUARD
47
48
/* This file contains definitions of the Unicode property values that are
49
returned by the UCD access macros and used throughout PCRE2.
50
51
IMPORTANT: The specific values of the first two enums (general and particular
52
character categories) are assumed by the table called catposstab in the file
53
pcre2_auto_possess.c. They are unlikely to change, but should be checked after
54
an update. */
55
56
/* These are the general character categories. */
57
58
enum {
59
ucp_C,
60
ucp_L,
61
ucp_M,
62
ucp_N,
63
ucp_P,
64
ucp_S,
65
ucp_Z,
66
};
67
68
/* These are the particular character categories. */
69
70
enum {
71
ucp_Cc, /* Control */
72
ucp_Cf, /* Format */
73
ucp_Cn, /* Unassigned */
74
ucp_Co, /* Private use */
75
ucp_Cs, /* Surrogate */
76
ucp_Ll, /* Lower case letter */
77
ucp_Lm, /* Modifier letter */
78
ucp_Lo, /* Other letter */
79
ucp_Lt, /* Title case letter */
80
ucp_Lu, /* Upper case letter */
81
ucp_Mc, /* Spacing mark */
82
ucp_Me, /* Enclosing mark */
83
ucp_Mn, /* Non-spacing mark */
84
ucp_Nd, /* Decimal number */
85
ucp_Nl, /* Letter number */
86
ucp_No, /* Other number */
87
ucp_Pc, /* Connector punctuation */
88
ucp_Pd, /* Dash punctuation */
89
ucp_Pe, /* Close punctuation */
90
ucp_Pf, /* Final punctuation */
91
ucp_Pi, /* Initial punctuation */
92
ucp_Po, /* Other punctuation */
93
ucp_Ps, /* Open punctuation */
94
ucp_Sc, /* Currency symbol */
95
ucp_Sk, /* Modifier symbol */
96
ucp_Sm, /* Mathematical symbol */
97
ucp_So, /* Other symbol */
98
ucp_Zl, /* Line separator */
99
ucp_Zp, /* Paragraph separator */
100
ucp_Zs, /* Space separator */
101
};
102
103
/* These are Boolean properties. */
104
105
enum {
106
ucp_ASCII,
107
ucp_ASCII_Hex_Digit,
108
ucp_Alphabetic,
109
ucp_Bidi_Control,
110
ucp_Bidi_Mirrored,
111
ucp_Case_Ignorable,
112
ucp_Cased,
113
ucp_Changes_When_Casefolded,
114
ucp_Changes_When_Casemapped,
115
ucp_Changes_When_Lowercased,
116
ucp_Changes_When_Titlecased,
117
ucp_Changes_When_Uppercased,
118
ucp_Dash,
119
ucp_Default_Ignorable_Code_Point,
120
ucp_Deprecated,
121
ucp_Diacritic,
122
ucp_Emoji,
123
ucp_Emoji_Component,
124
ucp_Emoji_Modifier,
125
ucp_Emoji_Modifier_Base,
126
ucp_Emoji_Presentation,
127
ucp_Extended_Pictographic,
128
ucp_Extender,
129
ucp_Grapheme_Base,
130
ucp_Grapheme_Extend,
131
ucp_Grapheme_Link,
132
ucp_Hex_Digit,
133
ucp_IDS_Binary_Operator,
134
ucp_IDS_Trinary_Operator,
135
ucp_IDS_Unary_Operator,
136
ucp_ID_Compat_Math_Continue,
137
ucp_ID_Compat_Math_Start,
138
ucp_ID_Continue,
139
ucp_ID_Start,
140
ucp_Ideographic,
141
ucp_InCB,
142
ucp_Join_Control,
143
ucp_Logical_Order_Exception,
144
ucp_Lowercase,
145
ucp_Math,
146
ucp_Modifier_Combining_Mark,
147
ucp_Noncharacter_Code_Point,
148
ucp_Pattern_Syntax,
149
ucp_Pattern_White_Space,
150
ucp_Prepended_Concatenation_Mark,
151
ucp_Quotation_Mark,
152
ucp_Radical,
153
ucp_Regional_Indicator,
154
ucp_Sentence_Terminal,
155
ucp_Soft_Dotted,
156
ucp_Terminal_Punctuation,
157
ucp_Unified_Ideograph,
158
ucp_Uppercase,
159
ucp_Variation_Selector,
160
ucp_White_Space,
161
ucp_XID_Continue,
162
ucp_XID_Start,
163
/* This must be last */
164
ucp_Bprop_Count
165
};
166
167
/* Size of entries in ucd_boolprop_sets[] */
168
169
#define ucd_boolprop_sets_item_size 2
170
171
/* These are the bidi class values. */
172
173
enum {
174
ucp_bidiAL, /* Arabic_Letter */
175
ucp_bidiAN, /* Arabic_Number */
176
ucp_bidiB, /* Paragraph_Separator */
177
ucp_bidiBN, /* Boundary_Neutral */
178
ucp_bidiCS, /* Common_Separator */
179
ucp_bidiEN, /* European_Number */
180
ucp_bidiES, /* European_Separator */
181
ucp_bidiET, /* European_Terminator */
182
ucp_bidiFSI, /* First_Strong_Isolate */
183
ucp_bidiL, /* Left_To_Right */
184
ucp_bidiLRE, /* Left_To_Right_Embedding */
185
ucp_bidiLRI, /* Left_To_Right_Isolate */
186
ucp_bidiLRO, /* Left_To_Right_Override */
187
ucp_bidiNSM, /* Nonspacing_Mark */
188
ucp_bidiON, /* Other_Neutral */
189
ucp_bidiPDF, /* Pop_Directional_Format */
190
ucp_bidiPDI, /* Pop_Directional_Isolate */
191
ucp_bidiR, /* Right_To_Left */
192
ucp_bidiRLE, /* Right_To_Left_Embedding */
193
ucp_bidiRLI, /* Right_To_Left_Isolate */
194
ucp_bidiRLO, /* Right_To_Left_Override */
195
ucp_bidiS, /* Segment_Separator */
196
ucp_bidiWS, /* White_Space */
197
};
198
199
/* These are grapheme break properties. The Extended Pictographic property
200
comes from the emoji-data.txt file. */
201
202
enum {
203
ucp_gbCR, /* 0 */
204
ucp_gbLF, /* 1 */
205
ucp_gbControl, /* 2 */
206
ucp_gbExtend, /* 3 */
207
ucp_gbPrepend, /* 4 */
208
ucp_gbSpacingMark, /* 5 */
209
ucp_gbL, /* 6 Hangul syllable type L */
210
ucp_gbV, /* 7 Hangul syllable type V */
211
ucp_gbT, /* 8 Hangul syllable type T */
212
ucp_gbLV, /* 9 Hangul syllable type LV */
213
ucp_gbLVT, /* 10 Hangul syllable type LVT */
214
ucp_gbRegional_Indicator, /* 11 */
215
ucp_gbOther, /* 12 */
216
ucp_gbZWJ, /* 13 */
217
ucp_gbExtended_Pictographic, /* 14 */
218
};
219
220
/* These are the script identifications. */
221
222
enum {
223
/* Scripts which has characters in other scripts. */
224
ucp_Latin,
225
ucp_Greek,
226
ucp_Cyrillic,
227
ucp_Armenian,
228
ucp_Hebrew,
229
ucp_Arabic,
230
ucp_Syriac,
231
ucp_Thaana,
232
ucp_Devanagari,
233
ucp_Bengali,
234
ucp_Gurmukhi,
235
ucp_Gujarati,
236
ucp_Oriya,
237
ucp_Tamil,
238
ucp_Telugu,
239
ucp_Kannada,
240
ucp_Malayalam,
241
ucp_Sinhala,
242
ucp_Thai,
243
ucp_Tibetan,
244
ucp_Myanmar,
245
ucp_Georgian,
246
ucp_Hangul,
247
ucp_Ethiopic,
248
ucp_Cherokee,
249
ucp_Runic,
250
ucp_Mongolian,
251
ucp_Hiragana,
252
ucp_Katakana,
253
ucp_Bopomofo,
254
ucp_Han,
255
ucp_Yi,
256
ucp_Gothic,
257
ucp_Tagalog,
258
ucp_Hanunoo,
259
ucp_Buhid,
260
ucp_Tagbanwa,
261
ucp_Limbu,
262
ucp_Tai_Le,
263
ucp_Linear_B,
264
ucp_Shavian,
265
ucp_Cypriot,
266
ucp_Buginese,
267
ucp_Coptic,
268
ucp_Glagolitic,
269
ucp_Tifinagh,
270
ucp_Syloti_Nagri,
271
ucp_Phags_Pa,
272
ucp_Nko,
273
ucp_Kayah_Li,
274
ucp_Lycian,
275
ucp_Carian,
276
ucp_Lydian,
277
ucp_Avestan,
278
ucp_Samaritan,
279
ucp_Lisu,
280
ucp_Javanese,
281
ucp_Old_Turkic,
282
ucp_Kaithi,
283
ucp_Mandaic,
284
ucp_Chakma,
285
ucp_Meroitic_Hieroglyphs,
286
ucp_Sharada,
287
ucp_Takri,
288
ucp_Caucasian_Albanian,
289
ucp_Duployan,
290
ucp_Elbasan,
291
ucp_Grantha,
292
ucp_Khojki,
293
ucp_Linear_A,
294
ucp_Mahajani,
295
ucp_Manichaean,
296
ucp_Modi,
297
ucp_Old_Permic,
298
ucp_Psalter_Pahlavi,
299
ucp_Khudawadi,
300
ucp_Tirhuta,
301
ucp_Multani,
302
ucp_Old_Hungarian,
303
ucp_Adlam,
304
ucp_Osage,
305
ucp_Tangut,
306
ucp_Masaram_Gondi,
307
ucp_Dogra,
308
ucp_Gunjala_Gondi,
309
ucp_Hanifi_Rohingya,
310
ucp_Sogdian,
311
ucp_Nandinagari,
312
ucp_Yezidi,
313
ucp_Cypro_Minoan,
314
ucp_Old_Uyghur,
315
ucp_Toto,
316
ucp_Garay,
317
ucp_Gurung_Khema,
318
ucp_Ol_Onal,
319
ucp_Sunuwar,
320
ucp_Todhri,
321
ucp_Tulu_Tigalari,
322
323
/* Scripts which has no characters in other scripts. */
324
ucp_Unknown,
325
ucp_Common,
326
ucp_Lao,
327
ucp_Canadian_Aboriginal,
328
ucp_Ogham,
329
ucp_Khmer,
330
ucp_Old_Italic,
331
ucp_Deseret,
332
ucp_Inherited,
333
ucp_Ugaritic,
334
ucp_Osmanya,
335
ucp_Braille,
336
ucp_New_Tai_Lue,
337
ucp_Old_Persian,
338
ucp_Kharoshthi,
339
ucp_Balinese,
340
ucp_Cuneiform,
341
ucp_Phoenician,
342
ucp_Sundanese,
343
ucp_Lepcha,
344
ucp_Ol_Chiki,
345
ucp_Vai,
346
ucp_Saurashtra,
347
ucp_Rejang,
348
ucp_Cham,
349
ucp_Tai_Tham,
350
ucp_Tai_Viet,
351
ucp_Egyptian_Hieroglyphs,
352
ucp_Bamum,
353
ucp_Meetei_Mayek,
354
ucp_Imperial_Aramaic,
355
ucp_Old_South_Arabian,
356
ucp_Inscriptional_Parthian,
357
ucp_Inscriptional_Pahlavi,
358
ucp_Batak,
359
ucp_Brahmi,
360
ucp_Meroitic_Cursive,
361
ucp_Miao,
362
ucp_Sora_Sompeng,
363
ucp_Bassa_Vah,
364
ucp_Pahawh_Hmong,
365
ucp_Mende_Kikakui,
366
ucp_Mro,
367
ucp_Old_North_Arabian,
368
ucp_Nabataean,
369
ucp_Palmyrene,
370
ucp_Pau_Cin_Hau,
371
ucp_Siddham,
372
ucp_Warang_Citi,
373
ucp_Ahom,
374
ucp_Anatolian_Hieroglyphs,
375
ucp_Hatran,
376
ucp_SignWriting,
377
ucp_Bhaiksuki,
378
ucp_Marchen,
379
ucp_Newa,
380
ucp_Nushu,
381
ucp_Soyombo,
382
ucp_Zanabazar_Square,
383
ucp_Makasar,
384
ucp_Medefaidrin,
385
ucp_Old_Sogdian,
386
ucp_Elymaic,
387
ucp_Nyiakeng_Puachue_Hmong,
388
ucp_Wancho,
389
ucp_Chorasmian,
390
ucp_Dives_Akuru,
391
ucp_Khitan_Small_Script,
392
ucp_Tangsa,
393
ucp_Vithkuqi,
394
ucp_Kawi,
395
ucp_Nag_Mundari,
396
ucp_Kirat_Rai,
397
398
/* This must be last */
399
ucp_Script_Count
400
};
401
402
/* Size of entries in ucd_script_sets[] */
403
404
#define ucd_script_sets_item_size 4
405
406
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
407
408
/* End of pcre2_ucp.h */
409
410