CoCalc -- pcre2

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/src/pcre2_ucp.h
⁹⁹⁰³ views
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4

5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7

8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2016-2022 University of Cambridge
11

12
This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
13
Instead, modify the maint/GenerateUcpHeader.py script and run it to generate
14
a new version of this code.
15

16
-----------------------------------------------------------------------------
17
Redistribution and use in source and binary forms, with or without
18
modification, are permitted provided that the following conditions are met:
19

20
    * Redistributions of source code must retain the above copyright notice,
21
      this list of conditions and the following disclaimer.
22

23
    * Redistributions in binary form must reproduce the above copyright
24
      notice, this list of conditions and the following disclaimer in the
25
      documentation and/or other materials provided with the distribution.
26

27
    * Neither the name of the University of Cambridge nor the names of its
28
      contributors may be used to endorse or promote products derived from
29
      this software without specific prior written permission.
30

31
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41
POSSIBILITY OF SUCH DAMAGE.
42
-----------------------------------------------------------------------------
43
*/
44

45
#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
46
#define PCRE2_UCP_H_IDEMPOTENT_GUARD
47

48
/* This file contains definitions of the Unicode property values that are
49
returned by the UCD access macros and used throughout PCRE2.
50

51
IMPORTANT: The specific values of the first two enums (general and particular
52
character categories) are assumed by the table called catposstab in the file
53
pcre2_auto_possess.c. They are unlikely to change, but should be checked after
54
an update. */
55

56
/* These are the general character categories. */
57

58
enum {
59
  ucp_C,
60
  ucp_L,
61
  ucp_M,
62
  ucp_N,
63
  ucp_P,
64
  ucp_S,
65
  ucp_Z,
66
};
67

68
/* These are the particular character categories. */
69

70
enum {
71
  ucp_Cc,    /* Control */
72
  ucp_Cf,    /* Format */
73
  ucp_Cn,    /* Unassigned */
74
  ucp_Co,    /* Private use */
75
  ucp_Cs,    /* Surrogate */
76
  ucp_Ll,    /* Lower case letter */
77
  ucp_Lm,    /* Modifier letter */
78
  ucp_Lo,    /* Other letter */
79
  ucp_Lt,    /* Title case letter */
80
  ucp_Lu,    /* Upper case letter */
81
  ucp_Mc,    /* Spacing mark */
82
  ucp_Me,    /* Enclosing mark */
83
  ucp_Mn,    /* Non-spacing mark */
84
  ucp_Nd,    /* Decimal number */
85
  ucp_Nl,    /* Letter number */
86
  ucp_No,    /* Other number */
87
  ucp_Pc,    /* Connector punctuation */
88
  ucp_Pd,    /* Dash punctuation */
89
  ucp_Pe,    /* Close punctuation */
90
  ucp_Pf,    /* Final punctuation */
91
  ucp_Pi,    /* Initial punctuation */
92
  ucp_Po,    /* Other punctuation */
93
  ucp_Ps,    /* Open punctuation */
94
  ucp_Sc,    /* Currency symbol */
95
  ucp_Sk,    /* Modifier symbol */
96
  ucp_Sm,    /* Mathematical symbol */
97
  ucp_So,    /* Other symbol */
98
  ucp_Zl,    /* Line separator */
99
  ucp_Zp,    /* Paragraph separator */
100
  ucp_Zs,    /* Space separator */
101
};
102

103
/* These are Boolean properties. */
104

105
enum {
106
  ucp_ASCII,
107
  ucp_ASCII_Hex_Digit,
108
  ucp_Alphabetic,
109
  ucp_Bidi_Control,
110
  ucp_Bidi_Mirrored,
111
  ucp_Case_Ignorable,
112
  ucp_Cased,
113
  ucp_Changes_When_Casefolded,
114
  ucp_Changes_When_Casemapped,
115
  ucp_Changes_When_Lowercased,
116
  ucp_Changes_When_Titlecased,
117
  ucp_Changes_When_Uppercased,
118
  ucp_Dash,
119
  ucp_Default_Ignorable_Code_Point,
120
  ucp_Deprecated,
121
  ucp_Diacritic,
122
  ucp_Emoji,
123
  ucp_Emoji_Component,
124
  ucp_Emoji_Modifier,
125
  ucp_Emoji_Modifier_Base,
126
  ucp_Emoji_Presentation,
127
  ucp_Extended_Pictographic,
128
  ucp_Extender,
129
  ucp_Grapheme_Base,
130
  ucp_Grapheme_Extend,
131
  ucp_Grapheme_Link,
132
  ucp_Hex_Digit,
133
  ucp_IDS_Binary_Operator,
134
  ucp_IDS_Trinary_Operator,
135
  ucp_IDS_Unary_Operator,
136
  ucp_ID_Compat_Math_Continue,
137
  ucp_ID_Compat_Math_Start,
138
  ucp_ID_Continue,
139
  ucp_ID_Start,
140
  ucp_Ideographic,
141
  ucp_InCB,
142
  ucp_Join_Control,
143
  ucp_Logical_Order_Exception,
144
  ucp_Lowercase,
145
  ucp_Math,
146
  ucp_Modifier_Combining_Mark,
147
  ucp_Noncharacter_Code_Point,
148
  ucp_Pattern_Syntax,
149
  ucp_Pattern_White_Space,
150
  ucp_Prepended_Concatenation_Mark,
151
  ucp_Quotation_Mark,
152
  ucp_Radical,
153
  ucp_Regional_Indicator,
154
  ucp_Sentence_Terminal,
155
  ucp_Soft_Dotted,
156
  ucp_Terminal_Punctuation,
157
  ucp_Unified_Ideograph,
158
  ucp_Uppercase,
159
  ucp_Variation_Selector,
160
  ucp_White_Space,
161
  ucp_XID_Continue,
162
  ucp_XID_Start,
163
  /* This must be last */
164
  ucp_Bprop_Count
165
};
166

167
/* Size of entries in ucd_boolprop_sets[] */
168

169
#define ucd_boolprop_sets_item_size 2
170

171
/* These are the bidi class values. */
172

173
enum {
174
  ucp_bidiAL,   /* Arabic_Letter */
175
  ucp_bidiAN,   /* Arabic_Number */
176
  ucp_bidiB,    /* Paragraph_Separator */
177
  ucp_bidiBN,   /* Boundary_Neutral */
178
  ucp_bidiCS,   /* Common_Separator */
179
  ucp_bidiEN,   /* European_Number */
180
  ucp_bidiES,   /* European_Separator */
181
  ucp_bidiET,   /* European_Terminator */
182
  ucp_bidiFSI,  /* First_Strong_Isolate */
183
  ucp_bidiL,    /* Left_To_Right */
184
  ucp_bidiLRE,  /* Left_To_Right_Embedding */
185
  ucp_bidiLRI,  /* Left_To_Right_Isolate */
186
  ucp_bidiLRO,  /* Left_To_Right_Override */
187
  ucp_bidiNSM,  /* Nonspacing_Mark */
188
  ucp_bidiON,   /* Other_Neutral */
189
  ucp_bidiPDF,  /* Pop_Directional_Format */
190
  ucp_bidiPDI,  /* Pop_Directional_Isolate */
191
  ucp_bidiR,    /* Right_To_Left */
192
  ucp_bidiRLE,  /* Right_To_Left_Embedding */
193
  ucp_bidiRLI,  /* Right_To_Left_Isolate */
194
  ucp_bidiRLO,  /* Right_To_Left_Override */
195
  ucp_bidiS,    /* Segment_Separator */
196
  ucp_bidiWS,   /* White_Space */
197
};
198

199
/* These are grapheme break properties. The Extended Pictographic property
200
comes from the emoji-data.txt file. */
201

202
enum {
203
  ucp_gbCR,                    /*  0 */
204
  ucp_gbLF,                    /*  1 */
205
  ucp_gbControl,               /*  2 */
206
  ucp_gbExtend,                /*  3 */
207
  ucp_gbPrepend,               /*  4 */
208
  ucp_gbSpacingMark,           /*  5 */
209
  ucp_gbL,                     /*  6 Hangul syllable type L */
210
  ucp_gbV,                     /*  7 Hangul syllable type V */
211
  ucp_gbT,                     /*  8 Hangul syllable type T */
212
  ucp_gbLV,                    /*  9 Hangul syllable type LV */
213
  ucp_gbLVT,                   /* 10 Hangul syllable type LVT */
214
  ucp_gbRegional_Indicator,    /* 11 */
215
  ucp_gbOther,                 /* 12 */
216
  ucp_gbZWJ,                   /* 13 */
217
  ucp_gbExtended_Pictographic, /* 14 */
218
};
219

220
/* These are the script identifications. */
221

222
enum {
223
  /* Scripts which has characters in other scripts. */
224
  ucp_Latin,
225
  ucp_Greek,
226
  ucp_Cyrillic,
227
  ucp_Armenian,
228
  ucp_Hebrew,
229
  ucp_Arabic,
230
  ucp_Syriac,
231
  ucp_Thaana,
232
  ucp_Devanagari,
233
  ucp_Bengali,
234
  ucp_Gurmukhi,
235
  ucp_Gujarati,
236
  ucp_Oriya,
237
  ucp_Tamil,
238
  ucp_Telugu,
239
  ucp_Kannada,
240
  ucp_Malayalam,
241
  ucp_Sinhala,
242
  ucp_Thai,
243
  ucp_Tibetan,
244
  ucp_Myanmar,
245
  ucp_Georgian,
246
  ucp_Hangul,
247
  ucp_Ethiopic,
248
  ucp_Cherokee,
249
  ucp_Runic,
250
  ucp_Mongolian,
251
  ucp_Hiragana,
252
  ucp_Katakana,
253
  ucp_Bopomofo,
254
  ucp_Han,
255
  ucp_Yi,
256
  ucp_Gothic,
257
  ucp_Tagalog,
258
  ucp_Hanunoo,
259
  ucp_Buhid,
260
  ucp_Tagbanwa,
261
  ucp_Limbu,
262
  ucp_Tai_Le,
263
  ucp_Linear_B,
264
  ucp_Shavian,
265
  ucp_Cypriot,
266
  ucp_Buginese,
267
  ucp_Coptic,
268
  ucp_Glagolitic,
269
  ucp_Tifinagh,
270
  ucp_Syloti_Nagri,
271
  ucp_Phags_Pa,
272
  ucp_Nko,
273
  ucp_Kayah_Li,
274
  ucp_Lycian,
275
  ucp_Carian,
276
  ucp_Lydian,
277
  ucp_Avestan,
278
  ucp_Samaritan,
279
  ucp_Lisu,
280
  ucp_Javanese,
281
  ucp_Old_Turkic,
282
  ucp_Kaithi,
283
  ucp_Mandaic,
284
  ucp_Chakma,
285
  ucp_Meroitic_Hieroglyphs,
286
  ucp_Sharada,
287
  ucp_Takri,
288
  ucp_Caucasian_Albanian,
289
  ucp_Duployan,
290
  ucp_Elbasan,
291
  ucp_Grantha,
292
  ucp_Khojki,
293
  ucp_Linear_A,
294
  ucp_Mahajani,
295
  ucp_Manichaean,
296
  ucp_Modi,
297
  ucp_Old_Permic,
298
  ucp_Psalter_Pahlavi,
299
  ucp_Khudawadi,
300
  ucp_Tirhuta,
301
  ucp_Multani,
302
  ucp_Old_Hungarian,
303
  ucp_Adlam,
304
  ucp_Osage,
305
  ucp_Tangut,
306
  ucp_Masaram_Gondi,
307
  ucp_Dogra,
308
  ucp_Gunjala_Gondi,
309
  ucp_Hanifi_Rohingya,
310
  ucp_Sogdian,
311
  ucp_Nandinagari,
312
  ucp_Yezidi,
313
  ucp_Cypro_Minoan,
314
  ucp_Old_Uyghur,
315
  ucp_Toto,
316
  ucp_Garay,
317
  ucp_Gurung_Khema,
318
  ucp_Ol_Onal,
319
  ucp_Sunuwar,
320
  ucp_Todhri,
321
  ucp_Tulu_Tigalari,
322

323
  /* Scripts which has no characters in other scripts. */
324
  ucp_Unknown,
325
  ucp_Common,
326
  ucp_Lao,
327
  ucp_Canadian_Aboriginal,
328
  ucp_Ogham,
329
  ucp_Khmer,
330
  ucp_Old_Italic,
331
  ucp_Deseret,
332
  ucp_Inherited,
333
  ucp_Ugaritic,
334
  ucp_Osmanya,
335
  ucp_Braille,
336
  ucp_New_Tai_Lue,
337
  ucp_Old_Persian,
338
  ucp_Kharoshthi,
339
  ucp_Balinese,
340
  ucp_Cuneiform,
341
  ucp_Phoenician,
342
  ucp_Sundanese,
343
  ucp_Lepcha,
344
  ucp_Ol_Chiki,
345
  ucp_Vai,
346
  ucp_Saurashtra,
347
  ucp_Rejang,
348
  ucp_Cham,
349
  ucp_Tai_Tham,
350
  ucp_Tai_Viet,
351
  ucp_Egyptian_Hieroglyphs,
352
  ucp_Bamum,
353
  ucp_Meetei_Mayek,
354
  ucp_Imperial_Aramaic,
355
  ucp_Old_South_Arabian,
356
  ucp_Inscriptional_Parthian,
357
  ucp_Inscriptional_Pahlavi,
358
  ucp_Batak,
359
  ucp_Brahmi,
360
  ucp_Meroitic_Cursive,
361
  ucp_Miao,
362
  ucp_Sora_Sompeng,
363
  ucp_Bassa_Vah,
364
  ucp_Pahawh_Hmong,
365
  ucp_Mende_Kikakui,
366
  ucp_Mro,
367
  ucp_Old_North_Arabian,
368
  ucp_Nabataean,
369
  ucp_Palmyrene,
370
  ucp_Pau_Cin_Hau,
371
  ucp_Siddham,
372
  ucp_Warang_Citi,
373
  ucp_Ahom,
374
  ucp_Anatolian_Hieroglyphs,
375
  ucp_Hatran,
376
  ucp_SignWriting,
377
  ucp_Bhaiksuki,
378
  ucp_Marchen,
379
  ucp_Newa,
380
  ucp_Nushu,
381
  ucp_Soyombo,
382
  ucp_Zanabazar_Square,
383
  ucp_Makasar,
384
  ucp_Medefaidrin,
385
  ucp_Old_Sogdian,
386
  ucp_Elymaic,
387
  ucp_Nyiakeng_Puachue_Hmong,
388
  ucp_Wancho,
389
  ucp_Chorasmian,
390
  ucp_Dives_Akuru,
391
  ucp_Khitan_Small_Script,
392
  ucp_Tangsa,
393
  ucp_Vithkuqi,
394
  ucp_Kawi,
395
  ucp_Nag_Mundari,
396
  ucp_Kirat_Rai,
397

398
  /* This must be last */
399
  ucp_Script_Count
400
};
401

402
/* Size of entries in ucd_script_sets[] */
403

404
#define ucd_script_sets_item_size 4
405

406
#endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
407

408
/* End of pcre2_ucp.h */
409

410
Product

Resources

Company