Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/src/pcre2_compile.h
9903 views
1
/*************************************************
2
* Perl-Compatible Regular Expressions *
3
*************************************************/
4
5
/* PCRE2 is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
Written by Philip Hazel
9
Original API code Copyright (c) 1997-2012 University of Cambridge
10
New API code Copyright (c) 2016-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
* Redistributions of source code must retain the above copyright notice,
17
this list of conditions and the following disclaimer.
18
19
* Redistributions in binary form must reproduce the above copyright
20
notice, this list of conditions and the following disclaimer in the
21
documentation and/or other materials provided with the distribution.
22
23
* Neither the name of the University of Cambridge nor the names of its
24
contributors may be used to endorse or promote products derived from
25
this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
#ifndef PCRE2_COMPILE_H_IDEMPOTENT_GUARD
42
#define PCRE2_COMPILE_H_IDEMPOTENT_GUARD
43
44
#include "pcre2_internal.h"
45
46
/* Compile time error code numbers. They are given names so that they can more
47
easily be tracked. When a new number is added, the tables called eint1 and
48
eint2 in pcre2posix.c may need to be updated, and a new error text must be
49
added to compile_error_texts in pcre2_error.c. Also, the error codes in
50
pcre2.h.in must be updated - their values are exactly 100 greater than these
51
values. */
52
53
enum { ERR0 = COMPILE_ERROR_BASE,
54
ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
55
ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
56
ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30,
57
ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
58
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
59
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
60
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
61
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
62
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
63
ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100,
64
ERR101,ERR102,ERR103,ERR104,ERR105,ERR106,ERR107,ERR108,ERR109,ERR110,
65
ERR111,ERR112,ERR113,ERR114,ERR115,ERR116 };
66
67
/* Code values for parsed patterns, which are stored in a vector of 32-bit
68
unsigned ints. Values less than META_END are literal data values. The coding
69
for identifying the item is in the top 16-bits, leaving 16 bits for the
70
additional data that some of them need. The META_CODE, META_DATA, and META_DIFF
71
macros are used to manipulate parsed pattern elements.
72
73
NOTE: When these definitions are changed, the table of extra lengths for each
74
code (meta_extra_lengths) must be updated to remain in step. */
75
76
#define META_END 0x80000000u /* End of pattern */
77
78
#define META_ALT 0x80010000u /* alternation */
79
#define META_ATOMIC 0x80020000u /* atomic group */
80
#define META_BACKREF 0x80030000u /* Back ref */
81
#define META_BACKREF_BYNAME 0x80040000u /* \k'name' */
82
#define META_BIGVALUE 0x80050000u /* Next is a literal > META_END */
83
#define META_CALLOUT_NUMBER 0x80060000u /* (?C with numerical argument */
84
#define META_CALLOUT_STRING 0x80070000u /* (?C with string argument */
85
#define META_CAPTURE 0x80080000u /* Capturing parenthesis */
86
#define META_CIRCUMFLEX 0x80090000u /* ^ metacharacter */
87
#define META_CLASS 0x800a0000u /* start non-empty class */
88
#define META_CLASS_EMPTY 0x800b0000u /* empty class */
89
#define META_CLASS_EMPTY_NOT 0x800c0000u /* negative empty class */
90
#define META_CLASS_END 0x800d0000u /* end of non-empty class */
91
#define META_CLASS_NOT 0x800e0000u /* start non-empty negative class */
92
#define META_COND_ASSERT 0x800f0000u /* (?(?assertion)... */
93
#define META_COND_DEFINE 0x80100000u /* (?(DEFINE)... */
94
#define META_COND_NAME 0x80110000u /* (?(<name>)... */
95
#define META_COND_NUMBER 0x80120000u /* (?(digits)... */
96
#define META_COND_RNAME 0x80130000u /* (?(R&name)... */
97
#define META_COND_RNUMBER 0x80140000u /* (?(Rdigits)... */
98
#define META_COND_VERSION 0x80150000u /* (?(VERSION<op>x.y)... */
99
#define META_OFFSET 0x80160000u /* Setting offset for various
100
META codes (e.g. META_SCS_NAME) */
101
#define META_SCS 0x80170000u /* (*scan_substring:... */
102
#define META_SCS_NAME 0x80180000u /* Next <name> of scan_substring */
103
#define META_SCS_NUMBER 0x80190000u /* Next digits of scan_substring */
104
#define META_DOLLAR 0x801a0000u /* $ metacharacter */
105
#define META_DOT 0x801b0000u /* . metacharacter */
106
#define META_ESCAPE 0x801c0000u /* \d and friends */
107
#define META_KET 0x801d0000u /* closing parenthesis */
108
#define META_NOCAPTURE 0x801e0000u /* no capture parens */
109
#define META_OPTIONS 0x801f0000u /* (?i) and friends */
110
#define META_POSIX 0x80200000u /* POSIX class item */
111
#define META_POSIX_NEG 0x80210000u /* negative POSIX class item */
112
#define META_RANGE_ESCAPED 0x80220000u /* range with at least one escape */
113
#define META_RANGE_LITERAL 0x80230000u /* range defined literally */
114
#define META_RECURSE 0x80240000u /* Recursion */
115
#define META_RECURSE_BYNAME 0x80250000u /* (?&name) */
116
#define META_SCRIPT_RUN 0x80260000u /* (*script_run:...) */
117
118
/* These must be kept together to make it easy to check that an assertion
119
is present where expected in a conditional group. */
120
121
#define META_LOOKAHEAD 0x80270000u /* (?= */
122
#define META_LOOKAHEADNOT 0x80280000u /* (?! */
123
#define META_LOOKBEHIND 0x80290000u /* (?<= */
124
#define META_LOOKBEHINDNOT 0x802a0000u /* (?<! */
125
126
/* These cannot be conditions */
127
128
#define META_LOOKAHEAD_NA 0x802b0000u /* (*napla: */
129
#define META_LOOKBEHIND_NA 0x802c0000u /* (*naplb: */
130
131
/* These must be kept in this order, with consecutive values, and the _ARG
132
versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
133
versions. */
134
135
#define META_MARK 0x802d0000u /* (*MARK) */
136
#define META_ACCEPT 0x802e0000u /* (*ACCEPT) */
137
#define META_FAIL 0x802f0000u /* (*FAIL) */
138
#define META_COMMIT 0x80300000u /* These */
139
#define META_COMMIT_ARG 0x80310000u /* pairs */
140
#define META_PRUNE 0x80320000u /* must */
141
#define META_PRUNE_ARG 0x80330000u /* be */
142
#define META_SKIP 0x80340000u /* kept */
143
#define META_SKIP_ARG 0x80350000u /* in */
144
#define META_THEN 0x80360000u /* this */
145
#define META_THEN_ARG 0x80370000u /* order */
146
147
/* These must be kept in groups of adjacent 3 values, and all together. */
148
149
#define META_ASTERISK 0x80380000u /* * */
150
#define META_ASTERISK_PLUS 0x80390000u /* *+ */
151
#define META_ASTERISK_QUERY 0x803a0000u /* *? */
152
#define META_PLUS 0x803b0000u /* + */
153
#define META_PLUS_PLUS 0x803c0000u /* ++ */
154
#define META_PLUS_QUERY 0x803d0000u /* +? */
155
#define META_QUERY 0x803e0000u /* ? */
156
#define META_QUERY_PLUS 0x803f0000u /* ?+ */
157
#define META_QUERY_QUERY 0x80400000u /* ?? */
158
#define META_MINMAX 0x80410000u /* {n,m} repeat */
159
#define META_MINMAX_PLUS 0x80420000u /* {n,m}+ repeat */
160
#define META_MINMAX_QUERY 0x80430000u /* {n,m}? repeat */
161
162
/* These meta codes must be kept in a group, with the OR/SUB/XOR in
163
this order, and AND/NOT at the start/end. */
164
165
#define META_ECLASS_AND 0x80440000u /* && (or &) in a class */
166
#define META_ECLASS_OR 0x80450000u /* || (or |, +) in a class */
167
#define META_ECLASS_SUB 0x80460000u /* -- (or -) in a class */
168
#define META_ECLASS_XOR 0x80470000u /* ~~ (or ^) in a class */
169
#define META_ECLASS_NOT 0x80480000u /* ! in a class */
170
171
/* Convenience aliases. */
172
173
#define META_FIRST_QUANTIFIER META_ASTERISK
174
#define META_LAST_QUANTIFIER META_MINMAX_QUERY
175
176
/* This is a special "meta code" that is used only to distinguish (*asr: from
177
(*sr: in the table of alphabetic assertions. It is never stored in the parsed
178
pattern because (*asr: is turned into (*sr:(*atomic: at that stage. There is
179
therefore no need for it to have a length entry, so use a high value. */
180
181
#define META_ATOMIC_SCRIPT_RUN 0x8fff0000u
182
183
/* Macros for manipulating elements of the parsed pattern vector. */
184
185
#define META_CODE(x) (x & 0xffff0000u)
186
#define META_DATA(x) (x & 0x0000ffffu)
187
#define META_DIFF(x,y) ((x-y)>>16)
188
189
/* Extended class management flags. */
190
191
#define CLASS_IS_ECLASS 0x1
192
193
/* Macro for the highest character value. */
194
195
#if PCRE2_CODE_UNIT_WIDTH == 8
196
#define MAX_UCHAR_VALUE 0xffu
197
#elif PCRE2_CODE_UNIT_WIDTH == 16
198
#define MAX_UCHAR_VALUE 0xffffu
199
#else
200
#define MAX_UCHAR_VALUE 0xffffffffu
201
#endif
202
203
#define GET_MAX_CHAR_VALUE(utf) \
204
((utf) ? MAX_UTF_CODE_POINT : MAX_UCHAR_VALUE)
205
206
/* Macro for setting individual bits in class bitmaps. */
207
208
#define SETBIT(a,b) a[(b) >> 3] |= (uint8_t)(1u << ((b) & 0x7))
209
210
/* Macro for 8 bit specific checks. */
211
#if PCRE2_CODE_UNIT_WIDTH == 8
212
#define SELECT_VALUE8(value8, value) (value8)
213
#else
214
#define SELECT_VALUE8(value8, value) (value)
215
#endif
216
217
/* Macro for aligning data. */
218
#define CLIST_ALIGN_TO(base, align) \
219
((base + ((size_t)(align) - 1)) & ~((size_t)(align) - 1))
220
221
/* Structure for holding information about an OP_ECLASS internal operand.
222
An "operand" here could be just a single OP_[X]CLASS, or it could be some
223
complex expression; but it's some sequence of ECL_* codes which pushes one
224
value to the stack. */
225
typedef struct {
226
/* The position of the operand - or NULL if (lengthptr != NULL). */
227
PCRE2_UCHAR *code_start;
228
PCRE2_SIZE length;
229
/* The operand's type if it is a single code (ECL_XCLASS, ECL_ANY, ECL_NONE);
230
otherwise zero if the operand is not atomic. */
231
uint8_t op_single_type;
232
/* Regardless of whether it's a single code or not, we fully constant-fold
233
the bitmap for code points < 256. */
234
class_bits_storage bits;
235
} eclass_op_info;
236
237
/* Macros for the definitions below, to prevent name collisions. */
238
239
#define _pcre2_posix_class_maps PCRE2_SUFFIX(_pcre2_posix_class_maps)
240
#define _pcre2_update_classbits PCRE2_SUFFIX(_pcre2_update_classbits_)
241
#define _pcre2_compile_class_nested PCRE2_SUFFIX(_pcre2_compile_class_nested_)
242
#define _pcre2_compile_class_not_nested PCRE2_SUFFIX(_pcre2_compile_class_not_nested_)
243
244
245
/* Indices of the POSIX classes in posix_names, posix_name_lengths,
246
posix_class_maps, and posix_substitutes. They must be kept in sync. */
247
248
#define PC_DIGIT 7
249
#define PC_GRAPH 8
250
#define PC_PRINT 9
251
#define PC_PUNCT 10
252
#define PC_XDIGIT 13
253
254
extern const int PRIV(posix_class_maps)[];
255
256
257
/* Set bits in classbits according to the property type */
258
259
void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
260
uint8_t *classbits);
261
262
/* Compile the META codes from start_ptr...end_ptr, writing a single OP_CLASS
263
OP_CLASS, OP_NCLASS, OP_XCLASS, or OP_ALLANY into pcode. */
264
265
uint32_t *PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
266
uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap,
267
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);
268
269
/* Compile the META codes in pptr into opcodes written to pcode. The pptr must
270
start at a META_CLASS or META_CLASS_NOT.
271
272
The pptr will be left pointing at the matching META_CLASS_END. */
273
274
BOOL PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
275
uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
276
compile_block *cb, PCRE2_SIZE *lengthptr);
277
278
#endif /* PCRE2_COMPILE_H_IDEMPOTENT_GUARD */
279
280
/* End of pcre2_compile.h */
281
282