Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/src/pcre2_find_bracket.c
9898 views
1
/*************************************************
2
* Perl-Compatible Regular Expressions *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
Written by Philip Hazel
9
Original API code Copyright (c) 1997-2012 University of Cambridge
10
New API code Copyright (c) 2016-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
* Redistributions of source code must retain the above copyright notice,
17
this list of conditions and the following disclaimer.
18
19
* Redistributions in binary form must reproduce the above copyright
20
notice, this list of conditions and the following disclaimer in the
21
documentation and/or other materials provided with the distribution.
22
23
* Neither the name of the University of Cambridge nor the names of its
24
contributors may be used to endorse or promote products derived from
25
this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
/* This module contains a single function that scans through a compiled pattern
43
until it finds a capturing bracket with the given number, or, if the number is
44
negative, an instance of OP_REVERSE or OP_VREVERSE for a lookbehind. The
45
function is called from pcre2_compile.c and also from pcre2_study.c when
46
finding the minimum matching length. */
47
48
49
#ifdef HAVE_CONFIG_H
50
#include "config.h"
51
#endif
52
53
#include "pcre2_internal.h"
54
55
56
/*************************************************
57
* Scan compiled regex for specific bracket *
58
*************************************************/
59
60
/*
61
Arguments:
62
code points to start of expression
63
utf TRUE in UTF mode
64
number the required bracket number or negative to find a lookbehind
65
66
Returns: pointer to the opcode for the bracket, or NULL if not found
67
*/
68
69
PCRE2_SPTR
70
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
71
{
72
for (;;)
73
{
74
PCRE2_UCHAR c = *code;
75
76
if (c == OP_END) return NULL;
77
78
/* XCLASS is used for classes that cannot be represented just by a bit map.
79
This includes negated single high-valued characters. ECLASS is used for
80
classes that use set operations internally. CALLOUT_STR is used for
81
callouts with string arguments. In each case the length in the table is
82
zero; the actual length is stored in the compiled code. */
83
84
if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1);
85
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
86
87
/* Handle lookbehind */
88
89
else if (c == OP_REVERSE || c == OP_VREVERSE)
90
{
91
if (number < 0) return code;
92
code += PRIV(OP_lengths)[c];
93
}
94
95
/* Handle capturing bracket */
96
97
else if (c == OP_CBRA || c == OP_SCBRA ||
98
c == OP_CBRAPOS || c == OP_SCBRAPOS)
99
{
100
int n = (int)GET2(code, 1+LINK_SIZE);
101
if (n == number) return code;
102
code += PRIV(OP_lengths)[c];
103
}
104
105
/* Otherwise, we can get the item's length from the table, except that for
106
repeated character types, we have to test for \p and \P, which have an extra
107
two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
108
must add in its length. */
109
110
else
111
{
112
switch(c)
113
{
114
case OP_TYPESTAR:
115
case OP_TYPEMINSTAR:
116
case OP_TYPEPLUS:
117
case OP_TYPEMINPLUS:
118
case OP_TYPEQUERY:
119
case OP_TYPEMINQUERY:
120
case OP_TYPEPOSSTAR:
121
case OP_TYPEPOSPLUS:
122
case OP_TYPEPOSQUERY:
123
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
124
break;
125
126
case OP_TYPEUPTO:
127
case OP_TYPEMINUPTO:
128
case OP_TYPEEXACT:
129
case OP_TYPEPOSUPTO:
130
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
131
code += 2;
132
break;
133
134
case OP_MARK:
135
case OP_COMMIT_ARG:
136
case OP_PRUNE_ARG:
137
case OP_SKIP_ARG:
138
case OP_THEN_ARG:
139
code += code[1];
140
break;
141
}
142
143
/* Add in the fixed length from the table */
144
145
code += PRIV(OP_lengths)[c];
146
147
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
148
followed by a multi-byte character. The length in the table is a minimum, so
149
we have to arrange to skip the extra bytes. */
150
151
#ifdef MAYBE_UTF_MULTI
152
if (utf) switch(c)
153
{
154
case OP_CHAR:
155
case OP_CHARI:
156
case OP_NOT:
157
case OP_NOTI:
158
case OP_EXACT:
159
case OP_EXACTI:
160
case OP_NOTEXACT:
161
case OP_NOTEXACTI:
162
case OP_UPTO:
163
case OP_UPTOI:
164
case OP_NOTUPTO:
165
case OP_NOTUPTOI:
166
case OP_MINUPTO:
167
case OP_MINUPTOI:
168
case OP_NOTMINUPTO:
169
case OP_NOTMINUPTOI:
170
case OP_POSUPTO:
171
case OP_POSUPTOI:
172
case OP_NOTPOSUPTO:
173
case OP_NOTPOSUPTOI:
174
case OP_STAR:
175
case OP_STARI:
176
case OP_NOTSTAR:
177
case OP_NOTSTARI:
178
case OP_MINSTAR:
179
case OP_MINSTARI:
180
case OP_NOTMINSTAR:
181
case OP_NOTMINSTARI:
182
case OP_POSSTAR:
183
case OP_POSSTARI:
184
case OP_NOTPOSSTAR:
185
case OP_NOTPOSSTARI:
186
case OP_PLUS:
187
case OP_PLUSI:
188
case OP_NOTPLUS:
189
case OP_NOTPLUSI:
190
case OP_MINPLUS:
191
case OP_MINPLUSI:
192
case OP_NOTMINPLUS:
193
case OP_NOTMINPLUSI:
194
case OP_POSPLUS:
195
case OP_POSPLUSI:
196
case OP_NOTPOSPLUS:
197
case OP_NOTPOSPLUSI:
198
case OP_QUERY:
199
case OP_QUERYI:
200
case OP_NOTQUERY:
201
case OP_NOTQUERYI:
202
case OP_MINQUERY:
203
case OP_MINQUERYI:
204
case OP_NOTMINQUERY:
205
case OP_NOTMINQUERYI:
206
case OP_POSQUERY:
207
case OP_POSQUERYI:
208
case OP_NOTPOSQUERY:
209
case OP_NOTPOSQUERYI:
210
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
211
break;
212
}
213
#else
214
(void)(utf); /* Keep compiler happy by referencing function argument */
215
#endif /* MAYBE_UTF_MULTI */
216
}
217
}
218
}
219
220
/* End of pcre2_find_bracket.c */
221
222