Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/src/pcre2_find_bracket.c
21745 views
1
/*************************************************
2
* Perl-Compatible Regular Expressions *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
Written by Philip Hazel
9
Original API code Copyright (c) 1997-2012 University of Cambridge
10
New API code Copyright (c) 2016-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
* Redistributions of source code must retain the above copyright notice,
17
this list of conditions and the following disclaimer.
18
19
* Redistributions in binary form must reproduce the above copyright
20
notice, this list of conditions and the following disclaimer in the
21
documentation and/or other materials provided with the distribution.
22
23
* Neither the name of the University of Cambridge nor the names of its
24
contributors may be used to endorse or promote products derived from
25
this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
/* This module contains a single function that scans through a compiled pattern
43
until it finds a capturing bracket with the given number, or, if the number is
44
negative, an instance of OP_REVERSE or OP_VREVERSE for a lookbehind. The
45
function is called from pcre2_compile.c and also from pcre2_study.c when
46
finding the minimum matching length. */
47
48
49
#include "pcre2_internal.h"
50
51
52
53
/*************************************************
54
* Scan compiled regex for specific bracket *
55
*************************************************/
56
57
/*
58
Arguments:
59
code points to start of expression
60
utf TRUE in UTF mode
61
number the required bracket number or negative to find a lookbehind
62
63
Returns: pointer to the opcode for the bracket, or NULL if not found
64
*/
65
66
PCRE2_SPTR
67
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
68
{
69
for (;;)
70
{
71
PCRE2_UCHAR c = *code;
72
73
if (c == OP_END) return NULL;
74
75
/* XCLASS is used for classes that cannot be represented just by a bit map.
76
This includes negated single high-valued characters. ECLASS is used for
77
classes that use set operations internally. CALLOUT_STR is used for
78
callouts with string arguments. In each case the length in the table is
79
zero; the actual length is stored in the compiled code. */
80
81
if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1);
82
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
83
84
/* Handle lookbehind */
85
86
else if (c == OP_REVERSE || c == OP_VREVERSE)
87
{
88
if (number < 0) return code;
89
code += PRIV(OP_lengths)[c];
90
}
91
92
/* Handle capturing bracket */
93
94
else if (c == OP_CBRA || c == OP_SCBRA ||
95
c == OP_CBRAPOS || c == OP_SCBRAPOS)
96
{
97
int n = (int)GET2(code, 1+LINK_SIZE);
98
if (n == number) return code;
99
code += PRIV(OP_lengths)[c];
100
}
101
102
/* Otherwise, we can get the item's length from the table, except that for
103
repeated character types, we have to test for \p and \P, which have an extra
104
two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
105
must add in its length. */
106
107
else
108
{
109
switch(c)
110
{
111
case OP_TYPESTAR:
112
case OP_TYPEMINSTAR:
113
case OP_TYPEPLUS:
114
case OP_TYPEMINPLUS:
115
case OP_TYPEQUERY:
116
case OP_TYPEMINQUERY:
117
case OP_TYPEPOSSTAR:
118
case OP_TYPEPOSPLUS:
119
case OP_TYPEPOSQUERY:
120
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
121
break;
122
123
case OP_TYPEUPTO:
124
case OP_TYPEMINUPTO:
125
case OP_TYPEEXACT:
126
case OP_TYPEPOSUPTO:
127
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
128
code += 2;
129
break;
130
131
case OP_MARK:
132
case OP_COMMIT_ARG:
133
case OP_PRUNE_ARG:
134
case OP_SKIP_ARG:
135
case OP_THEN_ARG:
136
code += code[1];
137
break;
138
}
139
140
/* Add in the fixed length from the table */
141
142
code += PRIV(OP_lengths)[c];
143
144
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
145
followed by a multi-byte character. The length in the table is a minimum, so
146
we have to arrange to skip the extra bytes. */
147
148
#ifdef MAYBE_UTF_MULTI
149
if (utf) switch(c)
150
{
151
case OP_CHAR:
152
case OP_CHARI:
153
case OP_NOT:
154
case OP_NOTI:
155
case OP_EXACT:
156
case OP_EXACTI:
157
case OP_NOTEXACT:
158
case OP_NOTEXACTI:
159
case OP_UPTO:
160
case OP_UPTOI:
161
case OP_NOTUPTO:
162
case OP_NOTUPTOI:
163
case OP_MINUPTO:
164
case OP_MINUPTOI:
165
case OP_NOTMINUPTO:
166
case OP_NOTMINUPTOI:
167
case OP_POSUPTO:
168
case OP_POSUPTOI:
169
case OP_NOTPOSUPTO:
170
case OP_NOTPOSUPTOI:
171
case OP_STAR:
172
case OP_STARI:
173
case OP_NOTSTAR:
174
case OP_NOTSTARI:
175
case OP_MINSTAR:
176
case OP_MINSTARI:
177
case OP_NOTMINSTAR:
178
case OP_NOTMINSTARI:
179
case OP_POSSTAR:
180
case OP_POSSTARI:
181
case OP_NOTPOSSTAR:
182
case OP_NOTPOSSTARI:
183
case OP_PLUS:
184
case OP_PLUSI:
185
case OP_NOTPLUS:
186
case OP_NOTPLUSI:
187
case OP_MINPLUS:
188
case OP_MINPLUSI:
189
case OP_NOTMINPLUS:
190
case OP_NOTMINPLUSI:
191
case OP_POSPLUS:
192
case OP_POSPLUSI:
193
case OP_NOTPOSPLUS:
194
case OP_NOTPOSPLUSI:
195
case OP_QUERY:
196
case OP_QUERYI:
197
case OP_NOTQUERY:
198
case OP_NOTQUERYI:
199
case OP_MINQUERY:
200
case OP_MINQUERYI:
201
case OP_NOTMINQUERY:
202
case OP_NOTMINQUERYI:
203
case OP_POSQUERY:
204
case OP_POSQUERYI:
205
case OP_NOTPOSQUERY:
206
case OP_NOTPOSQUERYI:
207
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
208
break;
209
}
210
#else
211
(void)(utf); /* Keep compiler happy by referencing function argument */
212
#endif /* MAYBE_UTF_MULTI */
213
}
214
}
215
}
216
217
/* End of pcre2_find_bracket.c */
218
219