Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/src/pcre2_substring.c
21326 views
1
/*************************************************
2
* Perl-Compatible Regular Expressions *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
Written by Philip Hazel
9
Original API code Copyright (c) 1997-2012 University of Cambridge
10
New API code Copyright (c) 2016-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
* Redistributions of source code must retain the above copyright notice,
17
this list of conditions and the following disclaimer.
18
19
* Redistributions in binary form must reproduce the above copyright
20
notice, this list of conditions and the following disclaimer in the
21
documentation and/or other materials provided with the distribution.
22
23
* Neither the name of the University of Cambridge nor the names of its
24
contributors may be used to endorse or promote products derived from
25
this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#include "pcre2_internal.h"
43
44
45
46
/*************************************************
47
* Copy named captured string to given buffer *
48
*************************************************/
49
50
/* This function copies a single captured substring into a given buffer,
51
identifying it by name. If the regex permits duplicate names, the first
52
substring that is set is chosen.
53
54
Arguments:
55
match_data points to the match data
56
stringname the name of the required substring
57
buffer where to put the substring
58
sizeptr the size of the buffer, updated to the size of the substring
59
60
Returns: if successful: zero
61
if not successful, a negative error code:
62
(1) an error from nametable_scan()
63
(2) an error from copy_bynumber()
64
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
65
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
66
*/
67
68
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
69
pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
70
PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
71
{
72
PCRE2_SPTR first, last, entry;
73
int failrc, entrysize;
74
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
75
return PCRE2_ERROR_DFA_UFUNC;
76
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
77
&first, &last);
78
if (entrysize < 0) return entrysize;
79
failrc = PCRE2_ERROR_UNAVAILABLE;
80
for (entry = first; entry <= last; entry += entrysize)
81
{
82
uint32_t n = GET2(entry, 0);
83
if (n < match_data->oveccount)
84
{
85
if (match_data->ovector[n*2] != PCRE2_UNSET)
86
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
87
failrc = PCRE2_ERROR_UNSET;
88
}
89
}
90
return failrc;
91
}
92
93
94
95
/*************************************************
96
* Copy numbered captured string to given buffer *
97
*************************************************/
98
99
/* This function copies a single captured substring into a given buffer,
100
identifying it by number.
101
102
Arguments:
103
match_data points to the match data
104
stringnumber the number of the required substring
105
buffer where to put the substring
106
sizeptr the size of the buffer, updated to the size of the substring
107
108
Returns: if successful: 0
109
if not successful, a negative error code:
110
PCRE2_ERROR_NOMEMORY: buffer too small
111
PCRE2_ERROR_NOSUBSTRING: no such substring
112
PCRE2_ERROR_UNAVAILABLE: ovector too small
113
PCRE2_ERROR_UNSET: substring is not set
114
*/
115
116
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
117
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
118
uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
119
{
120
int rc;
121
PCRE2_SIZE size;
122
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
123
if (rc < 0) return rc;
124
if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
125
if (size != 0) memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
126
CU2BYTES(size));
127
buffer[size] = 0;
128
*sizeptr = size;
129
return 0;
130
}
131
132
133
134
/*************************************************
135
* Extract named captured string *
136
*************************************************/
137
138
/* This function copies a single captured substring, identified by name, into
139
new memory. If the regex permits duplicate names, the first substring that is
140
set is chosen.
141
142
Arguments:
143
match_data pointer to match_data
144
stringname the name of the required substring
145
stringptr where to put the pointer to the new memory
146
sizeptr where to put the length of the substring
147
148
Returns: if successful: zero
149
if not successful, a negative value:
150
(1) an error from nametable_scan()
151
(2) an error from get_bynumber()
152
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
153
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
154
*/
155
156
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
157
pcre2_substring_get_byname(pcre2_match_data *match_data,
158
PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
159
{
160
PCRE2_SPTR first, last, entry;
161
int failrc, entrysize;
162
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
163
return PCRE2_ERROR_DFA_UFUNC;
164
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
165
&first, &last);
166
if (entrysize < 0) return entrysize;
167
failrc = PCRE2_ERROR_UNAVAILABLE;
168
for (entry = first; entry <= last; entry += entrysize)
169
{
170
uint32_t n = GET2(entry, 0);
171
if (n < match_data->oveccount)
172
{
173
if (match_data->ovector[n*2] != PCRE2_UNSET)
174
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
175
failrc = PCRE2_ERROR_UNSET;
176
}
177
}
178
return failrc;
179
}
180
181
182
183
/*************************************************
184
* Extract captured string to new memory *
185
*************************************************/
186
187
/* This function copies a single captured substring into a piece of new
188
memory.
189
190
Arguments:
191
match_data points to match data
192
stringnumber the number of the required substring
193
stringptr where to put a pointer to the new memory
194
sizeptr where to put the size of the substring
195
196
Returns: if successful: 0
197
if not successful, a negative error code:
198
PCRE2_ERROR_NOMEMORY: failed to get memory
199
PCRE2_ERROR_NOSUBSTRING: no such substring
200
PCRE2_ERROR_UNAVAILABLE: ovector too small
201
PCRE2_ERROR_UNSET: substring is not set
202
*/
203
204
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
205
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
206
uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
207
{
208
int rc;
209
PCRE2_SIZE size;
210
PCRE2_UCHAR *yield;
211
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
212
if (rc < 0) return rc;
213
yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
214
(size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
215
if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
216
yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
217
if (size != 0) memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
218
CU2BYTES(size));
219
yield[size] = 0;
220
*stringptr = yield;
221
*sizeptr = size;
222
return 0;
223
}
224
225
226
227
/*************************************************
228
* Free memory obtained by get_substring *
229
*************************************************/
230
231
/*
232
Argument: the result of a previous pcre2_substring_get_byxxx()
233
Returns: nothing
234
*/
235
236
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
237
pcre2_substring_free(PCRE2_UCHAR *string)
238
{
239
if (string != NULL)
240
{
241
pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
242
memctl->free(memctl, memctl->memory_data);
243
}
244
}
245
246
247
248
/*************************************************
249
* Get length of a named substring *
250
*************************************************/
251
252
/* This function returns the length of a named captured substring. If the regex
253
permits duplicate names, the first substring that is set is chosen.
254
255
Arguments:
256
match_data pointer to match data
257
stringname the name of the required substring
258
sizeptr where to put the length, if not NULL
259
260
Returns: 0 if successful, else a negative error number
261
*/
262
263
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
264
pcre2_substring_length_byname(pcre2_match_data *match_data,
265
PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
266
{
267
PCRE2_SPTR first, last, entry;
268
int failrc, entrysize;
269
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
270
return PCRE2_ERROR_DFA_UFUNC;
271
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
272
&first, &last);
273
if (entrysize < 0) return entrysize;
274
failrc = PCRE2_ERROR_UNAVAILABLE;
275
for (entry = first; entry <= last; entry += entrysize)
276
{
277
uint32_t n = GET2(entry, 0);
278
if (n < match_data->oveccount)
279
{
280
if (match_data->ovector[n*2] != PCRE2_UNSET)
281
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
282
failrc = PCRE2_ERROR_UNSET;
283
}
284
}
285
return failrc;
286
}
287
288
289
290
/*************************************************
291
* Get length of a numbered substring *
292
*************************************************/
293
294
/* This function returns the length of a captured substring. If the start is
295
beyond the end (which can happen when \K is used in an assertion), it sets the
296
length to zero.
297
298
Arguments:
299
match_data pointer to match data
300
stringnumber the number of the required substring
301
sizeptr where to put the length, if not NULL
302
303
Returns: if successful: 0
304
if not successful, a negative error code:
305
PCRE2_ERROR_NOSUBSTRING: no such substring
306
PCRE2_ERROR_UNAVAILABLE: ovector is too small
307
PCRE2_ERROR_UNSET: substring is not set
308
PCRE2_ERROR_INVALIDOFFSET: internal error, should not occur
309
*/
310
311
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
312
pcre2_substring_length_bynumber(pcre2_match_data *match_data,
313
uint32_t stringnumber, PCRE2_SIZE *sizeptr)
314
{
315
PCRE2_SIZE left, right;
316
int count = match_data->rc;
317
if (count == PCRE2_ERROR_PARTIAL)
318
{
319
if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
320
count = 0;
321
}
322
else if (count < 0) return count; /* Match failed */
323
324
if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
325
{
326
if (stringnumber > match_data->code->top_bracket)
327
return PCRE2_ERROR_NOSUBSTRING;
328
if (stringnumber >= match_data->oveccount)
329
return PCRE2_ERROR_UNAVAILABLE;
330
if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
331
return PCRE2_ERROR_UNSET;
332
}
333
else /* Matched using pcre2_dfa_match() */
334
{
335
if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
336
if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
337
}
338
339
left = match_data->ovector[stringnumber*2];
340
right = match_data->ovector[stringnumber*2+1];
341
/* LCOV_EXCL_START - this appears to be unreachable, as the ovector and
342
subject_length should always be set consistently, no matter what misbehaviour
343
the caller has committed. */
344
if (left > match_data->subject_length || right > match_data->subject_length)
345
{
346
PCRE2_DEBUG_UNREACHABLE();
347
return PCRE2_ERROR_INVALIDOFFSET;
348
}
349
/* LCOV_EXCL_STOP */
350
if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
351
return 0;
352
}
353
354
355
356
/*************************************************
357
* Extract all captured strings to new memory *
358
*************************************************/
359
360
/* This function gets one chunk of memory and builds a list of pointers and all
361
the captured substrings in it. A NULL pointer is put on the end of the list.
362
The substrings are zero-terminated, but also, if the final argument is
363
non-NULL, a list of lengths is also returned. This allows binary data to be
364
handled.
365
366
Arguments:
367
match_data points to the match data
368
listptr set to point to the list of pointers
369
lengthsptr set to point to the list of lengths (may be NULL)
370
371
Returns: if successful: 0
372
if not successful, a negative error code:
373
PCRE2_ERROR_NOMEMORY: failed to get memory,
374
or a match failure code
375
*/
376
377
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
378
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
379
PCRE2_SIZE **lengthsptr)
380
{
381
int i, count, count2;
382
PCRE2_SIZE size;
383
PCRE2_SIZE *lensp;
384
pcre2_memctl *memp;
385
PCRE2_UCHAR **listp;
386
PCRE2_UCHAR *sp;
387
PCRE2_SIZE *ovector;
388
389
if ((count = match_data->rc) < 0) return count; /* Match failed */
390
if (count == 0) count = match_data->oveccount; /* Ovector too small */
391
392
count2 = 2*count;
393
ovector = match_data->ovector;
394
size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */
395
if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
396
397
for (i = 0; i < count2; i += 2)
398
{
399
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
400
if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
401
}
402
403
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
404
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
405
406
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
407
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
408
409
if (lengthsptr == NULL)
410
{
411
sp = (PCRE2_UCHAR *)lensp;
412
lensp = NULL;
413
}
414
else
415
{
416
*lengthsptr = lensp;
417
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
418
}
419
420
for (i = 0; i < count2; i += 2)
421
{
422
size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
423
424
/* Size == 0 includes the case when the capture is unset. Avoid adding
425
PCRE2_UNSET to match_data->subject because it overflows, even though with
426
zero size calling memcpy() is harmless. */
427
428
if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
429
*listp++ = sp;
430
if (lensp != NULL) *lensp++ = size;
431
sp += size;
432
*sp++ = 0;
433
}
434
435
*listp = NULL;
436
return 0;
437
}
438
439
440
441
/*************************************************
442
* Free memory obtained by substring_list_get *
443
*************************************************/
444
445
/*
446
Argument: the result of a previous pcre2_substring_list_get()
447
Returns: nothing
448
*/
449
450
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
451
pcre2_substring_list_free(PCRE2_UCHAR **list)
452
{
453
if (list != NULL)
454
{
455
pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
456
memctl->free(memctl, memctl->memory_data);
457
}
458
}
459
460
461
462
/*************************************************
463
* Find (multiple) entries for named string *
464
*************************************************/
465
466
/* This function scans the nametable for a given name, using binary chop. It
467
returns either two pointers to the entries in the table, or, if no pointers are
468
given, the number of a unique group with the given name. If duplicate names are
469
permitted, and the name is not unique, an error is generated.
470
471
Arguments:
472
code the compiled regex
473
stringname the name whose entries required
474
firstptr where to put the pointer to the first entry
475
lastptr where to put the pointer to the last entry
476
477
Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found
478
otherwise, if firstptr and lastptr are NULL:
479
a group number for a unique substring
480
else PCRE2_ERROR_NOUNIQUESUBSTRING
481
otherwise:
482
the length of each entry, having set firstptr and lastptr
483
*/
484
485
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
486
pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
487
PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
488
{
489
uint16_t bot = 0;
490
uint16_t top = code->name_count;
491
uint16_t entrysize = code->name_entry_size;
492
PCRE2_SPTR nametable = (PCRE2_SPTR)((const char *)code + sizeof(pcre2_real_code));
493
494
while (top > bot)
495
{
496
uint16_t mid = (top + bot) / 2;
497
PCRE2_SPTR entry = nametable + entrysize*mid;
498
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
499
if (c == 0)
500
{
501
PCRE2_SPTR first;
502
PCRE2_SPTR last;
503
PCRE2_SPTR lastentry;
504
lastentry = nametable + entrysize * (code->name_count - 1);
505
first = last = entry;
506
while (first > nametable)
507
{
508
if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
509
first -= entrysize;
510
}
511
while (last < lastentry)
512
{
513
if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
514
last += entrysize;
515
}
516
if (firstptr == NULL) return (first == last)?
517
(int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
518
*firstptr = first;
519
*lastptr = last;
520
return entrysize;
521
}
522
if (c > 0) bot = mid + 1; else top = mid;
523
}
524
525
return PCRE2_ERROR_NOSUBSTRING;
526
}
527
528
529
/*************************************************
530
* Find number for named string *
531
*************************************************/
532
533
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
534
when it is known that names are unique. If there are duplicate names, it is not
535
defined which number is returned.
536
537
Arguments:
538
code the compiled regex
539
stringname the name whose number is required
540
541
Returns: the number of the named parenthesis, or a negative number
542
PCRE2_ERROR_NOSUBSTRING if not found
543
PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
544
*/
545
546
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
547
pcre2_substring_number_from_name(const pcre2_code *code,
548
PCRE2_SPTR stringname)
549
{
550
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
551
}
552
553
/* End of pcre2_substring.c */
554
555