Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/src/pcre2_substring.c
9898 views
1
/*************************************************
2
* Perl-Compatible Regular Expressions *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
Written by Philip Hazel
9
Original API code Copyright (c) 1997-2012 University of Cambridge
10
New API code Copyright (c) 2016-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
* Redistributions of source code must retain the above copyright notice,
17
this list of conditions and the following disclaimer.
18
19
* Redistributions in binary form must reproduce the above copyright
20
notice, this list of conditions and the following disclaimer in the
21
documentation and/or other materials provided with the distribution.
22
23
* Neither the name of the University of Cambridge nor the names of its
24
contributors may be used to endorse or promote products derived from
25
this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#include "pcre2_internal.h"
47
48
49
50
/*************************************************
51
* Copy named captured string to given buffer *
52
*************************************************/
53
54
/* This function copies a single captured substring into a given buffer,
55
identifying it by name. If the regex permits duplicate names, the first
56
substring that is set is chosen.
57
58
Arguments:
59
match_data points to the match data
60
stringname the name of the required substring
61
buffer where to put the substring
62
sizeptr the size of the buffer, updated to the size of the substring
63
64
Returns: if successful: zero
65
if not successful, a negative error code:
66
(1) an error from nametable_scan()
67
(2) an error from copy_bynumber()
68
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
69
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
70
*/
71
72
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
73
pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
74
PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
75
{
76
PCRE2_SPTR first, last, entry;
77
int failrc, entrysize;
78
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
79
return PCRE2_ERROR_DFA_UFUNC;
80
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
81
&first, &last);
82
if (entrysize < 0) return entrysize;
83
failrc = PCRE2_ERROR_UNAVAILABLE;
84
for (entry = first; entry <= last; entry += entrysize)
85
{
86
uint32_t n = GET2(entry, 0);
87
if (n < match_data->oveccount)
88
{
89
if (match_data->ovector[n*2] != PCRE2_UNSET)
90
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
91
failrc = PCRE2_ERROR_UNSET;
92
}
93
}
94
return failrc;
95
}
96
97
98
99
/*************************************************
100
* Copy numbered captured string to given buffer *
101
*************************************************/
102
103
/* This function copies a single captured substring into a given buffer,
104
identifying it by number.
105
106
Arguments:
107
match_data points to the match data
108
stringnumber the number of the required substring
109
buffer where to put the substring
110
sizeptr the size of the buffer, updated to the size of the substring
111
112
Returns: if successful: 0
113
if not successful, a negative error code:
114
PCRE2_ERROR_NOMEMORY: buffer too small
115
PCRE2_ERROR_NOSUBSTRING: no such substring
116
PCRE2_ERROR_UNAVAILABLE: ovector too small
117
PCRE2_ERROR_UNSET: substring is not set
118
*/
119
120
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
121
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
122
uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
123
{
124
int rc;
125
PCRE2_SIZE size;
126
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
127
if (rc < 0) return rc;
128
if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
129
memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
130
CU2BYTES(size));
131
buffer[size] = 0;
132
*sizeptr = size;
133
return 0;
134
}
135
136
137
138
/*************************************************
139
* Extract named captured string *
140
*************************************************/
141
142
/* This function copies a single captured substring, identified by name, into
143
new memory. If the regex permits duplicate names, the first substring that is
144
set is chosen.
145
146
Arguments:
147
match_data pointer to match_data
148
stringname the name of the required substring
149
stringptr where to put the pointer to the new memory
150
sizeptr where to put the length of the substring
151
152
Returns: if successful: zero
153
if not successful, a negative value:
154
(1) an error from nametable_scan()
155
(2) an error from get_bynumber()
156
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
157
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
158
*/
159
160
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
161
pcre2_substring_get_byname(pcre2_match_data *match_data,
162
PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
163
{
164
PCRE2_SPTR first, last, entry;
165
int failrc, entrysize;
166
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
167
return PCRE2_ERROR_DFA_UFUNC;
168
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
169
&first, &last);
170
if (entrysize < 0) return entrysize;
171
failrc = PCRE2_ERROR_UNAVAILABLE;
172
for (entry = first; entry <= last; entry += entrysize)
173
{
174
uint32_t n = GET2(entry, 0);
175
if (n < match_data->oveccount)
176
{
177
if (match_data->ovector[n*2] != PCRE2_UNSET)
178
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
179
failrc = PCRE2_ERROR_UNSET;
180
}
181
}
182
return failrc;
183
}
184
185
186
187
/*************************************************
188
* Extract captured string to new memory *
189
*************************************************/
190
191
/* This function copies a single captured substring into a piece of new
192
memory.
193
194
Arguments:
195
match_data points to match data
196
stringnumber the number of the required substring
197
stringptr where to put a pointer to the new memory
198
sizeptr where to put the size of the substring
199
200
Returns: if successful: 0
201
if not successful, a negative error code:
202
PCRE2_ERROR_NOMEMORY: failed to get memory
203
PCRE2_ERROR_NOSUBSTRING: no such substring
204
PCRE2_ERROR_UNAVAILABLE: ovector too small
205
PCRE2_ERROR_UNSET: substring is not set
206
*/
207
208
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
209
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
210
uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
211
{
212
int rc;
213
PCRE2_SIZE size;
214
PCRE2_UCHAR *yield;
215
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
216
if (rc < 0) return rc;
217
yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
218
(size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
219
if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
220
yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
221
memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
222
CU2BYTES(size));
223
yield[size] = 0;
224
*stringptr = yield;
225
*sizeptr = size;
226
return 0;
227
}
228
229
230
231
/*************************************************
232
* Free memory obtained by get_substring *
233
*************************************************/
234
235
/*
236
Argument: the result of a previous pcre2_substring_get_byxxx()
237
Returns: nothing
238
*/
239
240
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
241
pcre2_substring_free(PCRE2_UCHAR *string)
242
{
243
if (string != NULL)
244
{
245
pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
246
memctl->free(memctl, memctl->memory_data);
247
}
248
}
249
250
251
252
/*************************************************
253
* Get length of a named substring *
254
*************************************************/
255
256
/* This function returns the length of a named captured substring. If the regex
257
permits duplicate names, the first substring that is set is chosen.
258
259
Arguments:
260
match_data pointer to match data
261
stringname the name of the required substring
262
sizeptr where to put the length
263
264
Returns: 0 if successful, else a negative error number
265
*/
266
267
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
268
pcre2_substring_length_byname(pcre2_match_data *match_data,
269
PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
270
{
271
PCRE2_SPTR first, last, entry;
272
int failrc, entrysize;
273
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
274
return PCRE2_ERROR_DFA_UFUNC;
275
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
276
&first, &last);
277
if (entrysize < 0) return entrysize;
278
failrc = PCRE2_ERROR_UNAVAILABLE;
279
for (entry = first; entry <= last; entry += entrysize)
280
{
281
uint32_t n = GET2(entry, 0);
282
if (n < match_data->oveccount)
283
{
284
if (match_data->ovector[n*2] != PCRE2_UNSET)
285
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
286
failrc = PCRE2_ERROR_UNSET;
287
}
288
}
289
return failrc;
290
}
291
292
293
294
/*************************************************
295
* Get length of a numbered substring *
296
*************************************************/
297
298
/* This function returns the length of a captured substring. If the start is
299
beyond the end (which can happen when \K is used in an assertion), it sets the
300
length to zero.
301
302
Arguments:
303
match_data pointer to match data
304
stringnumber the number of the required substring
305
sizeptr where to put the length, if not NULL
306
307
Returns: if successful: 0
308
if not successful, a negative error code:
309
PCRE2_ERROR_NOSUBSTRING: no such substring
310
PCRE2_ERROR_UNAVAILABLE: ovector is too small
311
PCRE2_ERROR_UNSET: substring is not set
312
PCRE2_ERROR_INVALIDOFFSET: internal error, should not occur
313
*/
314
315
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
316
pcre2_substring_length_bynumber(pcre2_match_data *match_data,
317
uint32_t stringnumber, PCRE2_SIZE *sizeptr)
318
{
319
PCRE2_SIZE left, right;
320
int count = match_data->rc;
321
if (count == PCRE2_ERROR_PARTIAL)
322
{
323
if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
324
count = 0;
325
}
326
else if (count < 0) return count; /* Match failed */
327
328
if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
329
{
330
if (stringnumber > match_data->code->top_bracket)
331
return PCRE2_ERROR_NOSUBSTRING;
332
if (stringnumber >= match_data->oveccount)
333
return PCRE2_ERROR_UNAVAILABLE;
334
if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
335
return PCRE2_ERROR_UNSET;
336
}
337
else /* Matched using pcre2_dfa_match() */
338
{
339
if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
340
if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
341
}
342
343
left = match_data->ovector[stringnumber*2];
344
right = match_data->ovector[stringnumber*2+1];
345
if (left > match_data->subject_length || right > match_data->subject_length)
346
return PCRE2_ERROR_INVALIDOFFSET;
347
if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
348
return 0;
349
}
350
351
352
353
/*************************************************
354
* Extract all captured strings to new memory *
355
*************************************************/
356
357
/* This function gets one chunk of memory and builds a list of pointers and all
358
the captured substrings in it. A NULL pointer is put on the end of the list.
359
The substrings are zero-terminated, but also, if the final argument is
360
non-NULL, a list of lengths is also returned. This allows binary data to be
361
handled.
362
363
Arguments:
364
match_data points to the match data
365
listptr set to point to the list of pointers
366
lengthsptr set to point to the list of lengths (may be NULL)
367
368
Returns: if successful: 0
369
if not successful, a negative error code:
370
PCRE2_ERROR_NOMEMORY: failed to get memory,
371
or a match failure code
372
*/
373
374
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
375
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
376
PCRE2_SIZE **lengthsptr)
377
{
378
int i, count, count2;
379
PCRE2_SIZE size;
380
PCRE2_SIZE *lensp;
381
pcre2_memctl *memp;
382
PCRE2_UCHAR **listp;
383
PCRE2_UCHAR *sp;
384
PCRE2_SIZE *ovector;
385
386
if ((count = match_data->rc) < 0) return count; /* Match failed */
387
if (count == 0) count = match_data->oveccount; /* Ovector too small */
388
389
count2 = 2*count;
390
ovector = match_data->ovector;
391
size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */
392
if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
393
394
for (i = 0; i < count2; i += 2)
395
{
396
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
397
if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
398
}
399
400
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
401
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
402
403
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
404
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
405
406
if (lengthsptr == NULL)
407
{
408
sp = (PCRE2_UCHAR *)lensp;
409
lensp = NULL;
410
}
411
else
412
{
413
*lengthsptr = lensp;
414
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
415
}
416
417
for (i = 0; i < count2; i += 2)
418
{
419
size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
420
421
/* Size == 0 includes the case when the capture is unset. Avoid adding
422
PCRE2_UNSET to match_data->subject because it overflows, even though with
423
zero size calling memcpy() is harmless. */
424
425
if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
426
*listp++ = sp;
427
if (lensp != NULL) *lensp++ = size;
428
sp += size;
429
*sp++ = 0;
430
}
431
432
*listp = NULL;
433
return 0;
434
}
435
436
437
438
/*************************************************
439
* Free memory obtained by substring_list_get *
440
*************************************************/
441
442
/*
443
Argument: the result of a previous pcre2_substring_list_get()
444
Returns: nothing
445
*/
446
447
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
448
pcre2_substring_list_free(PCRE2_UCHAR **list)
449
{
450
if (list != NULL)
451
{
452
pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
453
memctl->free(memctl, memctl->memory_data);
454
}
455
}
456
457
458
459
/*************************************************
460
* Find (multiple) entries for named string *
461
*************************************************/
462
463
/* This function scans the nametable for a given name, using binary chop. It
464
returns either two pointers to the entries in the table, or, if no pointers are
465
given, the number of a unique group with the given name. If duplicate names are
466
permitted, and the name is not unique, an error is generated.
467
468
Arguments:
469
code the compiled regex
470
stringname the name whose entries required
471
firstptr where to put the pointer to the first entry
472
lastptr where to put the pointer to the last entry
473
474
Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found
475
otherwise, if firstptr and lastptr are NULL:
476
a group number for a unique substring
477
else PCRE2_ERROR_NOUNIQUESUBSTRING
478
otherwise:
479
the length of each entry, having set firstptr and lastptr
480
*/
481
482
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
483
pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
484
PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
485
{
486
uint16_t bot = 0;
487
uint16_t top = code->name_count;
488
uint16_t entrysize = code->name_entry_size;
489
PCRE2_SPTR nametable = (PCRE2_SPTR)((const char *)code + sizeof(pcre2_real_code));
490
491
while (top > bot)
492
{
493
uint16_t mid = (top + bot) / 2;
494
PCRE2_SPTR entry = nametable + entrysize*mid;
495
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
496
if (c == 0)
497
{
498
PCRE2_SPTR first;
499
PCRE2_SPTR last;
500
PCRE2_SPTR lastentry;
501
lastentry = nametable + entrysize * (code->name_count - 1);
502
first = last = entry;
503
while (first > nametable)
504
{
505
if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
506
first -= entrysize;
507
}
508
while (last < lastentry)
509
{
510
if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
511
last += entrysize;
512
}
513
if (firstptr == NULL) return (first == last)?
514
(int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
515
*firstptr = first;
516
*lastptr = last;
517
return entrysize;
518
}
519
if (c > 0) bot = mid + 1; else top = mid;
520
}
521
522
return PCRE2_ERROR_NOSUBSTRING;
523
}
524
525
526
/*************************************************
527
* Find number for named string *
528
*************************************************/
529
530
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
531
when it is known that names are unique. If there are duplicate names, it is not
532
defined which number is returned.
533
534
Arguments:
535
code the compiled regex
536
stringname the name whose number is required
537
538
Returns: the number of the named parenthesis, or a negative number
539
PCRE2_ERROR_NOSUBSTRING if not found
540
PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
541
*/
542
543
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
544
pcre2_substring_number_from_name(const pcre2_code *code,
545
PCRE2_SPTR stringname)
546
{
547
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
548
}
549
550
/* End of pcre2_substring.c */
551
552