Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/src/pcre2_convert.c
21648 views
1
/*************************************************
2
* Perl-Compatible Regular Expressions *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
Written by Philip Hazel
9
Original API code Copyright (c) 1997-2012 University of Cambridge
10
New API code Copyright (c) 2016-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
* Redistributions of source code must retain the above copyright notice,
17
this list of conditions and the following disclaimer.
18
19
* Redistributions in binary form must reproduce the above copyright
20
notice, this list of conditions and the following disclaimer in the
21
documentation and/or other materials provided with the distribution.
22
23
* Neither the name of the University of Cambridge nor the names of its
24
contributors may be used to endorse or promote products derived from
25
this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#include "pcre2_internal.h"
43
44
45
46
#define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \
47
PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
48
49
#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
50
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
51
PCRE2_CONVERT_GLOB_NO_STARSTAR| \
52
TYPE_OPTIONS)
53
54
#define DUMMY_BUFFER_SIZE 100
55
56
/* Generated pattern fragments */
57
58
#define STR_BACKSLASH_A STR_BACKSLASH STR_A
59
#define STR_BACKSLASH_z STR_BACKSLASH STR_z
60
#define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET
61
#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
62
#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
63
#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
64
#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
65
66
/* States for POSIX processing */
67
68
enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
69
POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
70
71
/* Macro to add a character string to the output buffer, checking for overflow. */
72
73
#define PUTCHARS(string) \
74
{ \
75
for (const char *s = string; *s != 0; s++) \
76
{ \
77
if (p >= endp) return PCRE2_ERROR_NOMEMORY; \
78
*p++ = *s; \
79
} \
80
}
81
82
/* Macro to check for lowercase characters. */
83
84
#ifdef EBCDIC
85
#define ISLOWER(c) (((c) >= CHAR_a && (c) <= CHAR_i) || \
86
((c) >= CHAR_j && (c) <= CHAR_r) || \
87
((c) >= CHAR_s && (c) <= CHAR_z))
88
#else
89
#define ISLOWER(c) ((c) >= CHAR_a && (c) <= CHAR_z)
90
#endif
91
92
/* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */
93
94
static const char *pcre2_escaped_literals =
95
STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS
96
STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN
97
STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
98
STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
99
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
100
101
/* Recognized escaped metacharacters in POSIX basic patterns. */
102
103
static const char *posix_meta_escapes =
104
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
105
STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
106
STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
107
108
/* Recognized POSIX classes, colon-separated. */
109
110
static const char *posix_classes =
111
STR_a STR_l STR_p STR_h STR_a STR_COLON
112
STR_l STR_o STR_w STR_e STR_r STR_COLON
113
STR_u STR_p STR_p STR_e STR_r STR_COLON
114
STR_a STR_l STR_n STR_u STR_m STR_COLON
115
STR_a STR_s STR_c STR_i STR_i STR_COLON
116
STR_b STR_l STR_a STR_n STR_k STR_COLON
117
STR_c STR_n STR_t STR_r STR_l STR_COLON
118
STR_d STR_i STR_g STR_i STR_t STR_COLON
119
STR_g STR_r STR_a STR_p STR_h STR_COLON
120
STR_p STR_r STR_i STR_n STR_t STR_COLON
121
STR_p STR_u STR_n STR_c STR_t STR_COLON
122
STR_s STR_p STR_a STR_c STR_e STR_COLON
123
STR_w STR_o STR_r STR_d STR_COLON
124
STR_x STR_d STR_i STR_g STR_i STR_t STR_COLON;
125
126
127
128
/*************************************************
129
* Convert a POSIX pattern *
130
*************************************************/
131
132
/* This function handles both basic and extended POSIX patterns.
133
134
Arguments:
135
pattype the pattern type
136
pattern the pattern
137
plength length in code units
138
utf TRUE if UTF
139
use_buffer where to put the output
140
use_length length of use_buffer
141
bufflenptr where to put the used length
142
dummyrun TRUE if a dummy run
143
ccontext the convert context
144
145
Returns: 0 => success
146
!0 => error code
147
*/
148
149
static int
150
convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
151
BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
152
PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
153
{
154
PCRE2_SPTR posix = pattern;
155
PCRE2_UCHAR *p = use_buffer;
156
PCRE2_UCHAR *pp = p;
157
PCRE2_UCHAR *endp = p + use_length - 1; /* Allow for trailing zero */
158
PCRE2_SIZE convlength = 0;
159
160
uint32_t bracount = 0;
161
uint32_t posix_state = POSIX_START_REGEX;
162
uint32_t lastspecial = 0;
163
BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
164
BOOL nextisliteral = FALSE;
165
166
(void)utf; /* Not used when Unicode not supported */
167
(void)ccontext; /* Not currently used */
168
169
/* Initialize default for error offset as end of input. */
170
171
*bufflenptr = plength;
172
PUTCHARS(STR_STAR_NUL);
173
174
/* Now scan the input. */
175
176
while (plength > 0)
177
{
178
uint32_t c, sc;
179
int clength = 1;
180
181
/* Add in the length of the last item, then, if in the dummy run, pull the
182
pointer back to the start of the (temporary) buffer and then remember the
183
start of the next item. */
184
185
convlength += p - pp;
186
if (dummyrun) p = use_buffer;
187
pp = p;
188
189
/* Pick up the next character */
190
191
#ifndef SUPPORT_UNICODE
192
c = *posix;
193
#else
194
GETCHARLENTEST(c, posix, clength);
195
#endif
196
posix += clength;
197
plength -= clength;
198
199
sc = nextisliteral? 0 : c;
200
nextisliteral = FALSE;
201
202
/* Handle a character within a class. */
203
204
if (posix_state >= POSIX_CLASS_NOT_STARTED)
205
{
206
if (c == CHAR_RIGHT_SQUARE_BRACKET)
207
{
208
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
209
posix_state = POSIX_NOT_BRACKET;
210
}
211
212
/* Not the end of the class */
213
214
else
215
{
216
switch (posix_state)
217
{
218
case POSIX_CLASS_STARTED:
219
if (ISLOWER(c)) break; /* Remain in started state */
220
posix_state = POSIX_CLASS_NOT_STARTED;
221
if (c == CHAR_COLON && plength > 0 &&
222
*posix == CHAR_RIGHT_SQUARE_BRACKET)
223
{
224
PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
225
plength--;
226
posix++;
227
continue; /* With next character after :] */
228
}
229
PCRE2_FALLTHROUGH /* Fall through */
230
231
case POSIX_CLASS_NOT_STARTED:
232
if (c == CHAR_LEFT_SQUARE_BRACKET)
233
posix_state = POSIX_CLASS_STARTING;
234
break;
235
236
case POSIX_CLASS_STARTING:
237
if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;
238
break;
239
}
240
241
if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
242
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
243
memcpy(p, posix - clength, CU2BYTES(clength));
244
p += clength;
245
}
246
}
247
248
/* Handle a character not within a class. */
249
250
else switch(sc)
251
{
252
case CHAR_LEFT_SQUARE_BRACKET:
253
PUTCHARS(STR_LEFT_SQUARE_BRACKET);
254
255
#ifdef NEVER
256
/* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
257
support) but they are not part of POSIX 1003.1. */
258
259
if (plength >= 6)
260
{
261
if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
262
posix[1] == CHAR_COLON &&
263
(posix[2] == CHAR_LESS_THAN_SIGN ||
264
posix[2] == CHAR_GREATER_THAN_SIGN) &&
265
posix[3] == CHAR_COLON &&
266
posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
267
posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
268
{
269
if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
270
memcpy(p, posix, CU2BYTES(6));
271
p += 6;
272
posix += 6;
273
plength -= 6;
274
continue; /* With next character */
275
}
276
}
277
#endif
278
279
/* Handle start of "normal" character classes */
280
281
posix_state = POSIX_CLASS_NOT_STARTED;
282
283
/* Handle ^ and ] as first characters */
284
285
if (plength > 0)
286
{
287
if (*posix == CHAR_CIRCUMFLEX_ACCENT)
288
{
289
posix++;
290
plength--;
291
PUTCHARS(STR_CIRCUMFLEX_ACCENT);
292
}
293
if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET)
294
{
295
posix++;
296
plength--;
297
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
298
}
299
}
300
break;
301
302
case CHAR_BACKSLASH:
303
if (plength == 0) return PCRE2_ERROR_END_BACKSLASH;
304
if (extended) nextisliteral = TRUE; else
305
{
306
if (*posix < 255 && strchr(posix_meta_escapes, *posix) != NULL)
307
{
308
if (*posix >= CHAR_0 && *posix <= CHAR_9) PUTCHARS(STR_BACKSLASH);
309
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
310
lastspecial = *p++ = *posix++;
311
plength--;
312
}
313
else nextisliteral = TRUE;
314
}
315
break;
316
317
case CHAR_RIGHT_PARENTHESIS:
318
if (!extended || bracount == 0) goto ESCAPE_LITERAL;
319
bracount--;
320
goto COPY_SPECIAL;
321
322
case CHAR_LEFT_PARENTHESIS:
323
bracount++;
324
PCRE2_FALLTHROUGH /* Fall through */
325
326
case CHAR_QUESTION_MARK:
327
case CHAR_PLUS:
328
case CHAR_LEFT_CURLY_BRACKET:
329
case CHAR_RIGHT_CURLY_BRACKET:
330
case CHAR_VERTICAL_LINE:
331
if (!extended) goto ESCAPE_LITERAL;
332
PCRE2_FALLTHROUGH /* Fall through */
333
334
case CHAR_DOT:
335
case CHAR_DOLLAR_SIGN:
336
posix_state = POSIX_NOT_BRACKET;
337
COPY_SPECIAL:
338
lastspecial = c;
339
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
340
*p++ = c;
341
break;
342
343
case CHAR_ASTERISK:
344
if (lastspecial != CHAR_ASTERISK)
345
{
346
if (!extended && (posix_state < POSIX_NOT_BRACKET ||
347
lastspecial == CHAR_LEFT_PARENTHESIS))
348
goto ESCAPE_LITERAL;
349
goto COPY_SPECIAL;
350
}
351
break; /* Ignore second and subsequent asterisks */
352
353
case CHAR_CIRCUMFLEX_ACCENT:
354
if (extended) goto COPY_SPECIAL;
355
if (posix_state == POSIX_START_REGEX ||
356
lastspecial == CHAR_LEFT_PARENTHESIS)
357
{
358
posix_state = POSIX_ANCHORED;
359
goto COPY_SPECIAL;
360
}
361
PCRE2_FALLTHROUGH /* Fall through */
362
363
default:
364
if (c < 255 && strchr(pcre2_escaped_literals, c) != NULL)
365
{
366
ESCAPE_LITERAL:
367
PUTCHARS(STR_BACKSLASH);
368
}
369
lastspecial = 0xff; /* Indicates nothing special */
370
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
371
memcpy(p, posix - clength, CU2BYTES(clength));
372
p += clength;
373
posix_state = POSIX_NOT_BRACKET;
374
break;
375
}
376
}
377
378
if (posix_state >= POSIX_CLASS_NOT_STARTED)
379
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
380
convlength += p - pp; /* Final segment */
381
*bufflenptr = convlength;
382
*p++ = 0;
383
return 0;
384
}
385
386
387
/*************************************************
388
* Convert a glob pattern *
389
*************************************************/
390
391
/* Context for writing the output into a buffer. */
392
393
typedef struct pcre2_output_context {
394
PCRE2_UCHAR *output; /* current output position */
395
PCRE2_SPTR output_end; /* output end */
396
PCRE2_SIZE output_size; /* size of the output */
397
uint8_t out_str[8]; /* string copied to the output */
398
} pcre2_output_context;
399
400
401
/* Write a character into the output.
402
403
Arguments:
404
out output context
405
chr the next character
406
*/
407
408
static void
409
convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr)
410
{
411
out->output_size++;
412
413
if (out->output < out->output_end)
414
*out->output++ = chr;
415
}
416
417
418
/* Write a string into the output.
419
420
Arguments:
421
out output context
422
length length of out->out_str
423
*/
424
425
static void
426
convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length)
427
{
428
uint8_t *out_str = out->out_str;
429
PCRE2_UCHAR *output = out->output;
430
PCRE2_SPTR output_end = out->output_end;
431
PCRE2_SIZE output_size = out->output_size;
432
433
do
434
{
435
output_size++;
436
437
if (output < output_end)
438
*output++ = *out_str++;
439
}
440
while (--length != 0);
441
442
out->output = output;
443
out->output_size = output_size;
444
}
445
446
447
/* Prints the separator into the output.
448
449
Arguments:
450
out output context
451
separator glob separator
452
with_escape backslash is needed before separator
453
*/
454
455
static void
456
convert_glob_print_separator(pcre2_output_context *out,
457
PCRE2_UCHAR separator, BOOL with_escape)
458
{
459
if (with_escape)
460
convert_glob_write(out, CHAR_BACKSLASH);
461
462
convert_glob_write(out, separator);
463
}
464
465
466
/* Prints a wildcard into the output.
467
468
Arguments:
469
out output context
470
separator glob separator
471
with_escape backslash is needed before separator
472
*/
473
474
static void
475
convert_glob_print_wildcard(pcre2_output_context *out,
476
PCRE2_UCHAR separator, BOOL with_escape)
477
{
478
out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
479
out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
480
convert_glob_write_str(out, 2);
481
482
convert_glob_print_separator(out, separator, with_escape);
483
484
convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET);
485
}
486
487
488
/* Parse a posix class.
489
490
Arguments:
491
from starting point of scanning the range
492
pattern_end end of pattern
493
out output context
494
495
Returns: >0 => class index
496
0 => malformed class
497
*/
498
499
static int
500
convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
501
pcre2_output_context *out)
502
{
503
PCRE2_SPTR start = *from + 1;
504
PCRE2_SPTR pattern = start;
505
const char *class_ptr;
506
PCRE2_UCHAR c;
507
int class_index;
508
509
while (TRUE)
510
{
511
if (pattern >= pattern_end) return 0;
512
513
c = *pattern++;
514
515
if (c < CHAR_a || c > CHAR_z) break;
516
}
517
518
if (c != CHAR_COLON || pattern >= pattern_end ||
519
*pattern != CHAR_RIGHT_SQUARE_BRACKET)
520
return 0;
521
522
class_ptr = posix_classes;
523
class_index = 1;
524
525
while (TRUE)
526
{
527
if (*class_ptr == 0) return 0;
528
529
pattern = start;
530
531
while (*pattern == (PCRE2_UCHAR) *class_ptr)
532
{
533
if (*pattern == CHAR_COLON)
534
{
535
pattern += 2;
536
start -= 2;
537
538
do convert_glob_write(out, *start++); while (start < pattern);
539
540
*from = pattern;
541
return class_index;
542
}
543
pattern++;
544
class_ptr++;
545
}
546
547
while (*class_ptr != CHAR_COLON) class_ptr++;
548
class_ptr++;
549
class_index++;
550
}
551
}
552
553
/* Checks whether the character is in the class.
554
555
Arguments:
556
class_index class index
557
c character
558
559
Returns: !0 => character is found in the class
560
0 => otherwise
561
*/
562
563
static BOOL
564
convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
565
{
566
const uint8_t *cbits = PRIV(default_tables) + cbits_offset;
567
int cbit;
568
569
#if PCRE2_CODE_UNIT_WIDTH != 8
570
if (c > 0xff)
571
{
572
/* Can't access the character tables for c > 0xff */
573
return FALSE;
574
}
575
#endif
576
577
/* See posix_class_maps. This is a small local clone of that.
578
Note that we don't know exactly what character tables will be used at
579
match time, but, for the purposes of pattern conversion, it should be
580
sufficient to use PCRE2's built-in default tables. */
581
582
switch (class_index)
583
{
584
case 1: /* alpha */
585
if (c == CHAR_UNDERSCORE) return FALSE;
586
if (((cbits + cbit_digit)[c/8] & (1u << (c&7))) != 0) return FALSE;
587
cbit = cbit_word;
588
break;
589
590
case 2: cbit = cbit_lower; break; /* lower */
591
case 3: cbit = cbit_upper; break; /* upper */
592
593
case 4: /* alnum */
594
if (c == CHAR_UNDERSCORE) return FALSE;
595
cbit = cbit_word;
596
break;
597
598
case 5: /* ascii */
599
if (((cbits + cbit_cntrl)[c/8] & (1u << (c&7))) != 0) return TRUE;
600
cbit = cbit_print;
601
break;
602
603
case 6: /* blank */
604
if (c == CHAR_LF || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
605
return FALSE;
606
cbit = cbit_space;
607
break;
608
609
case 7: cbit = cbit_cntrl; break; /* cntrl */
610
case 8: cbit = cbit_digit; break; /* digit */
611
case 9: cbit = cbit_graph; break; /* graph */
612
case 10: cbit = cbit_print; break; /* print */
613
case 11: cbit = cbit_punct; break; /* punct */
614
case 12: cbit = cbit_space; break; /* space */
615
case 13: cbit = cbit_word; break; /* word */
616
case 14: cbit = cbit_xdigit; break; /* xdigit */
617
default: return FALSE;
618
}
619
620
return ((cbits + cbit)[c/8] & (1u << (c&7))) != 0;
621
}
622
623
/* Parse a range of characters.
624
625
Arguments:
626
from starting point of scanning the range
627
pattern_end end of pattern
628
out output context
629
separator glob separator
630
with_escape backslash is needed before separator
631
632
Returns: 0 => success
633
!0 => error code
634
*/
635
636
static int
637
convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
638
pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator,
639
BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep)
640
{
641
BOOL is_negative = FALSE;
642
BOOL separator_seen = FALSE;
643
BOOL has_prev_c;
644
PCRE2_SPTR pattern = *from;
645
PCRE2_SPTR char_start = NULL;
646
uint32_t c, prev_c;
647
int len, class_index;
648
649
(void)utf; /* Avoid compiler warning. */
650
651
if (pattern >= pattern_end)
652
{
653
*from = pattern;
654
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
655
}
656
657
if (*pattern == CHAR_EXCLAMATION_MARK
658
|| *pattern == CHAR_CIRCUMFLEX_ACCENT)
659
{
660
pattern++;
661
662
if (pattern >= pattern_end)
663
{
664
*from = pattern;
665
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
666
}
667
668
is_negative = TRUE;
669
670
out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
671
out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
672
len = 2;
673
674
if (!no_wildsep)
675
{
676
if (with_escape)
677
{
678
out->out_str[len] = CHAR_BACKSLASH;
679
len++;
680
}
681
out->out_str[len] = (uint8_t) separator;
682
}
683
684
convert_glob_write_str(out, len + 1);
685
}
686
else
687
convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET);
688
689
has_prev_c = FALSE;
690
prev_c = 0;
691
692
if (*pattern == CHAR_RIGHT_SQUARE_BRACKET)
693
{
694
out->out_str[0] = CHAR_BACKSLASH;
695
out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
696
convert_glob_write_str(out, 2);
697
has_prev_c = TRUE;
698
prev_c = CHAR_RIGHT_SQUARE_BRACKET;
699
pattern++;
700
}
701
702
while (pattern < pattern_end)
703
{
704
char_start = pattern;
705
GETCHARINCTEST(c, pattern);
706
707
if (c == CHAR_RIGHT_SQUARE_BRACKET)
708
{
709
convert_glob_write(out, c);
710
711
if (!is_negative && !no_wildsep && separator_seen)
712
{
713
out->out_str[0] = CHAR_LEFT_PARENTHESIS;
714
out->out_str[1] = CHAR_QUESTION_MARK;
715
out->out_str[2] = CHAR_LESS_THAN_SIGN;
716
out->out_str[3] = CHAR_EXCLAMATION_MARK;
717
convert_glob_write_str(out, 4);
718
719
convert_glob_print_separator(out, separator, with_escape);
720
convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
721
}
722
723
*from = pattern;
724
return 0;
725
}
726
727
if (pattern >= pattern_end) break;
728
729
if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
730
{
731
*from = pattern;
732
class_index = convert_glob_parse_class(from, pattern_end, out);
733
734
if (class_index != 0)
735
{
736
pattern = *from;
737
738
has_prev_c = FALSE;
739
prev_c = 0;
740
741
if (!is_negative &&
742
convert_glob_char_in_class (class_index, separator))
743
separator_seen = TRUE;
744
continue;
745
}
746
}
747
else if (c == CHAR_MINUS && has_prev_c &&
748
*pattern != CHAR_RIGHT_SQUARE_BRACKET)
749
{
750
convert_glob_write(out, CHAR_MINUS);
751
752
char_start = pattern;
753
GETCHARINCTEST(c, pattern);
754
755
if (pattern >= pattern_end) break;
756
757
if (escape != 0 && c == escape)
758
{
759
char_start = pattern;
760
GETCHARINCTEST(c, pattern);
761
}
762
else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
763
{
764
*from = pattern;
765
return PCRE2_ERROR_CONVERT_SYNTAX;
766
}
767
768
if (prev_c > c)
769
{
770
*from = pattern;
771
return PCRE2_ERROR_CONVERT_SYNTAX;
772
}
773
774
if (prev_c < separator && separator < c) separator_seen = TRUE;
775
776
has_prev_c = FALSE;
777
prev_c = 0;
778
}
779
else
780
{
781
if (escape != 0 && c == escape)
782
{
783
char_start = pattern;
784
GETCHARINCTEST(c, pattern);
785
786
if (pattern >= pattern_end) break;
787
}
788
789
has_prev_c = TRUE;
790
prev_c = c;
791
}
792
793
if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET ||
794
c == CHAR_BACKSLASH || c == CHAR_MINUS)
795
convert_glob_write(out, CHAR_BACKSLASH);
796
797
if (c == separator) separator_seen = TRUE;
798
799
do convert_glob_write(out, *char_start++); while (char_start < pattern);
800
}
801
802
*from = pattern;
803
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
804
}
805
806
807
/* Prints a (*COMMIT) into the output.
808
809
Arguments:
810
out output context
811
*/
812
813
static void
814
convert_glob_print_commit(pcre2_output_context *out)
815
{
816
out->out_str[0] = CHAR_LEFT_PARENTHESIS;
817
out->out_str[1] = CHAR_ASTERISK;
818
out->out_str[2] = CHAR_C;
819
out->out_str[3] = CHAR_O;
820
out->out_str[4] = CHAR_M;
821
out->out_str[5] = CHAR_M;
822
out->out_str[6] = CHAR_I;
823
out->out_str[7] = CHAR_T;
824
convert_glob_write_str(out, 8);
825
convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
826
}
827
828
829
/* Bash glob converter.
830
831
Arguments:
832
pattype the pattern type
833
pattern the pattern
834
plength length in code units
835
utf TRUE if UTF
836
use_buffer where to put the output
837
use_length length of use_buffer
838
bufflenptr where to put the used length
839
dummyrun TRUE if a dummy run
840
ccontext the convert context
841
842
Returns: 0 => success
843
!0 => error code
844
*/
845
846
static int
847
convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength,
848
BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
849
PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
850
{
851
pcre2_output_context out;
852
PCRE2_SPTR pattern_start = pattern;
853
PCRE2_SPTR pattern_end = pattern + plength;
854
PCRE2_UCHAR separator = ccontext->glob_separator;
855
PCRE2_UCHAR escape = ccontext->glob_escape;
856
PCRE2_UCHAR c;
857
BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;
858
BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
859
BOOL in_atomic = FALSE;
860
BOOL after_starstar = FALSE;
861
BOOL no_slash_z = FALSE;
862
BOOL with_escape, is_start, after_separator;
863
int result = 0;
864
865
(void)utf; /* Avoid compiler warning. */
866
867
#ifdef SUPPORT_UNICODE
868
if (utf && (separator >= 128 || escape >= 128))
869
{
870
/* Currently only ASCII characters are supported. */
871
*bufflenptr = 0;
872
return PCRE2_ERROR_CONVERT_SYNTAX;
873
}
874
#endif
875
876
with_escape = strchr(pcre2_escaped_literals, separator) != NULL;
877
878
/* Initialize default for error offset as end of input. */
879
out.output = use_buffer;
880
out.output_end = use_buffer + use_length;
881
out.output_size = 0;
882
883
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
884
out.out_str[1] = CHAR_QUESTION_MARK;
885
out.out_str[2] = CHAR_s;
886
out.out_str[3] = CHAR_RIGHT_PARENTHESIS;
887
convert_glob_write_str(&out, 4);
888
889
is_start = TRUE;
890
891
if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK)
892
{
893
if (no_wildsep)
894
is_start = FALSE;
895
else if (!no_starstar && pattern + 1 < pattern_end &&
896
pattern[1] == CHAR_ASTERISK)
897
is_start = FALSE;
898
}
899
900
if (is_start)
901
{
902
out.out_str[0] = CHAR_BACKSLASH;
903
out.out_str[1] = CHAR_A;
904
convert_glob_write_str(&out, 2);
905
}
906
907
while (pattern < pattern_end)
908
{
909
c = *pattern++;
910
911
if (c == CHAR_ASTERISK)
912
{
913
is_start = pattern == pattern_start + 1;
914
915
if (in_atomic)
916
{
917
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
918
in_atomic = FALSE;
919
}
920
921
if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK)
922
{
923
after_separator = is_start || (pattern[-2] == separator);
924
925
do pattern++; while (pattern < pattern_end &&
926
*pattern == CHAR_ASTERISK);
927
928
if (pattern >= pattern_end)
929
{
930
no_slash_z = TRUE;
931
break;
932
}
933
934
after_starstar = TRUE;
935
936
if (after_separator && escape != 0 && *pattern == escape &&
937
pattern + 1 < pattern_end && pattern[1] == separator)
938
pattern++;
939
940
if (is_start)
941
{
942
if (*pattern != separator) continue;
943
944
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
945
out.out_str[1] = CHAR_QUESTION_MARK;
946
out.out_str[2] = CHAR_COLON;
947
out.out_str[3] = CHAR_BACKSLASH;
948
out.out_str[4] = CHAR_A;
949
out.out_str[5] = CHAR_VERTICAL_LINE;
950
convert_glob_write_str(&out, 6);
951
952
convert_glob_print_separator(&out, separator, with_escape);
953
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
954
955
pattern++;
956
continue;
957
}
958
959
convert_glob_print_commit(&out);
960
961
if (!after_separator || *pattern != separator)
962
{
963
out.out_str[0] = CHAR_DOT;
964
out.out_str[1] = CHAR_ASTERISK;
965
out.out_str[2] = CHAR_QUESTION_MARK;
966
convert_glob_write_str(&out, 3);
967
continue;
968
}
969
970
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
971
out.out_str[1] = CHAR_QUESTION_MARK;
972
out.out_str[2] = CHAR_COLON;
973
out.out_str[3] = CHAR_DOT;
974
out.out_str[4] = CHAR_ASTERISK;
975
out.out_str[5] = CHAR_QUESTION_MARK;
976
977
convert_glob_write_str(&out, 6);
978
979
convert_glob_print_separator(&out, separator, with_escape);
980
981
out.out_str[0] = CHAR_RIGHT_PARENTHESIS;
982
out.out_str[1] = CHAR_QUESTION_MARK;
983
out.out_str[2] = CHAR_QUESTION_MARK;
984
convert_glob_write_str(&out, 3);
985
986
pattern++;
987
continue;
988
}
989
990
if (pattern < pattern_end && *pattern == CHAR_ASTERISK)
991
{
992
do pattern++; while (pattern < pattern_end &&
993
*pattern == CHAR_ASTERISK);
994
}
995
996
if (no_wildsep)
997
{
998
if (pattern >= pattern_end)
999
{
1000
no_slash_z = TRUE;
1001
break;
1002
}
1003
1004
/* Start check must be after the end check. */
1005
if (is_start) continue;
1006
}
1007
1008
if (!is_start)
1009
{
1010
if (after_starstar)
1011
{
1012
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
1013
out.out_str[1] = CHAR_QUESTION_MARK;
1014
out.out_str[2] = CHAR_GREATER_THAN_SIGN;
1015
convert_glob_write_str(&out, 3);
1016
in_atomic = TRUE;
1017
}
1018
else
1019
convert_glob_print_commit(&out);
1020
}
1021
1022
if (no_wildsep)
1023
convert_glob_write(&out, CHAR_DOT);
1024
else
1025
convert_glob_print_wildcard(&out, separator, with_escape);
1026
1027
out.out_str[0] = CHAR_ASTERISK;
1028
out.out_str[1] = CHAR_QUESTION_MARK;
1029
if (pattern >= pattern_end)
1030
out.out_str[1] = CHAR_PLUS;
1031
convert_glob_write_str(&out, 2);
1032
continue;
1033
}
1034
1035
if (c == CHAR_QUESTION_MARK)
1036
{
1037
if (no_wildsep)
1038
convert_glob_write(&out, CHAR_DOT);
1039
else
1040
convert_glob_print_wildcard(&out, separator, with_escape);
1041
continue;
1042
}
1043
1044
if (c == CHAR_LEFT_SQUARE_BRACKET)
1045
{
1046
result = convert_glob_parse_range(&pattern, pattern_end,
1047
&out, utf, separator, with_escape, escape, no_wildsep);
1048
if (result != 0) break;
1049
continue;
1050
}
1051
1052
if (escape != 0 && c == escape)
1053
{
1054
if (pattern >= pattern_end)
1055
{
1056
result = PCRE2_ERROR_CONVERT_SYNTAX;
1057
break;
1058
}
1059
c = *pattern++;
1060
}
1061
1062
if (c < 255 && strchr(pcre2_escaped_literals, c) != NULL)
1063
convert_glob_write(&out, CHAR_BACKSLASH);
1064
1065
convert_glob_write(&out, c);
1066
}
1067
1068
if (result == 0)
1069
{
1070
if (!no_slash_z)
1071
{
1072
out.out_str[0] = CHAR_BACKSLASH;
1073
out.out_str[1] = CHAR_z;
1074
convert_glob_write_str(&out, 2);
1075
}
1076
1077
if (in_atomic)
1078
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
1079
1080
convert_glob_write(&out, CHAR_NUL);
1081
1082
if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer))
1083
result = PCRE2_ERROR_NOMEMORY;
1084
}
1085
1086
if (result != 0)
1087
{
1088
*bufflenptr = pattern - pattern_start;
1089
return result;
1090
}
1091
1092
*bufflenptr = out.output_size - 1;
1093
return 0;
1094
}
1095
1096
1097
/*************************************************
1098
* Convert pattern *
1099
*************************************************/
1100
1101
/* This is the external-facing function for converting other forms of pattern
1102
into PCRE2 regular expression patterns. On error, the bufflenptr argument is
1103
used to return an offset in the original pattern.
1104
1105
Arguments:
1106
pattern the input pattern
1107
plength length of input, or PCRE2_ZERO_TERMINATED
1108
options options bits
1109
buffptr pointer to pointer to output buffer
1110
bufflenptr pointer to length of output buffer
1111
ccontext convert context or NULL
1112
1113
Returns: 0 for success, else an error code (+ve or -ve)
1114
*/
1115
1116
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
1117
pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,
1118
PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr,
1119
pcre2_convert_context *ccontext)
1120
{
1121
int rc;
1122
PCRE2_UCHAR null_str[1] = { 0xcd };
1123
PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
1124
PCRE2_UCHAR *use_buffer = dummy_buffer;
1125
PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
1126
BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
1127
uint32_t pattype = options & TYPE_OPTIONS;
1128
1129
if (pattern == NULL && plength == 0)
1130
pattern = null_str;
1131
1132
if (pattern == NULL || bufflenptr == NULL)
1133
{
1134
if (bufflenptr != NULL) *bufflenptr = 0; /* Error offset */
1135
return PCRE2_ERROR_NULL;
1136
}
1137
1138
if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */
1139
(pattype & (~pattype+1)) != pattype || /* More than one type set */
1140
pattype == 0) /* No type set */
1141
{
1142
*bufflenptr = 0; /* Error offset */
1143
return PCRE2_ERROR_BADOPTION;
1144
}
1145
1146
if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern);
1147
if (ccontext == NULL) ccontext =
1148
(pcre2_convert_context *)(&PRIV(default_convert_context));
1149
1150
/* Check UTF if required. */
1151
1152
#ifndef SUPPORT_UNICODE
1153
if (utf)
1154
{
1155
*bufflenptr = 0; /* Error offset */
1156
return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
1157
}
1158
#else
1159
if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
1160
{
1161
PCRE2_SIZE erroroffset;
1162
rc = PRIV(valid_utf)(pattern, plength, &erroroffset);
1163
if (rc != 0)
1164
{
1165
*bufflenptr = erroroffset;
1166
return rc;
1167
}
1168
}
1169
#endif
1170
1171
/* If buffptr is not NULL, and what it points to is not NULL, we are being
1172
provided with a buffer and a length, so set them as the buffer to use. */
1173
1174
if (buffptr != NULL && *buffptr != NULL)
1175
{
1176
use_buffer = *buffptr;
1177
use_length = *bufflenptr;
1178
}
1179
1180
/* Call an individual converter, either just once (if a buffer was provided or
1181
just the length is needed), or twice (if a memory allocation is required). */
1182
1183
for (int i = 0; i < 2; i++)
1184
{
1185
PCRE2_UCHAR *allocated;
1186
BOOL dummyrun = buffptr == NULL || *buffptr == NULL;
1187
1188
switch(pattype)
1189
{
1190
case PCRE2_CONVERT_GLOB:
1191
rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf,
1192
use_buffer, use_length, bufflenptr, dummyrun, ccontext);
1193
break;
1194
1195
case PCRE2_CONVERT_POSIX_BASIC:
1196
case PCRE2_CONVERT_POSIX_EXTENDED:
1197
rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length,
1198
bufflenptr, dummyrun, ccontext);
1199
break;
1200
1201
/* We have already validated pattype. */
1202
/* LCOV_EXCL_START */
1203
default:
1204
PCRE2_DEBUG_UNREACHABLE();
1205
*bufflenptr = 0; /* Error offset */
1206
return PCRE2_ERROR_INTERNAL;
1207
/* LCOV_EXCL_STOP */
1208
}
1209
1210
if (rc != 0 || /* Error */
1211
buffptr == NULL || /* Just the length is required */
1212
*buffptr != NULL) /* Buffer was provided or allocated */
1213
return rc;
1214
1215
/* Allocate memory for the buffer, with hidden space for an allocator at
1216
the start. The next time round the loop runs the conversion for real. */
1217
1218
allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
1219
(*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext);
1220
if (allocated == NULL)
1221
{
1222
*bufflenptr = 0; /* Error offset */
1223
return PCRE2_ERROR_NOMEMORY;
1224
}
1225
*buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl));
1226
1227
use_buffer = *buffptr;
1228
use_length = *bufflenptr + 1;
1229
}
1230
1231
/* Running the loop above ought to have succeeded the second time. */
1232
/* LCOV_EXCL_START */
1233
PCRE2_DEBUG_UNREACHABLE();
1234
*bufflenptr = 0; /* Error offset */
1235
return PCRE2_ERROR_INTERNAL;
1236
/* LCOV_EXCL_STOP */
1237
}
1238
1239
1240
/*************************************************
1241
* Free converted pattern *
1242
*************************************************/
1243
1244
/* This frees a converted pattern that was put in newly-allocated memory.
1245
1246
Argument: the converted pattern
1247
Returns: nothing
1248
*/
1249
1250
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
1251
pcre2_converted_pattern_free(PCRE2_UCHAR *converted)
1252
{
1253
if (converted != NULL)
1254
{
1255
pcre2_memctl *memctl =
1256
(pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl));
1257
memctl->free(memctl, memctl->memory_data);
1258
}
1259
}
1260
1261
/* End of pcre2_convert.c */
1262
1263