Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/src/pcre2_convert.c
9903 views
1
/*************************************************
2
* Perl-Compatible Regular Expressions *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
Written by Philip Hazel
9
Original API code Copyright (c) 1997-2012 University of Cambridge
10
New API code Copyright (c) 2016-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
* Redistributions of source code must retain the above copyright notice,
17
this list of conditions and the following disclaimer.
18
19
* Redistributions in binary form must reproduce the above copyright
20
notice, this list of conditions and the following disclaimer in the
21
documentation and/or other materials provided with the distribution.
22
23
* Neither the name of the University of Cambridge nor the names of its
24
contributors may be used to endorse or promote products derived from
25
this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#include "pcre2_internal.h"
47
48
#define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \
49
PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
50
51
#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
52
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
53
PCRE2_CONVERT_GLOB_NO_STARSTAR| \
54
TYPE_OPTIONS)
55
56
#define DUMMY_BUFFER_SIZE 100
57
58
/* Generated pattern fragments */
59
60
#define STR_BACKSLASH_A STR_BACKSLASH STR_A
61
#define STR_BACKSLASH_z STR_BACKSLASH STR_z
62
#define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET
63
#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
64
#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
65
#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
66
#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
67
68
/* States for POSIX processing */
69
70
enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
71
POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
72
73
/* Macro to add a character string to the output buffer, checking for overflow. */
74
75
#define PUTCHARS(string) \
76
{ \
77
for (const char *s = string; *s != 0; s++) \
78
{ \
79
if (p >= endp) return PCRE2_ERROR_NOMEMORY; \
80
*p++ = *s; \
81
} \
82
}
83
84
/* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */
85
86
static const char *pcre2_escaped_literals =
87
STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS
88
STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN
89
STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
90
STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
91
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
92
93
/* Recognized escaped metacharacters in POSIX basic patterns. */
94
95
static const char *posix_meta_escapes =
96
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
97
STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
98
STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
99
100
101
102
/*************************************************
103
* Convert a POSIX pattern *
104
*************************************************/
105
106
/* This function handles both basic and extended POSIX patterns.
107
108
Arguments:
109
pattype the pattern type
110
pattern the pattern
111
plength length in code units
112
utf TRUE if UTF
113
use_buffer where to put the output
114
use_length length of use_buffer
115
bufflenptr where to put the used length
116
dummyrun TRUE if a dummy run
117
ccontext the convert context
118
119
Returns: 0 => success
120
!0 => error code
121
*/
122
123
static int
124
convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
125
BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
126
PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
127
{
128
PCRE2_SPTR posix = pattern;
129
PCRE2_UCHAR *p = use_buffer;
130
PCRE2_UCHAR *pp = p;
131
PCRE2_UCHAR *endp = p + use_length - 1; /* Allow for trailing zero */
132
PCRE2_SIZE convlength = 0;
133
134
uint32_t bracount = 0;
135
uint32_t posix_state = POSIX_START_REGEX;
136
uint32_t lastspecial = 0;
137
BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
138
BOOL nextisliteral = FALSE;
139
140
(void)utf; /* Not used when Unicode not supported */
141
(void)ccontext; /* Not currently used */
142
143
/* Initialize default for error offset as end of input. */
144
145
*bufflenptr = plength;
146
PUTCHARS(STR_STAR_NUL);
147
148
/* Now scan the input. */
149
150
while (plength > 0)
151
{
152
uint32_t c, sc;
153
int clength = 1;
154
155
/* Add in the length of the last item, then, if in the dummy run, pull the
156
pointer back to the start of the (temporary) buffer and then remember the
157
start of the next item. */
158
159
convlength += p - pp;
160
if (dummyrun) p = use_buffer;
161
pp = p;
162
163
/* Pick up the next character */
164
165
#ifndef SUPPORT_UNICODE
166
c = *posix;
167
#else
168
GETCHARLENTEST(c, posix, clength);
169
#endif
170
posix += clength;
171
plength -= clength;
172
173
sc = nextisliteral? 0 : c;
174
nextisliteral = FALSE;
175
176
/* Handle a character within a class. */
177
178
if (posix_state >= POSIX_CLASS_NOT_STARTED)
179
{
180
if (c == CHAR_RIGHT_SQUARE_BRACKET)
181
{
182
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
183
posix_state = POSIX_NOT_BRACKET;
184
}
185
186
/* Not the end of the class */
187
188
else
189
{
190
switch (posix_state)
191
{
192
case POSIX_CLASS_STARTED:
193
if (c <= 127 && islower(c)) break; /* Remain in started state */
194
posix_state = POSIX_CLASS_NOT_STARTED;
195
if (c == CHAR_COLON && plength > 0 &&
196
*posix == CHAR_RIGHT_SQUARE_BRACKET)
197
{
198
PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
199
plength--;
200
posix++;
201
continue; /* With next character after :] */
202
}
203
/* Fall through */
204
205
case POSIX_CLASS_NOT_STARTED:
206
if (c == CHAR_LEFT_SQUARE_BRACKET)
207
posix_state = POSIX_CLASS_STARTING;
208
break;
209
210
case POSIX_CLASS_STARTING:
211
if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;
212
break;
213
}
214
215
if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
216
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
217
memcpy(p, posix - clength, CU2BYTES(clength));
218
p += clength;
219
}
220
}
221
222
/* Handle a character not within a class. */
223
224
else switch(sc)
225
{
226
case CHAR_LEFT_SQUARE_BRACKET:
227
PUTCHARS(STR_LEFT_SQUARE_BRACKET);
228
229
#ifdef NEVER
230
/* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
231
support) but they are not part of POSIX 1003.1. */
232
233
if (plength >= 6)
234
{
235
if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
236
posix[1] == CHAR_COLON &&
237
(posix[2] == CHAR_LESS_THAN_SIGN ||
238
posix[2] == CHAR_GREATER_THAN_SIGN) &&
239
posix[3] == CHAR_COLON &&
240
posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
241
posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
242
{
243
if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
244
memcpy(p, posix, CU2BYTES(6));
245
p += 6;
246
posix += 6;
247
plength -= 6;
248
continue; /* With next character */
249
}
250
}
251
#endif
252
253
/* Handle start of "normal" character classes */
254
255
posix_state = POSIX_CLASS_NOT_STARTED;
256
257
/* Handle ^ and ] as first characters */
258
259
if (plength > 0)
260
{
261
if (*posix == CHAR_CIRCUMFLEX_ACCENT)
262
{
263
posix++;
264
plength--;
265
PUTCHARS(STR_CIRCUMFLEX_ACCENT);
266
}
267
if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET)
268
{
269
posix++;
270
plength--;
271
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
272
}
273
}
274
break;
275
276
case CHAR_BACKSLASH:
277
if (plength == 0) return PCRE2_ERROR_END_BACKSLASH;
278
if (extended) nextisliteral = TRUE; else
279
{
280
if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
281
{
282
if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
283
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
284
lastspecial = *p++ = *posix++;
285
plength--;
286
}
287
else nextisliteral = TRUE;
288
}
289
break;
290
291
case CHAR_RIGHT_PARENTHESIS:
292
if (!extended || bracount == 0) goto ESCAPE_LITERAL;
293
bracount--;
294
goto COPY_SPECIAL;
295
296
case CHAR_LEFT_PARENTHESIS:
297
bracount++;
298
/* Fall through */
299
300
case CHAR_QUESTION_MARK:
301
case CHAR_PLUS:
302
case CHAR_LEFT_CURLY_BRACKET:
303
case CHAR_RIGHT_CURLY_BRACKET:
304
case CHAR_VERTICAL_LINE:
305
if (!extended) goto ESCAPE_LITERAL;
306
/* Fall through */
307
308
case CHAR_DOT:
309
case CHAR_DOLLAR_SIGN:
310
posix_state = POSIX_NOT_BRACKET;
311
COPY_SPECIAL:
312
lastspecial = c;
313
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
314
*p++ = c;
315
break;
316
317
case CHAR_ASTERISK:
318
if (lastspecial != CHAR_ASTERISK)
319
{
320
if (!extended && (posix_state < POSIX_NOT_BRACKET ||
321
lastspecial == CHAR_LEFT_PARENTHESIS))
322
goto ESCAPE_LITERAL;
323
goto COPY_SPECIAL;
324
}
325
break; /* Ignore second and subsequent asterisks */
326
327
case CHAR_CIRCUMFLEX_ACCENT:
328
if (extended) goto COPY_SPECIAL;
329
if (posix_state == POSIX_START_REGEX ||
330
lastspecial == CHAR_LEFT_PARENTHESIS)
331
{
332
posix_state = POSIX_ANCHORED;
333
goto COPY_SPECIAL;
334
}
335
/* Fall through */
336
337
default:
338
if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
339
{
340
ESCAPE_LITERAL:
341
PUTCHARS(STR_BACKSLASH);
342
}
343
lastspecial = 0xff; /* Indicates nothing special */
344
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
345
memcpy(p, posix - clength, CU2BYTES(clength));
346
p += clength;
347
posix_state = POSIX_NOT_BRACKET;
348
break;
349
}
350
}
351
352
if (posix_state >= POSIX_CLASS_NOT_STARTED)
353
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
354
convlength += p - pp; /* Final segment */
355
*bufflenptr = convlength;
356
*p++ = 0;
357
return 0;
358
}
359
360
361
/*************************************************
362
* Convert a glob pattern *
363
*************************************************/
364
365
/* Context for writing the output into a buffer. */
366
367
typedef struct pcre2_output_context {
368
PCRE2_UCHAR *output; /* current output position */
369
PCRE2_SPTR output_end; /* output end */
370
PCRE2_SIZE output_size; /* size of the output */
371
uint8_t out_str[8]; /* string copied to the output */
372
} pcre2_output_context;
373
374
375
/* Write a character into the output.
376
377
Arguments:
378
out output context
379
chr the next character
380
*/
381
382
static void
383
convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr)
384
{
385
out->output_size++;
386
387
if (out->output < out->output_end)
388
*out->output++ = chr;
389
}
390
391
392
/* Write a string into the output.
393
394
Arguments:
395
out output context
396
length length of out->out_str
397
*/
398
399
static void
400
convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length)
401
{
402
uint8_t *out_str = out->out_str;
403
PCRE2_UCHAR *output = out->output;
404
PCRE2_SPTR output_end = out->output_end;
405
PCRE2_SIZE output_size = out->output_size;
406
407
do
408
{
409
output_size++;
410
411
if (output < output_end)
412
*output++ = *out_str++;
413
}
414
while (--length != 0);
415
416
out->output = output;
417
out->output_size = output_size;
418
}
419
420
421
/* Prints the separator into the output.
422
423
Arguments:
424
out output context
425
separator glob separator
426
with_escape backslash is needed before separator
427
*/
428
429
static void
430
convert_glob_print_separator(pcre2_output_context *out,
431
PCRE2_UCHAR separator, BOOL with_escape)
432
{
433
if (with_escape)
434
convert_glob_write(out, CHAR_BACKSLASH);
435
436
convert_glob_write(out, separator);
437
}
438
439
440
/* Prints a wildcard into the output.
441
442
Arguments:
443
out output context
444
separator glob separator
445
with_escape backslash is needed before separator
446
*/
447
448
static void
449
convert_glob_print_wildcard(pcre2_output_context *out,
450
PCRE2_UCHAR separator, BOOL with_escape)
451
{
452
out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
453
out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
454
convert_glob_write_str(out, 2);
455
456
convert_glob_print_separator(out, separator, with_escape);
457
458
convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET);
459
}
460
461
462
/* Parse a posix class.
463
464
Arguments:
465
from starting point of scanning the range
466
pattern_end end of pattern
467
out output context
468
469
Returns: >0 => class index
470
0 => malformed class
471
*/
472
473
static int
474
convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
475
pcre2_output_context *out)
476
{
477
static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
478
"graph:lower:print:punct:space:upper:word:xdigit:";
479
PCRE2_SPTR start = *from + 1;
480
PCRE2_SPTR pattern = start;
481
const char *class_ptr;
482
PCRE2_UCHAR c;
483
int class_index;
484
485
while (TRUE)
486
{
487
if (pattern >= pattern_end) return 0;
488
489
c = *pattern++;
490
491
if (c < CHAR_a || c > CHAR_z) break;
492
}
493
494
if (c != CHAR_COLON || pattern >= pattern_end ||
495
*pattern != CHAR_RIGHT_SQUARE_BRACKET)
496
return 0;
497
498
class_ptr = posix_classes;
499
class_index = 1;
500
501
while (TRUE)
502
{
503
if (*class_ptr == CHAR_NUL) return 0;
504
505
pattern = start;
506
507
while (*pattern == (PCRE2_UCHAR) *class_ptr)
508
{
509
if (*pattern == CHAR_COLON)
510
{
511
pattern += 2;
512
start -= 2;
513
514
do convert_glob_write(out, *start++); while (start < pattern);
515
516
*from = pattern;
517
return class_index;
518
}
519
pattern++;
520
class_ptr++;
521
}
522
523
while (*class_ptr != CHAR_COLON) class_ptr++;
524
class_ptr++;
525
class_index++;
526
}
527
}
528
529
/* Checks whether the character is in the class.
530
531
Arguments:
532
class_index class index
533
c character
534
535
Returns: !0 => character is found in the class
536
0 => otherwise
537
*/
538
539
static BOOL
540
convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
541
{
542
#if PCRE2_CODE_UNIT_WIDTH != 8
543
if (c > 0xff)
544
{
545
/* ctype functions are not sane for c > 0xff */
546
return 0;
547
}
548
#endif
549
550
switch (class_index)
551
{
552
case 1: return isalnum(c);
553
case 2: return isalpha(c);
554
case 3: return 1;
555
case 4: return c == CHAR_HT || c == CHAR_SPACE;
556
case 5: return iscntrl(c);
557
case 6: return isdigit(c);
558
case 7: return isgraph(c);
559
case 8: return islower(c);
560
case 9: return isprint(c);
561
case 10: return ispunct(c);
562
case 11: return isspace(c);
563
case 12: return isupper(c);
564
case 13: return isalnum(c) || c == CHAR_UNDERSCORE;
565
default: return isxdigit(c);
566
}
567
}
568
569
/* Parse a range of characters.
570
571
Arguments:
572
from starting point of scanning the range
573
pattern_end end of pattern
574
out output context
575
separator glob separator
576
with_escape backslash is needed before separator
577
578
Returns: 0 => success
579
!0 => error code
580
*/
581
582
static int
583
convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
584
pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator,
585
BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep)
586
{
587
BOOL is_negative = FALSE;
588
BOOL separator_seen = FALSE;
589
BOOL has_prev_c;
590
PCRE2_SPTR pattern = *from;
591
PCRE2_SPTR char_start = NULL;
592
uint32_t c, prev_c;
593
int len, class_index;
594
595
(void)utf; /* Avoid compiler warning. */
596
597
if (pattern >= pattern_end)
598
{
599
*from = pattern;
600
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
601
}
602
603
if (*pattern == CHAR_EXCLAMATION_MARK
604
|| *pattern == CHAR_CIRCUMFLEX_ACCENT)
605
{
606
pattern++;
607
608
if (pattern >= pattern_end)
609
{
610
*from = pattern;
611
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
612
}
613
614
is_negative = TRUE;
615
616
out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
617
out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
618
len = 2;
619
620
if (!no_wildsep)
621
{
622
if (with_escape)
623
{
624
out->out_str[len] = CHAR_BACKSLASH;
625
len++;
626
}
627
out->out_str[len] = (uint8_t) separator;
628
}
629
630
convert_glob_write_str(out, len + 1);
631
}
632
else
633
convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET);
634
635
has_prev_c = FALSE;
636
prev_c = 0;
637
638
if (*pattern == CHAR_RIGHT_SQUARE_BRACKET)
639
{
640
out->out_str[0] = CHAR_BACKSLASH;
641
out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
642
convert_glob_write_str(out, 2);
643
has_prev_c = TRUE;
644
prev_c = CHAR_RIGHT_SQUARE_BRACKET;
645
pattern++;
646
}
647
648
while (pattern < pattern_end)
649
{
650
char_start = pattern;
651
GETCHARINCTEST(c, pattern);
652
653
if (c == CHAR_RIGHT_SQUARE_BRACKET)
654
{
655
convert_glob_write(out, c);
656
657
if (!is_negative && !no_wildsep && separator_seen)
658
{
659
out->out_str[0] = CHAR_LEFT_PARENTHESIS;
660
out->out_str[1] = CHAR_QUESTION_MARK;
661
out->out_str[2] = CHAR_LESS_THAN_SIGN;
662
out->out_str[3] = CHAR_EXCLAMATION_MARK;
663
convert_glob_write_str(out, 4);
664
665
convert_glob_print_separator(out, separator, with_escape);
666
convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
667
}
668
669
*from = pattern;
670
return 0;
671
}
672
673
if (pattern >= pattern_end) break;
674
675
if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
676
{
677
*from = pattern;
678
class_index = convert_glob_parse_class(from, pattern_end, out);
679
680
if (class_index != 0)
681
{
682
pattern = *from;
683
684
has_prev_c = FALSE;
685
prev_c = 0;
686
687
if (!is_negative &&
688
convert_glob_char_in_class (class_index, separator))
689
separator_seen = TRUE;
690
continue;
691
}
692
}
693
else if (c == CHAR_MINUS && has_prev_c &&
694
*pattern != CHAR_RIGHT_SQUARE_BRACKET)
695
{
696
convert_glob_write(out, CHAR_MINUS);
697
698
char_start = pattern;
699
GETCHARINCTEST(c, pattern);
700
701
if (pattern >= pattern_end) break;
702
703
if (escape != 0 && c == escape)
704
{
705
char_start = pattern;
706
GETCHARINCTEST(c, pattern);
707
}
708
else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
709
{
710
*from = pattern;
711
return PCRE2_ERROR_CONVERT_SYNTAX;
712
}
713
714
if (prev_c > c)
715
{
716
*from = pattern;
717
return PCRE2_ERROR_CONVERT_SYNTAX;
718
}
719
720
if (prev_c < separator && separator < c) separator_seen = TRUE;
721
722
has_prev_c = FALSE;
723
prev_c = 0;
724
}
725
else
726
{
727
if (escape != 0 && c == escape)
728
{
729
char_start = pattern;
730
GETCHARINCTEST(c, pattern);
731
732
if (pattern >= pattern_end) break;
733
}
734
735
has_prev_c = TRUE;
736
prev_c = c;
737
}
738
739
if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET ||
740
c == CHAR_BACKSLASH || c == CHAR_MINUS)
741
convert_glob_write(out, CHAR_BACKSLASH);
742
743
if (c == separator) separator_seen = TRUE;
744
745
do convert_glob_write(out, *char_start++); while (char_start < pattern);
746
}
747
748
*from = pattern;
749
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
750
}
751
752
753
/* Prints a (*COMMIT) into the output.
754
755
Arguments:
756
out output context
757
*/
758
759
static void
760
convert_glob_print_commit(pcre2_output_context *out)
761
{
762
out->out_str[0] = CHAR_LEFT_PARENTHESIS;
763
out->out_str[1] = CHAR_ASTERISK;
764
out->out_str[2] = CHAR_C;
765
out->out_str[3] = CHAR_O;
766
out->out_str[4] = CHAR_M;
767
out->out_str[5] = CHAR_M;
768
out->out_str[6] = CHAR_I;
769
out->out_str[7] = CHAR_T;
770
convert_glob_write_str(out, 8);
771
convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
772
}
773
774
775
/* Bash glob converter.
776
777
Arguments:
778
pattype the pattern type
779
pattern the pattern
780
plength length in code units
781
utf TRUE if UTF
782
use_buffer where to put the output
783
use_length length of use_buffer
784
bufflenptr where to put the used length
785
dummyrun TRUE if a dummy run
786
ccontext the convert context
787
788
Returns: 0 => success
789
!0 => error code
790
*/
791
792
static int
793
convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength,
794
BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
795
PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
796
{
797
pcre2_output_context out;
798
PCRE2_SPTR pattern_start = pattern;
799
PCRE2_SPTR pattern_end = pattern + plength;
800
PCRE2_UCHAR separator = ccontext->glob_separator;
801
PCRE2_UCHAR escape = ccontext->glob_escape;
802
PCRE2_UCHAR c;
803
BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;
804
BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
805
BOOL in_atomic = FALSE;
806
BOOL after_starstar = FALSE;
807
BOOL no_slash_z = FALSE;
808
BOOL with_escape, is_start, after_separator;
809
int result = 0;
810
811
(void)utf; /* Avoid compiler warning. */
812
813
#ifdef SUPPORT_UNICODE
814
if (utf && (separator >= 128 || escape >= 128))
815
{
816
/* Currently only ASCII characters are supported. */
817
*bufflenptr = 0;
818
return PCRE2_ERROR_CONVERT_SYNTAX;
819
}
820
#endif
821
822
with_escape = strchr(pcre2_escaped_literals, separator) != NULL;
823
824
/* Initialize default for error offset as end of input. */
825
out.output = use_buffer;
826
out.output_end = use_buffer + use_length;
827
out.output_size = 0;
828
829
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
830
out.out_str[1] = CHAR_QUESTION_MARK;
831
out.out_str[2] = CHAR_s;
832
out.out_str[3] = CHAR_RIGHT_PARENTHESIS;
833
convert_glob_write_str(&out, 4);
834
835
is_start = TRUE;
836
837
if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK)
838
{
839
if (no_wildsep)
840
is_start = FALSE;
841
else if (!no_starstar && pattern + 1 < pattern_end &&
842
pattern[1] == CHAR_ASTERISK)
843
is_start = FALSE;
844
}
845
846
if (is_start)
847
{
848
out.out_str[0] = CHAR_BACKSLASH;
849
out.out_str[1] = CHAR_A;
850
convert_glob_write_str(&out, 2);
851
}
852
853
while (pattern < pattern_end)
854
{
855
c = *pattern++;
856
857
if (c == CHAR_ASTERISK)
858
{
859
is_start = pattern == pattern_start + 1;
860
861
if (in_atomic)
862
{
863
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
864
in_atomic = FALSE;
865
}
866
867
if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK)
868
{
869
after_separator = is_start || (pattern[-2] == separator);
870
871
do pattern++; while (pattern < pattern_end &&
872
*pattern == CHAR_ASTERISK);
873
874
if (pattern >= pattern_end)
875
{
876
no_slash_z = TRUE;
877
break;
878
}
879
880
after_starstar = TRUE;
881
882
if (after_separator && escape != 0 && *pattern == escape &&
883
pattern + 1 < pattern_end && pattern[1] == separator)
884
pattern++;
885
886
if (is_start)
887
{
888
if (*pattern != separator) continue;
889
890
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
891
out.out_str[1] = CHAR_QUESTION_MARK;
892
out.out_str[2] = CHAR_COLON;
893
out.out_str[3] = CHAR_BACKSLASH;
894
out.out_str[4] = CHAR_A;
895
out.out_str[5] = CHAR_VERTICAL_LINE;
896
convert_glob_write_str(&out, 6);
897
898
convert_glob_print_separator(&out, separator, with_escape);
899
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
900
901
pattern++;
902
continue;
903
}
904
905
convert_glob_print_commit(&out);
906
907
if (!after_separator || *pattern != separator)
908
{
909
out.out_str[0] = CHAR_DOT;
910
out.out_str[1] = CHAR_ASTERISK;
911
out.out_str[2] = CHAR_QUESTION_MARK;
912
convert_glob_write_str(&out, 3);
913
continue;
914
}
915
916
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
917
out.out_str[1] = CHAR_QUESTION_MARK;
918
out.out_str[2] = CHAR_COLON;
919
out.out_str[3] = CHAR_DOT;
920
out.out_str[4] = CHAR_ASTERISK;
921
out.out_str[5] = CHAR_QUESTION_MARK;
922
923
convert_glob_write_str(&out, 6);
924
925
convert_glob_print_separator(&out, separator, with_escape);
926
927
out.out_str[0] = CHAR_RIGHT_PARENTHESIS;
928
out.out_str[1] = CHAR_QUESTION_MARK;
929
out.out_str[2] = CHAR_QUESTION_MARK;
930
convert_glob_write_str(&out, 3);
931
932
pattern++;
933
continue;
934
}
935
936
if (pattern < pattern_end && *pattern == CHAR_ASTERISK)
937
{
938
do pattern++; while (pattern < pattern_end &&
939
*pattern == CHAR_ASTERISK);
940
}
941
942
if (no_wildsep)
943
{
944
if (pattern >= pattern_end)
945
{
946
no_slash_z = TRUE;
947
break;
948
}
949
950
/* Start check must be after the end check. */
951
if (is_start) continue;
952
}
953
954
if (!is_start)
955
{
956
if (after_starstar)
957
{
958
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
959
out.out_str[1] = CHAR_QUESTION_MARK;
960
out.out_str[2] = CHAR_GREATER_THAN_SIGN;
961
convert_glob_write_str(&out, 3);
962
in_atomic = TRUE;
963
}
964
else
965
convert_glob_print_commit(&out);
966
}
967
968
if (no_wildsep)
969
convert_glob_write(&out, CHAR_DOT);
970
else
971
convert_glob_print_wildcard(&out, separator, with_escape);
972
973
out.out_str[0] = CHAR_ASTERISK;
974
out.out_str[1] = CHAR_QUESTION_MARK;
975
if (pattern >= pattern_end)
976
out.out_str[1] = CHAR_PLUS;
977
convert_glob_write_str(&out, 2);
978
continue;
979
}
980
981
if (c == CHAR_QUESTION_MARK)
982
{
983
if (no_wildsep)
984
convert_glob_write(&out, CHAR_DOT);
985
else
986
convert_glob_print_wildcard(&out, separator, with_escape);
987
continue;
988
}
989
990
if (c == CHAR_LEFT_SQUARE_BRACKET)
991
{
992
result = convert_glob_parse_range(&pattern, pattern_end,
993
&out, utf, separator, with_escape, escape, no_wildsep);
994
if (result != 0) break;
995
continue;
996
}
997
998
if (escape != 0 && c == escape)
999
{
1000
if (pattern >= pattern_end)
1001
{
1002
result = PCRE2_ERROR_CONVERT_SYNTAX;
1003
break;
1004
}
1005
c = *pattern++;
1006
}
1007
1008
if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
1009
convert_glob_write(&out, CHAR_BACKSLASH);
1010
1011
convert_glob_write(&out, c);
1012
}
1013
1014
if (result == 0)
1015
{
1016
if (!no_slash_z)
1017
{
1018
out.out_str[0] = CHAR_BACKSLASH;
1019
out.out_str[1] = CHAR_z;
1020
convert_glob_write_str(&out, 2);
1021
}
1022
1023
if (in_atomic)
1024
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
1025
1026
convert_glob_write(&out, CHAR_NUL);
1027
1028
if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer))
1029
result = PCRE2_ERROR_NOMEMORY;
1030
}
1031
1032
if (result != 0)
1033
{
1034
*bufflenptr = pattern - pattern_start;
1035
return result;
1036
}
1037
1038
*bufflenptr = out.output_size - 1;
1039
return 0;
1040
}
1041
1042
1043
/*************************************************
1044
* Convert pattern *
1045
*************************************************/
1046
1047
/* This is the external-facing function for converting other forms of pattern
1048
into PCRE2 regular expression patterns. On error, the bufflenptr argument is
1049
used to return an offset in the original pattern.
1050
1051
Arguments:
1052
pattern the input pattern
1053
plength length of input, or PCRE2_ZERO_TERMINATED
1054
options options bits
1055
buffptr pointer to pointer to output buffer
1056
bufflenptr pointer to length of output buffer
1057
ccontext convert context or NULL
1058
1059
Returns: 0 for success, else an error code (+ve or -ve)
1060
*/
1061
1062
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
1063
pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,
1064
PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr,
1065
pcre2_convert_context *ccontext)
1066
{
1067
int rc;
1068
PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
1069
PCRE2_UCHAR *use_buffer = dummy_buffer;
1070
PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
1071
BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
1072
uint32_t pattype = options & TYPE_OPTIONS;
1073
1074
if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
1075
1076
if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */
1077
(pattype & (~pattype+1)) != pattype || /* More than one type set */
1078
pattype == 0) /* No type set */
1079
{
1080
*bufflenptr = 0; /* Error offset */
1081
return PCRE2_ERROR_BADOPTION;
1082
}
1083
1084
if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern);
1085
if (ccontext == NULL) ccontext =
1086
(pcre2_convert_context *)(&PRIV(default_convert_context));
1087
1088
/* Check UTF if required. */
1089
1090
#ifndef SUPPORT_UNICODE
1091
if (utf)
1092
{
1093
*bufflenptr = 0; /* Error offset */
1094
return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
1095
}
1096
#else
1097
if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
1098
{
1099
PCRE2_SIZE erroroffset;
1100
rc = PRIV(valid_utf)(pattern, plength, &erroroffset);
1101
if (rc != 0)
1102
{
1103
*bufflenptr = erroroffset;
1104
return rc;
1105
}
1106
}
1107
#endif
1108
1109
/* If buffptr is not NULL, and what it points to is not NULL, we are being
1110
provided with a buffer and a length, so set them as the buffer to use. */
1111
1112
if (buffptr != NULL && *buffptr != NULL)
1113
{
1114
use_buffer = *buffptr;
1115
use_length = *bufflenptr;
1116
}
1117
1118
/* Call an individual converter, either just once (if a buffer was provided or
1119
just the length is needed), or twice (if a memory allocation is required). */
1120
1121
for (int i = 0; i < 2; i++)
1122
{
1123
PCRE2_UCHAR *allocated;
1124
BOOL dummyrun = buffptr == NULL || *buffptr == NULL;
1125
1126
switch(pattype)
1127
{
1128
case PCRE2_CONVERT_GLOB:
1129
rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf,
1130
use_buffer, use_length, bufflenptr, dummyrun, ccontext);
1131
break;
1132
1133
case PCRE2_CONVERT_POSIX_BASIC:
1134
case PCRE2_CONVERT_POSIX_EXTENDED:
1135
rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length,
1136
bufflenptr, dummyrun, ccontext);
1137
break;
1138
1139
default:
1140
goto EXIT;
1141
}
1142
1143
if (rc != 0 || /* Error */
1144
buffptr == NULL || /* Just the length is required */
1145
*buffptr != NULL) /* Buffer was provided or allocated */
1146
return rc;
1147
1148
/* Allocate memory for the buffer, with hidden space for an allocator at
1149
the start. The next time round the loop runs the conversion for real. */
1150
1151
allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
1152
(*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext);
1153
if (allocated == NULL) return PCRE2_ERROR_NOMEMORY;
1154
*buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl));
1155
1156
use_buffer = *buffptr;
1157
use_length = *bufflenptr + 1;
1158
}
1159
1160
/* Something went terribly wrong. Trigger an assert and return an error */
1161
PCRE2_DEBUG_UNREACHABLE();
1162
1163
EXIT:
1164
1165
*bufflenptr = 0; /* Error offset */
1166
return PCRE2_ERROR_INTERNAL;
1167
}
1168
1169
1170
/*************************************************
1171
* Free converted pattern *
1172
*************************************************/
1173
1174
/* This frees a converted pattern that was put in newly-allocated memory.
1175
1176
Argument: the converted pattern
1177
Returns: nothing
1178
*/
1179
1180
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
1181
pcre2_converted_pattern_free(PCRE2_UCHAR *converted)
1182
{
1183
if (converted != NULL)
1184
{
1185
pcre2_memctl *memctl =
1186
(pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl));
1187
memctl->free(memctl, memctl->memory_data);
1188
}
1189
}
1190
1191
/* End of pcre2_convert.c */
1192
1193