Path: blob/master/thirdparty/pcre2/src/pcre2_convert.c
9903 views
/*************************************************1* Perl-Compatible Regular Expressions *2*************************************************/34/* PCRE is a library of functions to support regular expressions whose syntax5and semantics are as close as possible to those of the Perl 5 language.67Written by Philip Hazel8Original API code Copyright (c) 1997-2012 University of Cambridge9New API code Copyright (c) 2016-2024 University of Cambridge1011-----------------------------------------------------------------------------12Redistribution and use in source and binary forms, with or without13modification, are permitted provided that the following conditions are met:1415* Redistributions of source code must retain the above copyright notice,16this list of conditions and the following disclaimer.1718* Redistributions in binary form must reproduce the above copyright19notice, this list of conditions and the following disclaimer in the20documentation and/or other materials provided with the distribution.2122* Neither the name of the University of Cambridge nor the names of its23contributors may be used to endorse or promote products derived from24this software without specific prior written permission.2526THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE36POSSIBILITY OF SUCH DAMAGE.37-----------------------------------------------------------------------------38*/394041#ifdef HAVE_CONFIG_H42#include "config.h"43#endif4445#include "pcre2_internal.h"4647#define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \48PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)4950#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \51PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \52PCRE2_CONVERT_GLOB_NO_STARSTAR| \53TYPE_OPTIONS)5455#define DUMMY_BUFFER_SIZE 1005657/* Generated pattern fragments */5859#define STR_BACKSLASH_A STR_BACKSLASH STR_A60#define STR_BACKSLASH_z STR_BACKSLASH STR_z61#define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET62#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN63#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS64#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS65#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS6667/* States for POSIX processing */6869enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,70POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };7172/* Macro to add a character string to the output buffer, checking for overflow. */7374#define PUTCHARS(string) \75{ \76for (const char *s = string; *s != 0; s++) \77{ \78if (p >= endp) return PCRE2_ERROR_NOMEMORY; \79*p++ = *s; \80} \81}8283/* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */8485static const char *pcre2_escaped_literals =86STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS87STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN88STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET89STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET90STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;9192/* Recognized escaped metacharacters in POSIX basic patterns. */9394static const char *posix_meta_escapes =95STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS96STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET97STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;9899100101/*************************************************102* Convert a POSIX pattern *103*************************************************/104105/* This function handles both basic and extended POSIX patterns.106107Arguments:108pattype the pattern type109pattern the pattern110plength length in code units111utf TRUE if UTF112use_buffer where to put the output113use_length length of use_buffer114bufflenptr where to put the used length115dummyrun TRUE if a dummy run116ccontext the convert context117118Returns: 0 => success119!0 => error code120*/121122static int123convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,124BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,125PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)126{127PCRE2_SPTR posix = pattern;128PCRE2_UCHAR *p = use_buffer;129PCRE2_UCHAR *pp = p;130PCRE2_UCHAR *endp = p + use_length - 1; /* Allow for trailing zero */131PCRE2_SIZE convlength = 0;132133uint32_t bracount = 0;134uint32_t posix_state = POSIX_START_REGEX;135uint32_t lastspecial = 0;136BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;137BOOL nextisliteral = FALSE;138139(void)utf; /* Not used when Unicode not supported */140(void)ccontext; /* Not currently used */141142/* Initialize default for error offset as end of input. */143144*bufflenptr = plength;145PUTCHARS(STR_STAR_NUL);146147/* Now scan the input. */148149while (plength > 0)150{151uint32_t c, sc;152int clength = 1;153154/* Add in the length of the last item, then, if in the dummy run, pull the155pointer back to the start of the (temporary) buffer and then remember the156start of the next item. */157158convlength += p - pp;159if (dummyrun) p = use_buffer;160pp = p;161162/* Pick up the next character */163164#ifndef SUPPORT_UNICODE165c = *posix;166#else167GETCHARLENTEST(c, posix, clength);168#endif169posix += clength;170plength -= clength;171172sc = nextisliteral? 0 : c;173nextisliteral = FALSE;174175/* Handle a character within a class. */176177if (posix_state >= POSIX_CLASS_NOT_STARTED)178{179if (c == CHAR_RIGHT_SQUARE_BRACKET)180{181PUTCHARS(STR_RIGHT_SQUARE_BRACKET);182posix_state = POSIX_NOT_BRACKET;183}184185/* Not the end of the class */186187else188{189switch (posix_state)190{191case POSIX_CLASS_STARTED:192if (c <= 127 && islower(c)) break; /* Remain in started state */193posix_state = POSIX_CLASS_NOT_STARTED;194if (c == CHAR_COLON && plength > 0 &&195*posix == CHAR_RIGHT_SQUARE_BRACKET)196{197PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);198plength--;199posix++;200continue; /* With next character after :] */201}202/* Fall through */203204case POSIX_CLASS_NOT_STARTED:205if (c == CHAR_LEFT_SQUARE_BRACKET)206posix_state = POSIX_CLASS_STARTING;207break;208209case POSIX_CLASS_STARTING:210if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;211break;212}213214if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);215if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;216memcpy(p, posix - clength, CU2BYTES(clength));217p += clength;218}219}220221/* Handle a character not within a class. */222223else switch(sc)224{225case CHAR_LEFT_SQUARE_BRACKET:226PUTCHARS(STR_LEFT_SQUARE_BRACKET);227228#ifdef NEVER229/* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does230support) but they are not part of POSIX 1003.1. */231232if (plength >= 6)233{234if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&235posix[1] == CHAR_COLON &&236(posix[2] == CHAR_LESS_THAN_SIGN ||237posix[2] == CHAR_GREATER_THAN_SIGN) &&238posix[3] == CHAR_COLON &&239posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&240posix[5] == CHAR_RIGHT_SQUARE_BRACKET)241{242if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;243memcpy(p, posix, CU2BYTES(6));244p += 6;245posix += 6;246plength -= 6;247continue; /* With next character */248}249}250#endif251252/* Handle start of "normal" character classes */253254posix_state = POSIX_CLASS_NOT_STARTED;255256/* Handle ^ and ] as first characters */257258if (plength > 0)259{260if (*posix == CHAR_CIRCUMFLEX_ACCENT)261{262posix++;263plength--;264PUTCHARS(STR_CIRCUMFLEX_ACCENT);265}266if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET)267{268posix++;269plength--;270PUTCHARS(STR_RIGHT_SQUARE_BRACKET);271}272}273break;274275case CHAR_BACKSLASH:276if (plength == 0) return PCRE2_ERROR_END_BACKSLASH;277if (extended) nextisliteral = TRUE; else278{279if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)280{281if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);282if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;283lastspecial = *p++ = *posix++;284plength--;285}286else nextisliteral = TRUE;287}288break;289290case CHAR_RIGHT_PARENTHESIS:291if (!extended || bracount == 0) goto ESCAPE_LITERAL;292bracount--;293goto COPY_SPECIAL;294295case CHAR_LEFT_PARENTHESIS:296bracount++;297/* Fall through */298299case CHAR_QUESTION_MARK:300case CHAR_PLUS:301case CHAR_LEFT_CURLY_BRACKET:302case CHAR_RIGHT_CURLY_BRACKET:303case CHAR_VERTICAL_LINE:304if (!extended) goto ESCAPE_LITERAL;305/* Fall through */306307case CHAR_DOT:308case CHAR_DOLLAR_SIGN:309posix_state = POSIX_NOT_BRACKET;310COPY_SPECIAL:311lastspecial = c;312if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;313*p++ = c;314break;315316case CHAR_ASTERISK:317if (lastspecial != CHAR_ASTERISK)318{319if (!extended && (posix_state < POSIX_NOT_BRACKET ||320lastspecial == CHAR_LEFT_PARENTHESIS))321goto ESCAPE_LITERAL;322goto COPY_SPECIAL;323}324break; /* Ignore second and subsequent asterisks */325326case CHAR_CIRCUMFLEX_ACCENT:327if (extended) goto COPY_SPECIAL;328if (posix_state == POSIX_START_REGEX ||329lastspecial == CHAR_LEFT_PARENTHESIS)330{331posix_state = POSIX_ANCHORED;332goto COPY_SPECIAL;333}334/* Fall through */335336default:337if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)338{339ESCAPE_LITERAL:340PUTCHARS(STR_BACKSLASH);341}342lastspecial = 0xff; /* Indicates nothing special */343if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;344memcpy(p, posix - clength, CU2BYTES(clength));345p += clength;346posix_state = POSIX_NOT_BRACKET;347break;348}349}350351if (posix_state >= POSIX_CLASS_NOT_STARTED)352return PCRE2_ERROR_MISSING_SQUARE_BRACKET;353convlength += p - pp; /* Final segment */354*bufflenptr = convlength;355*p++ = 0;356return 0;357}358359360/*************************************************361* Convert a glob pattern *362*************************************************/363364/* Context for writing the output into a buffer. */365366typedef struct pcre2_output_context {367PCRE2_UCHAR *output; /* current output position */368PCRE2_SPTR output_end; /* output end */369PCRE2_SIZE output_size; /* size of the output */370uint8_t out_str[8]; /* string copied to the output */371} pcre2_output_context;372373374/* Write a character into the output.375376Arguments:377out output context378chr the next character379*/380381static void382convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr)383{384out->output_size++;385386if (out->output < out->output_end)387*out->output++ = chr;388}389390391/* Write a string into the output.392393Arguments:394out output context395length length of out->out_str396*/397398static void399convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length)400{401uint8_t *out_str = out->out_str;402PCRE2_UCHAR *output = out->output;403PCRE2_SPTR output_end = out->output_end;404PCRE2_SIZE output_size = out->output_size;405406do407{408output_size++;409410if (output < output_end)411*output++ = *out_str++;412}413while (--length != 0);414415out->output = output;416out->output_size = output_size;417}418419420/* Prints the separator into the output.421422Arguments:423out output context424separator glob separator425with_escape backslash is needed before separator426*/427428static void429convert_glob_print_separator(pcre2_output_context *out,430PCRE2_UCHAR separator, BOOL with_escape)431{432if (with_escape)433convert_glob_write(out, CHAR_BACKSLASH);434435convert_glob_write(out, separator);436}437438439/* Prints a wildcard into the output.440441Arguments:442out output context443separator glob separator444with_escape backslash is needed before separator445*/446447static void448convert_glob_print_wildcard(pcre2_output_context *out,449PCRE2_UCHAR separator, BOOL with_escape)450{451out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;452out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;453convert_glob_write_str(out, 2);454455convert_glob_print_separator(out, separator, with_escape);456457convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET);458}459460461/* Parse a posix class.462463Arguments:464from starting point of scanning the range465pattern_end end of pattern466out output context467468Returns: >0 => class index4690 => malformed class470*/471472static int473convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,474pcre2_output_context *out)475{476static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"477"graph:lower:print:punct:space:upper:word:xdigit:";478PCRE2_SPTR start = *from + 1;479PCRE2_SPTR pattern = start;480const char *class_ptr;481PCRE2_UCHAR c;482int class_index;483484while (TRUE)485{486if (pattern >= pattern_end) return 0;487488c = *pattern++;489490if (c < CHAR_a || c > CHAR_z) break;491}492493if (c != CHAR_COLON || pattern >= pattern_end ||494*pattern != CHAR_RIGHT_SQUARE_BRACKET)495return 0;496497class_ptr = posix_classes;498class_index = 1;499500while (TRUE)501{502if (*class_ptr == CHAR_NUL) return 0;503504pattern = start;505506while (*pattern == (PCRE2_UCHAR) *class_ptr)507{508if (*pattern == CHAR_COLON)509{510pattern += 2;511start -= 2;512513do convert_glob_write(out, *start++); while (start < pattern);514515*from = pattern;516return class_index;517}518pattern++;519class_ptr++;520}521522while (*class_ptr != CHAR_COLON) class_ptr++;523class_ptr++;524class_index++;525}526}527528/* Checks whether the character is in the class.529530Arguments:531class_index class index532c character533534Returns: !0 => character is found in the class5350 => otherwise536*/537538static BOOL539convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)540{541#if PCRE2_CODE_UNIT_WIDTH != 8542if (c > 0xff)543{544/* ctype functions are not sane for c > 0xff */545return 0;546}547#endif548549switch (class_index)550{551case 1: return isalnum(c);552case 2: return isalpha(c);553case 3: return 1;554case 4: return c == CHAR_HT || c == CHAR_SPACE;555case 5: return iscntrl(c);556case 6: return isdigit(c);557case 7: return isgraph(c);558case 8: return islower(c);559case 9: return isprint(c);560case 10: return ispunct(c);561case 11: return isspace(c);562case 12: return isupper(c);563case 13: return isalnum(c) || c == CHAR_UNDERSCORE;564default: return isxdigit(c);565}566}567568/* Parse a range of characters.569570Arguments:571from starting point of scanning the range572pattern_end end of pattern573out output context574separator glob separator575with_escape backslash is needed before separator576577Returns: 0 => success578!0 => error code579*/580581static int582convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,583pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator,584BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep)585{586BOOL is_negative = FALSE;587BOOL separator_seen = FALSE;588BOOL has_prev_c;589PCRE2_SPTR pattern = *from;590PCRE2_SPTR char_start = NULL;591uint32_t c, prev_c;592int len, class_index;593594(void)utf; /* Avoid compiler warning. */595596if (pattern >= pattern_end)597{598*from = pattern;599return PCRE2_ERROR_MISSING_SQUARE_BRACKET;600}601602if (*pattern == CHAR_EXCLAMATION_MARK603|| *pattern == CHAR_CIRCUMFLEX_ACCENT)604{605pattern++;606607if (pattern >= pattern_end)608{609*from = pattern;610return PCRE2_ERROR_MISSING_SQUARE_BRACKET;611}612613is_negative = TRUE;614615out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;616out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;617len = 2;618619if (!no_wildsep)620{621if (with_escape)622{623out->out_str[len] = CHAR_BACKSLASH;624len++;625}626out->out_str[len] = (uint8_t) separator;627}628629convert_glob_write_str(out, len + 1);630}631else632convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET);633634has_prev_c = FALSE;635prev_c = 0;636637if (*pattern == CHAR_RIGHT_SQUARE_BRACKET)638{639out->out_str[0] = CHAR_BACKSLASH;640out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;641convert_glob_write_str(out, 2);642has_prev_c = TRUE;643prev_c = CHAR_RIGHT_SQUARE_BRACKET;644pattern++;645}646647while (pattern < pattern_end)648{649char_start = pattern;650GETCHARINCTEST(c, pattern);651652if (c == CHAR_RIGHT_SQUARE_BRACKET)653{654convert_glob_write(out, c);655656if (!is_negative && !no_wildsep && separator_seen)657{658out->out_str[0] = CHAR_LEFT_PARENTHESIS;659out->out_str[1] = CHAR_QUESTION_MARK;660out->out_str[2] = CHAR_LESS_THAN_SIGN;661out->out_str[3] = CHAR_EXCLAMATION_MARK;662convert_glob_write_str(out, 4);663664convert_glob_print_separator(out, separator, with_escape);665convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);666}667668*from = pattern;669return 0;670}671672if (pattern >= pattern_end) break;673674if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)675{676*from = pattern;677class_index = convert_glob_parse_class(from, pattern_end, out);678679if (class_index != 0)680{681pattern = *from;682683has_prev_c = FALSE;684prev_c = 0;685686if (!is_negative &&687convert_glob_char_in_class (class_index, separator))688separator_seen = TRUE;689continue;690}691}692else if (c == CHAR_MINUS && has_prev_c &&693*pattern != CHAR_RIGHT_SQUARE_BRACKET)694{695convert_glob_write(out, CHAR_MINUS);696697char_start = pattern;698GETCHARINCTEST(c, pattern);699700if (pattern >= pattern_end) break;701702if (escape != 0 && c == escape)703{704char_start = pattern;705GETCHARINCTEST(c, pattern);706}707else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)708{709*from = pattern;710return PCRE2_ERROR_CONVERT_SYNTAX;711}712713if (prev_c > c)714{715*from = pattern;716return PCRE2_ERROR_CONVERT_SYNTAX;717}718719if (prev_c < separator && separator < c) separator_seen = TRUE;720721has_prev_c = FALSE;722prev_c = 0;723}724else725{726if (escape != 0 && c == escape)727{728char_start = pattern;729GETCHARINCTEST(c, pattern);730731if (pattern >= pattern_end) break;732}733734has_prev_c = TRUE;735prev_c = c;736}737738if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET ||739c == CHAR_BACKSLASH || c == CHAR_MINUS)740convert_glob_write(out, CHAR_BACKSLASH);741742if (c == separator) separator_seen = TRUE;743744do convert_glob_write(out, *char_start++); while (char_start < pattern);745}746747*from = pattern;748return PCRE2_ERROR_MISSING_SQUARE_BRACKET;749}750751752/* Prints a (*COMMIT) into the output.753754Arguments:755out output context756*/757758static void759convert_glob_print_commit(pcre2_output_context *out)760{761out->out_str[0] = CHAR_LEFT_PARENTHESIS;762out->out_str[1] = CHAR_ASTERISK;763out->out_str[2] = CHAR_C;764out->out_str[3] = CHAR_O;765out->out_str[4] = CHAR_M;766out->out_str[5] = CHAR_M;767out->out_str[6] = CHAR_I;768out->out_str[7] = CHAR_T;769convert_glob_write_str(out, 8);770convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);771}772773774/* Bash glob converter.775776Arguments:777pattype the pattern type778pattern the pattern779plength length in code units780utf TRUE if UTF781use_buffer where to put the output782use_length length of use_buffer783bufflenptr where to put the used length784dummyrun TRUE if a dummy run785ccontext the convert context786787Returns: 0 => success788!0 => error code789*/790791static int792convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength,793BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,794PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)795{796pcre2_output_context out;797PCRE2_SPTR pattern_start = pattern;798PCRE2_SPTR pattern_end = pattern + plength;799PCRE2_UCHAR separator = ccontext->glob_separator;800PCRE2_UCHAR escape = ccontext->glob_escape;801PCRE2_UCHAR c;802BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;803BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;804BOOL in_atomic = FALSE;805BOOL after_starstar = FALSE;806BOOL no_slash_z = FALSE;807BOOL with_escape, is_start, after_separator;808int result = 0;809810(void)utf; /* Avoid compiler warning. */811812#ifdef SUPPORT_UNICODE813if (utf && (separator >= 128 || escape >= 128))814{815/* Currently only ASCII characters are supported. */816*bufflenptr = 0;817return PCRE2_ERROR_CONVERT_SYNTAX;818}819#endif820821with_escape = strchr(pcre2_escaped_literals, separator) != NULL;822823/* Initialize default for error offset as end of input. */824out.output = use_buffer;825out.output_end = use_buffer + use_length;826out.output_size = 0;827828out.out_str[0] = CHAR_LEFT_PARENTHESIS;829out.out_str[1] = CHAR_QUESTION_MARK;830out.out_str[2] = CHAR_s;831out.out_str[3] = CHAR_RIGHT_PARENTHESIS;832convert_glob_write_str(&out, 4);833834is_start = TRUE;835836if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK)837{838if (no_wildsep)839is_start = FALSE;840else if (!no_starstar && pattern + 1 < pattern_end &&841pattern[1] == CHAR_ASTERISK)842is_start = FALSE;843}844845if (is_start)846{847out.out_str[0] = CHAR_BACKSLASH;848out.out_str[1] = CHAR_A;849convert_glob_write_str(&out, 2);850}851852while (pattern < pattern_end)853{854c = *pattern++;855856if (c == CHAR_ASTERISK)857{858is_start = pattern == pattern_start + 1;859860if (in_atomic)861{862convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);863in_atomic = FALSE;864}865866if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK)867{868after_separator = is_start || (pattern[-2] == separator);869870do pattern++; while (pattern < pattern_end &&871*pattern == CHAR_ASTERISK);872873if (pattern >= pattern_end)874{875no_slash_z = TRUE;876break;877}878879after_starstar = TRUE;880881if (after_separator && escape != 0 && *pattern == escape &&882pattern + 1 < pattern_end && pattern[1] == separator)883pattern++;884885if (is_start)886{887if (*pattern != separator) continue;888889out.out_str[0] = CHAR_LEFT_PARENTHESIS;890out.out_str[1] = CHAR_QUESTION_MARK;891out.out_str[2] = CHAR_COLON;892out.out_str[3] = CHAR_BACKSLASH;893out.out_str[4] = CHAR_A;894out.out_str[5] = CHAR_VERTICAL_LINE;895convert_glob_write_str(&out, 6);896897convert_glob_print_separator(&out, separator, with_escape);898convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);899900pattern++;901continue;902}903904convert_glob_print_commit(&out);905906if (!after_separator || *pattern != separator)907{908out.out_str[0] = CHAR_DOT;909out.out_str[1] = CHAR_ASTERISK;910out.out_str[2] = CHAR_QUESTION_MARK;911convert_glob_write_str(&out, 3);912continue;913}914915out.out_str[0] = CHAR_LEFT_PARENTHESIS;916out.out_str[1] = CHAR_QUESTION_MARK;917out.out_str[2] = CHAR_COLON;918out.out_str[3] = CHAR_DOT;919out.out_str[4] = CHAR_ASTERISK;920out.out_str[5] = CHAR_QUESTION_MARK;921922convert_glob_write_str(&out, 6);923924convert_glob_print_separator(&out, separator, with_escape);925926out.out_str[0] = CHAR_RIGHT_PARENTHESIS;927out.out_str[1] = CHAR_QUESTION_MARK;928out.out_str[2] = CHAR_QUESTION_MARK;929convert_glob_write_str(&out, 3);930931pattern++;932continue;933}934935if (pattern < pattern_end && *pattern == CHAR_ASTERISK)936{937do pattern++; while (pattern < pattern_end &&938*pattern == CHAR_ASTERISK);939}940941if (no_wildsep)942{943if (pattern >= pattern_end)944{945no_slash_z = TRUE;946break;947}948949/* Start check must be after the end check. */950if (is_start) continue;951}952953if (!is_start)954{955if (after_starstar)956{957out.out_str[0] = CHAR_LEFT_PARENTHESIS;958out.out_str[1] = CHAR_QUESTION_MARK;959out.out_str[2] = CHAR_GREATER_THAN_SIGN;960convert_glob_write_str(&out, 3);961in_atomic = TRUE;962}963else964convert_glob_print_commit(&out);965}966967if (no_wildsep)968convert_glob_write(&out, CHAR_DOT);969else970convert_glob_print_wildcard(&out, separator, with_escape);971972out.out_str[0] = CHAR_ASTERISK;973out.out_str[1] = CHAR_QUESTION_MARK;974if (pattern >= pattern_end)975out.out_str[1] = CHAR_PLUS;976convert_glob_write_str(&out, 2);977continue;978}979980if (c == CHAR_QUESTION_MARK)981{982if (no_wildsep)983convert_glob_write(&out, CHAR_DOT);984else985convert_glob_print_wildcard(&out, separator, with_escape);986continue;987}988989if (c == CHAR_LEFT_SQUARE_BRACKET)990{991result = convert_glob_parse_range(&pattern, pattern_end,992&out, utf, separator, with_escape, escape, no_wildsep);993if (result != 0) break;994continue;995}996997if (escape != 0 && c == escape)998{999if (pattern >= pattern_end)1000{1001result = PCRE2_ERROR_CONVERT_SYNTAX;1002break;1003}1004c = *pattern++;1005}10061007if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)1008convert_glob_write(&out, CHAR_BACKSLASH);10091010convert_glob_write(&out, c);1011}10121013if (result == 0)1014{1015if (!no_slash_z)1016{1017out.out_str[0] = CHAR_BACKSLASH;1018out.out_str[1] = CHAR_z;1019convert_glob_write_str(&out, 2);1020}10211022if (in_atomic)1023convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);10241025convert_glob_write(&out, CHAR_NUL);10261027if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer))1028result = PCRE2_ERROR_NOMEMORY;1029}10301031if (result != 0)1032{1033*bufflenptr = pattern - pattern_start;1034return result;1035}10361037*bufflenptr = out.output_size - 1;1038return 0;1039}104010411042/*************************************************1043* Convert pattern *1044*************************************************/10451046/* This is the external-facing function for converting other forms of pattern1047into PCRE2 regular expression patterns. On error, the bufflenptr argument is1048used to return an offset in the original pattern.10491050Arguments:1051pattern the input pattern1052plength length of input, or PCRE2_ZERO_TERMINATED1053options options bits1054buffptr pointer to pointer to output buffer1055bufflenptr pointer to length of output buffer1056ccontext convert context or NULL10571058Returns: 0 for success, else an error code (+ve or -ve)1059*/10601061PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION1062pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,1063PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr,1064pcre2_convert_context *ccontext)1065{1066int rc;1067PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];1068PCRE2_UCHAR *use_buffer = dummy_buffer;1069PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;1070BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;1071uint32_t pattype = options & TYPE_OPTIONS;10721073if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;10741075if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */1076(pattype & (~pattype+1)) != pattype || /* More than one type set */1077pattype == 0) /* No type set */1078{1079*bufflenptr = 0; /* Error offset */1080return PCRE2_ERROR_BADOPTION;1081}10821083if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern);1084if (ccontext == NULL) ccontext =1085(pcre2_convert_context *)(&PRIV(default_convert_context));10861087/* Check UTF if required. */10881089#ifndef SUPPORT_UNICODE1090if (utf)1091{1092*bufflenptr = 0; /* Error offset */1093return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;1094}1095#else1096if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)1097{1098PCRE2_SIZE erroroffset;1099rc = PRIV(valid_utf)(pattern, plength, &erroroffset);1100if (rc != 0)1101{1102*bufflenptr = erroroffset;1103return rc;1104}1105}1106#endif11071108/* If buffptr is not NULL, and what it points to is not NULL, we are being1109provided with a buffer and a length, so set them as the buffer to use. */11101111if (buffptr != NULL && *buffptr != NULL)1112{1113use_buffer = *buffptr;1114use_length = *bufflenptr;1115}11161117/* Call an individual converter, either just once (if a buffer was provided or1118just the length is needed), or twice (if a memory allocation is required). */11191120for (int i = 0; i < 2; i++)1121{1122PCRE2_UCHAR *allocated;1123BOOL dummyrun = buffptr == NULL || *buffptr == NULL;11241125switch(pattype)1126{1127case PCRE2_CONVERT_GLOB:1128rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf,1129use_buffer, use_length, bufflenptr, dummyrun, ccontext);1130break;11311132case PCRE2_CONVERT_POSIX_BASIC:1133case PCRE2_CONVERT_POSIX_EXTENDED:1134rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length,1135bufflenptr, dummyrun, ccontext);1136break;11371138default:1139goto EXIT;1140}11411142if (rc != 0 || /* Error */1143buffptr == NULL || /* Just the length is required */1144*buffptr != NULL) /* Buffer was provided or allocated */1145return rc;11461147/* Allocate memory for the buffer, with hidden space for an allocator at1148the start. The next time round the loop runs the conversion for real. */11491150allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +1151(*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext);1152if (allocated == NULL) return PCRE2_ERROR_NOMEMORY;1153*buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl));11541155use_buffer = *buffptr;1156use_length = *bufflenptr + 1;1157}11581159/* Something went terribly wrong. Trigger an assert and return an error */1160PCRE2_DEBUG_UNREACHABLE();11611162EXIT:11631164*bufflenptr = 0; /* Error offset */1165return PCRE2_ERROR_INTERNAL;1166}116711681169/*************************************************1170* Free converted pattern *1171*************************************************/11721173/* This frees a converted pattern that was put in newly-allocated memory.11741175Argument: the converted pattern1176Returns: nothing1177*/11781179PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION1180pcre2_converted_pattern_free(PCRE2_UCHAR *converted)1181{1182if (converted != NULL)1183{1184pcre2_memctl *memctl =1185(pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl));1186memctl->free(memctl, memctl->memory_data);1187}1188}11891190/* End of pcre2_convert.c */119111921193