Path: blob/master/thirdparty/pcre2/src/pcre2_find_bracket.c
21745 views
/*************************************************1* Perl-Compatible Regular Expressions *2*************************************************/34/* PCRE is a library of functions to support regular expressions whose syntax5and semantics are as close as possible to those of the Perl 5 language.67Written by Philip Hazel8Original API code Copyright (c) 1997-2012 University of Cambridge9New API code Copyright (c) 2016-2024 University of Cambridge1011-----------------------------------------------------------------------------12Redistribution and use in source and binary forms, with or without13modification, are permitted provided that the following conditions are met:1415* Redistributions of source code must retain the above copyright notice,16this list of conditions and the following disclaimer.1718* Redistributions in binary form must reproduce the above copyright19notice, this list of conditions and the following disclaimer in the20documentation and/or other materials provided with the distribution.2122* Neither the name of the University of Cambridge nor the names of its23contributors may be used to endorse or promote products derived from24this software without specific prior written permission.2526THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE36POSSIBILITY OF SUCH DAMAGE.37-----------------------------------------------------------------------------38*/394041/* This module contains a single function that scans through a compiled pattern42until it finds a capturing bracket with the given number, or, if the number is43negative, an instance of OP_REVERSE or OP_VREVERSE for a lookbehind. The44function is called from pcre2_compile.c and also from pcre2_study.c when45finding the minimum matching length. */464748#include "pcre2_internal.h"49505152/*************************************************53* Scan compiled regex for specific bracket *54*************************************************/5556/*57Arguments:58code points to start of expression59utf TRUE in UTF mode60number the required bracket number or negative to find a lookbehind6162Returns: pointer to the opcode for the bracket, or NULL if not found63*/6465PCRE2_SPTR66PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)67{68for (;;)69{70PCRE2_UCHAR c = *code;7172if (c == OP_END) return NULL;7374/* XCLASS is used for classes that cannot be represented just by a bit map.75This includes negated single high-valued characters. ECLASS is used for76classes that use set operations internally. CALLOUT_STR is used for77callouts with string arguments. In each case the length in the table is78zero; the actual length is stored in the compiled code. */7980if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1);81else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);8283/* Handle lookbehind */8485else if (c == OP_REVERSE || c == OP_VREVERSE)86{87if (number < 0) return code;88code += PRIV(OP_lengths)[c];89}9091/* Handle capturing bracket */9293else if (c == OP_CBRA || c == OP_SCBRA ||94c == OP_CBRAPOS || c == OP_SCBRAPOS)95{96int n = (int)GET2(code, 1+LINK_SIZE);97if (n == number) return code;98code += PRIV(OP_lengths)[c];99}100101/* Otherwise, we can get the item's length from the table, except that for102repeated character types, we have to test for \p and \P, which have an extra103two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we104must add in its length. */105106else107{108switch(c)109{110case OP_TYPESTAR:111case OP_TYPEMINSTAR:112case OP_TYPEPLUS:113case OP_TYPEMINPLUS:114case OP_TYPEQUERY:115case OP_TYPEMINQUERY:116case OP_TYPEPOSSTAR:117case OP_TYPEPOSPLUS:118case OP_TYPEPOSQUERY:119if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;120break;121122case OP_TYPEUPTO:123case OP_TYPEMINUPTO:124case OP_TYPEEXACT:125case OP_TYPEPOSUPTO:126if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)127code += 2;128break;129130case OP_MARK:131case OP_COMMIT_ARG:132case OP_PRUNE_ARG:133case OP_SKIP_ARG:134case OP_THEN_ARG:135code += code[1];136break;137}138139/* Add in the fixed length from the table */140141code += PRIV(OP_lengths)[c];142143/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be144followed by a multi-byte character. The length in the table is a minimum, so145we have to arrange to skip the extra bytes. */146147#ifdef MAYBE_UTF_MULTI148if (utf) switch(c)149{150case OP_CHAR:151case OP_CHARI:152case OP_NOT:153case OP_NOTI:154case OP_EXACT:155case OP_EXACTI:156case OP_NOTEXACT:157case OP_NOTEXACTI:158case OP_UPTO:159case OP_UPTOI:160case OP_NOTUPTO:161case OP_NOTUPTOI:162case OP_MINUPTO:163case OP_MINUPTOI:164case OP_NOTMINUPTO:165case OP_NOTMINUPTOI:166case OP_POSUPTO:167case OP_POSUPTOI:168case OP_NOTPOSUPTO:169case OP_NOTPOSUPTOI:170case OP_STAR:171case OP_STARI:172case OP_NOTSTAR:173case OP_NOTSTARI:174case OP_MINSTAR:175case OP_MINSTARI:176case OP_NOTMINSTAR:177case OP_NOTMINSTARI:178case OP_POSSTAR:179case OP_POSSTARI:180case OP_NOTPOSSTAR:181case OP_NOTPOSSTARI:182case OP_PLUS:183case OP_PLUSI:184case OP_NOTPLUS:185case OP_NOTPLUSI:186case OP_MINPLUS:187case OP_MINPLUSI:188case OP_NOTMINPLUS:189case OP_NOTMINPLUSI:190case OP_POSPLUS:191case OP_POSPLUSI:192case OP_NOTPOSPLUS:193case OP_NOTPOSPLUSI:194case OP_QUERY:195case OP_QUERYI:196case OP_NOTQUERY:197case OP_NOTQUERYI:198case OP_MINQUERY:199case OP_MINQUERYI:200case OP_NOTMINQUERY:201case OP_NOTMINQUERYI:202case OP_POSQUERY:203case OP_POSQUERYI:204case OP_NOTPOSQUERY:205case OP_NOTPOSQUERYI:206if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);207break;208}209#else210(void)(utf); /* Keep compiler happy by referencing function argument */211#endif /* MAYBE_UTF_MULTI */212}213}214}215216/* End of pcre2_find_bracket.c */217218219