Path: blob/master/thirdparty/pcre2/src/pcre2_find_bracket.c
9898 views
/*************************************************1* Perl-Compatible Regular Expressions *2*************************************************/34/* PCRE is a library of functions to support regular expressions whose syntax5and semantics are as close as possible to those of the Perl 5 language.67Written by Philip Hazel8Original API code Copyright (c) 1997-2012 University of Cambridge9New API code Copyright (c) 2016-2024 University of Cambridge1011-----------------------------------------------------------------------------12Redistribution and use in source and binary forms, with or without13modification, are permitted provided that the following conditions are met:1415* Redistributions of source code must retain the above copyright notice,16this list of conditions and the following disclaimer.1718* Redistributions in binary form must reproduce the above copyright19notice, this list of conditions and the following disclaimer in the20documentation and/or other materials provided with the distribution.2122* Neither the name of the University of Cambridge nor the names of its23contributors may be used to endorse or promote products derived from24this software without specific prior written permission.2526THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE36POSSIBILITY OF SUCH DAMAGE.37-----------------------------------------------------------------------------38*/394041/* This module contains a single function that scans through a compiled pattern42until it finds a capturing bracket with the given number, or, if the number is43negative, an instance of OP_REVERSE or OP_VREVERSE for a lookbehind. The44function is called from pcre2_compile.c and also from pcre2_study.c when45finding the minimum matching length. */464748#ifdef HAVE_CONFIG_H49#include "config.h"50#endif5152#include "pcre2_internal.h"535455/*************************************************56* Scan compiled regex for specific bracket *57*************************************************/5859/*60Arguments:61code points to start of expression62utf TRUE in UTF mode63number the required bracket number or negative to find a lookbehind6465Returns: pointer to the opcode for the bracket, or NULL if not found66*/6768PCRE2_SPTR69PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)70{71for (;;)72{73PCRE2_UCHAR c = *code;7475if (c == OP_END) return NULL;7677/* XCLASS is used for classes that cannot be represented just by a bit map.78This includes negated single high-valued characters. ECLASS is used for79classes that use set operations internally. CALLOUT_STR is used for80callouts with string arguments. In each case the length in the table is81zero; the actual length is stored in the compiled code. */8283if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1);84else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);8586/* Handle lookbehind */8788else if (c == OP_REVERSE || c == OP_VREVERSE)89{90if (number < 0) return code;91code += PRIV(OP_lengths)[c];92}9394/* Handle capturing bracket */9596else if (c == OP_CBRA || c == OP_SCBRA ||97c == OP_CBRAPOS || c == OP_SCBRAPOS)98{99int n = (int)GET2(code, 1+LINK_SIZE);100if (n == number) return code;101code += PRIV(OP_lengths)[c];102}103104/* Otherwise, we can get the item's length from the table, except that for105repeated character types, we have to test for \p and \P, which have an extra106two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we107must add in its length. */108109else110{111switch(c)112{113case OP_TYPESTAR:114case OP_TYPEMINSTAR:115case OP_TYPEPLUS:116case OP_TYPEMINPLUS:117case OP_TYPEQUERY:118case OP_TYPEMINQUERY:119case OP_TYPEPOSSTAR:120case OP_TYPEPOSPLUS:121case OP_TYPEPOSQUERY:122if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;123break;124125case OP_TYPEUPTO:126case OP_TYPEMINUPTO:127case OP_TYPEEXACT:128case OP_TYPEPOSUPTO:129if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)130code += 2;131break;132133case OP_MARK:134case OP_COMMIT_ARG:135case OP_PRUNE_ARG:136case OP_SKIP_ARG:137case OP_THEN_ARG:138code += code[1];139break;140}141142/* Add in the fixed length from the table */143144code += PRIV(OP_lengths)[c];145146/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be147followed by a multi-byte character. The length in the table is a minimum, so148we have to arrange to skip the extra bytes. */149150#ifdef MAYBE_UTF_MULTI151if (utf) switch(c)152{153case OP_CHAR:154case OP_CHARI:155case OP_NOT:156case OP_NOTI:157case OP_EXACT:158case OP_EXACTI:159case OP_NOTEXACT:160case OP_NOTEXACTI:161case OP_UPTO:162case OP_UPTOI:163case OP_NOTUPTO:164case OP_NOTUPTOI:165case OP_MINUPTO:166case OP_MINUPTOI:167case OP_NOTMINUPTO:168case OP_NOTMINUPTOI:169case OP_POSUPTO:170case OP_POSUPTOI:171case OP_NOTPOSUPTO:172case OP_NOTPOSUPTOI:173case OP_STAR:174case OP_STARI:175case OP_NOTSTAR:176case OP_NOTSTARI:177case OP_MINSTAR:178case OP_MINSTARI:179case OP_NOTMINSTAR:180case OP_NOTMINSTARI:181case OP_POSSTAR:182case OP_POSSTARI:183case OP_NOTPOSSTAR:184case OP_NOTPOSSTARI:185case OP_PLUS:186case OP_PLUSI:187case OP_NOTPLUS:188case OP_NOTPLUSI:189case OP_MINPLUS:190case OP_MINPLUSI:191case OP_NOTMINPLUS:192case OP_NOTMINPLUSI:193case OP_POSPLUS:194case OP_POSPLUSI:195case OP_NOTPOSPLUS:196case OP_NOTPOSPLUSI:197case OP_QUERY:198case OP_QUERYI:199case OP_NOTQUERY:200case OP_NOTQUERYI:201case OP_MINQUERY:202case OP_MINQUERYI:203case OP_NOTMINQUERY:204case OP_NOTMINQUERYI:205case OP_POSQUERY:206case OP_POSQUERYI:207case OP_NOTPOSQUERY:208case OP_NOTPOSQUERYI:209if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);210break;211}212#else213(void)(utf); /* Keep compiler happy by referencing function argument */214#endif /* MAYBE_UTF_MULTI */215}216}217}218219/* End of pcre2_find_bracket.c */220221222