Path: blob/master/thirdparty/pcre2/src/pcre2_pattern_info.c
21520 views
/*************************************************1* Perl-Compatible Regular Expressions *2*************************************************/34/* PCRE is a library of functions to support regular expressions whose syntax5and semantics are as close as possible to those of the Perl 5 language.67Written by Philip Hazel8Original API code Copyright (c) 1997-2012 University of Cambridge9New API code Copyright (c) 2016-2024 University of Cambridge1011-----------------------------------------------------------------------------12Redistribution and use in source and binary forms, with or without13modification, are permitted provided that the following conditions are met:1415* Redistributions of source code must retain the above copyright notice,16this list of conditions and the following disclaimer.1718* Redistributions in binary form must reproduce the above copyright19notice, this list of conditions and the following disclaimer in the20documentation and/or other materials provided with the distribution.2122* Neither the name of the University of Cambridge nor the names of its23contributors may be used to endorse or promote products derived from24this software without specific prior written permission.2526THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE36POSSIBILITY OF SUCH DAMAGE.37-----------------------------------------------------------------------------38*/394041#include "pcre2_internal.h"42434445/*************************************************46* Return info about compiled pattern *47*************************************************/4849/*50Arguments:51code points to compiled code52what what information is required53where where to put the information; if NULL, return length5455Returns: 0 when data returned56> 0 when length requested57< 0 on error or unset value58*/5960PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION61pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)62{63const pcre2_real_code *re = (const pcre2_real_code *)code;6465if (where == NULL) /* Requests field length */66{67switch(what)68{69case PCRE2_INFO_ALLOPTIONS:70case PCRE2_INFO_ARGOPTIONS:71case PCRE2_INFO_BACKREFMAX:72case PCRE2_INFO_BSR:73case PCRE2_INFO_CAPTURECOUNT:74case PCRE2_INFO_DEPTHLIMIT:75case PCRE2_INFO_EXTRAOPTIONS:76case PCRE2_INFO_FIRSTCODETYPE:77case PCRE2_INFO_FIRSTCODEUNIT:78case PCRE2_INFO_HASBACKSLASHC:79case PCRE2_INFO_HASCRORLF:80case PCRE2_INFO_HEAPLIMIT:81case PCRE2_INFO_JCHANGED:82case PCRE2_INFO_LASTCODETYPE:83case PCRE2_INFO_LASTCODEUNIT:84case PCRE2_INFO_MATCHEMPTY:85case PCRE2_INFO_MATCHLIMIT:86case PCRE2_INFO_MAXLOOKBEHIND:87case PCRE2_INFO_MINLENGTH:88case PCRE2_INFO_NAMEENTRYSIZE:89case PCRE2_INFO_NAMECOUNT:90case PCRE2_INFO_NEWLINE:91return sizeof(uint32_t);9293case PCRE2_INFO_FIRSTBITMAP:94return sizeof(const uint8_t *);9596case PCRE2_INFO_JITSIZE:97case PCRE2_INFO_SIZE:98case PCRE2_INFO_FRAMESIZE:99return sizeof(size_t);100101case PCRE2_INFO_NAMETABLE:102return sizeof(PCRE2_SPTR);103}104}105106if (re == NULL) return PCRE2_ERROR_NULL;107108/* Check that the first field in the block is the magic number. If it is not,109return with PCRE2_ERROR_BADMAGIC. */110111if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;112113/* Check that this pattern was compiled in the correct bit mode */114115if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;116117switch(what)118{119case PCRE2_INFO_ALLOPTIONS:120*((uint32_t *)where) = re->overall_options;121break;122123case PCRE2_INFO_ARGOPTIONS:124*((uint32_t *)where) = re->compile_options;125break;126127case PCRE2_INFO_BACKREFMAX:128*((uint32_t *)where) = re->top_backref;129break;130131case PCRE2_INFO_BSR:132*((uint32_t *)where) = re->bsr_convention;133break;134135case PCRE2_INFO_CAPTURECOUNT:136*((uint32_t *)where) = re->top_bracket;137break;138139case PCRE2_INFO_DEPTHLIMIT:140*((uint32_t *)where) = re->limit_depth;141if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;142break;143144case PCRE2_INFO_EXTRAOPTIONS:145*((uint32_t *)where) = re->extra_options;146break;147148case PCRE2_INFO_FIRSTCODETYPE:149*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :150((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;151break;152153case PCRE2_INFO_FIRSTCODEUNIT:154*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?155re->first_codeunit : 0;156break;157158case PCRE2_INFO_FIRSTBITMAP:159*((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?160&(re->start_bitmap[0]) : NULL;161break;162163case PCRE2_INFO_FRAMESIZE:164*((size_t *)where) = offsetof(heapframe, ovector) +165re->top_bracket * 2 * sizeof(PCRE2_SIZE);166break;167168case PCRE2_INFO_HASBACKSLASHC:169*((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;170break;171172case PCRE2_INFO_HASCRORLF:173*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;174break;175176case PCRE2_INFO_HEAPLIMIT:177*((uint32_t *)where) = re->limit_heap;178if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;179break;180181case PCRE2_INFO_JCHANGED:182*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;183break;184185case PCRE2_INFO_JITSIZE:186#ifdef SUPPORT_JIT187*((size_t *)where) = (re->executable_jit != NULL)?188PRIV(jit_get_size)(re->executable_jit) : 0;189#else190*((size_t *)where) = 0;191#endif192break;193194case PCRE2_INFO_LASTCODETYPE:195*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;196break;197198case PCRE2_INFO_LASTCODEUNIT:199*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?200re->last_codeunit : 0;201break;202203case PCRE2_INFO_MATCHEMPTY:204*((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;205break;206207case PCRE2_INFO_MATCHLIMIT:208*((uint32_t *)where) = re->limit_match;209if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;210break;211212case PCRE2_INFO_MAXLOOKBEHIND:213*((uint32_t *)where) = re->max_lookbehind;214break;215216case PCRE2_INFO_MINLENGTH:217*((uint32_t *)where) = re->minlength;218break;219220case PCRE2_INFO_NAMEENTRYSIZE:221*((uint32_t *)where) = re->name_entry_size;222break;223224case PCRE2_INFO_NAMECOUNT:225*((uint32_t *)where) = re->name_count;226break;227228case PCRE2_INFO_NAMETABLE:229*((PCRE2_SPTR *)where) = (PCRE2_SPTR)((const char *)re +230sizeof(pcre2_real_code));231break;232233case PCRE2_INFO_NEWLINE:234*((uint32_t *)where) = re->newline_convention;235break;236237case PCRE2_INFO_SIZE:238*((size_t *)where) = re->blocksize;239break;240241default: return PCRE2_ERROR_BADOPTION;242}243244return 0;245}246247248249/*************************************************250* Callout enumerator *251*************************************************/252253/*254Arguments:255code points to compiled code256callback function called for each callout block257callout_data user data passed to the callback258259Returns: 0 when successfully completed260< 0 on local error261!= 0 for callback error262*/263264PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION265pcre2_callout_enumerate(const pcre2_code *code,266int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)267{268const pcre2_real_code *re = (const pcre2_real_code *)code;269pcre2_callout_enumerate_block cb;270PCRE2_SPTR cc;271#ifdef SUPPORT_UNICODE272BOOL utf;273#endif274275if (re == NULL) return PCRE2_ERROR_NULL;276277#ifdef SUPPORT_UNICODE278utf = (re->overall_options & PCRE2_UTF) != 0;279#endif280281/* Check that the first field in the block is the magic number. If it is not,282return with PCRE2_ERROR_BADMAGIC. */283284if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;285286/* Check that this pattern was compiled in the correct bit mode */287288if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;289290cb.version = 0;291cc = (PCRE2_SPTR)((uint8_t *)re + re->code_start);292293while (TRUE)294{295int rc;296switch (*cc)297{298case OP_END:299return 0;300301case OP_CHAR:302case OP_CHARI:303case OP_NOT:304case OP_NOTI:305case OP_STAR:306case OP_MINSTAR:307case OP_PLUS:308case OP_MINPLUS:309case OP_QUERY:310case OP_MINQUERY:311case OP_UPTO:312case OP_MINUPTO:313case OP_EXACT:314case OP_POSSTAR:315case OP_POSPLUS:316case OP_POSQUERY:317case OP_POSUPTO:318case OP_STARI:319case OP_MINSTARI:320case OP_PLUSI:321case OP_MINPLUSI:322case OP_QUERYI:323case OP_MINQUERYI:324case OP_UPTOI:325case OP_MINUPTOI:326case OP_EXACTI:327case OP_POSSTARI:328case OP_POSPLUSI:329case OP_POSQUERYI:330case OP_POSUPTOI:331case OP_NOTSTAR:332case OP_NOTMINSTAR:333case OP_NOTPLUS:334case OP_NOTMINPLUS:335case OP_NOTQUERY:336case OP_NOTMINQUERY:337case OP_NOTUPTO:338case OP_NOTMINUPTO:339case OP_NOTEXACT:340case OP_NOTPOSSTAR:341case OP_NOTPOSPLUS:342case OP_NOTPOSQUERY:343case OP_NOTPOSUPTO:344case OP_NOTSTARI:345case OP_NOTMINSTARI:346case OP_NOTPLUSI:347case OP_NOTMINPLUSI:348case OP_NOTQUERYI:349case OP_NOTMINQUERYI:350case OP_NOTUPTOI:351case OP_NOTMINUPTOI:352case OP_NOTEXACTI:353case OP_NOTPOSSTARI:354case OP_NOTPOSPLUSI:355case OP_NOTPOSQUERYI:356case OP_NOTPOSUPTOI:357cc += PRIV(OP_lengths)[*cc];358#ifdef SUPPORT_UNICODE359if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);360#endif361break;362363case OP_TYPESTAR:364case OP_TYPEMINSTAR:365case OP_TYPEPLUS:366case OP_TYPEMINPLUS:367case OP_TYPEQUERY:368case OP_TYPEMINQUERY:369case OP_TYPEUPTO:370case OP_TYPEMINUPTO:371case OP_TYPEEXACT:372case OP_TYPEPOSSTAR:373case OP_TYPEPOSPLUS:374case OP_TYPEPOSQUERY:375case OP_TYPEPOSUPTO:376cc += PRIV(OP_lengths)[*cc];377#ifdef SUPPORT_UNICODE378if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;379#endif380break;381382#ifdef SUPPORT_WIDE_CHARS383case OP_XCLASS:384case OP_ECLASS:385cc += GET(cc, 1);386break;387#endif388389case OP_MARK:390case OP_COMMIT_ARG:391case OP_PRUNE_ARG:392case OP_SKIP_ARG:393case OP_THEN_ARG:394cc += PRIV(OP_lengths)[*cc] + cc[1];395break;396397case OP_CALLOUT:398cb.pattern_position = GET(cc, 1);399cb.next_item_length = GET(cc, 1 + LINK_SIZE);400cb.callout_number = cc[1 + 2*LINK_SIZE];401cb.callout_string_offset = 0;402cb.callout_string_length = 0;403cb.callout_string = NULL;404rc = callback(&cb, callout_data);405if (rc != 0) return rc;406cc += PRIV(OP_lengths)[*cc];407break;408409case OP_CALLOUT_STR:410cb.pattern_position = GET(cc, 1);411cb.next_item_length = GET(cc, 1 + LINK_SIZE);412cb.callout_number = 0;413cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);414cb.callout_string_length =415GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;416cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;417rc = callback(&cb, callout_data);418if (rc != 0) return rc;419cc += GET(cc, 1 + 2*LINK_SIZE);420break;421422default:423cc += PRIV(OP_lengths)[*cc];424break;425}426}427}428429/* End of pcre2_pattern_info.c */430431432