Path: blob/master/thirdparty/pcre2/src/pcre2_pattern_info.c
9898 views
/*************************************************1* Perl-Compatible Regular Expressions *2*************************************************/34/* PCRE is a library of functions to support regular expressions whose syntax5and semantics are as close as possible to those of the Perl 5 language.67Written by Philip Hazel8Original API code Copyright (c) 1997-2012 University of Cambridge9New API code Copyright (c) 2016-2024 University of Cambridge1011-----------------------------------------------------------------------------12Redistribution and use in source and binary forms, with or without13modification, are permitted provided that the following conditions are met:1415* Redistributions of source code must retain the above copyright notice,16this list of conditions and the following disclaimer.1718* Redistributions in binary form must reproduce the above copyright19notice, this list of conditions and the following disclaimer in the20documentation and/or other materials provided with the distribution.2122* Neither the name of the University of Cambridge nor the names of its23contributors may be used to endorse or promote products derived from24this software without specific prior written permission.2526THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE36POSSIBILITY OF SUCH DAMAGE.37-----------------------------------------------------------------------------38*/394041#ifdef HAVE_CONFIG_H42#include "config.h"43#endif4445#include "pcre2_internal.h"464748/*************************************************49* Return info about compiled pattern *50*************************************************/5152/*53Arguments:54code points to compiled code55what what information is required56where where to put the information; if NULL, return length5758Returns: 0 when data returned59> 0 when length requested60< 0 on error or unset value61*/6263PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION64pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)65{66const pcre2_real_code *re = (const pcre2_real_code *)code;6768if (where == NULL) /* Requests field length */69{70switch(what)71{72case PCRE2_INFO_ALLOPTIONS:73case PCRE2_INFO_ARGOPTIONS:74case PCRE2_INFO_BACKREFMAX:75case PCRE2_INFO_BSR:76case PCRE2_INFO_CAPTURECOUNT:77case PCRE2_INFO_DEPTHLIMIT:78case PCRE2_INFO_EXTRAOPTIONS:79case PCRE2_INFO_FIRSTCODETYPE:80case PCRE2_INFO_FIRSTCODEUNIT:81case PCRE2_INFO_HASBACKSLASHC:82case PCRE2_INFO_HASCRORLF:83case PCRE2_INFO_HEAPLIMIT:84case PCRE2_INFO_JCHANGED:85case PCRE2_INFO_LASTCODETYPE:86case PCRE2_INFO_LASTCODEUNIT:87case PCRE2_INFO_MATCHEMPTY:88case PCRE2_INFO_MATCHLIMIT:89case PCRE2_INFO_MAXLOOKBEHIND:90case PCRE2_INFO_MINLENGTH:91case PCRE2_INFO_NAMEENTRYSIZE:92case PCRE2_INFO_NAMECOUNT:93case PCRE2_INFO_NEWLINE:94return sizeof(uint32_t);9596case PCRE2_INFO_FIRSTBITMAP:97return sizeof(const uint8_t *);9899case PCRE2_INFO_JITSIZE:100case PCRE2_INFO_SIZE:101case PCRE2_INFO_FRAMESIZE:102return sizeof(size_t);103104case PCRE2_INFO_NAMETABLE:105return sizeof(PCRE2_SPTR);106}107}108109if (re == NULL) return PCRE2_ERROR_NULL;110111/* Check that the first field in the block is the magic number. If it is not,112return with PCRE2_ERROR_BADMAGIC. */113114if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;115116/* Check that this pattern was compiled in the correct bit mode */117118if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;119120switch(what)121{122case PCRE2_INFO_ALLOPTIONS:123*((uint32_t *)where) = re->overall_options;124break;125126case PCRE2_INFO_ARGOPTIONS:127*((uint32_t *)where) = re->compile_options;128break;129130case PCRE2_INFO_BACKREFMAX:131*((uint32_t *)where) = re->top_backref;132break;133134case PCRE2_INFO_BSR:135*((uint32_t *)where) = re->bsr_convention;136break;137138case PCRE2_INFO_CAPTURECOUNT:139*((uint32_t *)where) = re->top_bracket;140break;141142case PCRE2_INFO_DEPTHLIMIT:143*((uint32_t *)where) = re->limit_depth;144if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;145break;146147case PCRE2_INFO_EXTRAOPTIONS:148*((uint32_t *)where) = re->extra_options;149break;150151case PCRE2_INFO_FIRSTCODETYPE:152*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :153((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;154break;155156case PCRE2_INFO_FIRSTCODEUNIT:157*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?158re->first_codeunit : 0;159break;160161case PCRE2_INFO_FIRSTBITMAP:162*((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?163&(re->start_bitmap[0]) : NULL;164break;165166case PCRE2_INFO_FRAMESIZE:167*((size_t *)where) = offsetof(heapframe, ovector) +168re->top_bracket * 2 * sizeof(PCRE2_SIZE);169break;170171case PCRE2_INFO_HASBACKSLASHC:172*((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;173break;174175case PCRE2_INFO_HASCRORLF:176*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;177break;178179case PCRE2_INFO_HEAPLIMIT:180*((uint32_t *)where) = re->limit_heap;181if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;182break;183184case PCRE2_INFO_JCHANGED:185*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;186break;187188case PCRE2_INFO_JITSIZE:189#ifdef SUPPORT_JIT190*((size_t *)where) = (re->executable_jit != NULL)?191PRIV(jit_get_size)(re->executable_jit) : 0;192#else193*((size_t *)where) = 0;194#endif195break;196197case PCRE2_INFO_LASTCODETYPE:198*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;199break;200201case PCRE2_INFO_LASTCODEUNIT:202*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?203re->last_codeunit : 0;204break;205206case PCRE2_INFO_MATCHEMPTY:207*((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;208break;209210case PCRE2_INFO_MATCHLIMIT:211*((uint32_t *)where) = re->limit_match;212if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;213break;214215case PCRE2_INFO_MAXLOOKBEHIND:216*((uint32_t *)where) = re->max_lookbehind;217break;218219case PCRE2_INFO_MINLENGTH:220*((uint32_t *)where) = re->minlength;221break;222223case PCRE2_INFO_NAMEENTRYSIZE:224*((uint32_t *)where) = re->name_entry_size;225break;226227case PCRE2_INFO_NAMECOUNT:228*((uint32_t *)where) = re->name_count;229break;230231case PCRE2_INFO_NAMETABLE:232*((PCRE2_SPTR *)where) = (PCRE2_SPTR)((const char *)re +233sizeof(pcre2_real_code));234break;235236case PCRE2_INFO_NEWLINE:237*((uint32_t *)where) = re->newline_convention;238break;239240case PCRE2_INFO_SIZE:241*((size_t *)where) = re->blocksize;242break;243244default: return PCRE2_ERROR_BADOPTION;245}246247return 0;248}249250251252/*************************************************253* Callout enumerator *254*************************************************/255256/*257Arguments:258code points to compiled code259callback function called for each callout block260callout_data user data passed to the callback261262Returns: 0 when successfully completed263< 0 on local error264!= 0 for callback error265*/266267PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION268pcre2_callout_enumerate(const pcre2_code *code,269int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)270{271const pcre2_real_code *re = (const pcre2_real_code *)code;272pcre2_callout_enumerate_block cb;273PCRE2_SPTR cc;274#ifdef SUPPORT_UNICODE275BOOL utf;276#endif277278if (re == NULL) return PCRE2_ERROR_NULL;279280#ifdef SUPPORT_UNICODE281utf = (re->overall_options & PCRE2_UTF) != 0;282#endif283284/* Check that the first field in the block is the magic number. If it is not,285return with PCRE2_ERROR_BADMAGIC. */286287if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;288289/* Check that this pattern was compiled in the correct bit mode */290291if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;292293cb.version = 0;294cc = (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code))295+ re->name_count * re->name_entry_size;296297while (TRUE)298{299int rc;300switch (*cc)301{302case OP_END:303return 0;304305case OP_CHAR:306case OP_CHARI:307case OP_NOT:308case OP_NOTI:309case OP_STAR:310case OP_MINSTAR:311case OP_PLUS:312case OP_MINPLUS:313case OP_QUERY:314case OP_MINQUERY:315case OP_UPTO:316case OP_MINUPTO:317case OP_EXACT:318case OP_POSSTAR:319case OP_POSPLUS:320case OP_POSQUERY:321case OP_POSUPTO:322case OP_STARI:323case OP_MINSTARI:324case OP_PLUSI:325case OP_MINPLUSI:326case OP_QUERYI:327case OP_MINQUERYI:328case OP_UPTOI:329case OP_MINUPTOI:330case OP_EXACTI:331case OP_POSSTARI:332case OP_POSPLUSI:333case OP_POSQUERYI:334case OP_POSUPTOI:335case OP_NOTSTAR:336case OP_NOTMINSTAR:337case OP_NOTPLUS:338case OP_NOTMINPLUS:339case OP_NOTQUERY:340case OP_NOTMINQUERY:341case OP_NOTUPTO:342case OP_NOTMINUPTO:343case OP_NOTEXACT:344case OP_NOTPOSSTAR:345case OP_NOTPOSPLUS:346case OP_NOTPOSQUERY:347case OP_NOTPOSUPTO:348case OP_NOTSTARI:349case OP_NOTMINSTARI:350case OP_NOTPLUSI:351case OP_NOTMINPLUSI:352case OP_NOTQUERYI:353case OP_NOTMINQUERYI:354case OP_NOTUPTOI:355case OP_NOTMINUPTOI:356case OP_NOTEXACTI:357case OP_NOTPOSSTARI:358case OP_NOTPOSPLUSI:359case OP_NOTPOSQUERYI:360case OP_NOTPOSUPTOI:361cc += PRIV(OP_lengths)[*cc];362#ifdef SUPPORT_UNICODE363if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);364#endif365break;366367case OP_TYPESTAR:368case OP_TYPEMINSTAR:369case OP_TYPEPLUS:370case OP_TYPEMINPLUS:371case OP_TYPEQUERY:372case OP_TYPEMINQUERY:373case OP_TYPEUPTO:374case OP_TYPEMINUPTO:375case OP_TYPEEXACT:376case OP_TYPEPOSSTAR:377case OP_TYPEPOSPLUS:378case OP_TYPEPOSQUERY:379case OP_TYPEPOSUPTO:380cc += PRIV(OP_lengths)[*cc];381#ifdef SUPPORT_UNICODE382if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;383#endif384break;385386#ifdef SUPPORT_WIDE_CHARS387case OP_XCLASS:388case OP_ECLASS:389cc += GET(cc, 1);390break;391#endif392393case OP_MARK:394case OP_COMMIT_ARG:395case OP_PRUNE_ARG:396case OP_SKIP_ARG:397case OP_THEN_ARG:398cc += PRIV(OP_lengths)[*cc] + cc[1];399break;400401case OP_CALLOUT:402cb.pattern_position = GET(cc, 1);403cb.next_item_length = GET(cc, 1 + LINK_SIZE);404cb.callout_number = cc[1 + 2*LINK_SIZE];405cb.callout_string_offset = 0;406cb.callout_string_length = 0;407cb.callout_string = NULL;408rc = callback(&cb, callout_data);409if (rc != 0) return rc;410cc += PRIV(OP_lengths)[*cc];411break;412413case OP_CALLOUT_STR:414cb.pattern_position = GET(cc, 1);415cb.next_item_length = GET(cc, 1 + LINK_SIZE);416cb.callout_number = 0;417cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);418cb.callout_string_length =419GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;420cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;421rc = callback(&cb, callout_data);422if (rc != 0) return rc;423cc += GET(cc, 1 + 2*LINK_SIZE);424break;425426default:427cc += PRIV(OP_lengths)[*cc];428break;429}430}431}432433/* End of pcre2_pattern_info.c */434435436