Path: blob/master/thirdparty/pcre2/src/pcre2_substring.c
9898 views
/*************************************************1* Perl-Compatible Regular Expressions *2*************************************************/34/* PCRE is a library of functions to support regular expressions whose syntax5and semantics are as close as possible to those of the Perl 5 language.67Written by Philip Hazel8Original API code Copyright (c) 1997-2012 University of Cambridge9New API code Copyright (c) 2016-2024 University of Cambridge1011-----------------------------------------------------------------------------12Redistribution and use in source and binary forms, with or without13modification, are permitted provided that the following conditions are met:1415* Redistributions of source code must retain the above copyright notice,16this list of conditions and the following disclaimer.1718* Redistributions in binary form must reproduce the above copyright19notice, this list of conditions and the following disclaimer in the20documentation and/or other materials provided with the distribution.2122* Neither the name of the University of Cambridge nor the names of its23contributors may be used to endorse or promote products derived from24this software without specific prior written permission.2526THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE36POSSIBILITY OF SUCH DAMAGE.37-----------------------------------------------------------------------------38*/394041#ifdef HAVE_CONFIG_H42#include "config.h"43#endif4445#include "pcre2_internal.h"46474849/*************************************************50* Copy named captured string to given buffer *51*************************************************/5253/* This function copies a single captured substring into a given buffer,54identifying it by name. If the regex permits duplicate names, the first55substring that is set is chosen.5657Arguments:58match_data points to the match data59stringname the name of the required substring60buffer where to put the substring61sizeptr the size of the buffer, updated to the size of the substring6263Returns: if successful: zero64if not successful, a negative error code:65(1) an error from nametable_scan()66(2) an error from copy_bynumber()67(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector68(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset69*/7071PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION72pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,73PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)74{75PCRE2_SPTR first, last, entry;76int failrc, entrysize;77if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)78return PCRE2_ERROR_DFA_UFUNC;79entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,80&first, &last);81if (entrysize < 0) return entrysize;82failrc = PCRE2_ERROR_UNAVAILABLE;83for (entry = first; entry <= last; entry += entrysize)84{85uint32_t n = GET2(entry, 0);86if (n < match_data->oveccount)87{88if (match_data->ovector[n*2] != PCRE2_UNSET)89return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);90failrc = PCRE2_ERROR_UNSET;91}92}93return failrc;94}95969798/*************************************************99* Copy numbered captured string to given buffer *100*************************************************/101102/* This function copies a single captured substring into a given buffer,103identifying it by number.104105Arguments:106match_data points to the match data107stringnumber the number of the required substring108buffer where to put the substring109sizeptr the size of the buffer, updated to the size of the substring110111Returns: if successful: 0112if not successful, a negative error code:113PCRE2_ERROR_NOMEMORY: buffer too small114PCRE2_ERROR_NOSUBSTRING: no such substring115PCRE2_ERROR_UNAVAILABLE: ovector too small116PCRE2_ERROR_UNSET: substring is not set117*/118119PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION120pcre2_substring_copy_bynumber(pcre2_match_data *match_data,121uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)122{123int rc;124PCRE2_SIZE size;125rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);126if (rc < 0) return rc;127if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;128memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],129CU2BYTES(size));130buffer[size] = 0;131*sizeptr = size;132return 0;133}134135136137/*************************************************138* Extract named captured string *139*************************************************/140141/* This function copies a single captured substring, identified by name, into142new memory. If the regex permits duplicate names, the first substring that is143set is chosen.144145Arguments:146match_data pointer to match_data147stringname the name of the required substring148stringptr where to put the pointer to the new memory149sizeptr where to put the length of the substring150151Returns: if successful: zero152if not successful, a negative value:153(1) an error from nametable_scan()154(2) an error from get_bynumber()155(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector156(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset157*/158159PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION160pcre2_substring_get_byname(pcre2_match_data *match_data,161PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)162{163PCRE2_SPTR first, last, entry;164int failrc, entrysize;165if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)166return PCRE2_ERROR_DFA_UFUNC;167entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,168&first, &last);169if (entrysize < 0) return entrysize;170failrc = PCRE2_ERROR_UNAVAILABLE;171for (entry = first; entry <= last; entry += entrysize)172{173uint32_t n = GET2(entry, 0);174if (n < match_data->oveccount)175{176if (match_data->ovector[n*2] != PCRE2_UNSET)177return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);178failrc = PCRE2_ERROR_UNSET;179}180}181return failrc;182}183184185186/*************************************************187* Extract captured string to new memory *188*************************************************/189190/* This function copies a single captured substring into a piece of new191memory.192193Arguments:194match_data points to match data195stringnumber the number of the required substring196stringptr where to put a pointer to the new memory197sizeptr where to put the size of the substring198199Returns: if successful: 0200if not successful, a negative error code:201PCRE2_ERROR_NOMEMORY: failed to get memory202PCRE2_ERROR_NOSUBSTRING: no such substring203PCRE2_ERROR_UNAVAILABLE: ovector too small204PCRE2_ERROR_UNSET: substring is not set205*/206207PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION208pcre2_substring_get_bynumber(pcre2_match_data *match_data,209uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)210{211int rc;212PCRE2_SIZE size;213PCRE2_UCHAR *yield;214rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);215if (rc < 0) return rc;216yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +217(size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);218if (yield == NULL) return PCRE2_ERROR_NOMEMORY;219yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));220memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],221CU2BYTES(size));222yield[size] = 0;223*stringptr = yield;224*sizeptr = size;225return 0;226}227228229230/*************************************************231* Free memory obtained by get_substring *232*************************************************/233234/*235Argument: the result of a previous pcre2_substring_get_byxxx()236Returns: nothing237*/238239PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION240pcre2_substring_free(PCRE2_UCHAR *string)241{242if (string != NULL)243{244pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));245memctl->free(memctl, memctl->memory_data);246}247}248249250251/*************************************************252* Get length of a named substring *253*************************************************/254255/* This function returns the length of a named captured substring. If the regex256permits duplicate names, the first substring that is set is chosen.257258Arguments:259match_data pointer to match data260stringname the name of the required substring261sizeptr where to put the length262263Returns: 0 if successful, else a negative error number264*/265266PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION267pcre2_substring_length_byname(pcre2_match_data *match_data,268PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)269{270PCRE2_SPTR first, last, entry;271int failrc, entrysize;272if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)273return PCRE2_ERROR_DFA_UFUNC;274entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,275&first, &last);276if (entrysize < 0) return entrysize;277failrc = PCRE2_ERROR_UNAVAILABLE;278for (entry = first; entry <= last; entry += entrysize)279{280uint32_t n = GET2(entry, 0);281if (n < match_data->oveccount)282{283if (match_data->ovector[n*2] != PCRE2_UNSET)284return pcre2_substring_length_bynumber(match_data, n, sizeptr);285failrc = PCRE2_ERROR_UNSET;286}287}288return failrc;289}290291292293/*************************************************294* Get length of a numbered substring *295*************************************************/296297/* This function returns the length of a captured substring. If the start is298beyond the end (which can happen when \K is used in an assertion), it sets the299length to zero.300301Arguments:302match_data pointer to match data303stringnumber the number of the required substring304sizeptr where to put the length, if not NULL305306Returns: if successful: 0307if not successful, a negative error code:308PCRE2_ERROR_NOSUBSTRING: no such substring309PCRE2_ERROR_UNAVAILABLE: ovector is too small310PCRE2_ERROR_UNSET: substring is not set311PCRE2_ERROR_INVALIDOFFSET: internal error, should not occur312*/313314PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION315pcre2_substring_length_bynumber(pcre2_match_data *match_data,316uint32_t stringnumber, PCRE2_SIZE *sizeptr)317{318PCRE2_SIZE left, right;319int count = match_data->rc;320if (count == PCRE2_ERROR_PARTIAL)321{322if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;323count = 0;324}325else if (count < 0) return count; /* Match failed */326327if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)328{329if (stringnumber > match_data->code->top_bracket)330return PCRE2_ERROR_NOSUBSTRING;331if (stringnumber >= match_data->oveccount)332return PCRE2_ERROR_UNAVAILABLE;333if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)334return PCRE2_ERROR_UNSET;335}336else /* Matched using pcre2_dfa_match() */337{338if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;339if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;340}341342left = match_data->ovector[stringnumber*2];343right = match_data->ovector[stringnumber*2+1];344if (left > match_data->subject_length || right > match_data->subject_length)345return PCRE2_ERROR_INVALIDOFFSET;346if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;347return 0;348}349350351352/*************************************************353* Extract all captured strings to new memory *354*************************************************/355356/* This function gets one chunk of memory and builds a list of pointers and all357the captured substrings in it. A NULL pointer is put on the end of the list.358The substrings are zero-terminated, but also, if the final argument is359non-NULL, a list of lengths is also returned. This allows binary data to be360handled.361362Arguments:363match_data points to the match data364listptr set to point to the list of pointers365lengthsptr set to point to the list of lengths (may be NULL)366367Returns: if successful: 0368if not successful, a negative error code:369PCRE2_ERROR_NOMEMORY: failed to get memory,370or a match failure code371*/372373PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION374pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,375PCRE2_SIZE **lengthsptr)376{377int i, count, count2;378PCRE2_SIZE size;379PCRE2_SIZE *lensp;380pcre2_memctl *memp;381PCRE2_UCHAR **listp;382PCRE2_UCHAR *sp;383PCRE2_SIZE *ovector;384385if ((count = match_data->rc) < 0) return count; /* Match failed */386if (count == 0) count = match_data->oveccount; /* Ovector too small */387388count2 = 2*count;389ovector = match_data->ovector;390size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */391if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */392393for (i = 0; i < count2; i += 2)394{395size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);396if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);397}398399memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);400if (memp == NULL) return PCRE2_ERROR_NOMEMORY;401402*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));403lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));404405if (lengthsptr == NULL)406{407sp = (PCRE2_UCHAR *)lensp;408lensp = NULL;409}410else411{412*lengthsptr = lensp;413sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);414}415416for (i = 0; i < count2; i += 2)417{418size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;419420/* Size == 0 includes the case when the capture is unset. Avoid adding421PCRE2_UNSET to match_data->subject because it overflows, even though with422zero size calling memcpy() is harmless. */423424if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));425*listp++ = sp;426if (lensp != NULL) *lensp++ = size;427sp += size;428*sp++ = 0;429}430431*listp = NULL;432return 0;433}434435436437/*************************************************438* Free memory obtained by substring_list_get *439*************************************************/440441/*442Argument: the result of a previous pcre2_substring_list_get()443Returns: nothing444*/445446PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION447pcre2_substring_list_free(PCRE2_UCHAR **list)448{449if (list != NULL)450{451pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));452memctl->free(memctl, memctl->memory_data);453}454}455456457458/*************************************************459* Find (multiple) entries for named string *460*************************************************/461462/* This function scans the nametable for a given name, using binary chop. It463returns either two pointers to the entries in the table, or, if no pointers are464given, the number of a unique group with the given name. If duplicate names are465permitted, and the name is not unique, an error is generated.466467Arguments:468code the compiled regex469stringname the name whose entries required470firstptr where to put the pointer to the first entry471lastptr where to put the pointer to the last entry472473Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found474otherwise, if firstptr and lastptr are NULL:475a group number for a unique substring476else PCRE2_ERROR_NOUNIQUESUBSTRING477otherwise:478the length of each entry, having set firstptr and lastptr479*/480481PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION482pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,483PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)484{485uint16_t bot = 0;486uint16_t top = code->name_count;487uint16_t entrysize = code->name_entry_size;488PCRE2_SPTR nametable = (PCRE2_SPTR)((const char *)code + sizeof(pcre2_real_code));489490while (top > bot)491{492uint16_t mid = (top + bot) / 2;493PCRE2_SPTR entry = nametable + entrysize*mid;494int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);495if (c == 0)496{497PCRE2_SPTR first;498PCRE2_SPTR last;499PCRE2_SPTR lastentry;500lastentry = nametable + entrysize * (code->name_count - 1);501first = last = entry;502while (first > nametable)503{504if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;505first -= entrysize;506}507while (last < lastentry)508{509if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;510last += entrysize;511}512if (firstptr == NULL) return (first == last)?513(int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;514*firstptr = first;515*lastptr = last;516return entrysize;517}518if (c > 0) bot = mid + 1; else top = mid;519}520521return PCRE2_ERROR_NOSUBSTRING;522}523524525/*************************************************526* Find number for named string *527*************************************************/528529/* This function is a convenience wrapper for pcre2_substring_nametable_scan()530when it is known that names are unique. If there are duplicate names, it is not531defined which number is returned.532533Arguments:534code the compiled regex535stringname the name whose number is required536537Returns: the number of the named parenthesis, or a negative number538PCRE2_ERROR_NOSUBSTRING if not found539PCRE2_ERROR_NOUNIQUESUBSTRING if not unique540*/541542PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION543pcre2_substring_number_from_name(const pcre2_code *code,544PCRE2_SPTR stringname)545{546return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);547}548549/* End of pcre2_substring.c */550551552