Path: blob/master/thirdparty/pcre2/src/pcre2_substring.c
21326 views
/*************************************************1* Perl-Compatible Regular Expressions *2*************************************************/34/* PCRE is a library of functions to support regular expressions whose syntax5and semantics are as close as possible to those of the Perl 5 language.67Written by Philip Hazel8Original API code Copyright (c) 1997-2012 University of Cambridge9New API code Copyright (c) 2016-2024 University of Cambridge1011-----------------------------------------------------------------------------12Redistribution and use in source and binary forms, with or without13modification, are permitted provided that the following conditions are met:1415* Redistributions of source code must retain the above copyright notice,16this list of conditions and the following disclaimer.1718* Redistributions in binary form must reproduce the above copyright19notice, this list of conditions and the following disclaimer in the20documentation and/or other materials provided with the distribution.2122* Neither the name of the University of Cambridge nor the names of its23contributors may be used to endorse or promote products derived from24this software without specific prior written permission.2526THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE36POSSIBILITY OF SUCH DAMAGE.37-----------------------------------------------------------------------------38*/394041#include "pcre2_internal.h"42434445/*************************************************46* Copy named captured string to given buffer *47*************************************************/4849/* This function copies a single captured substring into a given buffer,50identifying it by name. If the regex permits duplicate names, the first51substring that is set is chosen.5253Arguments:54match_data points to the match data55stringname the name of the required substring56buffer where to put the substring57sizeptr the size of the buffer, updated to the size of the substring5859Returns: if successful: zero60if not successful, a negative error code:61(1) an error from nametable_scan()62(2) an error from copy_bynumber()63(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector64(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset65*/6667PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION68pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,69PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)70{71PCRE2_SPTR first, last, entry;72int failrc, entrysize;73if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)74return PCRE2_ERROR_DFA_UFUNC;75entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,76&first, &last);77if (entrysize < 0) return entrysize;78failrc = PCRE2_ERROR_UNAVAILABLE;79for (entry = first; entry <= last; entry += entrysize)80{81uint32_t n = GET2(entry, 0);82if (n < match_data->oveccount)83{84if (match_data->ovector[n*2] != PCRE2_UNSET)85return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);86failrc = PCRE2_ERROR_UNSET;87}88}89return failrc;90}91929394/*************************************************95* Copy numbered captured string to given buffer *96*************************************************/9798/* This function copies a single captured substring into a given buffer,99identifying it by number.100101Arguments:102match_data points to the match data103stringnumber the number of the required substring104buffer where to put the substring105sizeptr the size of the buffer, updated to the size of the substring106107Returns: if successful: 0108if not successful, a negative error code:109PCRE2_ERROR_NOMEMORY: buffer too small110PCRE2_ERROR_NOSUBSTRING: no such substring111PCRE2_ERROR_UNAVAILABLE: ovector too small112PCRE2_ERROR_UNSET: substring is not set113*/114115PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION116pcre2_substring_copy_bynumber(pcre2_match_data *match_data,117uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)118{119int rc;120PCRE2_SIZE size;121rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);122if (rc < 0) return rc;123if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;124if (size != 0) memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],125CU2BYTES(size));126buffer[size] = 0;127*sizeptr = size;128return 0;129}130131132133/*************************************************134* Extract named captured string *135*************************************************/136137/* This function copies a single captured substring, identified by name, into138new memory. If the regex permits duplicate names, the first substring that is139set is chosen.140141Arguments:142match_data pointer to match_data143stringname the name of the required substring144stringptr where to put the pointer to the new memory145sizeptr where to put the length of the substring146147Returns: if successful: zero148if not successful, a negative value:149(1) an error from nametable_scan()150(2) an error from get_bynumber()151(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector152(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset153*/154155PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION156pcre2_substring_get_byname(pcre2_match_data *match_data,157PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)158{159PCRE2_SPTR first, last, entry;160int failrc, entrysize;161if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)162return PCRE2_ERROR_DFA_UFUNC;163entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,164&first, &last);165if (entrysize < 0) return entrysize;166failrc = PCRE2_ERROR_UNAVAILABLE;167for (entry = first; entry <= last; entry += entrysize)168{169uint32_t n = GET2(entry, 0);170if (n < match_data->oveccount)171{172if (match_data->ovector[n*2] != PCRE2_UNSET)173return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);174failrc = PCRE2_ERROR_UNSET;175}176}177return failrc;178}179180181182/*************************************************183* Extract captured string to new memory *184*************************************************/185186/* This function copies a single captured substring into a piece of new187memory.188189Arguments:190match_data points to match data191stringnumber the number of the required substring192stringptr where to put a pointer to the new memory193sizeptr where to put the size of the substring194195Returns: if successful: 0196if not successful, a negative error code:197PCRE2_ERROR_NOMEMORY: failed to get memory198PCRE2_ERROR_NOSUBSTRING: no such substring199PCRE2_ERROR_UNAVAILABLE: ovector too small200PCRE2_ERROR_UNSET: substring is not set201*/202203PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION204pcre2_substring_get_bynumber(pcre2_match_data *match_data,205uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)206{207int rc;208PCRE2_SIZE size;209PCRE2_UCHAR *yield;210rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);211if (rc < 0) return rc;212yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +213(size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);214if (yield == NULL) return PCRE2_ERROR_NOMEMORY;215yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));216if (size != 0) memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],217CU2BYTES(size));218yield[size] = 0;219*stringptr = yield;220*sizeptr = size;221return 0;222}223224225226/*************************************************227* Free memory obtained by get_substring *228*************************************************/229230/*231Argument: the result of a previous pcre2_substring_get_byxxx()232Returns: nothing233*/234235PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION236pcre2_substring_free(PCRE2_UCHAR *string)237{238if (string != NULL)239{240pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));241memctl->free(memctl, memctl->memory_data);242}243}244245246247/*************************************************248* Get length of a named substring *249*************************************************/250251/* This function returns the length of a named captured substring. If the regex252permits duplicate names, the first substring that is set is chosen.253254Arguments:255match_data pointer to match data256stringname the name of the required substring257sizeptr where to put the length, if not NULL258259Returns: 0 if successful, else a negative error number260*/261262PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION263pcre2_substring_length_byname(pcre2_match_data *match_data,264PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)265{266PCRE2_SPTR first, last, entry;267int failrc, entrysize;268if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)269return PCRE2_ERROR_DFA_UFUNC;270entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,271&first, &last);272if (entrysize < 0) return entrysize;273failrc = PCRE2_ERROR_UNAVAILABLE;274for (entry = first; entry <= last; entry += entrysize)275{276uint32_t n = GET2(entry, 0);277if (n < match_data->oveccount)278{279if (match_data->ovector[n*2] != PCRE2_UNSET)280return pcre2_substring_length_bynumber(match_data, n, sizeptr);281failrc = PCRE2_ERROR_UNSET;282}283}284return failrc;285}286287288289/*************************************************290* Get length of a numbered substring *291*************************************************/292293/* This function returns the length of a captured substring. If the start is294beyond the end (which can happen when \K is used in an assertion), it sets the295length to zero.296297Arguments:298match_data pointer to match data299stringnumber the number of the required substring300sizeptr where to put the length, if not NULL301302Returns: if successful: 0303if not successful, a negative error code:304PCRE2_ERROR_NOSUBSTRING: no such substring305PCRE2_ERROR_UNAVAILABLE: ovector is too small306PCRE2_ERROR_UNSET: substring is not set307PCRE2_ERROR_INVALIDOFFSET: internal error, should not occur308*/309310PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION311pcre2_substring_length_bynumber(pcre2_match_data *match_data,312uint32_t stringnumber, PCRE2_SIZE *sizeptr)313{314PCRE2_SIZE left, right;315int count = match_data->rc;316if (count == PCRE2_ERROR_PARTIAL)317{318if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;319count = 0;320}321else if (count < 0) return count; /* Match failed */322323if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)324{325if (stringnumber > match_data->code->top_bracket)326return PCRE2_ERROR_NOSUBSTRING;327if (stringnumber >= match_data->oveccount)328return PCRE2_ERROR_UNAVAILABLE;329if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)330return PCRE2_ERROR_UNSET;331}332else /* Matched using pcre2_dfa_match() */333{334if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;335if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;336}337338left = match_data->ovector[stringnumber*2];339right = match_data->ovector[stringnumber*2+1];340/* LCOV_EXCL_START - this appears to be unreachable, as the ovector and341subject_length should always be set consistently, no matter what misbehaviour342the caller has committed. */343if (left > match_data->subject_length || right > match_data->subject_length)344{345PCRE2_DEBUG_UNREACHABLE();346return PCRE2_ERROR_INVALIDOFFSET;347}348/* LCOV_EXCL_STOP */349if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;350return 0;351}352353354355/*************************************************356* Extract all captured strings to new memory *357*************************************************/358359/* This function gets one chunk of memory and builds a list of pointers and all360the captured substrings in it. A NULL pointer is put on the end of the list.361The substrings are zero-terminated, but also, if the final argument is362non-NULL, a list of lengths is also returned. This allows binary data to be363handled.364365Arguments:366match_data points to the match data367listptr set to point to the list of pointers368lengthsptr set to point to the list of lengths (may be NULL)369370Returns: if successful: 0371if not successful, a negative error code:372PCRE2_ERROR_NOMEMORY: failed to get memory,373or a match failure code374*/375376PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION377pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,378PCRE2_SIZE **lengthsptr)379{380int i, count, count2;381PCRE2_SIZE size;382PCRE2_SIZE *lensp;383pcre2_memctl *memp;384PCRE2_UCHAR **listp;385PCRE2_UCHAR *sp;386PCRE2_SIZE *ovector;387388if ((count = match_data->rc) < 0) return count; /* Match failed */389if (count == 0) count = match_data->oveccount; /* Ovector too small */390391count2 = 2*count;392ovector = match_data->ovector;393size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */394if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */395396for (i = 0; i < count2; i += 2)397{398size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);399if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);400}401402memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);403if (memp == NULL) return PCRE2_ERROR_NOMEMORY;404405*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));406lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));407408if (lengthsptr == NULL)409{410sp = (PCRE2_UCHAR *)lensp;411lensp = NULL;412}413else414{415*lengthsptr = lensp;416sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);417}418419for (i = 0; i < count2; i += 2)420{421size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;422423/* Size == 0 includes the case when the capture is unset. Avoid adding424PCRE2_UNSET to match_data->subject because it overflows, even though with425zero size calling memcpy() is harmless. */426427if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));428*listp++ = sp;429if (lensp != NULL) *lensp++ = size;430sp += size;431*sp++ = 0;432}433434*listp = NULL;435return 0;436}437438439440/*************************************************441* Free memory obtained by substring_list_get *442*************************************************/443444/*445Argument: the result of a previous pcre2_substring_list_get()446Returns: nothing447*/448449PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION450pcre2_substring_list_free(PCRE2_UCHAR **list)451{452if (list != NULL)453{454pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));455memctl->free(memctl, memctl->memory_data);456}457}458459460461/*************************************************462* Find (multiple) entries for named string *463*************************************************/464465/* This function scans the nametable for a given name, using binary chop. It466returns either two pointers to the entries in the table, or, if no pointers are467given, the number of a unique group with the given name. If duplicate names are468permitted, and the name is not unique, an error is generated.469470Arguments:471code the compiled regex472stringname the name whose entries required473firstptr where to put the pointer to the first entry474lastptr where to put the pointer to the last entry475476Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found477otherwise, if firstptr and lastptr are NULL:478a group number for a unique substring479else PCRE2_ERROR_NOUNIQUESUBSTRING480otherwise:481the length of each entry, having set firstptr and lastptr482*/483484PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION485pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,486PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)487{488uint16_t bot = 0;489uint16_t top = code->name_count;490uint16_t entrysize = code->name_entry_size;491PCRE2_SPTR nametable = (PCRE2_SPTR)((const char *)code + sizeof(pcre2_real_code));492493while (top > bot)494{495uint16_t mid = (top + bot) / 2;496PCRE2_SPTR entry = nametable + entrysize*mid;497int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);498if (c == 0)499{500PCRE2_SPTR first;501PCRE2_SPTR last;502PCRE2_SPTR lastentry;503lastentry = nametable + entrysize * (code->name_count - 1);504first = last = entry;505while (first > nametable)506{507if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;508first -= entrysize;509}510while (last < lastentry)511{512if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;513last += entrysize;514}515if (firstptr == NULL) return (first == last)?516(int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;517*firstptr = first;518*lastptr = last;519return entrysize;520}521if (c > 0) bot = mid + 1; else top = mid;522}523524return PCRE2_ERROR_NOSUBSTRING;525}526527528/*************************************************529* Find number for named string *530*************************************************/531532/* This function is a convenience wrapper for pcre2_substring_nametable_scan()533when it is known that names are unique. If there are duplicate names, it is not534defined which number is returned.535536Arguments:537code the compiled regex538stringname the name whose number is required539540Returns: the number of the named parenthesis, or a negative number541PCRE2_ERROR_NOSUBSTRING if not found542PCRE2_ERROR_NOUNIQUESUBSTRING if not unique543*/544545PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION546pcre2_substring_number_from_name(const pcre2_code *code,547PCRE2_SPTR stringname)548{549return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);550}551552/* End of pcre2_substring.c */553554555