Path: blob/master/thirdparty/pcre2/src/pcre2_newline.c
21928 views
/*************************************************1* Perl-Compatible Regular Expressions *2*************************************************/34/* PCRE is a library of functions to support regular expressions whose syntax5and semantics are as close as possible to those of the Perl 5 language.67Written by Philip Hazel8Original API code Copyright (c) 1997-2012 University of Cambridge9New API code Copyright (c) 2016 University of Cambridge1011-----------------------------------------------------------------------------12Redistribution and use in source and binary forms, with or without13modification, are permitted provided that the following conditions are met:1415* Redistributions of source code must retain the above copyright notice,16this list of conditions and the following disclaimer.1718* Redistributions in binary form must reproduce the above copyright19notice, this list of conditions and the following disclaimer in the20documentation and/or other materials provided with the distribution.2122* Neither the name of the University of Cambridge nor the names of its23contributors may be used to endorse or promote products derived from24this software without specific prior written permission.2526THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE36POSSIBILITY OF SUCH DAMAGE.37-----------------------------------------------------------------------------38*/394041/* This module contains internal functions for testing newlines when more than42one kind of newline is to be recognized. When a newline is found, its length is43returned. In principle, we could implement several newline "types", each44referring to a different set of newline characters. At present, PCRE2 supports45only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,46and NLTYPE_ANY. The full list of Unicode newline characters is taken from47http://unicode.org/unicode/reports/tr18/. */484950#include "pcre2_internal.h"51525354/*************************************************55* Check for newline at given position *56*************************************************/5758/* This function is called only via the IS_NEWLINE macro, which does so only59when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed60newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit61pointed to by ptr is less than the end of the string.6263Arguments:64ptr pointer to possible newline65type the newline type66endptr pointer to the end of the string67lenptr where to return the length68utf TRUE if in utf mode6970Returns: TRUE or FALSE71*/7273BOOL74PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,75uint32_t *lenptr, BOOL utf)76{77uint32_t c;7879#ifdef SUPPORT_UNICODE80if (utf) { GETCHAR(c, ptr); } else c = *ptr;81#else82(void)utf;83c = *ptr;84#endif /* SUPPORT_UNICODE */8586if (type == NLTYPE_ANYCRLF) switch(c)87{88case CHAR_LF:89*lenptr = 1;90return TRUE;9192case CHAR_CR:93*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;94return TRUE;9596default:97return FALSE;98}99100/* NLTYPE_ANY */101102else switch(c)103{104#ifdef EBCDIC105case CHAR_NEL:106#endif107case CHAR_LF:108case CHAR_VT:109case CHAR_FF:110*lenptr = 1;111return TRUE;112113case CHAR_CR:114*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;115return TRUE;116117#ifndef EBCDIC118#if PCRE2_CODE_UNIT_WIDTH == 8119case CHAR_NEL:120*lenptr = utf? 2 : 1;121return TRUE;122123case 0x2028: /* LS */124case 0x2029: /* PS */125*lenptr = 3;126return TRUE;127128#else /* 16-bit or 32-bit code units */129case CHAR_NEL:130case 0x2028: /* LS */131case 0x2029: /* PS */132*lenptr = 1;133return TRUE;134#endif135#endif /* Not EBCDIC */136137default:138return FALSE;139}140}141142143144/*************************************************145* Check for newline at previous position *146*************************************************/147148/* This function is called only via the WAS_NEWLINE macro, which does so only149when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed150newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial151value of ptr is greater than the start of the string that is being processed.152153Arguments:154ptr pointer to possible newline155type the newline type156startptr pointer to the start of the string157lenptr where to return the length158utf TRUE if in utf mode159160Returns: TRUE or FALSE161*/162163BOOL164PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,165uint32_t *lenptr, BOOL utf)166{167uint32_t c;168ptr--;169170#ifdef SUPPORT_UNICODE171if (utf)172{173BACKCHAR(ptr);174GETCHAR(c, ptr);175}176else c = *ptr;177#else178(void)utf;179c = *ptr;180#endif /* SUPPORT_UNICODE */181182if (type == NLTYPE_ANYCRLF) switch(c)183{184case CHAR_LF:185*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;186return TRUE;187188case CHAR_CR:189*lenptr = 1;190return TRUE;191192default:193return FALSE;194}195196/* NLTYPE_ANY */197198else switch(c)199{200case CHAR_LF:201*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;202return TRUE;203204#ifdef EBCDIC205case CHAR_NEL:206#endif207case CHAR_VT:208case CHAR_FF:209case CHAR_CR:210*lenptr = 1;211return TRUE;212213#ifndef EBCDIC214#if PCRE2_CODE_UNIT_WIDTH == 8215case CHAR_NEL:216*lenptr = utf? 2 : 1;217return TRUE;218219case 0x2028: /* LS */220case 0x2029: /* PS */221*lenptr = 3;222return TRUE;223224#else /* 16-bit or 32-bit code units */225case CHAR_NEL:226case 0x2028: /* LS */227case 0x2029: /* PS */228*lenptr = 1;229return TRUE;230#endif231#endif /* Not EBCDIC */232233default:234return FALSE;235}236}237238/* End of pcre2_newline.c */239240241