Path: blob/master/thirdparty/pcre2/src/pcre2_newline.c
9898 views
/*************************************************1* Perl-Compatible Regular Expressions *2*************************************************/34/* PCRE is a library of functions to support regular expressions whose syntax5and semantics are as close as possible to those of the Perl 5 language.67Written by Philip Hazel8Original API code Copyright (c) 1997-2012 University of Cambridge9New API code Copyright (c) 2016 University of Cambridge1011-----------------------------------------------------------------------------12Redistribution and use in source and binary forms, with or without13modification, are permitted provided that the following conditions are met:1415* Redistributions of source code must retain the above copyright notice,16this list of conditions and the following disclaimer.1718* Redistributions in binary form must reproduce the above copyright19notice, this list of conditions and the following disclaimer in the20documentation and/or other materials provided with the distribution.2122* Neither the name of the University of Cambridge nor the names of its23contributors may be used to endorse or promote products derived from24this software without specific prior written permission.2526THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE36POSSIBILITY OF SUCH DAMAGE.37-----------------------------------------------------------------------------38*/394041/* This module contains internal functions for testing newlines when more than42one kind of newline is to be recognized. When a newline is found, its length is43returned. In principle, we could implement several newline "types", each44referring to a different set of newline characters. At present, PCRE2 supports45only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,46and NLTYPE_ANY. The full list of Unicode newline characters is taken from47http://unicode.org/unicode/reports/tr18/. */484950#ifdef HAVE_CONFIG_H51#include "config.h"52#endif5354#include "pcre2_internal.h"55565758/*************************************************59* Check for newline at given position *60*************************************************/6162/* This function is called only via the IS_NEWLINE macro, which does so only63when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed64newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit65pointed to by ptr is less than the end of the string.6667Arguments:68ptr pointer to possible newline69type the newline type70endptr pointer to the end of the string71lenptr where to return the length72utf TRUE if in utf mode7374Returns: TRUE or FALSE75*/7677BOOL78PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,79uint32_t *lenptr, BOOL utf)80{81uint32_t c;8283#ifdef SUPPORT_UNICODE84if (utf) { GETCHAR(c, ptr); } else c = *ptr;85#else86(void)utf;87c = *ptr;88#endif /* SUPPORT_UNICODE */8990if (type == NLTYPE_ANYCRLF) switch(c)91{92case CHAR_LF:93*lenptr = 1;94return TRUE;9596case CHAR_CR:97*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;98return TRUE;99100default:101return FALSE;102}103104/* NLTYPE_ANY */105106else switch(c)107{108#ifdef EBCDIC109case CHAR_NEL:110#endif111case CHAR_LF:112case CHAR_VT:113case CHAR_FF:114*lenptr = 1;115return TRUE;116117case CHAR_CR:118*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;119return TRUE;120121#ifndef EBCDIC122#if PCRE2_CODE_UNIT_WIDTH == 8123case CHAR_NEL:124*lenptr = utf? 2 : 1;125return TRUE;126127case 0x2028: /* LS */128case 0x2029: /* PS */129*lenptr = 3;130return TRUE;131132#else /* 16-bit or 32-bit code units */133case CHAR_NEL:134case 0x2028: /* LS */135case 0x2029: /* PS */136*lenptr = 1;137return TRUE;138#endif139#endif /* Not EBCDIC */140141default:142return FALSE;143}144}145146147148/*************************************************149* Check for newline at previous position *150*************************************************/151152/* This function is called only via the WAS_NEWLINE macro, which does so only153when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed154newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial155value of ptr is greater than the start of the string that is being processed.156157Arguments:158ptr pointer to possible newline159type the newline type160startptr pointer to the start of the string161lenptr where to return the length162utf TRUE if in utf mode163164Returns: TRUE or FALSE165*/166167BOOL168PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,169uint32_t *lenptr, BOOL utf)170{171uint32_t c;172ptr--;173174#ifdef SUPPORT_UNICODE175if (utf)176{177BACKCHAR(ptr);178GETCHAR(c, ptr);179}180else c = *ptr;181#else182(void)utf;183c = *ptr;184#endif /* SUPPORT_UNICODE */185186if (type == NLTYPE_ANYCRLF) switch(c)187{188case CHAR_LF:189*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;190return TRUE;191192case CHAR_CR:193*lenptr = 1;194return TRUE;195196default:197return FALSE;198}199200/* NLTYPE_ANY */201202else switch(c)203{204case CHAR_LF:205*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;206return TRUE;207208#ifdef EBCDIC209case CHAR_NEL:210#endif211case CHAR_VT:212case CHAR_FF:213case CHAR_CR:214*lenptr = 1;215return TRUE;216217#ifndef EBCDIC218#if PCRE2_CODE_UNIT_WIDTH == 8219case CHAR_NEL:220*lenptr = utf? 2 : 1;221return TRUE;222223case 0x2028: /* LS */224case 0x2029: /* PS */225*lenptr = 3;226return TRUE;227228#else /* 16-bit or 32-bit code units */229case CHAR_NEL:230case 0x2028: /* LS */231case 0x2029: /* PS */232*lenptr = 1;233return TRUE;234#endif235#endif /* Not EBCDIC */236237default:238return FALSE;239}240}241242/* End of pcre2_newline.c */243244245