Path: blob/main/contrib/llvm-project/libcxx/src/regex.cpp
35147 views
//===----------------------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include <algorithm>9#include <iterator>10#include <regex>1112_LIBCPP_BEGIN_NAMESPACE_STD1314static const char* make_error_type_string(regex_constants::error_type ecode) {15switch (ecode) {16case regex_constants::error_collate:17return "The expression contained an invalid collating element name.";18case regex_constants::error_ctype:19return "The expression contained an invalid character class name.";20case regex_constants::error_escape:21return "The expression contained an invalid escaped character, or a "22"trailing escape.";23case regex_constants::error_backref:24return "The expression contained an invalid back reference.";25case regex_constants::error_brack:26return "The expression contained mismatched [ and ].";27case regex_constants::error_paren:28return "The expression contained mismatched ( and ).";29case regex_constants::error_brace:30return "The expression contained mismatched { and }.";31case regex_constants::error_badbrace:32return "The expression contained an invalid range in a {} expression.";33case regex_constants::error_range:34return "The expression contained an invalid character range, "35"such as [b-a] in most encodings.";36case regex_constants::error_space:37return "There was insufficient memory to convert the expression into "38"a finite state machine.";39case regex_constants::error_badrepeat:40return "One of *?+{ was not preceded by a valid regular expression.";41case regex_constants::error_complexity:42return "The complexity of an attempted match against a regular "43"expression exceeded a pre-set level.";44case regex_constants::error_stack:45return "There was insufficient memory to determine whether the regular "46"expression could match the specified character sequence.";47case regex_constants::__re_err_grammar:48return "An invalid regex grammar has been requested.";49case regex_constants::__re_err_empty:50return "An empty regex is not allowed in the POSIX grammar.";51case regex_constants::__re_err_parse:52return "The parser did not consume the entire regular expression.";53default:54break;55}56return "Unknown error type";57}5859regex_error::regex_error(regex_constants::error_type ecode)60: runtime_error(make_error_type_string(ecode)), __code_(ecode) {}6162regex_error::~regex_error() throw() {}6364namespace {6566struct collationnames {67const char* elem_;68char char_;69};7071#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)72// EBCDIC IBM-104773// Sorted via the EBCDIC collating sequence74const collationnames collatenames[] = {75{"a", 0x81},76{"alert", 0x2f},77{"ampersand", 0x50},78{"apostrophe", 0x7d},79{"asterisk", 0x5c},80{"b", 0x82},81{"backslash", 0xe0},82{"backspace", 0x16},83{"c", 0x83},84{"carriage-return", 0xd},85{"circumflex", 0x5f},86{"circumflex-accent", 0x5f},87{"colon", 0x7a},88{"comma", 0x6b},89{"commercial-at", 0x7c},90{"d", 0x84},91{"dollar-sign", 0x5b},92{"e", 0x85},93{"eight", 0xf8},94{"equals-sign", 0x7e},95{"exclamation-mark", 0x5a},96{"f", 0x86},97{"five", 0xf5},98{"form-feed", 0xc},99{"four", 0xf4},100{"full-stop", 0x4b},101{"g", 0x87},102{"grave-accent", 0x79},103{"greater-than-sign", 0x6e},104{"h", 0x88},105{"hyphen", 0x60},106{"hyphen-minus", 0x60},107{"i", 0x89},108{"j", 0x91},109{"k", 0x92},110{"l", 0x93},111{"left-brace", 0xc0},112{"left-curly-bracket", 0xc0},113{"left-parenthesis", 0x4d},114{"left-square-bracket", 0xad},115{"less-than-sign", 0x4c},116{"low-line", 0x6d},117{"m", 0x94},118{"n", 0x95},119{"newline", 0x15},120{"nine", 0xf9},121{"number-sign", 0x7b},122{"o", 0x96},123{"one", 0xf1},124{"p", 0x97},125{"percent-sign", 0x6c},126{"period", 0x4b},127{"plus-sign", 0x4e},128{"q", 0x98},129{"question-mark", 0x6f},130{"quotation-mark", 0x7f},131{"r", 0x99},132{"reverse-solidus", 0xe0},133{"right-brace", 0xd0},134{"right-curly-bracket", 0xd0},135{"right-parenthesis", 0x5d},136{"right-square-bracket", 0xbd},137{"s", 0xa2},138{"semicolon", 0x5e},139{"seven", 0xf7},140{"six", 0xf6},141{"slash", 0x61},142{"solidus", 0x61},143{"space", 0x40},144{"t", 0xa3},145{"tab", 0x5},146{"three", 0xf3},147{"tilde", 0xa1},148{"two", 0xf2},149{"u", 0xa4},150{"underscore", 0x6d},151{"v", 0xa5},152{"vertical-line", 0x4f},153{"vertical-tab", 0xb},154{"w", 0xa6},155{"x", 0xa7},156{"y", 0xa8},157{"z", 0xa9},158{"zero", 0xf0},159{"A", 0xc1},160{"B", 0xc2},161{"C", 0xc3},162{"D", 0xc4},163{"E", 0xc5},164{"F", 0xc6},165{"G", 0xc7},166{"H", 0xc8},167{"I", 0xc9},168{"J", 0xd1},169{"K", 0xd2},170{"L", 0xd3},171{"M", 0xd4},172{"N", 0xd5},173{"NUL", 0},174{"O", 0xd6},175{"P", 0xd7},176{"Q", 0xd8},177{"R", 0xd9},178{"S", 0xe2},179{"T", 0xe3},180{"U", 0xe4},181{"V", 0xe5},182{"W", 0xe6},183{"X", 0xe7},184{"Y", 0xe8},185{"Z", 0xe9}};186#else187// ASCII188const collationnames collatenames[] = {189{"A", 0x41},190{"B", 0x42},191{"C", 0x43},192{"D", 0x44},193{"E", 0x45},194{"F", 0x46},195{"G", 0x47},196{"H", 0x48},197{"I", 0x49},198{"J", 0x4a},199{"K", 0x4b},200{"L", 0x4c},201{"M", 0x4d},202{"N", 0x4e},203{"NUL", 0x00},204{"O", 0x4f},205{"P", 0x50},206{"Q", 0x51},207{"R", 0x52},208{"S", 0x53},209{"T", 0x54},210{"U", 0x55},211{"V", 0x56},212{"W", 0x57},213{"X", 0x58},214{"Y", 0x59},215{"Z", 0x5a},216{"a", 0x61},217{"alert", 0x07},218{"ampersand", 0x26},219{"apostrophe", 0x27},220{"asterisk", 0x2a},221{"b", 0x62},222{"backslash", 0x5c},223{"backspace", 0x08},224{"c", 0x63},225{"carriage-return", 0x0d},226{"circumflex", 0x5e},227{"circumflex-accent", 0x5e},228{"colon", 0x3a},229{"comma", 0x2c},230{"commercial-at", 0x40},231{"d", 0x64},232{"dollar-sign", 0x24},233{"e", 0x65},234{"eight", 0x38},235{"equals-sign", 0x3d},236{"exclamation-mark", 0x21},237{"f", 0x66},238{"five", 0x35},239{"form-feed", 0x0c},240{"four", 0x34},241{"full-stop", 0x2e},242{"g", 0x67},243{"grave-accent", 0x60},244{"greater-than-sign", 0x3e},245{"h", 0x68},246{"hyphen", 0x2d},247{"hyphen-minus", 0x2d},248{"i", 0x69},249{"j", 0x6a},250{"k", 0x6b},251{"l", 0x6c},252{"left-brace", 0x7b},253{"left-curly-bracket", 0x7b},254{"left-parenthesis", 0x28},255{"left-square-bracket", 0x5b},256{"less-than-sign", 0x3c},257{"low-line", 0x5f},258{"m", 0x6d},259{"n", 0x6e},260{"newline", 0x0a},261{"nine", 0x39},262{"number-sign", 0x23},263{"o", 0x6f},264{"one", 0x31},265{"p", 0x70},266{"percent-sign", 0x25},267{"period", 0x2e},268{"plus-sign", 0x2b},269{"q", 0x71},270{"question-mark", 0x3f},271{"quotation-mark", 0x22},272{"r", 0x72},273{"reverse-solidus", 0x5c},274{"right-brace", 0x7d},275{"right-curly-bracket", 0x7d},276{"right-parenthesis", 0x29},277{"right-square-bracket", 0x5d},278{"s", 0x73},279{"semicolon", 0x3b},280{"seven", 0x37},281{"six", 0x36},282{"slash", 0x2f},283{"solidus", 0x2f},284{"space", 0x20},285{"t", 0x74},286{"tab", 0x09},287{"three", 0x33},288{"tilde", 0x7e},289{"two", 0x32},290{"u", 0x75},291{"underscore", 0x5f},292{"v", 0x76},293{"vertical-line", 0x7c},294{"vertical-tab", 0x0b},295{"w", 0x77},296{"x", 0x78},297{"y", 0x79},298{"z", 0x7a},299{"zero", 0x30}};300#endif301302struct classnames {303const char* elem_;304regex_traits<char>::char_class_type mask_;305};306307const classnames ClassNames[] = {308{"alnum", ctype_base::alnum},309{"alpha", ctype_base::alpha},310{"blank", ctype_base::blank},311{"cntrl", ctype_base::cntrl},312{"d", ctype_base::digit},313{"digit", ctype_base::digit},314{"graph", ctype_base::graph},315{"lower", ctype_base::lower},316{"print", ctype_base::print},317{"punct", ctype_base::punct},318{"s", ctype_base::space},319{"space", ctype_base::space},320{"upper", ctype_base::upper},321{"w", regex_traits<char>::__regex_word},322{"xdigit", ctype_base::xdigit}};323324struct use_strcmp {325bool operator()(const collationnames& x, const char* y) { return strcmp(x.elem_, y) < 0; }326bool operator()(const classnames& x, const char* y) { return strcmp(x.elem_, y) < 0; }327};328329} // namespace330331string __get_collation_name(const char* s) {332const collationnames* i = std::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp());333string r;334if (i != end(collatenames) && strcmp(s, i->elem_) == 0)335r = char(i->char_);336return r;337}338339regex_traits<char>::char_class_type __get_classname(const char* s, bool __icase) {340const classnames* i = std::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp());341regex_traits<char>::char_class_type r = 0;342if (i != end(ClassNames) && strcmp(s, i->elem_) == 0) {343r = i->mask_;344if (r == regex_traits<char>::__regex_word)345r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower;346else if (__icase) {347if (r & (ctype_base::lower | ctype_base::upper))348r |= ctype_base::alpha;349}350}351return r;352}353354template <>355void __match_any_but_newline<char>::__exec(__state& __s) const {356if (__s.__current_ != __s.__last_) {357switch (*__s.__current_) {358case '\r':359case '\n':360__s.__do_ = __state::__reject;361__s.__node_ = nullptr;362break;363default:364__s.__do_ = __state::__accept_and_consume;365++__s.__current_;366__s.__node_ = this->first();367break;368}369} else {370__s.__do_ = __state::__reject;371__s.__node_ = nullptr;372}373}374375template <>376void __match_any_but_newline<wchar_t>::__exec(__state& __s) const {377if (__s.__current_ != __s.__last_) {378switch (*__s.__current_) {379case '\r':380case '\n':381case 0x2028:382case 0x2029:383__s.__do_ = __state::__reject;384__s.__node_ = nullptr;385break;386default:387__s.__do_ = __state::__accept_and_consume;388++__s.__current_;389__s.__node_ = this->first();390break;391}392} else {393__s.__do_ = __state::__reject;394__s.__node_ = nullptr;395}396}397398_LIBCPP_END_NAMESPACE_STD399400401