: translated from the Regex++ tests.txt by cvtregex++.c 2001-05-16 # manual edits were done to insert the standard error codes # # inserted {...} features tests may have changed line numbers and/or ordering # # comments beyond this point are verbatim from the original input # # this file contains a script of tests to run through regress.exe # # comments start with a semicolon and proceed to the end of the line # # changes to regular expression compile flags start with a "-" as the first # non-whitespace character and consist of a list of the printable names # of the flags, for example "match_default" # # Other lines contain a test to perform using the current flag status # the first token contains the expression to compile, the second the string # to match it against. If the second string is "!" then the expression should # not compile, that is the first string is an invalid regular expression. # This is then followed by a list of integers that specify what should match, # each pair represents the starting and ending positions of a subexpression # starting with the zeroth subexpression (the whole match). # A value of -1 indicates that the subexpression should not take part in the # match at all, if the first value is -1 then no part of the expression should # match the string. # # - match_default normal REG_EXTENDED # # try some really simple literals: E a a (0,1) E Z Z (0,1) E Z aaa NOMATCH E Z xxxxZZxxx (4,5) # and some simple brackets: E (a) zzzaazz (3,4)(3,4) Exz () zzz (0,0)(0,0) Exz () NULL (0,0)(0,0) E ( ! EPAREN E ) ! NOMATCH E (aa ! EPAREN E aa) ! NOMATCH E a b NOMATCH E \(\) () (0,2) E \(a\) (a) (0,3) E \() ! NOMATCH E (\) ! EPAREN E p(a)rameter ABCparameterXYZ (3,12)(4,5) E [pq](a)rameter ABCparameterXYZ (3,12)(4,5) # now try escaped brackets: # - match_default bk_parens REG_BASIC B \(a\) zzzaazz (3,4)(3,4) B \(\) zzz (0,0)(0,0) B \(\) NULL (0,0)(0,0) B \( ! EPAREN B \) ! EPAREN B \(aa ! EPAREN B aa\) ! EPAREN B () () (0,2) B (a) (a) (0,3) B (\) ! EPAREN B \() ! EPAREN # now move on to "." wildcards # - match_default normal REG_EXTENDED REG_STARTEND E . a (0,1) E$ . \n (0,1) E$ . \r (0,1) E . NULL NOMATCH # - match_default normal match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE En . a (0,1) En$ . \n NOMATCH En$ . \r (0,1) En . NULL NOMATCH # - match_default normal match_not_dot_null match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE En$ . \n NOMATCH En$ . \r (0,1) # this *WILL* produce an error from the POSIX API functions: # - match_default normal match_not_dot_null match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE REG_NO_POSIX_TEST Enz . NULL NOMATCH # # now move on to the repetion ops, # starting with operator * # - match_default normal REG_EXTENDED E a* b (0,0) E ab* a (0,1) E ab* ab (0,2) E ab* sssabbbbbbsss (3,10) E ab*c* a (0,1) E ab*c* abbb (0,4) E ab*c* accc (0,4) E ab*c* abbcc (0,5) E *a ! BADRPT E$ \n* \n\n (0,2) E \** ** (0,2) E \* * (0,1) # now try operator + E ab+ a NOMATCH E ab+ ab (0,2) E ab+ sssabbbbbbsss (3,10) E ab+c+ a NOMATCH E ab+c+ abbb NOMATCH E ab+c+ accc NOMATCH E ab+c+ abbcc (0,5) E +a ! BADRPT E$ \n+ \n\n (0,2) E \+ + (0,1) E \+ ++ (0,1) E \++ ++ (0,2) # - match_default normal bk_plus_qm REG_EXTENDED REG_NO_POSIX_TEST Exz + + BADRPT Exz \+ ! NOMATCH Exz a\+ aa NOMATCH # now try operator ? # - match_default normal REG_EXTENDED E a? b (0,0) E ab? a (0,1) E ab? ab (0,2) E ab? sssabbbbbbsss (3,5) E ab?c? a (0,1) E ab?c? abbb (0,2) E ab?c? accc (0,2) E ab?c? abcc (0,3) E ?a ! BADRPT E$ \n? \n\n (0,1) E \? ? (0,1) E \? ?? (0,1) E \?? ?? (0,1) # - match_default normal bk_plus_qm REG_EXTENDED REG_NO_POSIX_TEST Exz ? ? BADRPT Exz \? ! NOMATCH Exz a\? aa NOMATCH Exz a\? b NOMATCH # - match_default normal limited_ops B a? a? (0,2) B a+ a+ (0,2) B a\? a? (0,2) B a\+ a+ (0,2) # now try operator {} # - match_default normal REG_EXTENDED E a{2} a NOMATCH E a{2} aa (0,2) E a{2} aaa (0,2) E a{2,} a NOMATCH E a{2,} aa (0,2) E a{2,} aaaaa (0,5) E a{2,4} a NOMATCH E a{2,4} aa (0,2) E a{2,4} aaa (0,3) E a{2,4} aaaa (0,4) E a{2,4} aaaaa (0,4) # spaces are now allowed inside {} E a{ 2 , 4 } aaaaa BADBR E a{} ! BADBR E a{ } ! BADBR E a{2 ! EBRACE E a} ! NOMATCH E \{\} {} (0,2) # - match_default normal bk_braces B a\{2\} a NOMATCH B a\{2\} aa (0,2) B a\{2\} aaa (0,2) B a\{2,\} a NOMATCH B a\{2,\} aa (0,2) B a\{2,\} aaaaa (0,5) B a\{2,4\} a NOMATCH B a\{2,4\} aa (0,2) B a\{2,4\} aaa (0,3) B a\{2,4\} aaaa (0,4) B a\{2,4\} aaaaa (0,4) B a\{ 2 , 4 \} aaaaa BADBR B {} {} (0,2) # now test the alternation operator | # - match_default normal REG_EXTENDED E a|b a (0,1) E a|b b (0,1) E a(b|c) ab (0,2)(1,2) E a(b|c) ac (0,2)(1,2) E a(b|c) ad NOMATCH E |c ! ENULL E c| ! ENULL E (|) ! ENULL E (a|) ! ENULL E (|a) ! ENULL E a\| a| (0,2) # - match_default normal limited_ops B a| a| (0,2) B a\| a| (0,2) B | | (0,1) # - match_default normal bk_vbar REG_NO_POSIX_TEST Bxz a| a| (0,2) Bxz a\|b a (0,1) Bxz a\|b b (0,1) # now test the set operator [] # - match_default normal REG_EXTENDED # try some literals first E [abc] a (0,1) E [abc] b (0,1) E [abc] c (0,1) E [abc] d NOMATCH E [^bcd] a (0,1) E [^bcd] b NOMATCH E [^bcd] d NOMATCH E [^bcd] e (0,1) E a[b]c abc (0,3) E a[ab]c abc (0,3) E a[^ab]c adc (0,3) E a[]b]c a]c (0,3) E a[[b]c a[c (0,3) E a[-b]c a-c (0,3) E a[^]b]c adc (0,3) E a[^-b]c adc (0,3) E a[b-]c a-c (0,3) E a[b ! EBRACK E a[] ! EBRACK # then some ranges E [b-e] a NOMATCH E [b-e] b (0,1) E [b-e] e (0,1) E [b-e] f NOMATCH E [^b-e] a (0,1) E [^b-e] b NOMATCH E [^b-e] e NOMATCH E [^b-e] f (0,1) E a[1-3]c a2c (0,3) E a[3-1]c ! ERANGE E a[1-3-5]c ! ERANGE E a[1- ! EBRACK # and some classes E a[[:alpha:]]c abc (0,3) E a[[:unknown:]]c ! ECTYPE E a[[: ! ECTYPE E a[[:alpha ! ECTYPE E a[[:alpha:] ! EBRACK E a[[:alpha,:] ! ECTYPE E a[[:]:]]b ! ECTYPE E a[[:-:]]b ! ECTYPE E a[[:alph:]] ! ECTYPE E a[[:alphabet:]] ! ECTYPE E [[:alnum:]]+ -%@a0X- (3,6) E [[:alpha:]]+ -%@aX0- (3,5) E$ [[:blank:]]+ a \tb (1,4) E$ [[:cntrl:]]+ a\n\tb (1,3) E [[:digit:]]+ a019b (1,4) E [[:graph:]]+ a%b (0,3) E [[:lower:]]+ AabC (1,3) # This test fails with STLPort, disable for now as this is a corner case anyway... #[[:print:]]+ "\na b\n" 1 4 E$ [[:punct:]]+ %-&\t (0,3) E$ [[:space:]]+ a \n\t\rb (1,5) E [[:upper:]]+ aBCd (1,3) E [[:xdigit:]]+ p0f3Cx (1,5) # now test flag settings: # - escape_in_lists REG_NO_POSIX_TEST Exz$ [\n] \n (0,1) # - REG_NO_POSIX_TEST Bxz$ [\n] \n (0,1) Bxz$ [\n] \\ NOMATCH Bxz [[:class:] : ECTYPE Bxz [[:class:] [ ECTYPE Bxz [[:class:] c ECTYPE # line anchors # - match_default normal REG_EXTENDED En ^ab ab (0,2) En ^ab xxabxx NOMATCH En$ ^ab xx\nabzz (3,5) En ab$ ab (0,2) En ab$ abxx NOMATCH En$ ab$ ab\nzz (0,2) # - match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL Eben ^ab ab NOMATCH Eben ^ab xxabxx NOMATCH Eben$ ^ab xx\nabzz (3,5) Eben ab$ ab NOMATCH Eben ab$ abxx NOMATCH Eben$ ab$ ab\nzz (0,2) # back references # - match_default normal REG_EXTENDED E a(b)\2c ! ESUBREG E a(b\1)c ! ESUBREG E a(b*)c\1d abbcbbd (0,7)(1,3) E a(b*)c\1d abbcbd NOMATCH E a(b*)c\1d abbcbbbd NOMATCH E ^(.)\1 abc NOMATCH E a([bc])\1d abcdabbd (4,8)(5,6) # strictly speaking this is at best ambiguous, at worst wrong, this is what most # re implimentations will match though. E a(([bc])\2)*d abbccd (0,6)(3,5)(3,4) E a(([bc])\2)*d abbcbd NOMATCH E a((b)*\2)*d abbbd (0,5)(1,4)(2,3) E (ab*)[ab]*\1 ababaaa (0,7)(0,1) E (a)\1bcd aabcd (0,5)(0,1) E (a)\1bc*d aabcd (0,5)(0,1) E (a)\1bc*d aabd (0,4)(0,1) E (a)\1bc*d aabcccd (0,7)(0,1) E (a)\1bc*[ce]d aabcccd (0,7)(0,1) E ^(a)\1b(c)*cd$ aabcccd (0,7)(0,1)(4,5) # # characters by code: # - match_default normal REG_EXTENDED REG_STARTEND {E \101 A (0,1) not an ascii implementation E \172 z (0,1) E \0172 z NOMATCH } E NULL NULL ENULL E NULL NULL ENULL # # word operators: {E \w a (0,1) perl \w not supported E \w z (0,1) E \w A (0,1) E \w Z (0,1) E \w _ (0,1) E \w } NOMATCH E \w ` NOMATCH E \w [ NOMATCH E \w @ NOMATCH } # non-word: {E \W W NOMATCH perl \W not supported E \W z NOMATCH E \W A NOMATCH E \W Z NOMATCH E \W _ NOMATCH E \W } (0,1) E \W ` (0,1) E \W [ (0,1) E \W @ (0,1) E \<\w+\W+ aa aa a (1,5) } # word boundaries {E \<a\> ,a, (1,2) word boundaries not supported E \<* ! BADRPT E \>* ! BADRPT E \<+ ! BADRPT E \>+ ! BADRPT E \<? ! BADRPT E \>? ! BADRPT # word start: E \<abcd abcd (2,6) E \<ab cab NOMATCH E$ \\<ab \nab (1,3) E \<tag ::tag (2,5) # word end: E abc\> abc (0,3) E abc\> abcd NOMATCH E$ abc\\> abc\n (0,3) E abc\> abc:: (0,3) E \<abc abcabc abc\n\nabc (0,3) E \< ab a aaa (2,2) } # word boundary: {E \babcd abcd (0,4) perl \b not supported E \babcd :abcd: (1,5) perl \b not supported E \bab cab NOMATCH E$ \\bab \nab (1,3) E \btag ::tag (2,5) E abc\b abc (0,3) E abc\b abcd NOMATCH E$ abc\\b abc\n (0,3) E abc\b abc:: (0,3) E \b abb a abbb (0,0) } # within word: {E \B ab (1,1) perl \B not supported E a\Bb ab (0,2) E a\B ab (0,1) E a\B a NOMATCH E a\B a NOMATCH } # # buffer operators: {E \`abc abc (0,3) regex++ \' not supported E$ \\`abc \nabc NOMATCH E \`abc abc NOMATCH E abc\' abc (0,3) E$ abc\\' abc\n NOMATCH E abc\' abc NOMATCH } # # extra escape sequences: E$ \a \a (0,1) E$ \f \f (0,1) E$ \n \n (0,1) E$ \r \r (0,1) E$ \t \t (0,1) E$ \v \v (0,1) E$ \\a \a (0,1) E$ \\f \f (0,1) E$ \\n \n (0,1) E$ \\r \r (0,1) E$ \\t \t (0,1) E$ \\v \v (0,1) E \\a \a (0,2) E \\f \f (0,2) E \\n \n (0,2) E \\r \r (0,2) E \\t \t (0,2) E \\v \v (0,2) # # now follows various complex expressions designed to try and bust the matcher: E a(((b)))c abc (0,3)(1,2)(1,2)(1,2) E a(b|(c))d abd (0,3)(1,2) E a(b|(c))d acd (0,3)(1,2)(1,2) E a(b*|c)d abbd (0,4)(1,3) # just gotta have one DFA-buster, of course E a[ab]{20} aaaaabaaaabaaaabaaaab (0,21) # and an inline expansion in case somebody gets tricky E a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab (0,21) # and in case somebody just slips in an NFA... E a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights (0,31)(21,24)(24,31) # one really big one E 1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b (1,71) # fish for problems as brackets go past 8 E [ab][cd][ef][gh][ij][kl][mn] xacegikmoq (1,8) E [ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq (1,9) E [ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy (1,10) E [ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy (1,10) # and as parenthesis go past 9: E (a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi (1,9)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9) E (a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij (1,10)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10) E (a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk (1,11)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11) E (a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl (1,12)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11)(11,12) E (a)d|(b)c abc (1,3)(-1,-1)(1,2) E _+((www)|(ftp)|(mailto)):_* _wwwnocolon _mailto: (12,20)(13,19)(-1,-1)(-1,-1)(13,19) # subtleties of matching E a(b)?cd acd (0,3) E a(b)?c\1d acd NOMATCH E a(b?c)+d accd (0,4)(2,3) E (wee|week)(knights|night) weeknights (0,10)(0,3)(3,10) E .* abc (0,3) E a(b|(c))d abd (0,3)(1,2) E a(b|(c))d acd (0,3)(1,2)(1,2) E a(b*|c|e)d abbd (0,4)(1,3) E a(b*|c|e)d acd (0,3)(1,2) E a(b*|c|e)d ad (0,2)(1,1) E a(b?)c abc (0,3)(1,2) E a(b?)c ac (0,2)(1,1) E a(b+)c abc (0,3)(1,2) E a(b+)c abbbc (0,5)(1,4) E a(b*)c ac (0,2)(1,1) E (a|ab)(bc([de]+)f|cde) abcdef (0,6)(0,1)(1,6)(3,5) E a([bc]?)c abc (0,3)(1,2) E a([bc]?)c ac (0,2)(1,1) E a([bc]+)c abc (0,3)(1,2) E a([bc]+)c abcc (0,4)(1,3) E a([bc]+)bc abcbc (0,5)(1,3) E a(bb+|b)b abb (0,3)(1,2) E a(bbb+|bb+|b)b abb (0,3)(1,2) E a(bbb+|bb+|b)b abbb (0,4)(1,3) E a(bbb+|bb+|b)bb abbb (0,4)(1,2) E (.*).* abcdef (0,6)(0,6) E (a*)* bc (0,0)(0,0) # do we get the right subexpression when it is used more than once? E a(b|c)*d ad (0,2) E a(b|c)*d abcd (0,4)(2,3) E a(b|c)+d abd (0,3)(1,2) E a(b|c)+d abcd (0,4)(2,3) E a(b|c?)+d ad (0,2)(1,1) E a(b|c?)+d abcd (0,4)(2,3) E a(b|c){0,0}d ad (0,2) E a(b|c){0,1}d ad (0,2) E a(b|c){0,1}d abd (0,3)(1,2) E a(b|c){0,2}d ad (0,2) E a(b|c){0,2}d abcd (0,4)(2,3) E a(b|c){0,}d ad (0,2) E a(b|c){0,}d abcd (0,4)(2,3) E a(b|c){1,1}d abd (0,3)(1,2) E a(b|c){1,2}d abd (0,3)(1,2) E a(b|c){1,2}d abcd (0,4)(2,3) E a(b|c){1,}d abd (0,3)(1,2) E a(b|c){1,}d abcd (0,4)(2,3) E a(b|c){2,2}d acbd (0,4)(2,3) E a(b|c){2,2}d abcd (0,4)(2,3) E a(b|c){2,4}d abcd (0,4)(2,3) E a(b|c){2,4}d abcbd (0,5)(3,4) E a(b|c){2,4}d abcbcd (0,6)(4,5) E a(b|c){2,}d abcd (0,4)(2,3) E a(b|c){2,}d abcbd (0,5)(3,4) E a(b+|((c)*))+d abd (0,3)(1,2) E a(b+|((c)*))+d abcd (0,4)(2,3)(2,3)(2,3) # - match_default normal REG_EXTENDED REG_STARTEND REG_NOSPEC literal L \**?/{} \**?/{} (0,7) # - match_default normal REG_EXTENDED REG_NO_POSIX_TEST # try to match C++ syntax elements: # line comment: Exz$ //[^\n]* ++i //here is a line comment\n (4,28) # block comment: Exz /\*([^*]|\*+[^*/])*\*+/ /* here is a block comment */ (0,29)(26,27) Exz /\*([^*]|\*+[^*/])*\*+/ /**/ (0,4) Exz /\*([^*]|\*+[^*/])*\*+/ /***/ (0,5) Exz /\*([^*]|\*+[^*/])*\*+/ /****/ (0,6) Exz /\*([^*]|\*+[^*/])*\*+/ /*****/ (0,7) Exz /\*([^*]|\*+[^*/])*\*+/ /*****/*/ (0,7) # preprossor directives: E$ ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* #define some_symbol (0,19) E$ ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* #define some_symbol(x) #x (0,25) E$ ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* #define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x); (0,27) # literals: E ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF (0,4)(0,4)(0,4) E ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 (0,2)(0,2)(-1,-1)(0,2) E ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu (0,5)(0,4)(0,4) E ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL (0,5)(0,4)(0,4)(-1,-1)(4,5) E ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 (0,24)(0,18)(0,18)(-1,-1)(19,24)(19,24)(22,24) # strings: #E '([^\\']|\\.)*' '\\x3A' (0,6)(4,5) E '([^\\']|\\.)*' '\\'' (0,4)(1,3) E$ '([^']|\\.)*' '\n' (0,3)(1,2) # now try and test some unicode specific characters: # - match_default normal REG_EXTENDED REG_UNICODE_ONLY E [[:unicode:]]+ a\0300\0400z ECTYPE # finally try some case insensitive matches: # - match_default normal REG_EXTENDED REG_ICASE # upper and lower have no meaning here so they fail, however these # may compile with other libraries... Ei [[:lower:]]+ Ab (0,2) Ei [[:lower:]]+ aB (0,2) Ei [[:upper:]]+ Ab (0,2) Ei [[:upper:]]+ aB (0,2) Ei 0123456789@abcdefghijklmnopqrstuvwxyz_`ABCDEFGHIJKLMNOPQRSTUVWXYZ 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz (0,65) Ei 0123456789@abcdefghijklmnopqrstuvwxyz\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz (0,66) Ei 0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|} (0,72) # known and suspected bugs: # - match_default normal REG_EXTENDED E \( ( (0,1) E \) ) (0,1) E \$ $ (0,1) E \^ ^ (0,1) E \. . (0,1) E \* * (0,1) E \+ + (0,1) E \? ? (0,1) E \[ [ (0,1) E \] ] (0,1) E \| | (0,1) E \\ \\ (0,1) E # # (0,1) E \# # BADESC Ex \# # (0,1) E a- a- (0,2) E \- - BADESC Ex \- - (0,1) E \{ { (0,1) E \} } (0,1) E 0 0 (0,1) E 1 1 (0,1) E 9 9 (0,1) E b b (0,1) E B B (0,1) E < < (0,1) E > > (0,1) E w w (0,1) E W W (0,1) E ` ` (0,1) E ' ' (0,1) E$ \n \n (0,1) E , , (0,1) E a a (0,1) E f f (0,1) E n n (0,1) E r r (0,1) E t t (0,1) E v v (0,1) E c c (0,1) E x x (0,1) E : : (0,1) E (\.[[:alnum:]]+){2} w.a.b (1,5)(3,5) # - match_default normal REG_EXTENDED REG_ICASE Ei a A (0,1) Ei A a (0,1) Ei [abc]+ abcABC (0,6) Ei [ABC]+ abcABC (0,6) Ei [a-z]+ abcABC (0,6) Ei [A-Z]+ abzANZ (0,6) Ei [a-Z]+ abzABZ ERANGE Eix [a-Z]+ abzABZ NOMATCH Ei [A-z]+ abzABZ (0,6) Ei [[:lower:]]+ abyzABYZ (0,8) Ei [[:upper:]]+ abzABZ (0,6) Ei [[:word:]]+ abcZZZ (0,6) Ei [[:alpha:]]+ abyzABYZ (0,8) Ei [[:alnum:]]+ 09abyzABYZ (0,10) # updated tests for version 2: # - match_default normal REG_EXTENDED E$ \x41 A (0,1) E$ \xff \xFF (0,1) E$ \xFF \xff (0,1) # - match_default normal REG_EXTENDED REG_NO_POSIX_TEST {Exz$ \\c[ \e (0,1) perl \c not supported # - match_default normal REG_EXTENDED E$ \\cA \001 (0,1) E$ \\cz \032 (0,1) E$ \\c= ! NOMATCH E$ \\c? ! NOMATCH } E =: =: (0,2) # word start: E [[:<:]]abcd abcd (2,6) E [[:<:]]ab cab NOMATCH E$ [[:<:]]ab \nab (1,3) E [[:<:]]tag ::tag (2,5) #word end: E abc[[:>:]] abc (0,3) E abc[[:>:]] abcd NOMATCH E$ abc[[:>:]] abc\n (0,3) E abc[[:>:]] abc:: (0,3) # collating elements and rewritten set code: # - match_default normal REG_EXTENDED REG_STARTEND {E [[.zero.]] 0 (0,1) [[.element-name.]] not supported E [[.one.]] 1 (0,1) E [[.two.]] 2 (0,1) E [[.three.]] 3 (0,1) E [[.a.]] baa (1,2) #E [[.NUL.]] NULL (0,1) E [[.right-curly-bracket.]] } (0,1) E [[=right-curly-bracket=]] } (0,1) } E [[:<:]z] ! ECTYPE E [a[:>:]] ! ECTYPE E [[=a=]] a (0,1) # - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE Ei [[.A.]] A (0,1) Ei [[.A.]] a (0,1) Ei [[.A.]-b]+ AaBb (0,4) Ei [A-[.b.]]+ AaBb (0,4) Ei [[.a.]-B]+ AaBb ERANGE Eix [[.a.]-B]+ AaBb NOMATCH Ei [a-[.B.]]+ AaBb ERANGE Eix [a-[.B.]]+ AaBb NOMATCH # - match_default normal REG_EXTENDED REG_NO_POSIX_TEST Exz$ [\x61] a (0,1) Exz$ [\x61-c]+ abcd (0,3) Exz$ [a-\x63]+ abcd (0,3) # - match_default normal REG_EXTENDED REG_STARTEND E [[.a.]-c]+ abcd (0,3) E [a-[.c.]]+ abcd (0,3) E [[:alpha:]-a] ! ERANGE E [a-[:alpha:]] ! NOMATCH # try mutli-character ligatures: {E [[.ae.]] ae (0,2) [[.ligature.]] not supported E [[.ae.]] aE NOMATCH E [[.AE.]] AE (0,2) E [[.Ae.]] Ae (0,2) E [[.ae.]-b] a NOMATCH E [[.ae.]-b] b (0,1) E [[.ae.]-b] ae (0,2) E [a-[.ae.]] a (0,1) E [a-[.ae.]] b NOMATCH E [a-[.ae.]] ae (0,2) # - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE Ei [[.ae.]] AE (0,2) Ei [[.ae.]] Ae (0,2) Ei [[.AE.]] Ae (0,2) Ei [[.Ae.]] aE (0,2) Ei [[.AE.]-B] a NOMATCH Ei [[.Ae.]-b] b (0,1) Ei [[.Ae.]-b] B (0,1) Ei [[.ae.]-b] AE (0,2) } # - match_default normal REG_EXTENDED REG_STARTEND #extended perl style escape sequences: {E$ \\e \033 (0,1) perl \e not supported } {E$ \\x1b \033 (0,1) perl \x not supported E$ \\x{1b} \033 (0,1) E \x{} ! NOMATCH E \x{ ! NOMATCH E \x} ! NOMATCH E \x ! NOMATCH E \x{yy ! NOMATCH E \x{1b ! NOMATCH } # - match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST {Exz \l+ ABabcAB (2,5) regex++ \l not supported Exz [\l]+ ABabcAB (2,5) Exz [a-\l] ! NOMATCH E [a-\l] ! ERANGE E [\l-a] ! ERANGE Exz [\L] ! (0,1) Exz \L+ abABCab (2,5) Exz \L+ ab,.-ab (2,5) Exz \u+ abABCab (2,5) Exz [\u]+ abABCab (2,5) Exz [\U] ! (0,1) Exz \U+ ABabcAB (2,5) } {Exz \d+ ab012ab (2,5) perl \d not supported Exz [a-\d] ! NOMATCH E [a-\d] ! ERANGE E [\d-a] ! ERANGE Exz [\d]+ ab012ad (6,7) Evxz [\d]+ ab012ad (2,5) Exz [\D] !D (1,2) Evxz [\D] !D (0,1) Exz \D+ 01abc01 (2,5) Exz \s+ ab ab (2,5) Exz [\s]+ as as (1,2) Evxz [\s]+ as as (2,5) Exz [\S] !S (1,2) Evxz [\S] !S (0,1) Exz \S+ abc (2,5) } # - match_default normal REG_EXTENDED REG_STARTEND {E \Qabc abc (0,3) regex++ \Q not supported E \Qabc\E abcd (0,3) E \Qabc\Ed abcde (0,4) E \Q+*?\\E +*?\\ (0,4) } {E \C+ abcde (0,5) regex++ \C not supported } {E \X+ abcde (0,5) regex++ \X not supported # - match_default normal REG_EXTENDED REG_STARTEND REG_UNICODE_ONLY E \X+ a\768\769 (0,3) E \X+ \2309\2307 (0,2) E \X+ \2489\2494 (0,2) } # - match_default normal REG_EXTENDED REG_STARTEND {E \Aabc abc (0,3) regex++ \A not supported E \Aabc aabc NOMATCH E a\Aab abc NOMATCH E abc\z abc (0,3) E abc\z abcd NOMATCH E$ abc\\z abc\n\n NOMATCH E$ abc\\Z abc\n (0,3) E$ abc\\Z abc\n\n (0,3) E abc\Z abc (0,3) E \Aabc abc abc (0,3) } {E \Gabc abc (0,3) perl \G not supported E \G\w+\W+ abc abc a cbbb (0,5) E \Ga+b+ aaababb abb (0,4) E \Gabc dabcd NOMATCH E a\Gbc abc NOMATCH } # # now test grep, # basically check all our restart types - line, word, etc # checking each one for null and non-null matches. # # - match_default normal REG_EXTENDED REG_STARTEND REG_GREP E a a a a aa (1,2) E a+b+ aabaabbb ab (0,3) E a(b*|c|e)d adabbdacd (0,2)(1,1) E$ a \na\na\na\naa (1,2) E$ ^ \n\n \n\n\n (0,0) E$ ^ab ab \nab ab\n (0,2) E$ ^[^\n]*\n \n \n\n \n (0,4) E abc abc (0,3) E abc abc abcabc (1,4) E$ \n\n \n\n\n \n \n\n\n\n (1,3) E$ $ \n\n \n\n\n (10,10) En$ $ \n\n \n\n\n (3,3) # - match_default normal REG_EXTENDED REG_STARTEND REG_GREP REG_ICASE Ei A a a a aa (1,2) Ei A+B+ aabaabbb ab (0,3) Ei A(B*|c|e)D adabbdacd (0,2)(1,1) Ei$ A \na\na\na\naa (1,2) Ei$ ^aB Ab \nab Ab\n (0,2) Ei$ \\<abc Abcabc aBc\n\nabc (0,3) Ei ABC abc (0,3) Ei abc ABC ABCABC (1,4) # # now test merge, # # - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_no_copy # start by testing subs: # a+ ...aaa,,, ($`,...) # a+ ...aaa,,, ($',,,,) # a+ ...aaa,,, ($&,aaa) # a+ ...aaa,,, ($0,aaa) # a+ ...aaa,,, ($1,NULL) # a+ ...aaa,,, ($15,NULL) # (a+)b+ ...aaabbb,,, ($1,aaa) # [[:digit:]]* 123ab (<$0>,<123><><><>) # [[:digit:]]* 123ab1 (<$0>,<123><><><1>) # and now escapes: # a+ ...aaa,,, ($x,$x) # a+ ...aaa,,, (\a,\a) # a+ ...aaa,,, (\f,\f) # a+ ...aaa,,, (\n,\n) # a+ ...aaa,,, (\r,\r) # a+ ...aaa,,, (\t,\t) # a+ ...aaa,,, (\v,\v) # a+ ...aaa,,, (\x21,!) # a+ ...aaa,,, (\x{21},!) # a+ ...aaa,,, (\c@,\0) # a+ ...aaa,,, (\e,\27) # a+ ...aaa,,, (\0101,A) # a+ ...aaa,,, ((\0101),A) # - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_sed format_no_copy # (a+)(b+) ...aabb,, (\0,aabb) # (a+)(b+) ...aabb,, (\1,aa) # (a+)(b+) ...aabb,, (\2,bb) # (a+)(b+) ...aabb,, (&,aabb) # (a+)(b+) ...aabb,, ($,$) # (a+)(b+) ...aabb,, ($1,$1) # (a+)(b+) ...aabb,, (()?:,()?:) # (a+)(b+) ...aabb,, (\\,\\) # (a+)(b+) ...aabb,, (\&,&) # - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_perl format_no_copy # (a+)(b+) ...aabb,, ($0,aabb) # (a+)(b+) ...aabb,, ($1,aa) # (a+)(b+) ...aabb,, ($2,bb) # (a+)(b+) ...aabb,, ($&,aabb) # (a+)(b+) ...aabb,, (&,&) # (a+)(b+) ...aabb,, (\0,\0) # (a+)(b+) ...aabb,, (()?:,()?:) # - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE # move to copying unmatched data: # a+ ...aaa,,, (bbb,...bbb,,,) # a+(b+) ...aaabb,,, ($1,...bb,,,) # a+(b+) ...aaabb,,,ab*abbb? ($1,...bb,,,b*bbb?) # (a+)|(b+) ...aaabb,,,ab*abbb? ((?1A)(?2B),...AB,,,AB*AB?) # (a+)|(b+) ...aaabb,,,ab*abbb? (?1A:B,...AB,,,AB*AB?) # (a+)|(b+) ...aaabb,,,ab*abbb? ((?1A:B)C,...ACBC,,,ACBC*ACBC?) # (a+)|(b+) ...aaabb,,,ab*abbb? (?1:B,...B,,,B*B?) # # changes to newline handling with 2.11: # # - match_default normal REG_EXTENDED REG_STARTEND REG_GREP E$ ^. \n \r\n (0,1) E$ .$ \n \r\n (8,9) En$ .$ \n \r\n (1,2) # - match_default normal REG_EXTENDED REG_STARTEND REG_GREP REG_UNICODE_ONLY #E ^. \8232 \8233 (0,1) #E .$ \8232 \8233 (1,2) # # non-greedy repeats added 21/04/00 # - match_default normal REG_EXTENDED E a{1,3}{1} ! BADRPT {E a*? aa (0,0) non-greedy repeats not supported E a** aaa (0,3) E a?? aa (0,0) E a++ ! BADRPT E a+? aa (0,1) E a{1,3}? aaa (0,1) E \w+?w ...ccccccwcccccw (3,10) E \W+\w+?w ...ccccccwcccccw (0,10) E abc|\w+? abd (0,1) E abc|\w+? abcd (0,3) E <\s*tag[^>]*>(.*?)<\s*/tag\s*> <tag>here is some text</tag> <tag></tag> (1,29)(6,23) E <\s*tag[^>]*>(.*?)<\s*/tag\s*> < tag attr=\"something\">here is some text< /tag > <tag></tag> (1,51)(26,43) } # # non-marking parenthesis added 25/04/00 # - match_default normal REG_EXTENDED {E (?:abc)+ xxabcabcxx (2,8) non-marking parens not supported E (?:a+)(b+) xaaabbbx (1,7)(4,7) E (a+)(?:b+) xaaabbba (1,7)(1,4) E (?:(a+)b+) xaaabbba (1,7)(1,4) E (?:a+(b+)) xaaabbba (1,7)(4,7) E a+(?#b+)b+ xaaabbba (1,7) } # # try some partial matches: # - match_partial match_default normal REG_EXTENDED REG_NO_POSIX_TEST Exz (xyz)(.*)abc xyzaaab NOMATCH Exz (xyz)(.*)abc xyz NOMATCH Exz (xyz)(.*)abc xy NOMATCH