CoCalc -- regex++.dat

GitHub Repository: att/ast
Path: blob/master/src/cmd/re/regex++.dat
¹⁸⁰⁸ views
: translated from the Regex++ tests.txt by cvtregex++.c 2001-05-16
# manual edits were done to insert the standard error codes
#
# inserted {...} features tests may have changed line numbers and/or ordering
#
# comments beyond this point are verbatim from the original input
# 
# this file contains a script of tests to run through regress.exe
#
# comments start with a semicolon and proceed to the end of the line
#
# changes to regular expression compile flags start with a "-" as the first
# non-whitespace character and consist of a list of the printable names
# of the flags, for example "match_default"
#
# Other lines contain a test to perform using the current flag status
# the first token contains the expression to compile, the second the string
# to match it against. If the second string is "!" then the expression should
# not compile, that is the first string is an invalid regular expression.
# This is then followed by a list of integers that specify what should match,
# each pair represents the starting and ending positions of a subexpression
# starting with the zeroth subexpression (the whole match).
# A value of -1 indicates that the subexpression should not take part in the
# match at all, if the first value is -1 then no part of the expression should
# match the string.
#

# - match_default normal REG_EXTENDED

#
# try some really simple literals:
E	a	a	(0,1)
E	Z	Z	(0,1)
E	Z	aaa	NOMATCH
E	Z	xxxxZZxxx	(4,5)

# and some simple brackets:
E	(a)	zzzaazz	(3,4)(3,4)
Exz	()	zzz	(0,0)(0,0)
Exz	()	NULL	(0,0)(0,0)
E	(	!	EPAREN
E	)	!	NOMATCH
E	(aa	!	EPAREN
E	aa)	!	NOMATCH
E	a	b	NOMATCH
E	\(\)	()	(0,2)
E	\(a\)	(a)	(0,3)
E	\()	!	NOMATCH
E	(\)	!	EPAREN
E	p(a)rameter	ABCparameterXYZ	(3,12)(4,5)
E	[pq](a)rameter	ABCparameterXYZ	(3,12)(4,5)

# now try escaped brackets:
# - match_default bk_parens REG_BASIC
B	\(a\)	zzzaazz	(3,4)(3,4)
B	\(\)	zzz	(0,0)(0,0)
B	\(\)	NULL	(0,0)(0,0)
B	\(	!	EPAREN
B	\)	!	EPAREN
B	\(aa	!	EPAREN
B	aa\)	!	EPAREN
B	()	()	(0,2)
B	(a)	(a)	(0,3)
B	(\)	!	EPAREN
B	\()	!	EPAREN

# now move on to "." wildcards
# - match_default normal REG_EXTENDED REG_STARTEND
E	.	a	(0,1)
E$	.	\n	(0,1)
E$	.	\r	(0,1)
E	.	NULL	NOMATCH
# - match_default normal match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE
En	.	a	(0,1)
En$	.	\n	NOMATCH
En$	.	\r	(0,1)
En	.	NULL	NOMATCH
# - match_default normal match_not_dot_null match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE
En$	.	\n	NOMATCH
En$	.	\r	(0,1)
# this *WILL* produce an error from the POSIX API functions:
# - match_default normal match_not_dot_null match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE REG_NO_POSIX_TEST
Enz	.	NULL	NOMATCH


#
# now move on to the repetion ops,
# starting with operator *
# - match_default normal REG_EXTENDED
E	a*	b	(0,0)
E	ab*	a	(0,1)
E	ab*	ab	(0,2)
E	ab*	sssabbbbbbsss	(3,10)
E	ab*c*	a	(0,1)
E	ab*c*	abbb	(0,4)
E	ab*c*	accc	(0,4)
E	ab*c*	abbcc	(0,5)
E	*a	!	BADRPT
E$	\n*	\n\n	(0,2)
E	\**	**	(0,2)
E	\*	*	(0,1)

# now try operator +
E	ab+	a	NOMATCH
E	ab+	ab	(0,2)
E	ab+	sssabbbbbbsss	(3,10)
E	ab+c+	a	NOMATCH
E	ab+c+	abbb	NOMATCH
E	ab+c+	accc	NOMATCH
E	ab+c+	abbcc	(0,5)
E	+a	!	BADRPT
E$	\n+	\n\n	(0,2)
E	\+	+	(0,1)
E	\+	++	(0,1)
E	\++	++	(0,2)
# - match_default normal bk_plus_qm REG_EXTENDED REG_NO_POSIX_TEST
Exz	+	+	BADRPT
Exz	\+	!	NOMATCH
Exz	a\+	aa	NOMATCH

# now try operator ?
# - match_default normal REG_EXTENDED
E	a?	b	(0,0)
E	ab?	a	(0,1)
E	ab?	ab	(0,2)
E	ab?	sssabbbbbbsss	(3,5)
E	ab?c?	a	(0,1)
E	ab?c?	abbb	(0,2)
E	ab?c?	accc	(0,2)
E	ab?c?	abcc	(0,3)
E	?a	!	BADRPT
E$	\n?	\n\n	(0,1)
E	\?	?	(0,1)
E	\?	??	(0,1)
E	\??	??	(0,1)
# - match_default normal bk_plus_qm REG_EXTENDED REG_NO_POSIX_TEST
Exz	?	?	BADRPT
Exz	\?	!	NOMATCH
Exz	a\?	aa	NOMATCH
Exz	a\?	b	NOMATCH

# - match_default normal limited_ops
B	a?	a?	(0,2)
B	a+	a+	(0,2)
B	a\?	a?	(0,2)
B	a\+	a+	(0,2)

# now try operator {}
# - match_default normal REG_EXTENDED
E	a{2}	a	NOMATCH
E	a{2}	aa	(0,2)
E	a{2}	aaa	(0,2)
E	a{2,}	a	NOMATCH
E	a{2,}	aa	(0,2)
E	a{2,}	aaaaa	(0,5)
E	a{2,4}	a	NOMATCH
E	a{2,4}	aa	(0,2)
E	a{2,4}	aaa	(0,3)
E	a{2,4}	aaaa	(0,4)
E	a{2,4}	aaaaa	(0,4)
# spaces are now allowed inside {}
E	a{ 2 , 4 }	aaaaa	BADBR
E	a{}	!	BADBR
E	a{ }	!	BADBR
E	a{2	!	EBRACE
E	a}	!	NOMATCH
E	\{\}	{}	(0,2)

# - match_default normal bk_braces
B	a\{2\}	a	NOMATCH
B	a\{2\}	aa	(0,2)
B	a\{2\}	aaa	(0,2)
B	a\{2,\}	a	NOMATCH
B	a\{2,\}	aa	(0,2)
B	a\{2,\}	aaaaa	(0,5)
B	a\{2,4\}	a	NOMATCH
B	a\{2,4\}	aa	(0,2)
B	a\{2,4\}	aaa	(0,3)
B	a\{2,4\}	aaaa	(0,4)
B	a\{2,4\}	aaaaa	(0,4)
B	a\{ 2 , 4 \}	aaaaa	BADBR
B	{}	{}	(0,2)

# now test the alternation operator |
# - match_default normal REG_EXTENDED
E	a|b	a	(0,1)
E	a|b	b	(0,1)
E	a(b|c)	ab	(0,2)(1,2)
E	a(b|c)	ac	(0,2)(1,2)
E	a(b|c)	ad	NOMATCH
E	|c	!	ENULL
E	c|	!	ENULL
E	(|)	!	ENULL
E	(a|)	!	ENULL
E	(|a)	!	ENULL
E	a\|	a|	(0,2)
# - match_default normal limited_ops
B	a|	a|	(0,2)
B	a\|	a|	(0,2)
B	|	|	(0,1)
# - match_default normal bk_vbar REG_NO_POSIX_TEST
Bxz	a|	a|	(0,2)
Bxz	a\|b	a	(0,1)
Bxz	a\|b	b	(0,1)

# now test the set operator []
# - match_default normal REG_EXTENDED
# try some literals first
E	[abc]	a	(0,1)
E	[abc]	b	(0,1)
E	[abc]	c	(0,1)
E	[abc]	d	NOMATCH
E	[^bcd]	a	(0,1)
E	[^bcd]	b	NOMATCH
E	[^bcd]	d	NOMATCH
E	[^bcd]	e	(0,1)
E	a[b]c	abc	(0,3)
E	a[ab]c	abc	(0,3)
E	a[^ab]c	adc	(0,3)
E	a[]b]c	a]c	(0,3)
E	a[[b]c	a[c	(0,3)
E	a[-b]c	a-c	(0,3)
E	a[^]b]c	adc	(0,3)
E	a[^-b]c	adc	(0,3)
E	a[b-]c	a-c	(0,3)
E	a[b	!	EBRACK
E	a[]	!	EBRACK

# then some ranges
E	[b-e]	a	NOMATCH
E	[b-e]	b	(0,1)
E	[b-e]	e	(0,1)
E	[b-e]	f	NOMATCH
E	[^b-e]	a	(0,1)
E	[^b-e]	b	NOMATCH
E	[^b-e]	e	NOMATCH
E	[^b-e]	f	(0,1)
E	a[1-3]c	a2c	(0,3)
E	a[3-1]c	!	ERANGE
E	a[1-3-5]c	!	ERANGE
E	a[1-	!	EBRACK

# and some classes
E	a[[:alpha:]]c	abc	(0,3)
E	a[[:unknown:]]c	!	ECTYPE
E	a[[:	!	ECTYPE
E	a[[:alpha	!	ECTYPE
E	a[[:alpha:]	!	EBRACK
E	a[[:alpha,:]	!	ECTYPE
E	a[[:]:]]b	!	ECTYPE
E	a[[:-:]]b	!	ECTYPE
E	a[[:alph:]]	!	ECTYPE
E	a[[:alphabet:]]	!	ECTYPE
E	[[:alnum:]]+	-%@a0X-	(3,6)
E	[[:alpha:]]+	-%@aX0-	(3,5)
E$	[[:blank:]]+	a  \tb	(1,4)
E$	[[:cntrl:]]+	a\n\tb	(1,3)
E	[[:digit:]]+	a019b	(1,4)
E	[[:graph:]]+	a%b 	(0,3)
E	[[:lower:]]+	AabC	(1,3)
# This test fails with STLPort, disable for now as this is a corner case anyway...
#[[:print:]]+ "\na b\n" 1 4
E$	[[:punct:]]+	%-&\t	(0,3)
E$	[[:space:]]+	a \n\t\rb	(1,5)
E	[[:upper:]]+	aBCd	(1,3)
E	[[:xdigit:]]+	p0f3Cx	(1,5)

# now test flag settings:
# - escape_in_lists REG_NO_POSIX_TEST
Exz$	[\n]	\n	(0,1)
# - REG_NO_POSIX_TEST
Bxz$	[\n]	\n	(0,1)
Bxz$	[\n]	\\	NOMATCH
Bxz	[[:class:]	:	ECTYPE
Bxz	[[:class:]	[	ECTYPE
Bxz	[[:class:]	c	ECTYPE

# line anchors
# - match_default normal REG_EXTENDED
En	^ab	ab	(0,2)
En	^ab	xxabxx	NOMATCH
En$	^ab	xx\nabzz	(3,5)
En	ab$	ab	(0,2)
En	ab$	abxx	NOMATCH
En$	ab$	ab\nzz	(0,2)
# - match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL
Eben	^ab	ab	NOMATCH
Eben	^ab	xxabxx	NOMATCH
Eben$	^ab	xx\nabzz	(3,5)
Eben	ab$	ab	NOMATCH
Eben	ab$	abxx	NOMATCH
Eben$	ab$	ab\nzz	(0,2)

# back references
# - match_default normal REG_EXTENDED
E	a(b)\2c	!	ESUBREG
E	a(b\1)c	!	ESUBREG
E	a(b*)c\1d	abbcbbd	(0,7)(1,3)
E	a(b*)c\1d	abbcbd	NOMATCH
E	a(b*)c\1d	abbcbbbd	NOMATCH
E	^(.)\1	abc	NOMATCH
E	a([bc])\1d	abcdabbd	(4,8)(5,6)
# strictly speaking this is at best ambiguous, at worst wrong, this is what most
# re implimentations will match though.
E	a(([bc])\2)*d	abbccd	(0,6)(3,5)(3,4)

E	a(([bc])\2)*d	abbcbd	NOMATCH
E	a((b)*\2)*d	abbbd	(0,5)(1,4)(2,3)
E	(ab*)[ab]*\1	ababaaa	(0,7)(0,1)
E	(a)\1bcd	aabcd	(0,5)(0,1)
E	(a)\1bc*d	aabcd	(0,5)(0,1)
E	(a)\1bc*d	aabd	(0,4)(0,1)
E	(a)\1bc*d	aabcccd	(0,7)(0,1)
E	(a)\1bc*[ce]d	aabcccd	(0,7)(0,1)
E	^(a)\1b(c)*cd$	aabcccd	(0,7)(0,1)(4,5)

#
# characters by code:
# - match_default normal REG_EXTENDED REG_STARTEND
{E	\101	A	(0,1)		not an ascii implementation
E	\172	z	(0,1)
E	\0172	z	NOMATCH
}
E	NULL	NULL	ENULL
E	NULL	NULL	ENULL

#
# word operators:
{E	\w	a	(0,1)		perl \w not supported
E	\w	z	(0,1)
E	\w	A	(0,1)
E	\w	Z	(0,1)
E	\w	_	(0,1)
E	\w	}	NOMATCH
E	\w	`	NOMATCH
E	\w	[	NOMATCH
E	\w	@	NOMATCH
}
# non-word:
{E	\W	W	NOMATCH		perl \W not supported
E	\W	z	NOMATCH
E	\W	A	NOMATCH
E	\W	Z	NOMATCH
E	\W	_	NOMATCH
E	\W	}	(0,1)
E	\W	`	(0,1)
E	\W	[	(0,1)
E	\W	@	(0,1)

E	\<\w+\W+	 aa  aa  a 	(1,5)
}
# word boundaries
{E	\<a\>	,a,	(1,2)		word boundaries not supported
E	\<*	!	BADRPT
E	\>*	!	BADRPT
E	\<+	!	BADRPT
E	\>+	!	BADRPT
E	\<?	!	BADRPT
E	\>?	!	BADRPT
# word start:
E	\<abcd	  abcd	(2,6)
E	\<ab	cab	NOMATCH
E$	\\<ab	\nab	(1,3)
E	\<tag	::tag	(2,5)
# word end:
E	abc\>	abc	(0,3)
E	abc\>	abcd	NOMATCH
E$	abc\\>	abc\n	(0,3)
E	abc\>	abc::	(0,3)

E	\<abc	abcabc abc\n\nabc	(0,3)
E	\<	  ab a aaa  		(2,2)
}
# word boundary:
{E	\babcd	abcd	(0,4)		perl \b not supported
E	\babcd	:abcd:	(1,5)		perl \b not supported
E	\bab	cab	NOMATCH
E$	\\bab	\nab	(1,3)
E	\btag	::tag	(2,5)
E	abc\b	abc	(0,3)
E	abc\b	abcd	NOMATCH
E$	abc\\b	abc\n	(0,3)
E	abc\b	abc::	(0,3)

E	\b	  abb a abbb 	(0,0)
}
# within word:
{E	\B	ab	(1,1)		perl \B not supported
E	a\Bb	ab	(0,2)
E	a\B	ab	(0,1)
E	a\B	a	NOMATCH
E	a\B	a 	NOMATCH
}

#
# buffer operators:
{E	\`abc	abc	(0,3)		regex++ \' not supported
E$	\\`abc	\nabc	NOMATCH
E	\`abc	 abc	NOMATCH
E	abc\'	abc	(0,3)
E$	abc\\'	abc\n	NOMATCH
E	abc\'	abc 	NOMATCH
}

#
# extra escape sequences:
E$	\a	\a	(0,1)
E$	\f	\f	(0,1)
E$	\n	\n	(0,1)
E$	\r	\r	(0,1)
E$	\t	\t	(0,1)
E$	\v	\v	(0,1)

E$	\\a	\a	(0,1)
E$	\\f	\f	(0,1)
E$	\\n	\n	(0,1)
E$	\\r	\r	(0,1)
E$	\\t	\t	(0,1)
E$	\\v	\v	(0,1)

E	\\a	\a	(0,2)
E	\\f	\f	(0,2)
E	\\n	\n	(0,2)
E	\\r	\r	(0,2)
E	\\t	\t	(0,2)
E	\\v	\v	(0,2)

#
# now follows various complex expressions designed to try and bust the matcher:
E	a(((b)))c	abc	(0,3)(1,2)(1,2)(1,2)
E	a(b|(c))d	abd	(0,3)(1,2)
E	a(b|(c))d	acd	(0,3)(1,2)(1,2)
E	a(b*|c)d	abbd	(0,4)(1,3)
# just gotta have one DFA-buster, of course
E	a[ab]{20}	aaaaabaaaabaaaabaaaab	(0,21)
# and an inline expansion in case somebody gets tricky
E	a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]	aaaaabaaaabaaaabaaaab	(0,21)
# and in case somebody just slips in an NFA...
E	a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)	aaaaabaaaabaaaabaaaabweeknights	(0,31)(21,24)(24,31)
# one really big one
E	1234567890123456789012345678901234567890123456789012345678901234567890	a1234567890123456789012345678901234567890123456789012345678901234567890b	(1,71)
# fish for problems as brackets go past 8
E	[ab][cd][ef][gh][ij][kl][mn]	xacegikmoq	(1,8)
E	[ab][cd][ef][gh][ij][kl][mn][op]	xacegikmoq	(1,9)
E	[ab][cd][ef][gh][ij][kl][mn][op][qr]	xacegikmoqy	(1,10)
E	[ab][cd][ef][gh][ij][kl][mn][op][q]	xacegikmoqy	(1,10)
# and as parenthesis go past 9:
E	(a)(b)(c)(d)(e)(f)(g)(h)	zabcdefghi	(1,9)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)
E	(a)(b)(c)(d)(e)(f)(g)(h)(i)	zabcdefghij	(1,10)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)
E	(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)	zabcdefghijk	(1,11)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11)
E	(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)	zabcdefghijkl	(1,12)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11)(11,12)
E	(a)d|(b)c	abc	(1,3)(-1,-1)(1,2)
E	_+((www)|(ftp)|(mailto)):_*	_wwwnocolon _mailto:	(12,20)(13,19)(-1,-1)(-1,-1)(13,19)

# subtleties of matching
E	a(b)?cd		acd	(0,3)
E	a(b)?c\1d	acd	NOMATCH
E	a(b?c)+d	accd	(0,4)(2,3)
E	(wee|week)(knights|night)	weeknights	(0,10)(0,3)(3,10)
E	.*	abc	(0,3)
E	a(b|(c))d	abd	(0,3)(1,2)
E	a(b|(c))d	acd	(0,3)(1,2)(1,2)
E	a(b*|c|e)d	abbd	(0,4)(1,3)
E	a(b*|c|e)d	acd	(0,3)(1,2)
E	a(b*|c|e)d	ad	(0,2)(1,1)
E	a(b?)c	abc	(0,3)(1,2)
E	a(b?)c	ac	(0,2)(1,1)
E	a(b+)c	abc	(0,3)(1,2)
E	a(b+)c	abbbc	(0,5)(1,4)
E	a(b*)c	ac	(0,2)(1,1)
E	(a|ab)(bc([de]+)f|cde)	abcdef	(0,6)(0,1)(1,6)(3,5)
E	a([bc]?)c	abc	(0,3)(1,2)
E	a([bc]?)c	ac	(0,2)(1,1)
E	a([bc]+)c	abc	(0,3)(1,2)
E	a([bc]+)c	abcc	(0,4)(1,3)
E	a([bc]+)bc	abcbc	(0,5)(1,3)
E	a(bb+|b)b	abb	(0,3)(1,2)
E	a(bbb+|bb+|b)b	abb	(0,3)(1,2)
E	a(bbb+|bb+|b)b	abbb	(0,4)(1,3)
E	a(bbb+|bb+|b)bb	abbb	(0,4)(1,2)
E	(.*).*	abcdef	(0,6)(0,6)
E	(a*)*	bc	(0,0)(0,0)

# do we get the right subexpression when it is used more than once?
E	a(b|c)*d	ad	(0,2)
E	a(b|c)*d	abcd	(0,4)(2,3)
E	a(b|c)+d	abd	(0,3)(1,2)
E	a(b|c)+d	abcd	(0,4)(2,3)
E	a(b|c?)+d	ad	(0,2)(1,1)
E	a(b|c?)+d	abcd	(0,4)(2,3)
E	a(b|c){0,0}d	ad	(0,2)
E	a(b|c){0,1}d	ad	(0,2)
E	a(b|c){0,1}d	abd	(0,3)(1,2)
E	a(b|c){0,2}d	ad	(0,2)
E	a(b|c){0,2}d	abcd	(0,4)(2,3)
E	a(b|c){0,}d	ad	(0,2)
E	a(b|c){0,}d	abcd	(0,4)(2,3)
E	a(b|c){1,1}d	abd	(0,3)(1,2)
E	a(b|c){1,2}d	abd	(0,3)(1,2)
E	a(b|c){1,2}d	abcd	(0,4)(2,3)
E	a(b|c){1,}d	abd	(0,3)(1,2)
E	a(b|c){1,}d	abcd	(0,4)(2,3)
E	a(b|c){2,2}d	acbd	(0,4)(2,3)
E	a(b|c){2,2}d	abcd	(0,4)(2,3)
E	a(b|c){2,4}d	abcd	(0,4)(2,3)
E	a(b|c){2,4}d	abcbd	(0,5)(3,4)
E	a(b|c){2,4}d	abcbcd	(0,6)(4,5)
E	a(b|c){2,}d	abcd	(0,4)(2,3)
E	a(b|c){2,}d	abcbd	(0,5)(3,4)
E	a(b+|((c)*))+d	abd	(0,3)(1,2)
E	a(b+|((c)*))+d	abcd	(0,4)(2,3)(2,3)(2,3)

# - match_default normal REG_EXTENDED REG_STARTEND REG_NOSPEC literal
L	\**?/{}	\**?/{}	(0,7)

# - match_default normal REG_EXTENDED REG_NO_POSIX_TEST
# try to match C++ syntax elements:
# line comment:
Exz$	//[^\n]*	++i //here is a line comment\n	(4,28)
# block comment:
Exz	/\*([^*]|\*+[^*/])*\*+/	/* here is a block comment */	(0,29)(26,27)
Exz	/\*([^*]|\*+[^*/])*\*+/	/**/	(0,4)
Exz	/\*([^*]|\*+[^*/])*\*+/	/***/	(0,5)
Exz	/\*([^*]|\*+[^*/])*\*+/	/****/	(0,6)
Exz	/\*([^*]|\*+[^*/])*\*+/	/*****/	(0,7)
Exz	/\*([^*]|\*+[^*/])*\*+/	/*****/*/	(0,7)
# preprossor directives:
E$	^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]*	#define some_symbol	(0,19)
E$	^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]*	#define some_symbol(x) #x	(0,25)
E$	^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]*	#define some_symbol(x) \\  \r\n  foo();\\\r\n   printf(#x);	(0,27)
# literals:
E	((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?	0xFF	(0,4)(0,4)(0,4)
E	((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?	35	(0,2)(0,2)(-1,-1)(0,2)
E	((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?	0xFFu	(0,5)(0,4)(0,4)
E	((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?	0xFFL	(0,5)(0,4)(0,4)(-1,-1)(4,5)
E	((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?	0xFFFFFFFFFFFFFFFFuint64	(0,24)(0,18)(0,18)(-1,-1)(19,24)(19,24)(22,24)
# strings:
#E	'([^\\']|\\.)*'	'\\x3A'	(0,6)(4,5)
E	'([^\\']|\\.)*'	'\\''	(0,4)(1,3)
E$	'([^']|\\.)*'	'\n'	(0,3)(1,2)

# now try and test some unicode specific characters:
# - match_default normal REG_EXTENDED REG_UNICODE_ONLY
E	[[:unicode:]]+	a\0300\0400z	ECTYPE

# finally try some case insensitive matches:
# - match_default normal REG_EXTENDED REG_ICASE
# upper and lower have no meaning here so they fail, however these
# may compile with other libraries...
Ei	[[:lower:]]+	Ab	(0,2)
Ei	[[:lower:]]+	aB	(0,2)
Ei	[[:upper:]]+	Ab	(0,2)
Ei	[[:upper:]]+	aB	(0,2)
Ei	0123456789@abcdefghijklmnopqrstuvwxyz_`ABCDEFGHIJKLMNOPQRSTUVWXYZ	0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz	(0,65)
Ei	0123456789@abcdefghijklmnopqrstuvwxyz\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ	0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz	(0,66)
Ei	0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\}	0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}	(0,72)

# known and suspected bugs:
# - match_default normal REG_EXTENDED
E	\(	(	(0,1)
E	\)	)	(0,1)
E	\$	$	(0,1)
E	\^	^	(0,1)
E	\.	.	(0,1)
E	\*	*	(0,1)
E	\+	+	(0,1)
E	\?	?	(0,1)
E	\[	[	(0,1)
E	\]	]	(0,1)
E	\|	|	(0,1)
E	\\	\\	(0,1)
E	#	#	(0,1)
E	\#	#	BADESC
Ex	\#	#	(0,1)
E	a-	a-	(0,2)
E	\-	-	BADESC
Ex	\-	-	(0,1)
E	\{	{	(0,1)
E	\}	}	(0,1)
E	0	0	(0,1)
E	1	1	(0,1)
E	9	9	(0,1)
E	b	b	(0,1)
E	B	B	(0,1)
E	<	<	(0,1)
E	>	>	(0,1)
E	w	w	(0,1)
E	W	W	(0,1)
E	`	`	(0,1)
E	'	'	(0,1)
E$	\n	\n	(0,1)
E	,	,	(0,1)
E	a	a	(0,1)
E	f	f	(0,1)
E	n	n	(0,1)
E	r	r	(0,1)
E	t	t	(0,1)
E	v	v	(0,1)
E	c	c	(0,1)
E	x	x	(0,1)
E	:	:	(0,1)
E	(\.[[:alnum:]]+){2}	w.a.b 	(1,5)(3,5)

# - match_default normal REG_EXTENDED REG_ICASE
Ei	a	A	(0,1)
Ei	A	a	(0,1)
Ei	[abc]+	abcABC	(0,6)
Ei	[ABC]+	abcABC	(0,6)
Ei	[a-z]+	abcABC	(0,6)
Ei	[A-Z]+	abzANZ	(0,6)
Ei	[a-Z]+	abzABZ	ERANGE
Eix	[a-Z]+	abzABZ	NOMATCH
Ei	[A-z]+	abzABZ	(0,6)
Ei	[[:lower:]]+	abyzABYZ	(0,8)
Ei	[[:upper:]]+	abzABZ	(0,6)
Ei	[[:word:]]+	abcZZZ	(0,6)
Ei	[[:alpha:]]+	abyzABYZ	(0,8)
Ei	[[:alnum:]]+	09abyzABYZ	(0,10)

# updated tests for version 2:
# - match_default normal REG_EXTENDED
E$	\x41	A	(0,1)
E$	\xff	\xFF	(0,1)
E$	\xFF	\xff	(0,1)
# - match_default normal REG_EXTENDED REG_NO_POSIX_TEST
{Exz$	\\c[	\e	(0,1)			perl \c not supported
# - match_default normal REG_EXTENDED
E$	\\cA	\001	(0,1)
E$	\\cz	\032	(0,1)
E$	\\c=	!	NOMATCH
E$	\\c?	!	NOMATCH
}
E	=:	=:	(0,2)

# word start:
E	[[:<:]]abcd	  abcd	(2,6)
E	[[:<:]]ab	cab	NOMATCH
E$	[[:<:]]ab	\nab	(1,3)
E	[[:<:]]tag	::tag	(2,5)
#word end:
E	abc[[:>:]]	abc	(0,3)
E	abc[[:>:]]	abcd	NOMATCH
E$	abc[[:>:]]	abc\n	(0,3)
E	abc[[:>:]]	abc::	(0,3)

# collating elements and rewritten set code:
# - match_default normal REG_EXTENDED REG_STARTEND
{E	[[.zero.]]	0	(0,1)		[[.element-name.]] not supported
E	[[.one.]]	1	(0,1)
E	[[.two.]]	2	(0,1)
E	[[.three.]]	3	(0,1)
E	[[.a.]]	baa	(1,2)
#E	[[.NUL.]]	NULL	(0,1)
E	[[.right-curly-bracket.]]	}	(0,1)
E	[[=right-curly-bracket=]]	}	(0,1)
}
E	[[:<:]z]	!	ECTYPE
E	[a[:>:]]	!	ECTYPE
E	[[=a=]]	a	(0,1)
# - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
Ei	[[.A.]]	A	(0,1)
Ei	[[.A.]]	a	(0,1)
Ei	[[.A.]-b]+	AaBb	(0,4)
Ei	[A-[.b.]]+	AaBb	(0,4)
Ei	[[.a.]-B]+	AaBb	ERANGE
Eix	[[.a.]-B]+	AaBb	NOMATCH
Ei	[a-[.B.]]+	AaBb	ERANGE
Eix	[a-[.B.]]+	AaBb	NOMATCH
# - match_default normal REG_EXTENDED REG_NO_POSIX_TEST
Exz$	[\x61]	a	(0,1)
Exz$	[\x61-c]+	abcd	(0,3)
Exz$	[a-\x63]+	abcd	(0,3)
# - match_default normal REG_EXTENDED REG_STARTEND
E	[[.a.]-c]+	abcd	(0,3)
E	[a-[.c.]]+	abcd	(0,3)
E	[[:alpha:]-a]	!	ERANGE
E	[a-[:alpha:]]	!	NOMATCH

# try mutli-character ligatures:
{E	[[.ae.]]	ae	(0,2)		[[.ligature.]] not supported
E	[[.ae.]]	aE	NOMATCH
E	[[.AE.]]	AE	(0,2)
E	[[.Ae.]]	Ae	(0,2)
E	[[.ae.]-b]	a	NOMATCH
E	[[.ae.]-b]	b	(0,1)
E	[[.ae.]-b]	ae	(0,2)
E	[a-[.ae.]]	a	(0,1)
E	[a-[.ae.]]	b	NOMATCH
E	[a-[.ae.]]	ae	(0,2)
# - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
Ei	[[.ae.]]	AE	(0,2)
Ei	[[.ae.]]	Ae	(0,2)
Ei	[[.AE.]]	Ae	(0,2)
Ei	[[.Ae.]]	aE	(0,2)
Ei	[[.AE.]-B]	a	NOMATCH
Ei	[[.Ae.]-b]	b	(0,1)
Ei	[[.Ae.]-b]	B	(0,1)
Ei	[[.ae.]-b]	AE	(0,2)
}

# - match_default normal REG_EXTENDED REG_STARTEND
#extended perl style escape sequences:
{E$	\\e	\033	(0,1)		perl \e not supported
}
{E$	\\x1b	\033	(0,1)		perl \x not supported
E$	\\x{1b}	\033	(0,1)
E	\x{}	!	NOMATCH
E	\x{	!	NOMATCH
E	\x}	!	NOMATCH
E	\x	!	NOMATCH
E	\x{yy	!	NOMATCH
E	\x{1b	!	NOMATCH
}

# - match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST
{Exz	\l+	ABabcAB	(2,5)		regex++ \l not supported
Exz	[\l]+	ABabcAB	(2,5)
Exz	[a-\l]	!	NOMATCH
E	[a-\l]	!	ERANGE
E	[\l-a]	!	ERANGE
Exz	[\L]	!	(0,1)
Exz	\L+	abABCab	(2,5)
Exz	\L+	ab,.-ab	(2,5)
Exz	\u+	abABCab	(2,5)
Exz	[\u]+	abABCab	(2,5)
Exz	[\U]	!	(0,1)
Exz	\U+	ABabcAB	(2,5)
}
{Exz	\d+	ab012ab	(2,5)		perl \d not supported
Exz	[a-\d]	!	NOMATCH
E	[a-\d]	!	ERANGE
E	[\d-a]	!	ERANGE
Exz	[\d]+	ab012ad	(6,7)
Evxz	[\d]+	ab012ad	(2,5)
Exz	[\D]	!D	(1,2)
Evxz	[\D]	!D	(0,1)
Exz	\D+	01abc01	(2,5)
Exz	\s+	ab   ab	(2,5)
Exz	[\s]+	as   as	(1,2)
Evxz	[\s]+	as   as	(2,5)
Exz	[\S]	!S	(1,2)
Evxz	[\S]	!S	(0,1)
Exz	\S+	  abc  	(2,5)
}
# - match_default normal REG_EXTENDED REG_STARTEND
{E	\Qabc	abc	(0,3)		regex++ \Q not supported
E	\Qabc\E	abcd	(0,3)
E	\Qabc\Ed	abcde	(0,4)
E	\Q+*?\\E	+*?\\	(0,4)
}

{E	\C+	abcde	(0,5)		regex++ \C not supported
}
{E	\X+	abcde	(0,5)		regex++ \X not supported

# - match_default normal REG_EXTENDED REG_STARTEND REG_UNICODE_ONLY
E	\X+	a\768\769	(0,3)
E	\X+	\2309\2307	(0,2)
E	\X+	\2489\2494	(0,2)
}

# - match_default normal REG_EXTENDED REG_STARTEND
{E	\Aabc	abc	(0,3)		regex++ \A not supported
E	\Aabc	aabc	NOMATCH
E	a\Aab	abc	NOMATCH
E	abc\z	abc	(0,3)
E	abc\z	abcd	NOMATCH
E$	abc\\z	abc\n\n	NOMATCH
E$	abc\\Z	abc\n	(0,3)
E$	abc\\Z	abc\n\n	(0,3)
E	abc\Z	abc	(0,3)
E	\Aabc	abc   abc	(0,3)
}

{E	\Gabc	abc	(0,3)		perl \G not supported
E	\G\w+\W+	abc  abc a cbbb   	(0,5)
E	\Ga+b+	aaababb  abb	(0,4)
E	\Gabc	dabcd	NOMATCH
E	a\Gbc	abc	NOMATCH
}

#
# now test grep,
# basically check all our restart types - line, word, etc
# checking each one for null and non-null matches.
#
# - match_default normal REG_EXTENDED REG_STARTEND REG_GREP
E	a	 a a a aa	(1,2)
E	a+b+	aabaabbb ab	(0,3)
E	a(b*|c|e)d	adabbdacd	(0,2)(1,1)
E$	a	\na\na\na\naa	(1,2)

E$	^	   \n\n  \n\n\n	(0,0)
E$	^ab	ab  \nab  ab\n	(0,2)
E$	^[^\n]*\n	   \n  \n\n  \n	(0,4)

E	abc	abc	(0,3)
E	abc	 abc abcabc 	(1,4)
E$	\n\n	 \n\n\n       \n      \n\n\n\n  	(1,3)

E$	$	   \n\n  \n\n\n	(10,10)
En$	$	   \n\n  \n\n\n	(3,3)

# - match_default normal REG_EXTENDED REG_STARTEND REG_GREP REG_ICASE
Ei	A	 a a a aa	(1,2)
Ei	A+B+	aabaabbb ab	(0,3)
Ei	A(B*|c|e)D	adabbdacd	(0,2)(1,1)
Ei$	A	\na\na\na\naa	(1,2)

Ei$	^aB	Ab  \nab  Ab\n	(0,2)
Ei$	\\<abc	Abcabc aBc\n\nabc	(0,3)

Ei	ABC	abc	(0,3)
Ei	abc	 ABC ABCABC 	(1,4)


#
# now test merge,
#
# - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_no_copy
# start by testing subs:
#	a+	...aaa,,,	($`,...)
#	a+	...aaa,,,	($',,,,)
#	a+	...aaa,,,	($&,aaa)
#	a+	...aaa,,,	($0,aaa)
#	a+	...aaa,,,	($1,NULL)
#	a+	...aaa,,,	($15,NULL)
#	(a+)b+	...aaabbb,,,	($1,aaa)
#	[[:digit:]]*	123ab	(<$0>,<123><><><>)
#	[[:digit:]]*	123ab1	(<$0>,<123><><><1>)

# and now escapes:
#	a+	...aaa,,,	($x,$x)
#	a+	...aaa,,,	(\a,\a)
#	a+	...aaa,,,	(\f,\f)
#	a+	...aaa,,,	(\n,\n)
#	a+	...aaa,,,	(\r,\r)
#	a+	...aaa,,,	(\t,\t)
#	a+	...aaa,,,	(\v,\v)

#	a+	...aaa,,,	(\x21,!)
#	a+	...aaa,,,	(\x{21},!)
#	a+	...aaa,,,	(\c@,\0)
#	a+	...aaa,,,	(\e,\27)
#	a+	...aaa,,,	(\0101,A)
#	a+	...aaa,,,	((\0101),A)

# - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_sed format_no_copy
#	(a+)(b+)	...aabb,,	(\0,aabb)
#	(a+)(b+)	...aabb,,	(\1,aa)
#	(a+)(b+)	...aabb,,	(\2,bb)
#	(a+)(b+)	...aabb,,	(&,aabb)
#	(a+)(b+)	...aabb,,	($,$)
#	(a+)(b+)	...aabb,,	($1,$1)
#	(a+)(b+)	...aabb,,	(()?:,()?:)
#	(a+)(b+)	...aabb,,	(\\,\\)
#	(a+)(b+)	...aabb,,	(\&,&)


# - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_perl format_no_copy
#	(a+)(b+)	...aabb,,	($0,aabb)
#	(a+)(b+)	...aabb,,	($1,aa)
#	(a+)(b+)	...aabb,,	($2,bb)
#	(a+)(b+)	...aabb,,	($&,aabb)
#	(a+)(b+)	...aabb,,	(&,&)
#	(a+)(b+)	...aabb,,	(\0,\0)
#	(a+)(b+)	...aabb,,	(()?:,()?:)

# - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE
# move to copying unmatched data:
#	a+	...aaa,,,	(bbb,...bbb,,,)
#	a+(b+)	...aaabb,,,	($1,...bb,,,)
#	a+(b+)	...aaabb,,,ab*abbb?	($1,...bb,,,b*bbb?)

#	(a+)|(b+)	...aaabb,,,ab*abbb?	((?1A)(?2B),...AB,,,AB*AB?)
#	(a+)|(b+)	...aaabb,,,ab*abbb?	(?1A:B,...AB,,,AB*AB?)
#	(a+)|(b+)	...aaabb,,,ab*abbb?	((?1A:B)C,...ACBC,,,ACBC*ACBC?)
#	(a+)|(b+)	...aaabb,,,ab*abbb?	(?1:B,...B,,,B*B?)

#
# changes to newline handling with 2.11:
#

# - match_default normal REG_EXTENDED REG_STARTEND REG_GREP

E$	^.	  \n  \r\n  	(0,1)
E$	.$	  \n  \r\n  	(8,9)
En$	.$	  \n  \r\n  	(1,2)

# - match_default normal REG_EXTENDED REG_STARTEND REG_GREP REG_UNICODE_ONLY
#E	^.	\8232 \8233  	(0,1)
#E	.$	\8232 \8233  	(1,2)

#
# non-greedy repeats added 21/04/00
# - match_default normal REG_EXTENDED
E	a{1,3}{1}	!	BADRPT
{E	a*?	aa	(0,0)		non-greedy repeats not supported
E	a**	aaa	(0,3)
E	a??	aa	(0,0)
E	a++	!	BADRPT
E	a+?	aa	(0,1)
E	a{1,3}?	aaa	(0,1)
E	\w+?w	...ccccccwcccccw	(3,10)
E	\W+\w+?w	...ccccccwcccccw	(0,10)
E	abc|\w+?	abd	(0,1)
E	abc|\w+?	abcd	(0,3)
E	<\s*tag[^>]*>(.*?)<\s*/tag\s*>	 <tag>here is some text</tag> <tag></tag>	(1,29)(6,23)
E	<\s*tag[^>]*>(.*?)<\s*/tag\s*>	 < tag attr=\"something\">here is some text< /tag > <tag></tag>	(1,51)(26,43)
}

#
# non-marking parenthesis added 25/04/00
# - match_default normal REG_EXTENDED
{E	(?:abc)+	xxabcabcxx	(2,8)		non-marking parens not supported
E	(?:a+)(b+)	xaaabbbx	(1,7)(4,7)
E	(a+)(?:b+)	xaaabbba	(1,7)(1,4)
E	(?:(a+)b+)	xaaabbba	(1,7)(1,4)
E	(?:a+(b+))	xaaabbba	(1,7)(4,7)
E	a+(?#b+)b+	xaaabbba	(1,7)
}

#
# try some partial matches:
# - match_partial match_default normal REG_EXTENDED REG_NO_POSIX_TEST
Exz	(xyz)(.*)abc	xyzaaab	NOMATCH
Exz	(xyz)(.*)abc	xyz	NOMATCH
Exz	(xyz)(.*)abc	xy	NOMATCH
Product

Resources

Company