Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/tests/regex/multibyte.sh
39491 views
1
atf_test_case bmpat
2
bmpat_head()
3
{
4
atf_set "descr" "Check matching multibyte characters (PR153502)"
5
}
6
bmpat_body()
7
{
8
export LC_CTYPE="C.UTF-8"
9
10
printf 'é' | atf_check -o "inline:é" \
11
sed -ne '/^.$/p'
12
printf 'éé' | atf_check -o "inline:éé" \
13
sed -ne '/^..$/p'
14
printf 'aéa' | atf_check -o "inline:aéa" \
15
sed -ne '/a.a/p'
16
printf 'aéa'| atf_check -o "inline:aéa" \
17
sed -ne '/a.*a/p'
18
printf 'aaéaa' | atf_check -o "inline:aaéaa" \
19
sed -ne '/aa.aa/p'
20
printf 'aéaéa' | atf_check -o "inline:aéaéa" \
21
sed -ne '/a.a.a/p'
22
printf 'éa' | atf_check -o "inline:éa" \
23
sed -ne '/.a/p'
24
printf 'aéaa' | atf_check -o "inline:aéaa" \
25
sed -ne '/a.aa/p'
26
printf 'éaé' | atf_check -o "inline:éaé" \
27
sed -ne '/.a./p'
28
}
29
30
atf_test_case icase
31
icase_head()
32
{
33
atf_set "descr" "Check case-insensitive matching for characters 128-255"
34
}
35
icase_body()
36
{
37
export LC_CTYPE="C.UTF-8"
38
39
a=$(printf '\302\265\n') # U+00B5
40
b=$(printf '\316\234\n') # U+039C
41
c=$(printf '\316\274\n') # U+03BC
42
43
echo $b | atf_check -o "inline:$b\n" sed -ne "/$a/Ip"
44
echo $c | atf_check -o "inline:$c\n" sed -ne "/$a/Ip"
45
}
46
47
atf_test_case mbset cleanup
48
mbset_head()
49
{
50
atf_set "descr" "Check multibyte sets matching"
51
}
52
mbset_body()
53
{
54
export LC_CTYPE="C.UTF-8"
55
56
# This involved an erroneously implemented optimization which reduces
57
# single-element sets to an exact match with a single codepoint.
58
# Match sets record small-codepoint characters in a bitmap and
59
# large-codepoint characters in an array; the optimization would falsely
60
# trigger if either the bitmap or the array was a singleton, ignoring
61
# the members of the other side of the set.
62
#
63
# To exercise this, we construct sets which have one member of one side
64
# and one or more of the other, and verify that all members can be
65
# found.
66
printf "a" > mbset; atf_check -o not-empty sed -ne '/[aà]/p' mbset
67
printf "à" > mbset; atf_check -o not-empty sed -ne '/[aà]/p' mbset
68
printf "a" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset
69
printf "à" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset
70
printf "á" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset
71
printf "à" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset
72
printf "a" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset
73
printf "b" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset
74
printf "a" > mbset; atf_check -o not-empty sed -Ene '/[aà]/p' mbset
75
printf "à" > mbset; atf_check -o not-empty sed -Ene '/[aà]/p' mbset
76
printf "a" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset
77
printf "à" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset
78
printf "á" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset
79
printf "à" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset
80
printf "a" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset
81
printf "b" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset
82
}
83
mbset_cleanup()
84
{
85
rm -f mbset
86
}
87
88
atf_init_test_cases()
89
{
90
atf_add_test_case bmpat
91
atf_add_test_case icase
92
atf_add_test_case mbset
93
}
94
95