Path: blob/main/lib/libc/tests/regex/multibyte.sh
104874 views
atf_test_case bmpat1bmpat_head()2{3atf_set "descr" "Check matching multibyte characters (PR153502)"4}5bmpat_body()6{7export LC_CTYPE="C.UTF-8"89printf 'é' | atf_check -o "inline:é" \10sed -ne '/^.$/p'11printf 'éé' | atf_check -o "inline:éé" \12sed -ne '/^..$/p'13printf 'aéa' | atf_check -o "inline:aéa" \14sed -ne '/a.a/p'15printf 'aéa'| atf_check -o "inline:aéa" \16sed -ne '/a.*a/p'17printf 'aaéaa' | atf_check -o "inline:aaéaa" \18sed -ne '/aa.aa/p'19printf 'aéaéa' | atf_check -o "inline:aéaéa" \20sed -ne '/a.a.a/p'21printf 'éa' | atf_check -o "inline:éa" \22sed -ne '/.a/p'23printf 'aéaa' | atf_check -o "inline:aéaa" \24sed -ne '/a.aa/p'25printf 'éaé' | atf_check -o "inline:éaé" \26sed -ne '/.a./p'27}2829atf_test_case icase30icase_head()31{32atf_set "descr" "Check case-insensitive matching for characters 128-255"33}34icase_body()35{36export LC_CTYPE="C.UTF-8"3738a=$(printf '\302\265\n') # U+00B539b=$(printf '\316\234\n') # U+039C40c=$(printf '\316\274\n') # U+03BC4142echo $b | atf_check -o "inline:$b\n" sed -ne "/$a/Ip"43echo $c | atf_check -o "inline:$c\n" sed -ne "/$a/Ip"44}4546atf_test_case mbset cleanup47mbset_head()48{49atf_set "descr" "Check multibyte sets matching"50}51mbset_body()52{53export LC_CTYPE="C.UTF-8"5455# This involved an erroneously implemented optimization which reduces56# single-element sets to an exact match with a single codepoint.57# Match sets record small-codepoint characters in a bitmap and58# large-codepoint characters in an array; the optimization would falsely59# trigger if either the bitmap or the array was a singleton, ignoring60# the members of the other side of the set.61#62# To exercise this, we construct sets which have one member of one side63# and one or more of the other, and verify that all members can be64# found.65printf "a" > mbset; atf_check -o not-empty sed -ne '/[aà]/p' mbset66printf "à" > mbset; atf_check -o not-empty sed -ne '/[aà]/p' mbset67printf "a" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset68printf "à" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset69printf "á" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset70printf "à" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset71printf "a" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset72printf "b" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset73printf "a" > mbset; atf_check -o not-empty sed -Ene '/[aà]/p' mbset74printf "à" > mbset; atf_check -o not-empty sed -Ene '/[aà]/p' mbset75printf "a" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset76printf "à" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset77printf "á" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset78printf "à" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset79printf "a" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset80printf "b" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset81}82mbset_cleanup()83{84rm -f mbset85}8687atf_init_test_cases()88{89atf_add_test_case bmpat90atf_add_test_case icase91atf_add_test_case mbset92}939495