Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/cmd/ksh93/tests/locale.sh
1810 views
1
########################################################################
2
# #
3
# This software is part of the ast package #
4
# Copyright (c) 1982-2012 AT&T Intellectual Property #
5
# and is licensed under the #
6
# Eclipse Public License, Version 1.0 #
7
# by AT&T Intellectual Property #
8
# #
9
# A copy of the License is available at #
10
# http://www.eclipse.org/org/documents/epl-v10.html #
11
# (with md5 checksum b35adb5213ca9657e911e9befb180842) #
12
# #
13
# Information and Software Systems Research #
14
# AT&T Research #
15
# Florham Park NJ #
16
# #
17
# David Korn <[email protected]> #
18
# #
19
########################################################################
20
function err_exit
21
{
22
print -u2 -n "\t"
23
print -u2 -r ${Command}[$1]: "${@:2}"
24
let Errors+=1
25
}
26
alias err_exit='err_exit $LINENO'
27
28
Command=${0##*/}
29
integer Errors=0
30
31
unset LANG ${!LC_*}
32
33
tmp=$(mktemp -dt) || { err_exit mktemp -dt failed; exit 1; }
34
trap "cd /; rm -rf $tmp" EXIT
35
cd $tmp || exit
36
37
a=$($SHELL -c '/' 2>&1 | sed -e "s,.*: *,," -e "s, *\[.*,,")
38
b=$($SHELL -c '(LC_ALL=debug / 2>/dev/null); /' 2>&1 | sed -e "s,.*: *,," -e "s, *\[.*,,")
39
[[ "$b" == "$a" ]] || err_exit "locale not restored after subshell -- expected '$a', got '$b'"
40
b=$($SHELL -c '(LC_ALL=debug; / 2>/dev/null); /' 2>&1 | sed -e "s,.*: *,," -e "s, *\[.*,,")
41
[[ "$b" == "$a" ]] || err_exit "locale not restored after subshell -- expected '$a', got '$b'"
42
43
# test shift-jis \x81\x40 ... \x81\x7E encodings
44
# (shift char followed by 7 bit ascii)
45
46
typeset -i16 chr
47
for locale in $(PATH=/bin:/usr/bin locale -a 2>/dev/null | grep -i jis)
48
do export LC_ALL=$locale
49
for ((chr=0x40; chr<=0x7E; chr++))
50
do c=${chr#16#}
51
for s in \\x81\\x$c \\x$c
52
do b="$(printf "$s")"
53
eval n=\$\'$s\'
54
[[ $b == "$n" ]] || err_exit "LC_ALL=$locale printf difference for \"$s\" -- expected '$n', got '$b'"
55
u=$(print -- $b)
56
q=$(print -- "$b")
57
[[ $u == "$q" ]] || err_exit "LC_ALL=$locale quoted print difference for \"$s\" -- $b => '$u' vs \"$b\" => '$q'"
58
done
59
done
60
done
61
62
# this locale is supported by ast on all platforms
63
# EU for { decimal_point="," thousands_sep="." }
64
65
locale=C_EU.UTF-8
66
67
export LC_ALL=C
68
69
# test multibyte value/trace format -- $'\303\274' is UTF-8 u-umlaut
70
71
c=$(LC_ALL=C $SHELL -c "printf $':%2s:\n' $'\303\274'")
72
u=$(LC_ALL=$locale $SHELL -c "printf $':%2s:\n' $'\303\274'" 2>/dev/null)
73
if [[ "$c" != "$u" ]]
74
then LC_ALL=$locale
75
x=$'+2+ typeset item.text\
76
+3+ item.text=\303\274\
77
+4+ print -- \303\274\
78
\303\274\
79
+5+ eval $\'arr[0]=(\\n\\ttext=\\303\\274\\n)\'
80
+2+ arr[0].text=ü\
81
+6+ print -- \303\274\
82
ü\
83
+7+ eval txt=$\'(\\n\\ttext=\\303\\274\\n)\'
84
+2+ txt.text=\303\274\
85
+8+ print -- \'(\' text=$\'\\303\\274\' \')\'\
86
( text=\303\274 )'
87
u=$(LC_ALL=$locale PS4='+$LINENO+ ' $SHELL -x -c "
88
item=(typeset text)
89
item.text=$'\303\274'
90
print -- \"\${item.text}\"
91
eval \"arr[0]=\$item\"
92
print -- \"\${arr[0].text}\"
93
eval \"txt=\${arr[0]}\"
94
print -- \$txt
95
" 2>&1)
96
[[ "$u" == "$x" ]] || err_exit LC_ALL=$locale multibyte value/trace format failed
97
98
x=$'00fc\n20ac'
99
u=$(LC_ALL=$locale $SHELL -c $'printf "%04x\n" \$\'\"\303\274\"\' \$\'\"\xE2\x82\xAC\"\'')
100
[[ $u == $x ]] || err_exit LC_ALL=$locale multibyte %04x printf format failed
101
fi
102
103
if (( $($SHELL -c $'export LC_ALL='$locale$'; print -r "\342\202\254\342\202\254\342\202\254\342\202\254w\342\202\254\342\202\254\342\202\254\342\202\254" | wc -m' 2>/dev/null) == 10 ))
104
then LC_ALL=$locale $SHELL -c b1=$'"\342\202\254\342\202\254\342\202\254\342\202\254w\342\202\254\342\202\254\342\202\254\342\202\254"; [[ ${b1:4:1} == w ]]' || err_exit 'multibyte ${var:offset:len} not working correctly'
105
fi
106
107
#$SHELL -c 'export LANG='$locale'; printf "\u[20ac]\u[20ac]" > $tmp/two_euro_chars.txt'
108
printf $'\342\202\254\342\202\254' > $tmp/two_euro_chars.txt
109
exp="6 2 6"
110
set -- $($SHELL -c "
111
unset LC_CTYPE
112
export LANG=$locale
113
export LC_ALL=C
114
command wc -C < $tmp/two_euro_chars.txt
115
unset LC_ALL
116
command wc -C < $tmp/two_euro_chars.txt
117
export LC_ALL=C
118
command wc -C < $tmp/two_euro_chars.txt
119
")
120
got=$*
121
[[ $got == $exp ]] || err_exit "command wc LC_ALL default failed -- expected '$exp', got '$got'"
122
set -- $($SHELL -c "
123
if builtin wc 2>/dev/null || builtin -f cmd wc 2>/dev/null
124
then unset LC_CTYPE
125
export LANG=$locale
126
export LC_ALL=C
127
wc -C < $tmp/two_euro_chars.txt
128
unset LC_ALL
129
wc -C < $tmp/two_euro_chars.txt
130
export LC_ALL=C
131
wc -C < $tmp/two_euro_chars.txt
132
fi
133
")
134
got=$*
135
[[ $got == $exp ]] || err_exit "builtin wc LC_ALL default failed -- expected '$exp', got '$got'"
136
137
# multibyte char straddling buffer boundary
138
139
{
140
unset i
141
integer i
142
for ((i = 0; i < 163; i++))
143
do print "#234567890123456789012345678901234567890123456789"
144
done
145
printf $'%-.*c\n' 15 '#'
146
for ((i = 0; i < 2; i++))
147
do print $': "\xe5\xae\x9f\xe8\xa1\x8c\xe6\xa9\x9f\xe8\x83\xbd\xe3\x82\x92\xe8\xa1\xa8\xe7\xa4\xba\xe3\x81\x97\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82" :'
148
done
149
} > ko.dat
150
151
LC_ALL=$locale $SHELL < ko.dat 2> /dev/null || err_exit "script with multibyte char straddling buffer boundary fails"
152
153
# exp LC_ALL LC_NUMERIC LANG
154
set -- \
155
2,5 $locale C '' \
156
2.5 C $locale '' \
157
2,5 $locale '' C \
158
2,5 '' $locale C \
159
2.5 C '' $locale \
160
2.5 '' C $locale \
161
162
unset a b c
163
unset LC_ALL LC_NUMERIC LANG
164
integer a b c
165
while (( $# >= 4 ))
166
do exp=$1
167
unset H V
168
typeset -A H
169
typeset -a V
170
[[ $2 ]] && V[0]="export LC_ALL=$2;"
171
[[ $3 ]] && V[1]="export LC_NUMERIC=$3;"
172
[[ $4 ]] && V[2]="export LANG=$4;"
173
for ((a = 0; a < 3; a++))
174
do for ((b = 0; b < 3; b++))
175
do if (( b != a ))
176
then for ((c = 0; c < 3; c++))
177
do if (( c != a && c != b ))
178
then T=${V[$a]}${V[$b]}${V[$c]}
179
if [[ ! ${H[$T]} ]]
180
then H[$T]=1
181
got=$($SHELL -c "${T}print \$(( $exp ))" 2>&1)
182
[[ $got == $exp ]] || err_exit "${T} sequence failed -- expected '$exp', got '$got'"
183
fi
184
fi
185
done
186
fi
187
done
188
done
189
shift 4
190
done
191
192
# setocale(LC_ALL,"") after setlocale() initialization
193
194
printf 'f1\357\274\240f2\n' > input1
195
printf 't2\357\274\240f1\n' > input2
196
printf '\357\274\240\n' > delim
197
print "export LC_ALL=$locale
198
join -j1 1 -j2 2 -o 1.1 -t \$(cat delim) input1 input2 > out" > script
199
$SHELL -c 'unset LANG ${!LC_*}; $SHELL ./script' ||
200
err_exit "join test script failed -- exit code $?"
201
exp="f1"
202
got="$(<out)"
203
[[ $got == "$exp" ]] || err_exit "LC_ALL test script failed -- expected '$exp', got '$got'"
204
205
# multibyte identifiers
206
207
exp=OK
208
got=$(LC_ALL=C.UTF-8 $SHELL -c $'\u[5929]=OK; print ${\u[5929]}' 2>&1)
209
[[ $got == "$exp" ]] || err_exit "multibyte variable definition/expansion failed -- expected '$exp', got '$got'"
210
got=$(LC_ALL=C.UTF-8 $SHELL -c $'function \u[5929]\n{\nprint OK;\n}; \u[5929]' 2>&1)
211
[[ $got == "$exp" ]] || err_exit "multibyte ksh function definition/execution failed -- expected '$exp', got '$got'"
212
got=$(LC_ALL=C.UTF-8 $SHELL -c $'\u[5929]()\n{\nprint OK;\n}; \u[5929]' 2>&1)
213
[[ $got == "$exp" ]] || err_exit "multibyte posix function definition/execution failed -- expected '$exp', got '$got'"
214
215
# this locale is supported by ast on all platforms
216
# mainly used to debug multibyte and message translation code
217
# however wctype is not supported but that's ok for these tests
218
219
locale=debug
220
221
if [[ "$(LC_ALL=$locale $SHELL <<- \+EOF+
222
x=a<1z>b<2yx>c
223
print ${#x}
224
+EOF+)" != 5
225
]]
226
then err_exit '${#x} not working with multibyte locales'
227
fi
228
229
dir=_not_found_
230
exp=2
231
for cmd in \
232
"cd $dir; export LC_ALL=debug; cd $dir" \
233
"cd $dir; LC_ALL=debug cd $dir" \
234
235
do got=$($SHELL -c "$cmd" 2>&1 | sort -u | wc -l)
236
(( ${got:-0} == $exp )) || err_exit "'$cmd' sequence failed -- error message not localized"
237
done
238
exp=121
239
for lc in LANG LC_MESSAGES LC_ALL
240
do for cmd in "($lc=$locale;cd $dir)" "$lc=$locale;cd $dir;unset $lc" "function tst { typeset $lc=$locale;cd $dir; }; tst"
241
do tst="$lc=C;cd $dir;$cmd;cd $dir;:"
242
$SHELL -c "unset LANG \${!LC_*}; $SHELL -c '$tst'" > out 2>&1 ||
243
err_exit "'$tst' failed -- exit status $?"
244
integer id=0
245
unset msg
246
typeset -A msg
247
got=
248
while read -r line
249
do line=${line##*:}
250
if [[ ! ${msg[$line]} ]]
251
then msg[$line]=$((++id))
252
fi
253
got+=${msg[$line]}
254
done < out
255
[[ $got == $exp ]] || err_exit "'$tst' failed -- expected '$exp', got '$got'"
256
done
257
done
258
259
exp=123
260
got=$(LC_ALL=debug $SHELL -c "a<2A@>z=$exp; print \$a<2A@>z")
261
[[ $got == $exp ]] || err_exit "multibyte debug locale \$a<2A@>z failed -- expected '$exp', got '$got'"
262
263
unset LC_ALL LC_MESSAGES
264
export LANG=debug
265
function message
266
{
267
print -r $"An error occurred."
268
}
269
exp=$'(libshell,3,46)\nAn error occurred.\n(libshell,3,46)'
270
alt=$'(debug,message,libshell,An error occurred.)\nAn error occurred.\n(debug,message,libshell,An error occurred.)'
271
got=$(message; LANG=C message; message)
272
[[ $got == "$exp" || $got == "$alt" ]] || {
273
EXP=$(printf %q "$exp")
274
ALT=$(printf %q "$alt")
275
GOT=$(printf %q "$got")
276
err_exit "LANG change not seen by function -- expected $EXP or $ALT, got $GOT"
277
}
278
279
a_thing=fish
280
got=$(print -r aa$"\\ahello \" /\\${a_thing}/\\"zz)
281
exp='aa(debug,'$Command',libshell,\ahello " /\fish/\)zz'
282
[[ $got == "$exp" ]] || err_exit "$\"...\" containing expansions fails: expected $exp, got $got"
283
284
exp='(debug,'$Command',libshell,This is a string\n)'
285
typeset got=$"This is a string\n"
286
[[ $got == "$exp" ]] || err_exit "$\"...\" in assignment expansion fails: expected $exp got $got"
287
288
unset LANG
289
290
LC_ALL=C
291
x=$"hello"
292
[[ $x == hello ]] || err_exit 'assignment of message strings not working'
293
294
# tests for multibyte characteer at buffer boundary
295
{
296
print 'cat << \\EOF'
297
for ((i=1; i < 164; i++))
298
do print 123456789+123456789+123456789+123456789+123456789
299
done
300
print $'next character is multibyte<2b|>c<3d|\>foo'
301
for ((i=1; i < 10; i++))
302
do print 123456789+123456789+123456789+123456789+123456789
303
done
304
print EOF
305
} > script$$.1
306
chmod +x script$$.1
307
x=$( LC_ALL=debug $SHELL ./script$$.1)
308
[[ ${#x} == 8641 ]] || err_exit 'here doc contains wrong number of chars with multibyte locale'
309
[[ $x == *$'next character is multibyte<2b|>c<3d|\>foo'* ]] || err_exit "here_doc doesn't contain line with multibyte chars"
310
311
312
x=$(LC_ALL=debug $SHELL -c 'x="a<2b|>c";print -r -- ${#x}')
313
(( x == 3 )) || err_exit 'character length of multibyte character should be 3'
314
x=$(LC_ALL=debug $SHELL -c 'typeset -R10 x="a<2b|>c";print -r -- "${x}"')
315
[[ $x == ' a<2b|>c' ]] || err_exit 'typeset -R10 should begin with three spaces'
316
x=$(LC_ALL=debug $SHELL -c 'typeset -L10 x="a<2b|>c";print -r -- "${x}"')
317
[[ $x == 'a<2b|>c ' ]] || err_exit 'typeset -L10 should end in three spaces'
318
319
if $SHELL -c "export LC_ALL=en_US.UTF-8; c=$'\342\202\254'; [[ \${#c} == 1 ]]" 2>/dev/null
320
then LC_ALL=en_US.UTF-8
321
unset i p1 p2 x
322
for i in 9 b c d 20 1680 2000 2001 2002 2003 2004 2005 2006 2008 2009 200a 2028 2029 3000 # 1803 2007 202f 205f
323
do if ! eval "[[ \$'\\u[$i]' == [[:space:]] ]]"
324
then x+=,$i
325
fi
326
done
327
if [[ $x ]]
328
then if [[ $x == ,*,* ]]
329
then p1=s p2="are not space characters"
330
else p1= p2="is not a space character"
331
fi
332
err_exit "unicode char$p1 ${x#?} $p2 in locale $LC_ALL"
333
fi
334
unset x
335
x=$(printf "hello\u[20ac]\xee world")
336
[[ $(print -r -- "$x") == $'hello\u[20ac]\xee world' ]] || err_exit '%q with unicode and non-unicode not working'
337
if [[ $(whence od) ]]
338
then got='68 65 6c 6c 6f e2 82 ac ee 20 77 6f 72 6c 64 0a'
339
[[ $(print -r -- "$x" | od -An -tx1) == "$got" ]] || err_exit "incorrect string from printf %q"
340
fi
341
342
fi
343
344
exit $((Errors<125?Errors:125))
345
346
347