Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/text/normalizer/Utility.java
38830 views
1
/*
2
* Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
/*
26
*******************************************************************************
27
* (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved *
28
* *
29
* The original version of this source code and documentation is copyrighted *
30
* and owned by IBM, These materials are provided under terms of a License *
31
* Agreement between IBM and Sun. This technology is protected by multiple *
32
* US and International patents. This notice and attribution to IBM may not *
33
* to removed. *
34
*******************************************************************************
35
*/
36
37
package sun.text.normalizer;
38
39
public final class Utility {
40
41
/**
42
* Convenience utility to compare two Object[]s
43
* Ought to be in System.
44
* @param len the length to compare.
45
* The start indices and start+len must be valid.
46
*/
47
public final static boolean arrayRegionMatches(char[] source, int sourceStart,
48
char[] target, int targetStart,
49
int len)
50
{
51
int sourceEnd = sourceStart + len;
52
int delta = targetStart - sourceStart;
53
for (int i = sourceStart; i < sourceEnd; i++) {
54
if (source[i]!=target[i + delta])
55
return false;
56
}
57
return true;
58
}
59
60
/**
61
* Convert characters outside the range U+0020 to U+007F to
62
* Unicode escapes, and convert backslash to a double backslash.
63
*/
64
public static final String escape(String s) {
65
StringBuffer buf = new StringBuffer();
66
for (int i=0; i<s.length(); ) {
67
int c = UTF16.charAt(s, i);
68
i += UTF16.getCharCount(c);
69
if (c >= ' ' && c <= 0x007F) {
70
if (c == '\\') {
71
buf.append("\\\\"); // That is, "\\"
72
} else {
73
buf.append((char)c);
74
}
75
} else {
76
boolean four = c <= 0xFFFF;
77
buf.append(four ? "\\u" : "\\U");
78
hex(c, four ? 4 : 8, buf);
79
}
80
}
81
return buf.toString();
82
}
83
84
/* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
85
static private final char[] UNESCAPE_MAP = {
86
/*" 0x22, 0x22 */
87
/*' 0x27, 0x27 */
88
/*? 0x3F, 0x3F */
89
/*\ 0x5C, 0x5C */
90
/*a*/ 0x61, 0x07,
91
/*b*/ 0x62, 0x08,
92
/*e*/ 0x65, 0x1b,
93
/*f*/ 0x66, 0x0c,
94
/*n*/ 0x6E, 0x0a,
95
/*r*/ 0x72, 0x0d,
96
/*t*/ 0x74, 0x09,
97
/*v*/ 0x76, 0x0b
98
};
99
100
/**
101
* Convert an escape to a 32-bit code point value. We attempt
102
* to parallel the icu4c unescapeAt() function.
103
* @param offset16 an array containing offset to the character
104
* <em>after</em> the backslash. Upon return offset16[0] will
105
* be updated to point after the escape sequence.
106
* @return character value from 0 to 10FFFF, or -1 on error.
107
*/
108
public static int unescapeAt(String s, int[] offset16) {
109
int c;
110
int result = 0;
111
int n = 0;
112
int minDig = 0;
113
int maxDig = 0;
114
int bitsPerDigit = 4;
115
int dig;
116
int i;
117
boolean braces = false;
118
119
/* Check that offset is in range */
120
int offset = offset16[0];
121
int length = s.length();
122
if (offset < 0 || offset >= length) {
123
return -1;
124
}
125
126
/* Fetch first UChar after '\\' */
127
c = UTF16.charAt(s, offset);
128
offset += UTF16.getCharCount(c);
129
130
/* Convert hexadecimal and octal escapes */
131
switch (c) {
132
case 'u':
133
minDig = maxDig = 4;
134
break;
135
case 'U':
136
minDig = maxDig = 8;
137
break;
138
case 'x':
139
minDig = 1;
140
if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
141
++offset;
142
braces = true;
143
maxDig = 8;
144
} else {
145
maxDig = 2;
146
}
147
break;
148
default:
149
dig = UCharacter.digit(c, 8);
150
if (dig >= 0) {
151
minDig = 1;
152
maxDig = 3;
153
n = 1; /* Already have first octal digit */
154
bitsPerDigit = 3;
155
result = dig;
156
}
157
break;
158
}
159
if (minDig != 0) {
160
while (offset < length && n < maxDig) {
161
c = UTF16.charAt(s, offset);
162
dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
163
if (dig < 0) {
164
break;
165
}
166
result = (result << bitsPerDigit) | dig;
167
offset += UTF16.getCharCount(c);
168
++n;
169
}
170
if (n < minDig) {
171
return -1;
172
}
173
if (braces) {
174
if (c != 0x7D /*}*/) {
175
return -1;
176
}
177
++offset;
178
}
179
if (result < 0 || result >= 0x110000) {
180
return -1;
181
}
182
// If an escape sequence specifies a lead surrogate, see
183
// if there is a trail surrogate after it, either as an
184
// escape or as a literal. If so, join them up into a
185
// supplementary.
186
if (offset < length &&
187
UTF16.isLeadSurrogate((char) result)) {
188
int ahead = offset+1;
189
c = s.charAt(offset); // [sic] get 16-bit code unit
190
if (c == '\\' && ahead < length) {
191
int o[] = new int[] { ahead };
192
c = unescapeAt(s, o);
193
ahead = o[0];
194
}
195
if (UTF16.isTrailSurrogate((char) c)) {
196
offset = ahead;
197
result = UCharacterProperty.getRawSupplementary(
198
(char) result, (char) c);
199
}
200
}
201
offset16[0] = offset;
202
return result;
203
}
204
205
/* Convert C-style escapes in table */
206
for (i=0; i<UNESCAPE_MAP.length; i+=2) {
207
if (c == UNESCAPE_MAP[i]) {
208
offset16[0] = offset;
209
return UNESCAPE_MAP[i+1];
210
} else if (c < UNESCAPE_MAP[i]) {
211
break;
212
}
213
}
214
215
/* Map \cX to control-X: X & 0x1F */
216
if (c == 'c' && offset < length) {
217
c = UTF16.charAt(s, offset);
218
offset16[0] = offset + UTF16.getCharCount(c);
219
return 0x1F & c;
220
}
221
222
/* If no special forms are recognized, then consider
223
* the backslash to generically escape the next character. */
224
offset16[0] = offset;
225
return c;
226
}
227
228
/**
229
* Convert a integer to size width hex uppercase digits.
230
* E.g., hex('a', 4, str) => "0041".
231
* Append the output to the given StringBuffer.
232
* If width is too small to fit, nothing will be appended to output.
233
*/
234
public static StringBuffer hex(int ch, int width, StringBuffer output) {
235
return appendNumber(output, ch, 16, width);
236
}
237
238
/**
239
* Convert a integer to size width (minimum) hex uppercase digits.
240
* E.g., hex('a', 4, str) => "0041". If the integer requires more
241
* than width digits, more will be used.
242
*/
243
public static String hex(int ch, int width) {
244
StringBuffer buf = new StringBuffer();
245
return appendNumber(buf, ch, 16, width).toString();
246
}
247
248
/**
249
* Skip over a sequence of zero or more white space characters
250
* at pos. Return the index of the first non-white-space character
251
* at or after pos, or str.length(), if there is none.
252
*/
253
public static int skipWhitespace(String str, int pos) {
254
while (pos < str.length()) {
255
int c = UTF16.charAt(str, pos);
256
if (!UCharacterProperty.isRuleWhiteSpace(c)) {
257
break;
258
}
259
pos += UTF16.getCharCount(c);
260
}
261
return pos;
262
}
263
264
static final char DIGITS[] = {
265
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
266
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
267
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
268
'U', 'V', 'W', 'X', 'Y', 'Z'
269
};
270
271
/**
272
* Append the digits of a positive integer to the given
273
* <code>StringBuffer</code> in the given radix. This is
274
* done recursively since it is easiest to generate the low-
275
* order digit first, but it must be appended last.
276
*
277
* @param result is the <code>StringBuffer</code> to append to
278
* @param n is the positive integer
279
* @param radix is the radix, from 2 to 36 inclusive
280
* @param minDigits is the minimum number of digits to append.
281
*/
282
private static void recursiveAppendNumber(StringBuffer result, int n,
283
int radix, int minDigits)
284
{
285
int digit = n % radix;
286
287
if (n >= radix || minDigits > 1) {
288
recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
289
}
290
291
result.append(DIGITS[digit]);
292
}
293
294
/**
295
* Append a number to the given StringBuffer in the given radix.
296
* Standard digits '0'-'9' are used and letters 'A'-'Z' for
297
* radices 11 through 36.
298
* @param result the digits of the number are appended here
299
* @param n the number to be converted to digits; may be negative.
300
* If negative, a '-' is prepended to the digits.
301
* @param radix a radix from 2 to 36 inclusive.
302
* @param minDigits the minimum number of digits, not including
303
* any '-', to produce. Values less than 2 have no effect. One
304
* digit is always emitted regardless of this parameter.
305
* @return a reference to result
306
*/
307
public static StringBuffer appendNumber(StringBuffer result, int n,
308
int radix, int minDigits)
309
throws IllegalArgumentException
310
{
311
if (radix < 2 || radix > 36) {
312
throw new IllegalArgumentException("Illegal radix " + radix);
313
}
314
315
316
int abs = n;
317
318
if (n < 0) {
319
abs = -n;
320
result.append("-");
321
}
322
323
recursiveAppendNumber(result, abs, radix, minDigits);
324
325
return result;
326
}
327
328
/**
329
* Return true if the character is NOT printable ASCII. The tab,
330
* newline and linefeed characters are considered unprintable.
331
*/
332
public static boolean isUnprintable(int c) {
333
return !(c >= 0x20 && c <= 0x7E);
334
}
335
336
/**
337
* Escape unprintable characters using <backslash>uxxxx notation
338
* for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
339
* above. If the character is printable ASCII, then do nothing
340
* and return FALSE. Otherwise, append the escaped notation and
341
* return TRUE.
342
*/
343
public static boolean escapeUnprintable(StringBuffer result, int c) {
344
if (isUnprintable(c)) {
345
result.append('\\');
346
if ((c & ~0xFFFF) != 0) {
347
result.append('U');
348
result.append(DIGITS[0xF&(c>>28)]);
349
result.append(DIGITS[0xF&(c>>24)]);
350
result.append(DIGITS[0xF&(c>>20)]);
351
result.append(DIGITS[0xF&(c>>16)]);
352
} else {
353
result.append('u');
354
}
355
result.append(DIGITS[0xF&(c>>12)]);
356
result.append(DIGITS[0xF&(c>>8)]);
357
result.append(DIGITS[0xF&(c>>4)]);
358
result.append(DIGITS[0xF&c]);
359
return true;
360
}
361
return false;
362
}
363
364
/**
365
* Similar to StringBuffer.getChars, version 1.3.
366
* Since JDK 1.2 implements StringBuffer.getChars differently, this method
367
* is here to provide consistent results.
368
* To be removed after JDK 1.2 ceased to be the reference platform.
369
* @param src source string buffer
370
* @param srcBegin offset to the start of the src to retrieve from
371
* @param srcEnd offset to the end of the src to retrieve from
372
* @param dst char array to store the retrieved chars
373
* @param dstBegin offset to the start of the destination char array to
374
* store the retrieved chars
375
*/
376
public static void getChars(StringBuffer src, int srcBegin, int srcEnd,
377
char dst[], int dstBegin)
378
{
379
if (srcBegin == srcEnd) {
380
return;
381
}
382
src.getChars(srcBegin, srcEnd, dst, dstBegin);
383
}
384
385
}
386
387