Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/net/idn/Punycode.java
38918 views
1
/*
2
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
3
*
4
* This code is free software; you can redistribute it and/or modify it
5
* under the terms of the GNU General Public License version 2 only, as
6
* published by the Free Software Foundation. Oracle designates this
7
* particular file as subject to the "Classpath" exception as provided
8
* by Oracle in the LICENSE file that accompanied this code.
9
*
10
* This code is distributed in the hope that it will be useful, but WITHOUT
11
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13
* version 2 for more details (a copy is included in the LICENSE file that
14
* accompanied this code).
15
*
16
* You should have received a copy of the GNU General Public License version
17
* 2 along with this work; if not, write to the Free Software Foundation,
18
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19
*
20
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21
* or visit www.oracle.com if you need additional information or have any
22
* questions.
23
*/
24
/*
25
*******************************************************************************
26
* Copyright (C) 2003-2004, International Business Machines Corporation and *
27
* others. All Rights Reserved. *
28
*******************************************************************************
29
*/
30
//
31
// CHANGELOG
32
// 2005-05-19 Edward Wang
33
// - copy this file from icu4jsrc_3_2/src/com/ibm/icu/text/Punycode.java
34
// - move from package com.ibm.icu.text to package sun.net.idn
35
// - use ParseException instead of StringPrepParseException
36
// 2007-08-14 Martin Buchholz
37
// - remove redundant casts
38
//
39
package sun.net.idn;
40
41
import java.text.ParseException;
42
import sun.text.normalizer.UCharacter;
43
import sun.text.normalizer.UTF16;
44
45
/**
46
* Ported code from ICU punycode.c
47
* @author ram
48
*/
49
50
/* Package Private class */
51
public final class Punycode {
52
53
/* Punycode parameters for Bootstring */
54
private static final int BASE = 36;
55
private static final int TMIN = 1;
56
private static final int TMAX = 26;
57
private static final int SKEW = 38;
58
private static final int DAMP = 700;
59
private static final int INITIAL_BIAS = 72;
60
private static final int INITIAL_N = 0x80;
61
62
/* "Basic" Unicode/ASCII code points */
63
private static final int HYPHEN = 0x2d;
64
private static final int DELIMITER = HYPHEN;
65
66
private static final int ZERO = 0x30;
67
private static final int NINE = 0x39;
68
69
private static final int SMALL_A = 0x61;
70
private static final int SMALL_Z = 0x7a;
71
72
private static final int CAPITAL_A = 0x41;
73
private static final int CAPITAL_Z = 0x5a;
74
75
// TODO: eliminate the 256 limitation
76
private static final int MAX_CP_COUNT = 256;
77
78
private static final int UINT_MAGIC = 0x80000000;
79
private static final long ULONG_MAGIC = 0x8000000000000000L;
80
81
private static int adaptBias(int delta, int length, boolean firstTime){
82
if(firstTime){
83
delta /=DAMP;
84
}else{
85
delta /= 2;
86
}
87
delta += delta/length;
88
89
int count=0;
90
for(; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) {
91
delta/=(BASE-TMIN);
92
}
93
94
return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
95
}
96
97
/**
98
* basicToDigit[] contains the numeric value of a basic code
99
* point (for use in representing integers) in the range 0 to
100
* BASE-1, or -1 if b is does not represent a value.
101
*/
102
static final int[] basicToDigit= new int[]{
103
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
104
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
105
106
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
107
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
108
109
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
110
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
111
112
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
113
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
114
115
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
116
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
117
118
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
119
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
120
121
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
122
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
123
124
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
125
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
126
};
127
128
private static char asciiCaseMap(char b, boolean uppercase) {
129
if(uppercase) {
130
if(SMALL_A<=b && b<=SMALL_Z) {
131
b-=(SMALL_A-CAPITAL_A);
132
}
133
} else {
134
if(CAPITAL_A<=b && b<=CAPITAL_Z) {
135
b+=(SMALL_A-CAPITAL_A);
136
}
137
}
138
return b;
139
}
140
141
/**
142
* digitToBasic() returns the basic code point whose value
143
* (when used for representing integers) is d, which must be in the
144
* range 0 to BASE-1. The lowercase form is used unless the uppercase flag is
145
* nonzero, in which case the uppercase form is used.
146
*/
147
private static char digitToBasic(int digit, boolean uppercase) {
148
/* 0..25 map to ASCII a..z or A..Z */
149
/* 26..35 map to ASCII 0..9 */
150
if(digit<26) {
151
if(uppercase) {
152
return (char)(CAPITAL_A+digit);
153
} else {
154
return (char)(SMALL_A+digit);
155
}
156
} else {
157
return (char)((ZERO-26)+digit);
158
}
159
}
160
/**
161
* Converts Unicode to Punycode.
162
* The input string must not contain single, unpaired surrogates.
163
* The output will be represented as an array of ASCII code points.
164
*
165
* @param src
166
* @param caseFlags
167
* @return
168
* @throws ParseException
169
*/
170
public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws ParseException{
171
172
int[] cpBuffer = new int[MAX_CP_COUNT];
173
int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
174
char c, c2;
175
int srcLength = src.length();
176
int destCapacity = MAX_CP_COUNT;
177
char[] dest = new char[destCapacity];
178
StringBuffer result = new StringBuffer();
179
/*
180
* Handle the basic code points and
181
* convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
182
*/
183
srcCPCount=destLength=0;
184
185
for(j=0; j<srcLength; ++j) {
186
if(srcCPCount==MAX_CP_COUNT) {
187
/* too many input code points */
188
throw new ParseException("Too many input code points", -1);
189
}
190
c=src.charAt(j);
191
if(isBasic(c)) {
192
if(destLength<destCapacity) {
193
cpBuffer[srcCPCount++]=0;
194
dest[destLength]=
195
caseFlags!=null ?
196
asciiCaseMap(c, caseFlags[j]) :
197
c;
198
}
199
++destLength;
200
} else {
201
n=((caseFlags!=null && caseFlags[j])? 1 : 0)<<31L;
202
if(!UTF16.isSurrogate(c)) {
203
n|=c;
204
} else if(UTF16.isLeadSurrogate(c) && (j+1)<srcLength && UTF16.isTrailSurrogate(c2=src.charAt(j+1))) {
205
++j;
206
207
n|=UCharacter.getCodePoint(c, c2);
208
} else {
209
/* error: unmatched surrogate */
210
throw new ParseException("Illegal char found", -1);
211
}
212
cpBuffer[srcCPCount++]=n;
213
}
214
}
215
216
/* Finish the basic string - if it is not empty - with a delimiter. */
217
basicLength=destLength;
218
if(basicLength>0) {
219
if(destLength<destCapacity) {
220
dest[destLength]=DELIMITER;
221
}
222
++destLength;
223
}
224
225
/*
226
* handledCPCount is the number of code points that have been handled
227
* basicLength is the number of basic code points
228
* destLength is the number of chars that have been output
229
*/
230
231
/* Initialize the state: */
232
n=INITIAL_N;
233
delta=0;
234
bias=INITIAL_BIAS;
235
236
/* Main encoding loop: */
237
for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) {
238
/*
239
* All non-basic code points < n have been handled already.
240
* Find the next larger one:
241
*/
242
for(m=0x7fffffff, j=0; j<srcCPCount; ++j) {
243
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
244
if(n<=q && q<m) {
245
m=q;
246
}
247
}
248
249
/*
250
* Increase delta enough to advance the decoder's
251
* <n,i> state to <m,0>, but guard against overflow:
252
*/
253
if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
254
throw new RuntimeException("Internal program error");
255
}
256
delta+=(m-n)*(handledCPCount+1);
257
n=m;
258
259
/* Encode a sequence of same code points n */
260
for(j=0; j<srcCPCount; ++j) {
261
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
262
if(q<n) {
263
++delta;
264
} else if(q==n) {
265
/* Represent delta as a generalized variable-length integer: */
266
for(q=delta, k=BASE; /* no condition */; k+=BASE) {
267
268
/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
269
270
t=k-bias;
271
if(t<TMIN) {
272
t=TMIN;
273
} else if(t>TMAX) {
274
t=TMAX;
275
}
276
*/
277
278
t=k-bias;
279
if(t<TMIN) {
280
t=TMIN;
281
} else if(k>=(bias+TMAX)) {
282
t=TMAX;
283
}
284
285
if(q<t) {
286
break;
287
}
288
289
if(destLength<destCapacity) {
290
dest[destLength++]=digitToBasic(t+(q-t)%(BASE-t), false);
291
}
292
q=(q-t)/(BASE-t);
293
}
294
295
if(destLength<destCapacity) {
296
dest[destLength++]=digitToBasic(q, (cpBuffer[j]<0));
297
}
298
bias=adaptBias(delta, handledCPCount+1,(handledCPCount==basicLength));
299
delta=0;
300
++handledCPCount;
301
}
302
}
303
304
++delta;
305
++n;
306
}
307
308
return result.append(dest, 0, destLength);
309
}
310
311
private static boolean isBasic(int ch){
312
return (ch < INITIAL_N);
313
}
314
315
private static boolean isBasicUpperCase(int ch){
316
return( CAPITAL_A <= ch && ch <= CAPITAL_Z);
317
}
318
319
private static boolean isSurrogate(int ch){
320
return (((ch)&0xfffff800)==0xd800);
321
}
322
/**
323
* Converts Punycode to Unicode.
324
* The Unicode string will be at most as long as the Punycode string.
325
*
326
* @param src
327
* @param caseFlags
328
* @return
329
* @throws ParseException
330
*/
331
public static StringBuffer decode(StringBuffer src, boolean[] caseFlags)
332
throws ParseException{
333
int srcLength = src.length();
334
StringBuffer result = new StringBuffer();
335
int n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
336
destCPCount, firstSupplementaryIndex, cpLength;
337
char b;
338
int destCapacity = MAX_CP_COUNT;
339
char[] dest = new char[destCapacity];
340
341
/*
342
* Handle the basic code points:
343
* Let basicLength be the number of input code points
344
* before the last delimiter, or 0 if there is none,
345
* then copy the first basicLength code points to the output.
346
*
347
* The two following loops iterate backward.
348
*/
349
for(j=srcLength; j>0;) {
350
if(src.charAt(--j)==DELIMITER) {
351
break;
352
}
353
}
354
destLength=basicLength=destCPCount=j;
355
356
while(j>0) {
357
b=src.charAt(--j);
358
if(!isBasic(b)) {
359
throw new ParseException("Illegal char found", -1);
360
}
361
362
if(j<destCapacity) {
363
dest[j]= b;
364
365
if(caseFlags!=null) {
366
caseFlags[j]=isBasicUpperCase(b);
367
}
368
}
369
}
370
371
/* Initialize the state: */
372
n=INITIAL_N;
373
i=0;
374
bias=INITIAL_BIAS;
375
firstSupplementaryIndex=1000000000;
376
377
/*
378
* Main decoding loop:
379
* Start just after the last delimiter if any
380
* basic code points were copied; start at the beginning otherwise.
381
*/
382
for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) {
383
/*
384
* in is the index of the next character to be consumed, and
385
* destCPCount is the number of code points in the output array.
386
*
387
* Decode a generalized variable-length integer into delta,
388
* which gets added to i. The overflow checking is easier
389
* if we increase i as we go, then subtract off its starting
390
* value at the end to obtain delta.
391
*/
392
for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) {
393
if(in>=srcLength) {
394
throw new ParseException("Illegal char found", -1);
395
}
396
397
digit=basicToDigit[(byte)src.charAt(in++)];
398
if(digit<0) {
399
throw new ParseException("Invalid char found", -1);
400
}
401
if(digit>(0x7fffffff-i)/w) {
402
/* integer overflow */
403
throw new ParseException("Illegal char found", -1);
404
}
405
406
i+=digit*w;
407
t=k-bias;
408
if(t<TMIN) {
409
t=TMIN;
410
} else if(k>=(bias+TMAX)) {
411
t=TMAX;
412
}
413
if(digit<t) {
414
break;
415
}
416
417
if(w>0x7fffffff/(BASE-t)) {
418
/* integer overflow */
419
throw new ParseException("Illegal char found", -1);
420
}
421
w*=BASE-t;
422
}
423
424
/*
425
* Modification from sample code:
426
* Increments destCPCount here,
427
* where needed instead of in for() loop tail.
428
*/
429
++destCPCount;
430
bias=adaptBias(i-oldi, destCPCount, (oldi==0));
431
432
/*
433
* i was supposed to wrap around from (incremented) destCPCount to 0,
434
* incrementing n each time, so we'll fix that now:
435
*/
436
if(i/destCPCount>(0x7fffffff-n)) {
437
/* integer overflow */
438
throw new ParseException("Illegal char found", -1);
439
}
440
441
n+=i/destCPCount;
442
i%=destCPCount;
443
/* not needed for Punycode: */
444
/* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
445
446
if(n>0x10ffff || isSurrogate(n)) {
447
/* Unicode code point overflow */
448
throw new ParseException("Illegal char found", -1);
449
}
450
451
/* Insert n at position i of the output: */
452
cpLength=UTF16.getCharCount(n);
453
if((destLength+cpLength)<destCapacity) {
454
int codeUnitIndex;
455
456
/*
457
* Handle indexes when supplementary code points are present.
458
*
459
* In almost all cases, there will be only BMP code points before i
460
* and even in the entire string.
461
* This is handled with the same efficiency as with UTF-32.
462
*
463
* Only the rare cases with supplementary code points are handled
464
* more slowly - but not too bad since this is an insertion anyway.
465
*/
466
if(i<=firstSupplementaryIndex) {
467
codeUnitIndex=i;
468
if(cpLength>1) {
469
firstSupplementaryIndex=codeUnitIndex;
470
} else {
471
++firstSupplementaryIndex;
472
}
473
} else {
474
codeUnitIndex=firstSupplementaryIndex;
475
codeUnitIndex=UTF16.moveCodePointOffset(dest, 0, destLength, codeUnitIndex, i-codeUnitIndex);
476
}
477
478
/* use the UChar index codeUnitIndex instead of the code point index i */
479
if(codeUnitIndex<destLength) {
480
System.arraycopy(dest, codeUnitIndex,
481
dest, codeUnitIndex+cpLength,
482
(destLength-codeUnitIndex));
483
if(caseFlags!=null) {
484
System.arraycopy(caseFlags, codeUnitIndex,
485
caseFlags, codeUnitIndex+cpLength,
486
destLength-codeUnitIndex);
487
}
488
}
489
if(cpLength==1) {
490
/* BMP, insert one code unit */
491
dest[codeUnitIndex]=(char)n;
492
} else {
493
/* supplementary character, insert two code units */
494
dest[codeUnitIndex]=UTF16.getLeadSurrogate(n);
495
dest[codeUnitIndex+1]=UTF16.getTrailSurrogate(n);
496
}
497
if(caseFlags!=null) {
498
/* Case of last character determines uppercase flag: */
499
caseFlags[codeUnitIndex]=isBasicUpperCase(src.charAt(in-1));
500
if(cpLength==2) {
501
caseFlags[codeUnitIndex+1]=false;
502
}
503
}
504
}
505
destLength+=cpLength;
506
++i;
507
}
508
result.append(dest, 0, destLength);
509
return result;
510
}
511
}
512
513