Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/java/net/IDN.java
38829 views
1
/*
2
* Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
package java.net;
26
27
import java.io.InputStream;
28
import java.io.IOException;
29
import java.security.AccessController;
30
import java.security.PrivilegedAction;
31
32
import sun.net.idn.StringPrep;
33
import sun.net.idn.Punycode;
34
import sun.text.normalizer.UCharacterIterator;
35
36
/**
37
* Provides methods to convert internationalized domain names (IDNs) between
38
* a normal Unicode representation and an ASCII Compatible Encoding (ACE) representation.
39
* Internationalized domain names can use characters from the entire range of
40
* Unicode, while traditional domain names are restricted to ASCII characters.
41
* ACE is an encoding of Unicode strings that uses only ASCII characters and
42
* can be used with software (such as the Domain Name System) that only
43
* understands traditional domain names.
44
*
45
* <p>Internationalized domain names are defined in <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
46
* RFC 3490 defines two operations: ToASCII and ToUnicode. These 2 operations employ
47
* <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a> algorithm, which is a
48
* profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a>, and
49
* <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a> algorithm to convert
50
* domain name string back and forth.
51
*
52
* <p>The behavior of aforementioned conversion process can be adjusted by various flags:
53
* <ul>
54
* <li>If the ALLOW_UNASSIGNED flag is used, the domain name string to be converted
55
* can contain code points that are unassigned in Unicode 3.2, which is the
56
* Unicode version on which IDN conversion is based. If the flag is not used,
57
* the presence of such unassigned code points is treated as an error.
58
* <li>If the USE_STD3_ASCII_RULES flag is used, ASCII strings are checked against <a href="http://www.ietf.org/rfc/rfc1122.txt">RFC 1122</a> and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC 1123</a>.
59
* It is an error if they don't meet the requirements.
60
* </ul>
61
* These flags can be logically OR'ed together.
62
*
63
* <p>The security consideration is important with respect to internationalization
64
* domain name support. For example, English domain names may be <i>homographed</i>
65
* - maliciously misspelled by substitution of non-Latin letters.
66
* <a href="http://www.unicode.org/reports/tr36/">Unicode Technical Report #36</a>
67
* discusses security issues of IDN support as well as possible solutions.
68
* Applications are responsible for taking adequate security measures when using
69
* international domain names.
70
*
71
* @author Edward Wang
72
* @since 1.6
73
*
74
*/
75
public final class IDN {
76
/**
77
* Flag to allow processing of unassigned code points
78
*/
79
public static final int ALLOW_UNASSIGNED = 0x01;
80
81
/**
82
* Flag to turn on the check against STD-3 ASCII rules
83
*/
84
public static final int USE_STD3_ASCII_RULES = 0x02;
85
86
87
/**
88
* Translates a string from Unicode to ASCII Compatible Encoding (ACE),
89
* as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
90
*
91
* <p>ToASCII operation can fail. ToASCII fails if any step of it fails.
92
* If ToASCII operation fails, an IllegalArgumentException will be thrown.
93
* In this case, the input string should not be used in an internationalized domain name.
94
*
95
* <p> A label is an individual part of a domain name. The original ToASCII operation,
96
* as defined in RFC 3490, only operates on a single label. This method can handle
97
* both label and entire domain name, by assuming that labels in a domain name are
98
* always separated by dots. The following characters are recognized as dots:
99
* &#0092;u002E (full stop), &#0092;u3002 (ideographic full stop), &#0092;uFF0E (fullwidth full stop),
100
* and &#0092;uFF61 (halfwidth ideographic full stop). if dots are
101
* used as label separators, this method also changes all of them to &#0092;u002E (full stop)
102
* in output translated string.
103
*
104
* @param input the string to be processed
105
* @param flag process flag; can be 0 or any logical OR of possible flags
106
*
107
* @return the translated {@code String}
108
*
109
* @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification
110
*/
111
public static String toASCII(String input, int flag)
112
{
113
int p = 0, q = 0;
114
StringBuffer out = new StringBuffer();
115
116
if (isRootLabel(input)) {
117
return ".";
118
}
119
120
while (p < input.length()) {
121
q = searchDots(input, p);
122
out.append(toASCIIInternal(input.substring(p, q), flag));
123
if (q != (input.length())) {
124
// has more labels, or keep the trailing dot as at present
125
out.append('.');
126
}
127
p = q + 1;
128
}
129
130
return out.toString();
131
}
132
133
134
/**
135
* Translates a string from Unicode to ASCII Compatible Encoding (ACE),
136
* as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
137
*
138
* <p> This convenience method works as if by invoking the
139
* two-argument counterpart as follows:
140
* <blockquote>
141
* {@link #toASCII(String, int) toASCII}(input,&nbsp;0);
142
* </blockquote>
143
*
144
* @param input the string to be processed
145
*
146
* @return the translated {@code String}
147
*
148
* @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification
149
*/
150
public static String toASCII(String input) {
151
return toASCII(input, 0);
152
}
153
154
155
/**
156
* Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
157
* as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
158
*
159
* <p>ToUnicode never fails. In case of any error, the input string is returned unmodified.
160
*
161
* <p> A label is an individual part of a domain name. The original ToUnicode operation,
162
* as defined in RFC 3490, only operates on a single label. This method can handle
163
* both label and entire domain name, by assuming that labels in a domain name are
164
* always separated by dots. The following characters are recognized as dots:
165
* &#0092;u002E (full stop), &#0092;u3002 (ideographic full stop), &#0092;uFF0E (fullwidth full stop),
166
* and &#0092;uFF61 (halfwidth ideographic full stop).
167
*
168
* @param input the string to be processed
169
* @param flag process flag; can be 0 or any logical OR of possible flags
170
*
171
* @return the translated {@code String}
172
*/
173
public static String toUnicode(String input, int flag) {
174
int p = 0, q = 0;
175
StringBuffer out = new StringBuffer();
176
177
if (isRootLabel(input)) {
178
return ".";
179
}
180
181
while (p < input.length()) {
182
q = searchDots(input, p);
183
out.append(toUnicodeInternal(input.substring(p, q), flag));
184
if (q != (input.length())) {
185
// has more labels, or keep the trailing dot as at present
186
out.append('.');
187
}
188
p = q + 1;
189
}
190
191
return out.toString();
192
}
193
194
195
/**
196
* Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
197
* as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
198
*
199
* <p> This convenience method works as if by invoking the
200
* two-argument counterpart as follows:
201
* <blockquote>
202
* {@link #toUnicode(String, int) toUnicode}(input,&nbsp;0);
203
* </blockquote>
204
*
205
* @param input the string to be processed
206
*
207
* @return the translated {@code String}
208
*/
209
public static String toUnicode(String input) {
210
return toUnicode(input, 0);
211
}
212
213
214
/* ---------------- Private members -------------- */
215
216
// ACE Prefix is "xn--"
217
private static final String ACE_PREFIX = "xn--";
218
private static final int ACE_PREFIX_LENGTH = ACE_PREFIX.length();
219
220
private static final int MAX_LABEL_LENGTH = 63;
221
222
// single instance of nameprep
223
private static StringPrep namePrep = null;
224
225
static {
226
InputStream stream = null;
227
228
try {
229
final String IDN_PROFILE = "uidna.spp";
230
if (System.getSecurityManager() != null) {
231
stream = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
232
public InputStream run() {
233
return StringPrep.class.getResourceAsStream(IDN_PROFILE);
234
}
235
});
236
} else {
237
stream = StringPrep.class.getResourceAsStream(IDN_PROFILE);
238
}
239
240
namePrep = new StringPrep(stream);
241
stream.close();
242
} catch (IOException e) {
243
// should never reach here
244
assert false;
245
}
246
}
247
248
249
/* ---------------- Private operations -------------- */
250
251
252
//
253
// to suppress the default zero-argument constructor
254
//
255
private IDN() {}
256
257
//
258
// toASCII operation; should only apply to a single label
259
//
260
private static String toASCIIInternal(String label, int flag)
261
{
262
// step 1
263
// Check if the string contains code points outside the ASCII range 0..0x7c.
264
boolean isASCII = isAllASCII(label);
265
StringBuffer dest;
266
267
// step 2
268
// perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
269
if (!isASCII) {
270
UCharacterIterator iter = UCharacterIterator.getInstance(label);
271
try {
272
dest = namePrep.prepare(iter, flag);
273
} catch (java.text.ParseException e) {
274
throw new IllegalArgumentException(e);
275
}
276
} else {
277
dest = new StringBuffer(label);
278
}
279
280
// step 8, move forward to check the smallest number of the code points
281
// the length must be inside 1..63
282
if (dest.length() == 0) {
283
throw new IllegalArgumentException(
284
"Empty label is not a legal name");
285
}
286
287
// step 3
288
// Verify the absence of non-LDH ASCII code points
289
// 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60, 0x7b..0x7f
290
// Verify the absence of leading and trailing hyphen
291
boolean useSTD3ASCIIRules = ((flag & USE_STD3_ASCII_RULES) != 0);
292
if (useSTD3ASCIIRules) {
293
for (int i = 0; i < dest.length(); i++) {
294
int c = dest.charAt(i);
295
if (isNonLDHAsciiCodePoint(c)) {
296
throw new IllegalArgumentException(
297
"Contains non-LDH ASCII characters");
298
}
299
}
300
301
if (dest.charAt(0) == '-' ||
302
dest.charAt(dest.length() - 1) == '-') {
303
304
throw new IllegalArgumentException(
305
"Has leading or trailing hyphen");
306
}
307
}
308
309
if (!isASCII) {
310
// step 4
311
// If all code points are inside 0..0x7f, skip to step 8
312
if (!isAllASCII(dest.toString())) {
313
// step 5
314
// verify the sequence does not begin with ACE prefix
315
if(!startsWithACEPrefix(dest)){
316
317
// step 6
318
// encode the sequence with punycode
319
try {
320
dest = Punycode.encode(dest, null);
321
} catch (java.text.ParseException e) {
322
throw new IllegalArgumentException(e);
323
}
324
325
dest = toASCIILower(dest);
326
327
// step 7
328
// prepend the ACE prefix
329
dest.insert(0, ACE_PREFIX);
330
} else {
331
throw new IllegalArgumentException("The input starts with the ACE Prefix");
332
}
333
334
}
335
}
336
337
// step 8
338
// the length must be inside 1..63
339
if (dest.length() > MAX_LABEL_LENGTH) {
340
throw new IllegalArgumentException("The label in the input is too long");
341
}
342
343
return dest.toString();
344
}
345
346
//
347
// toUnicode operation; should only apply to a single label
348
//
349
private static String toUnicodeInternal(String label, int flag) {
350
boolean[] caseFlags = null;
351
StringBuffer dest;
352
353
// step 1
354
// find out if all the codepoints in input are ASCII
355
boolean isASCII = isAllASCII(label);
356
357
if(!isASCII){
358
// step 2
359
// perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
360
try {
361
UCharacterIterator iter = UCharacterIterator.getInstance(label);
362
dest = namePrep.prepare(iter, flag);
363
} catch (Exception e) {
364
// toUnicode never fails; if any step fails, return the input string
365
return label;
366
}
367
} else {
368
dest = new StringBuffer(label);
369
}
370
371
// step 3
372
// verify ACE Prefix
373
if(startsWithACEPrefix(dest)) {
374
375
// step 4
376
// Remove the ACE Prefix
377
String temp = dest.substring(ACE_PREFIX_LENGTH, dest.length());
378
379
try {
380
// step 5
381
// Decode using punycode
382
StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null);
383
384
// step 6
385
// Apply toASCII
386
String toASCIIOut = toASCII(decodeOut.toString(), flag);
387
388
// step 7
389
// verify
390
if (toASCIIOut.equalsIgnoreCase(dest.toString())) {
391
// step 8
392
// return output of step 5
393
return decodeOut.toString();
394
}
395
} catch (Exception ignored) {
396
// no-op
397
}
398
}
399
400
// just return the input
401
return label;
402
}
403
404
405
//
406
// LDH stands for "letter/digit/hyphen", with characters restricted to the
407
// 26-letter Latin alphabet <A-Z a-z>, the digits <0-9>, and the hyphen
408
// <->.
409
// Non LDH refers to characters in the ASCII range, but which are not
410
// letters, digits or the hypen.
411
//
412
// non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7F
413
//
414
private static boolean isNonLDHAsciiCodePoint(int ch){
415
return (0x0000 <= ch && ch <= 0x002C) ||
416
(0x002E <= ch && ch <= 0x002F) ||
417
(0x003A <= ch && ch <= 0x0040) ||
418
(0x005B <= ch && ch <= 0x0060) ||
419
(0x007B <= ch && ch <= 0x007F);
420
}
421
422
//
423
// search dots in a string and return the index of that character;
424
// or if there is no dots, return the length of input string
425
// dots might be: \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop),
426
// and \uFF61 (halfwidth ideographic full stop).
427
//
428
private static int searchDots(String s, int start) {
429
int i;
430
for (i = start; i < s.length(); i++) {
431
if (isLabelSeparator(s.charAt(i))) {
432
break;
433
}
434
}
435
436
return i;
437
}
438
439
//
440
// to check if a string is a root label, ".".
441
//
442
private static boolean isRootLabel(String s) {
443
return (s.length() == 1 && isLabelSeparator(s.charAt(0)));
444
}
445
446
//
447
// to check if a character is a label separator, i.e. a dot character.
448
//
449
private static boolean isLabelSeparator(char c) {
450
return (c == '.' || c == '\u3002' || c == '\uFF0E' || c == '\uFF61');
451
}
452
453
//
454
// to check if a string only contains US-ASCII code point
455
//
456
private static boolean isAllASCII(String input) {
457
boolean isASCII = true;
458
for (int i = 0; i < input.length(); i++) {
459
int c = input.charAt(i);
460
if (c > 0x7F) {
461
isASCII = false;
462
break;
463
}
464
}
465
return isASCII;
466
}
467
468
//
469
// to check if a string starts with ACE-prefix
470
//
471
private static boolean startsWithACEPrefix(StringBuffer input){
472
boolean startsWithPrefix = true;
473
474
if(input.length() < ACE_PREFIX_LENGTH){
475
return false;
476
}
477
for(int i = 0; i < ACE_PREFIX_LENGTH; i++){
478
if(toASCIILower(input.charAt(i)) != ACE_PREFIX.charAt(i)){
479
startsWithPrefix = false;
480
}
481
}
482
return startsWithPrefix;
483
}
484
485
private static char toASCIILower(char ch){
486
if('A' <= ch && ch <= 'Z'){
487
return (char)(ch + 'a' - 'A');
488
}
489
return ch;
490
}
491
492
private static StringBuffer toASCIILower(StringBuffer input){
493
StringBuffer dest = new StringBuffer();
494
for(int i = 0; i < input.length();i++){
495
dest.append(toASCIILower(input.charAt(i)));
496
}
497
return dest;
498
}
499
}
500
501