CoCalc -- IDN.java

GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/java/net/IDN.java
³⁸⁸²⁹ views
1
/*
2
 * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
10
 *
11
 * This code is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
 * version 2 for more details (a copy is included in the LICENSE file that
15
 * accompanied this code).
16
 *
17
 * You should have received a copy of the GNU General Public License version
18
 * 2 along with this work; if not, write to the Free Software Foundation,
19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
 *
21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
 * or visit www.oracle.com if you need additional information or have any
23
 * questions.
24
 */
25
package java.net;
26

27
import java.io.InputStream;
28
import java.io.IOException;
29
import java.security.AccessController;
30
import java.security.PrivilegedAction;
31

32
import sun.net.idn.StringPrep;
33
import sun.net.idn.Punycode;
34
import sun.text.normalizer.UCharacterIterator;
35

36
/**
37
 * Provides methods to convert internationalized domain names (IDNs) between
38
 * a normal Unicode representation and an ASCII Compatible Encoding (ACE) representation.
39
 * Internationalized domain names can use characters from the entire range of
40
 * Unicode, while traditional domain names are restricted to ASCII characters.
41
 * ACE is an encoding of Unicode strings that uses only ASCII characters and
42
 * can be used with software (such as the Domain Name System) that only
43
 * understands traditional domain names.
44
 *
45
 * <p>Internationalized domain names are defined in <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
46
 * RFC 3490 defines two operations: ToASCII and ToUnicode. These 2 operations employ
47
 * <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a> algorithm, which is a
48
 * profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a>, and
49
 * <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a> algorithm to convert
50
 * domain name string back and forth.
51
 *
52
 * <p>The behavior of aforementioned conversion process can be adjusted by various flags:
53
 *   <ul>
54
 *     <li>If the ALLOW_UNASSIGNED flag is used, the domain name string to be converted
55
 *         can contain code points that are unassigned in Unicode 3.2, which is the
56
 *         Unicode version on which IDN conversion is based. If the flag is not used,
57
 *         the presence of such unassigned code points is treated as an error.
58
 *     <li>If the USE_STD3_ASCII_RULES flag is used, ASCII strings are checked against <a href="http://www.ietf.org/rfc/rfc1122.txt">RFC 1122</a> and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC 1123</a>.
59
 *         It is an error if they don't meet the requirements.
60
 *   </ul>
61
 * These flags can be logically OR'ed together.
62
 *
63
 * <p>The security consideration is important with respect to internationalization
64
 * domain name support. For example, English domain names may be <i>homographed</i>
65
 * - maliciously misspelled by substitution of non-Latin letters.
66
 * <a href="http://www.unicode.org/reports/tr36/">Unicode Technical Report #36</a>
67
 * discusses security issues of IDN support as well as possible solutions.
68
 * Applications are responsible for taking adequate security measures when using
69
 * international domain names.
70
 *
71
 * @author Edward Wang
72
 * @since 1.6
73
 *
74
 */
75
public final class IDN {
76
    /**
77
     * Flag to allow processing of unassigned code points
78
     */
79
    public static final int ALLOW_UNASSIGNED = 0x01;
80

81
    /**
82
     * Flag to turn on the check against STD-3 ASCII rules
83
     */
84
    public static final int USE_STD3_ASCII_RULES = 0x02;
85

86

87
    /**
88
     * Translates a string from Unicode to ASCII Compatible Encoding (ACE),
89
     * as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
90
     *
91
     * <p>ToASCII operation can fail. ToASCII fails if any step of it fails.
92
     * If ToASCII operation fails, an IllegalArgumentException will be thrown.
93
     * In this case, the input string should not be used in an internationalized domain name.
94
     *
95
     * <p> A label is an individual part of a domain name. The original ToASCII operation,
96
     * as defined in RFC 3490, only operates on a single label. This method can handle
97
     * both label and entire domain name, by assuming that labels in a domain name are
98
     * always separated by dots. The following characters are recognized as dots:
99
     * &#0092;u002E (full stop), &#0092;u3002 (ideographic full stop), &#0092;uFF0E (fullwidth full stop),
100
     * and &#0092;uFF61 (halfwidth ideographic full stop). if dots are
101
     * used as label separators, this method also changes all of them to &#0092;u002E (full stop)
102
     * in output translated string.
103
     *
104
     * @param input     the string to be processed
105
     * @param flag      process flag; can be 0 or any logical OR of possible flags
106
     *
107
     * @return          the translated {@code String}
108
     *
109
     * @throws IllegalArgumentException   if the input string doesn't conform to RFC 3490 specification
110
     */
111
    public static String toASCII(String input, int flag)
112
    {
113
        int p = 0, q = 0;
114
        StringBuffer out = new StringBuffer();
115

116
        if (isRootLabel(input)) {
117
            return ".";
118
        }
119

120
        while (p < input.length()) {
121
            q = searchDots(input, p);
122
            out.append(toASCIIInternal(input.substring(p, q),  flag));
123
            if (q != (input.length())) {
124
               // has more labels, or keep the trailing dot as at present
125
               out.append('.');
126
            }
127
            p = q + 1;
128
        }
129

130
        return out.toString();
131
    }
132

133

134
    /**
135
     * Translates a string from Unicode to ASCII Compatible Encoding (ACE),
136
     * as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
137
     *
138
     * <p> This convenience method works as if by invoking the
139
     * two-argument counterpart as follows:
140
     * <blockquote>
141
     * {@link #toASCII(String, int) toASCII}(input,&nbsp;0);
142
     * </blockquote>
143
     *
144
     * @param input     the string to be processed
145
     *
146
     * @return          the translated {@code String}
147
     *
148
     * @throws IllegalArgumentException   if the input string doesn't conform to RFC 3490 specification
149
     */
150
    public static String toASCII(String input) {
151
        return toASCII(input, 0);
152
    }
153

154

155
    /**
156
     * Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
157
     * as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
158
     *
159
     * <p>ToUnicode never fails. In case of any error, the input string is returned unmodified.
160
     *
161
     * <p> A label is an individual part of a domain name. The original ToUnicode operation,
162
     * as defined in RFC 3490, only operates on a single label. This method can handle
163
     * both label and entire domain name, by assuming that labels in a domain name are
164
     * always separated by dots. The following characters are recognized as dots:
165
     * &#0092;u002E (full stop), &#0092;u3002 (ideographic full stop), &#0092;uFF0E (fullwidth full stop),
166
     * and &#0092;uFF61 (halfwidth ideographic full stop).
167
     *
168
     * @param input     the string to be processed
169
     * @param flag      process flag; can be 0 or any logical OR of possible flags
170
     *
171
     * @return          the translated {@code String}
172
     */
173
    public static String toUnicode(String input, int flag) {
174
        int p = 0, q = 0;
175
        StringBuffer out = new StringBuffer();
176

177
        if (isRootLabel(input)) {
178
            return ".";
179
        }
180

181
        while (p < input.length()) {
182
            q = searchDots(input, p);
183
            out.append(toUnicodeInternal(input.substring(p, q),  flag));
184
            if (q != (input.length())) {
185
               // has more labels, or keep the trailing dot as at present
186
               out.append('.');
187
            }
188
            p = q + 1;
189
        }
190

191
        return out.toString();
192
    }
193

194

195
    /**
196
     * Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
197
     * as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
198
     *
199
     * <p> This convenience method works as if by invoking the
200
     * two-argument counterpart as follows:
201
     * <blockquote>
202
     * {@link #toUnicode(String, int) toUnicode}(input,&nbsp;0);
203
     * </blockquote>
204
     *
205
     * @param input     the string to be processed
206
     *
207
     * @return          the translated {@code String}
208
     */
209
    public static String toUnicode(String input) {
210
        return toUnicode(input, 0);
211
    }
212

213

214
    /* ---------------- Private members -------------- */
215

216
    // ACE Prefix is "xn--"
217
    private static final String ACE_PREFIX = "xn--";
218
    private static final int ACE_PREFIX_LENGTH = ACE_PREFIX.length();
219

220
    private static final int MAX_LABEL_LENGTH   = 63;
221

222
    // single instance of nameprep
223
    private static StringPrep namePrep = null;
224

225
    static {
226
        InputStream stream = null;
227

228
        try {
229
            final String IDN_PROFILE = "uidna.spp";
230
            if (System.getSecurityManager() != null) {
231
                stream = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
232
                    public InputStream run() {
233
                        return StringPrep.class.getResourceAsStream(IDN_PROFILE);
234
                    }
235
                });
236
            } else {
237
                stream = StringPrep.class.getResourceAsStream(IDN_PROFILE);
238
            }
239

240
            namePrep = new StringPrep(stream);
241
            stream.close();
242
        } catch (IOException e) {
243
            // should never reach here
244
            assert false;
245
        }
246
    }
247

248

249
    /* ---------------- Private operations -------------- */
250

251

252
    //
253
    // to suppress the default zero-argument constructor
254
    //
255
    private IDN() {}
256

257
    //
258
    // toASCII operation; should only apply to a single label
259
    //
260
    private static String toASCIIInternal(String label, int flag)
261
    {
262
        // step 1
263
        // Check if the string contains code points outside the ASCII range 0..0x7c.
264
        boolean isASCII  = isAllASCII(label);
265
        StringBuffer dest;
266

267
        // step 2
268
        // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
269
        if (!isASCII) {
270
            UCharacterIterator iter = UCharacterIterator.getInstance(label);
271
            try {
272
                dest = namePrep.prepare(iter, flag);
273
            } catch (java.text.ParseException e) {
274
                throw new IllegalArgumentException(e);
275
            }
276
        } else {
277
            dest = new StringBuffer(label);
278
        }
279

280
        // step 8, move forward to check the smallest number of the code points
281
        // the length must be inside 1..63
282
        if (dest.length() == 0) {
283
            throw new IllegalArgumentException(
284
                        "Empty label is not a legal name");
285
        }
286

287
        // step 3
288
        // Verify the absence of non-LDH ASCII code points
289
        //   0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60, 0x7b..0x7f
290
        // Verify the absence of leading and trailing hyphen
291
        boolean useSTD3ASCIIRules = ((flag & USE_STD3_ASCII_RULES) != 0);
292
        if (useSTD3ASCIIRules) {
293
            for (int i = 0; i < dest.length(); i++) {
294
                int c = dest.charAt(i);
295
                if (isNonLDHAsciiCodePoint(c)) {
296
                    throw new IllegalArgumentException(
297
                        "Contains non-LDH ASCII characters");
298
                }
299
            }
300

301
            if (dest.charAt(0) == '-' ||
302
                dest.charAt(dest.length() - 1) == '-') {
303

304
                throw new IllegalArgumentException(
305
                        "Has leading or trailing hyphen");
306
            }
307
        }
308

309
        if (!isASCII) {
310
            // step 4
311
            // If all code points are inside 0..0x7f, skip to step 8
312
            if (!isAllASCII(dest.toString())) {
313
                // step 5
314
                // verify the sequence does not begin with ACE prefix
315
                if(!startsWithACEPrefix(dest)){
316

317
                    // step 6
318
                    // encode the sequence with punycode
319
                    try {
320
                        dest = Punycode.encode(dest, null);
321
                    } catch (java.text.ParseException e) {
322
                        throw new IllegalArgumentException(e);
323
                    }
324

325
                    dest = toASCIILower(dest);
326

327
                    // step 7
328
                    // prepend the ACE prefix
329
                    dest.insert(0, ACE_PREFIX);
330
                } else {
331
                    throw new IllegalArgumentException("The input starts with the ACE Prefix");
332
                }
333

334
            }
335
        }
336

337
        // step 8
338
        // the length must be inside 1..63
339
        if (dest.length() > MAX_LABEL_LENGTH) {
340
            throw new IllegalArgumentException("The label in the input is too long");
341
        }
342

343
        return dest.toString();
344
    }
345

346
    //
347
    // toUnicode operation; should only apply to a single label
348
    //
349
    private static String toUnicodeInternal(String label, int flag) {
350
        boolean[] caseFlags = null;
351
        StringBuffer dest;
352

353
        // step 1
354
        // find out if all the codepoints in input are ASCII
355
        boolean isASCII = isAllASCII(label);
356

357
        if(!isASCII){
358
            // step 2
359
            // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
360
            try {
361
                UCharacterIterator iter = UCharacterIterator.getInstance(label);
362
                dest = namePrep.prepare(iter, flag);
363
            } catch (Exception e) {
364
                // toUnicode never fails; if any step fails, return the input string
365
                return label;
366
            }
367
        } else {
368
            dest = new StringBuffer(label);
369
        }
370

371
        // step 3
372
        // verify ACE Prefix
373
        if(startsWithACEPrefix(dest)) {
374

375
            // step 4
376
            // Remove the ACE Prefix
377
            String temp = dest.substring(ACE_PREFIX_LENGTH, dest.length());
378

379
            try {
380
                // step 5
381
                // Decode using punycode
382
                StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null);
383

384
                // step 6
385
                // Apply toASCII
386
                String toASCIIOut = toASCII(decodeOut.toString(), flag);
387

388
                // step 7
389
                // verify
390
                if (toASCIIOut.equalsIgnoreCase(dest.toString())) {
391
                    // step 8
392
                    // return output of step 5
393
                    return decodeOut.toString();
394
                }
395
            } catch (Exception ignored) {
396
                // no-op
397
            }
398
        }
399

400
        // just return the input
401
        return label;
402
    }
403

404

405
    //
406
    // LDH stands for "letter/digit/hyphen", with characters restricted to the
407
    // 26-letter Latin alphabet <A-Z a-z>, the digits <0-9>, and the hyphen
408
    // <->.
409
    // Non LDH refers to characters in the ASCII range, but which are not
410
    // letters, digits or the hypen.
411
    //
412
    // non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7F
413
    //
414
    private static boolean isNonLDHAsciiCodePoint(int ch){
415
        return (0x0000 <= ch && ch <= 0x002C) ||
416
               (0x002E <= ch && ch <= 0x002F) ||
417
               (0x003A <= ch && ch <= 0x0040) ||
418
               (0x005B <= ch && ch <= 0x0060) ||
419
               (0x007B <= ch && ch <= 0x007F);
420
    }
421

422
    //
423
    // search dots in a string and return the index of that character;
424
    // or if there is no dots, return the length of input string
425
    // dots might be: \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop),
426
    // and \uFF61 (halfwidth ideographic full stop).
427
    //
428
    private static int searchDots(String s, int start) {
429
        int i;
430
        for (i = start; i < s.length(); i++) {
431
            if (isLabelSeparator(s.charAt(i))) {
432
                break;
433
            }
434
        }
435

436
        return i;
437
    }
438

439
    //
440
    // to check if a string is a root label, ".".
441
    //
442
    private static boolean isRootLabel(String s) {
443
        return (s.length() == 1 && isLabelSeparator(s.charAt(0)));
444
    }
445

446
    //
447
    // to check if a character is a label separator, i.e. a dot character.
448
    //
449
    private static boolean isLabelSeparator(char c) {
450
        return (c == '.' || c == '\u3002' || c == '\uFF0E' || c == '\uFF61');
451
    }
452

453
    //
454
    // to check if a string only contains US-ASCII code point
455
    //
456
    private static boolean isAllASCII(String input) {
457
        boolean isASCII = true;
458
        for (int i = 0; i < input.length(); i++) {
459
            int c = input.charAt(i);
460
            if (c > 0x7F) {
461
                isASCII = false;
462
                break;
463
            }
464
        }
465
        return isASCII;
466
    }
467

468
    //
469
    // to check if a string starts with ACE-prefix
470
    //
471
    private static boolean startsWithACEPrefix(StringBuffer input){
472
        boolean startsWithPrefix = true;
473

474
        if(input.length() < ACE_PREFIX_LENGTH){
475
            return false;
476
        }
477
        for(int i = 0; i < ACE_PREFIX_LENGTH; i++){
478
            if(toASCIILower(input.charAt(i)) != ACE_PREFIX.charAt(i)){
479
                startsWithPrefix = false;
480
            }
481
        }
482
        return startsWithPrefix;
483
    }
484

485
    private static char toASCIILower(char ch){
486
        if('A' <= ch && ch <= 'Z'){
487
            return (char)(ch + 'a' - 'A');
488
        }
489
        return ch;
490
    }
491

492
    private static StringBuffer toASCIILower(StringBuffer input){
493
        StringBuffer dest = new StringBuffer();
494
        for(int i = 0; i < input.length();i++){
495
            dest.append(toASCIILower(input.charAt(i)));
496
        }
497
        return dest;
498
    }
499
}
500

501
Product

Resources

Company