Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
80713 views
1
/*! http://mths.be/punycode v1.2.4 by @mathias */
2
;(function(root) {
3
4
/** Detect free variables */
5
var freeExports = typeof exports == 'object' && exports;
6
var freeModule = typeof module == 'object' && module &&
7
module.exports == freeExports && module;
8
var freeGlobal = typeof global == 'object' && global;
9
if (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal) {
10
root = freeGlobal;
11
}
12
13
/**
14
* The `punycode` object.
15
* @name punycode
16
* @type Object
17
*/
18
var punycode,
19
20
/** Highest positive signed 32-bit float value */
21
maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1
22
23
/** Bootstring parameters */
24
base = 36,
25
tMin = 1,
26
tMax = 26,
27
skew = 38,
28
damp = 700,
29
initialBias = 72,
30
initialN = 128, // 0x80
31
delimiter = '-', // '\x2D'
32
33
/** Regular expressions */
34
regexPunycode = /^xn--/,
35
regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars
36
regexSeparators = /\x2E|\u3002|\uFF0E|\uFF61/g, // RFC 3490 separators
37
38
/** Error messages */
39
errors = {
40
'overflow': 'Overflow: input needs wider integers to process',
41
'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
42
'invalid-input': 'Invalid input'
43
},
44
45
/** Convenience shortcuts */
46
baseMinusTMin = base - tMin,
47
floor = Math.floor,
48
stringFromCharCode = String.fromCharCode,
49
50
/** Temporary variable */
51
key;
52
53
/*--------------------------------------------------------------------------*/
54
55
/**
56
* A generic error utility function.
57
* @private
58
* @param {String} type The error type.
59
* @returns {Error} Throws a `RangeError` with the applicable error message.
60
*/
61
function error(type) {
62
throw RangeError(errors[type]);
63
}
64
65
/**
66
* A generic `Array#map` utility function.
67
* @private
68
* @param {Array} array The array to iterate over.
69
* @param {Function} callback The function that gets called for every array
70
* item.
71
* @returns {Array} A new array of values returned by the callback function.
72
*/
73
function map(array, fn) {
74
var length = array.length;
75
while (length--) {
76
array[length] = fn(array[length]);
77
}
78
return array;
79
}
80
81
/**
82
* A simple `Array#map`-like wrapper to work with domain name strings.
83
* @private
84
* @param {String} domain The domain name.
85
* @param {Function} callback The function that gets called for every
86
* character.
87
* @returns {Array} A new string of characters returned by the callback
88
* function.
89
*/
90
function mapDomain(string, fn) {
91
return map(string.split(regexSeparators), fn).join('.');
92
}
93
94
/**
95
* Creates an array containing the numeric code points of each Unicode
96
* character in the string. While JavaScript uses UCS-2 internally,
97
* this function will convert a pair of surrogate halves (each of which
98
* UCS-2 exposes as separate characters) into a single code point,
99
* matching UTF-16.
100
* @see `punycode.ucs2.encode`
101
* @see <http://mathiasbynens.be/notes/javascript-encoding>
102
* @memberOf punycode.ucs2
103
* @name decode
104
* @param {String} string The Unicode input string (UCS-2).
105
* @returns {Array} The new array of code points.
106
*/
107
function ucs2decode(string) {
108
var output = [],
109
counter = 0,
110
length = string.length,
111
value,
112
extra;
113
while (counter < length) {
114
value = string.charCodeAt(counter++);
115
if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
116
// high surrogate, and there is a next character
117
extra = string.charCodeAt(counter++);
118
if ((extra & 0xFC00) == 0xDC00) { // low surrogate
119
output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
120
} else {
121
// unmatched surrogate; only append this code unit, in case the next
122
// code unit is the high surrogate of a surrogate pair
123
output.push(value);
124
counter--;
125
}
126
} else {
127
output.push(value);
128
}
129
}
130
return output;
131
}
132
133
/**
134
* Creates a string based on an array of numeric code points.
135
* @see `punycode.ucs2.decode`
136
* @memberOf punycode.ucs2
137
* @name encode
138
* @param {Array} codePoints The array of numeric code points.
139
* @returns {String} The new Unicode string (UCS-2).
140
*/
141
function ucs2encode(array) {
142
return map(array, function(value) {
143
var output = '';
144
if (value > 0xFFFF) {
145
value -= 0x10000;
146
output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
147
value = 0xDC00 | value & 0x3FF;
148
}
149
output += stringFromCharCode(value);
150
return output;
151
}).join('');
152
}
153
154
/**
155
* Converts a basic code point into a digit/integer.
156
* @see `digitToBasic()`
157
* @private
158
* @param {Number} codePoint The basic numeric code point value.
159
* @returns {Number} The numeric value of a basic code point (for use in
160
* representing integers) in the range `0` to `base - 1`, or `base` if
161
* the code point does not represent a value.
162
*/
163
function basicToDigit(codePoint) {
164
if (codePoint - 48 < 10) {
165
return codePoint - 22;
166
}
167
if (codePoint - 65 < 26) {
168
return codePoint - 65;
169
}
170
if (codePoint - 97 < 26) {
171
return codePoint - 97;
172
}
173
return base;
174
}
175
176
/**
177
* Converts a digit/integer into a basic code point.
178
* @see `basicToDigit()`
179
* @private
180
* @param {Number} digit The numeric value of a basic code point.
181
* @returns {Number} The basic code point whose value (when used for
182
* representing integers) is `digit`, which needs to be in the range
183
* `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
184
* used; else, the lowercase form is used. The behavior is undefined
185
* if `flag` is non-zero and `digit` has no uppercase form.
186
*/
187
function digitToBasic(digit, flag) {
188
// 0..25 map to ASCII a..z or A..Z
189
// 26..35 map to ASCII 0..9
190
return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
191
}
192
193
/**
194
* Bias adaptation function as per section 3.4 of RFC 3492.
195
* http://tools.ietf.org/html/rfc3492#section-3.4
196
* @private
197
*/
198
function adapt(delta, numPoints, firstTime) {
199
var k = 0;
200
delta = firstTime ? floor(delta / damp) : delta >> 1;
201
delta += floor(delta / numPoints);
202
for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
203
delta = floor(delta / baseMinusTMin);
204
}
205
return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
206
}
207
208
/**
209
* Converts a Punycode string of ASCII-only symbols to a string of Unicode
210
* symbols.
211
* @memberOf punycode
212
* @param {String} input The Punycode string of ASCII-only symbols.
213
* @returns {String} The resulting string of Unicode symbols.
214
*/
215
function decode(input) {
216
// Don't use UCS-2
217
var output = [],
218
inputLength = input.length,
219
out,
220
i = 0,
221
n = initialN,
222
bias = initialBias,
223
basic,
224
j,
225
index,
226
oldi,
227
w,
228
k,
229
digit,
230
t,
231
/** Cached calculation results */
232
baseMinusT;
233
234
// Handle the basic code points: let `basic` be the number of input code
235
// points before the last delimiter, or `0` if there is none, then copy
236
// the first basic code points to the output.
237
238
basic = input.lastIndexOf(delimiter);
239
if (basic < 0) {
240
basic = 0;
241
}
242
243
for (j = 0; j < basic; ++j) {
244
// if it's not a basic code point
245
if (input.charCodeAt(j) >= 0x80) {
246
error('not-basic');
247
}
248
output.push(input.charCodeAt(j));
249
}
250
251
// Main decoding loop: start just after the last delimiter if any basic code
252
// points were copied; start at the beginning otherwise.
253
254
for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
255
256
// `index` is the index of the next character to be consumed.
257
// Decode a generalized variable-length integer into `delta`,
258
// which gets added to `i`. The overflow checking is easier
259
// if we increase `i` as we go, then subtract off its starting
260
// value at the end to obtain `delta`.
261
for (oldi = i, w = 1, k = base; /* no condition */; k += base) {
262
263
if (index >= inputLength) {
264
error('invalid-input');
265
}
266
267
digit = basicToDigit(input.charCodeAt(index++));
268
269
if (digit >= base || digit > floor((maxInt - i) / w)) {
270
error('overflow');
271
}
272
273
i += digit * w;
274
t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
275
276
if (digit < t) {
277
break;
278
}
279
280
baseMinusT = base - t;
281
if (w > floor(maxInt / baseMinusT)) {
282
error('overflow');
283
}
284
285
w *= baseMinusT;
286
287
}
288
289
out = output.length + 1;
290
bias = adapt(i - oldi, out, oldi == 0);
291
292
// `i` was supposed to wrap around from `out` to `0`,
293
// incrementing `n` each time, so we'll fix that now:
294
if (floor(i / out) > maxInt - n) {
295
error('overflow');
296
}
297
298
n += floor(i / out);
299
i %= out;
300
301
// Insert `n` at position `i` of the output
302
output.splice(i++, 0, n);
303
304
}
305
306
return ucs2encode(output);
307
}
308
309
/**
310
* Converts a string of Unicode symbols to a Punycode string of ASCII-only
311
* symbols.
312
* @memberOf punycode
313
* @param {String} input The string of Unicode symbols.
314
* @returns {String} The resulting Punycode string of ASCII-only symbols.
315
*/
316
function encode(input) {
317
var n,
318
delta,
319
handledCPCount,
320
basicLength,
321
bias,
322
j,
323
m,
324
q,
325
k,
326
t,
327
currentValue,
328
output = [],
329
/** `inputLength` will hold the number of code points in `input`. */
330
inputLength,
331
/** Cached calculation results */
332
handledCPCountPlusOne,
333
baseMinusT,
334
qMinusT;
335
336
// Convert the input in UCS-2 to Unicode
337
input = ucs2decode(input);
338
339
// Cache the length
340
inputLength = input.length;
341
342
// Initialize the state
343
n = initialN;
344
delta = 0;
345
bias = initialBias;
346
347
// Handle the basic code points
348
for (j = 0; j < inputLength; ++j) {
349
currentValue = input[j];
350
if (currentValue < 0x80) {
351
output.push(stringFromCharCode(currentValue));
352
}
353
}
354
355
handledCPCount = basicLength = output.length;
356
357
// `handledCPCount` is the number of code points that have been handled;
358
// `basicLength` is the number of basic code points.
359
360
// Finish the basic string - if it is not empty - with a delimiter
361
if (basicLength) {
362
output.push(delimiter);
363
}
364
365
// Main encoding loop:
366
while (handledCPCount < inputLength) {
367
368
// All non-basic code points < n have been handled already. Find the next
369
// larger one:
370
for (m = maxInt, j = 0; j < inputLength; ++j) {
371
currentValue = input[j];
372
if (currentValue >= n && currentValue < m) {
373
m = currentValue;
374
}
375
}
376
377
// Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
378
// but guard against overflow
379
handledCPCountPlusOne = handledCPCount + 1;
380
if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
381
error('overflow');
382
}
383
384
delta += (m - n) * handledCPCountPlusOne;
385
n = m;
386
387
for (j = 0; j < inputLength; ++j) {
388
currentValue = input[j];
389
390
if (currentValue < n && ++delta > maxInt) {
391
error('overflow');
392
}
393
394
if (currentValue == n) {
395
// Represent delta as a generalized variable-length integer
396
for (q = delta, k = base; /* no condition */; k += base) {
397
t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
398
if (q < t) {
399
break;
400
}
401
qMinusT = q - t;
402
baseMinusT = base - t;
403
output.push(
404
stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
405
);
406
q = floor(qMinusT / baseMinusT);
407
}
408
409
output.push(stringFromCharCode(digitToBasic(q, 0)));
410
bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
411
delta = 0;
412
++handledCPCount;
413
}
414
}
415
416
++delta;
417
++n;
418
419
}
420
return output.join('');
421
}
422
423
/**
424
* Converts a Punycode string representing a domain name to Unicode. Only the
425
* Punycoded parts of the domain name will be converted, i.e. it doesn't
426
* matter if you call it on a string that has already been converted to
427
* Unicode.
428
* @memberOf punycode
429
* @param {String} domain The Punycode domain name to convert to Unicode.
430
* @returns {String} The Unicode representation of the given Punycode
431
* string.
432
*/
433
function toUnicode(domain) {
434
return mapDomain(domain, function(string) {
435
return regexPunycode.test(string)
436
? decode(string.slice(4).toLowerCase())
437
: string;
438
});
439
}
440
441
/**
442
* Converts a Unicode string representing a domain name to Punycode. Only the
443
* non-ASCII parts of the domain name will be converted, i.e. it doesn't
444
* matter if you call it with a domain that's already in ASCII.
445
* @memberOf punycode
446
* @param {String} domain The domain name to convert, as a Unicode string.
447
* @returns {String} The Punycode representation of the given domain name.
448
*/
449
function toASCII(domain) {
450
return mapDomain(domain, function(string) {
451
return regexNonASCII.test(string)
452
? 'xn--' + encode(string)
453
: string;
454
});
455
}
456
457
/*--------------------------------------------------------------------------*/
458
459
/** Define the public API */
460
punycode = {
461
/**
462
* A string representing the current Punycode.js version number.
463
* @memberOf punycode
464
* @type String
465
*/
466
'version': '1.2.4',
467
/**
468
* An object of methods to convert from JavaScript's internal character
469
* representation (UCS-2) to Unicode code points, and back.
470
* @see <http://mathiasbynens.be/notes/javascript-encoding>
471
* @memberOf punycode
472
* @type Object
473
*/
474
'ucs2': {
475
'decode': ucs2decode,
476
'encode': ucs2encode
477
},
478
'decode': decode,
479
'encode': encode,
480
'toASCII': toASCII,
481
'toUnicode': toUnicode
482
};
483
484
/** Expose `punycode` */
485
// Some AMD build optimizers, like r.js, check for specific condition patterns
486
// like the following:
487
if (
488
typeof define == 'function' &&
489
typeof define.amd == 'object' &&
490
define.amd
491
) {
492
define('punycode', function() {
493
return punycode;
494
});
495
} else if (freeExports && !freeExports.nodeType) {
496
if (freeModule) { // in Node.js or RingoJS v0.8.0+
497
freeModule.exports = punycode;
498
} else { // in Narwhal or RingoJS v0.7.0-
499
for (key in punycode) {
500
punycode.hasOwnProperty(key) && (freeExports[key] = punycode[key]);
501
}
502
}
503
} else { // in Rhino or a web browser
504
root.punycode = punycode;
505
}
506
507
}(this));
508
509