punycode.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502
  1. /*! http://mths.be/punycode v1.2.1 by @mathias */
  2. ;(function(root) {
  3. /** Detect free variables */
  4. var freeExports = typeof exports == 'object' && exports;
  5. var freeModule = typeof module == 'object' && module &&
  6. module.exports == freeExports && module;
  7. var freeGlobal = typeof global == 'object' && global;
  8. if (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal) {
  9. root = freeGlobal;
  10. }
  11. /**
  12. * The `punycode` object.
  13. * @name punycode
  14. * @type Object
  15. */
  16. var punycode,
  17. /** Highest positive signed 32-bit float value */
  18. maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1
  19. /** Bootstring parameters */
  20. base = 36,
  21. tMin = 1,
  22. tMax = 26,
  23. skew = 38,
  24. damp = 700,
  25. initialBias = 72,
  26. initialN = 128, // 0x80
  27. delimiter = '-', // '\x2D'
  28. /** Regular expressions */
  29. regexPunycode = /^xn--/,
  30. regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars
  31. regexSeparators = /\x2E|\u3002|\uFF0E|\uFF61/g, // RFC 3490 separators
  32. /** Error messages */
  33. errors = {
  34. 'overflow': 'Overflow: input needs wider integers to process',
  35. 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
  36. 'invalid-input': 'Invalid input'
  37. },
  38. /** Convenience shortcuts */
  39. baseMinusTMin = base - tMin,
  40. floor = Math.floor,
  41. stringFromCharCode = String.fromCharCode,
  42. /** Temporary variable */
  43. key;
  44. /*--------------------------------------------------------------------------*/
  45. /**
  46. * A generic error utility function.
  47. * @private
  48. * @param {String} type The error type.
  49. * @returns {Error} Throws a `RangeError` with the applicable error message.
  50. */
  51. function error(type) {
  52. throw RangeError(errors[type]);
  53. }
  54. /**
  55. * A generic `Array#map` utility function.
  56. * @private
  57. * @param {Array} array The array to iterate over.
  58. * @param {Function} callback The function that gets called for every array
  59. * item.
  60. * @returns {Array} A new array of values returned by the callback function.
  61. */
  62. function map(array, fn) {
  63. var length = array.length;
  64. while (length--) {
  65. array[length] = fn(array[length]);
  66. }
  67. return array;
  68. }
  69. /**
  70. * A simple `Array#map`-like wrapper to work with domain name strings.
  71. * @private
  72. * @param {String} domain The domain name.
  73. * @param {Function} callback The function that gets called for every
  74. * character.
  75. * @returns {Array} A new string of characters returned by the callback
  76. * function.
  77. */
  78. function mapDomain(string, fn) {
  79. return map(string.split(regexSeparators), fn).join('.');
  80. }
  81. /**
  82. * Creates an array containing the decimal code points of each Unicode
  83. * character in the string. While JavaScript uses UCS-2 internally,
  84. * this function will convert a pair of surrogate halves (each of which
  85. * UCS-2 exposes as separate characters) into a single code point,
  86. * matching UTF-16.
  87. * @see `punycode.ucs2.encode`
  88. * @see <http://mathiasbynens.be/notes/javascript-encoding>
  89. * @memberOf punycode.ucs2
  90. * @name decode
  91. * @param {String} string The Unicode input string (UCS-2).
  92. * @returns {Array} The new array of code points.
  93. */
  94. function ucs2decode(string) {
  95. var output = [],
  96. counter = 0,
  97. length = string.length,
  98. value,
  99. extra;
  100. while (counter < length) {
  101. value = string.charCodeAt(counter++);
  102. if ((value & 0xF800) == 0xD800 && counter < length) {
  103. // high surrogate, and there is a next character
  104. extra = string.charCodeAt(counter++);
  105. if ((extra & 0xFC00) == 0xDC00) { // low surrogate
  106. output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
  107. } else {
  108. output.push(value, extra);
  109. }
  110. } else {
  111. output.push(value);
  112. }
  113. }
  114. return output;
  115. }
  116. /**
  117. * Creates a string based on an array of decimal code points.
  118. * @see `punycode.ucs2.decode`
  119. * @memberOf punycode.ucs2
  120. * @name encode
  121. * @param {Array} codePoints The array of decimal code points.
  122. * @returns {String} The new Unicode string (UCS-2).
  123. */
  124. function ucs2encode(array) {
  125. return map(array, function(value) {
  126. var output = '';
  127. if (value > 0xFFFF) {
  128. value -= 0x10000;
  129. output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
  130. value = 0xDC00 | value & 0x3FF;
  131. }
  132. output += stringFromCharCode(value);
  133. return output;
  134. }).join('');
  135. }
  136. /**
  137. * Converts a basic code point into a digit/integer.
  138. * @see `digitToBasic()`
  139. * @private
  140. * @param {Number} codePoint The basic (decimal) code point.
  141. * @returns {Number} The numeric value of a basic code point (for use in
  142. * representing integers) in the range `0` to `base - 1`, or `base` if
  143. * the code point does not represent a value.
  144. */
  145. function basicToDigit(codePoint) {
  146. return codePoint - 48 < 10
  147. ? codePoint - 22
  148. : codePoint - 65 < 26
  149. ? codePoint - 65
  150. : codePoint - 97 < 26
  151. ? codePoint - 97
  152. : base;
  153. }
  154. /**
  155. * Converts a digit/integer into a basic code point.
  156. * @see `basicToDigit()`
  157. * @private
  158. * @param {Number} digit The numeric value of a basic code point.
  159. * @returns {Number} The basic code point whose value (when used for
  160. * representing integers) is `digit`, which needs to be in the range
  161. * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
  162. * used; else, the lowercase form is used. The behavior is undefined
  163. * if flag is non-zero and `digit` has no uppercase form.
  164. */
  165. function digitToBasic(digit, flag) {
  166. // 0..25 map to ASCII a..z or A..Z
  167. // 26..35 map to ASCII 0..9
  168. return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
  169. }
  170. /**
  171. * Bias adaptation function as per section 3.4 of RFC 3492.
  172. * http://tools.ietf.org/html/rfc3492#section-3.4
  173. * @private
  174. */
  175. function adapt(delta, numPoints, firstTime) {
  176. var k = 0;
  177. delta = firstTime ? floor(delta / damp) : delta >> 1;
  178. delta += floor(delta / numPoints);
  179. for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
  180. delta = floor(delta / baseMinusTMin);
  181. }
  182. return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
  183. }
  184. /**
  185. * Converts a Punycode string of ASCII code points to a string of Unicode
  186. * code points.
  187. * @memberOf punycode
  188. * @param {String} input The Punycode string of ASCII code points.
  189. * @returns {String} The resulting string of Unicode code points.
  190. */
  191. function decode(input) {
  192. // Don't use UCS-2
  193. var output = [],
  194. inputLength = input.length,
  195. out,
  196. i = 0,
  197. n = initialN,
  198. bias = initialBias,
  199. basic,
  200. j,
  201. index,
  202. oldi,
  203. w,
  204. k,
  205. digit,
  206. t,
  207. length,
  208. /** Cached calculation results */
  209. baseMinusT;
  210. // Handle the basic code points: let `basic` be the number of input code
  211. // points before the last delimiter, or `0` if there is none, then copy
  212. // the first basic code points to the output.
  213. basic = input.lastIndexOf(delimiter);
  214. if (basic < 0) {
  215. basic = 0;
  216. }
  217. for (j = 0; j < basic; ++j) {
  218. // if it's not a basic code point
  219. if (input.charCodeAt(j) >= 0x80) {
  220. error('not-basic');
  221. }
  222. output.push(input.charCodeAt(j));
  223. }
  224. // Main decoding loop: start just after the last delimiter if any basic code
  225. // points were copied; start at the beginning otherwise.
  226. for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
  227. // `index` is the index of the next character to be consumed.
  228. // Decode a generalized variable-length integer into `delta`,
  229. // which gets added to `i`. The overflow checking is easier
  230. // if we increase `i` as we go, then subtract off its starting
  231. // value at the end to obtain `delta`.
  232. for (oldi = i, w = 1, k = base; /* no condition */; k += base) {
  233. if (index >= inputLength) {
  234. error('invalid-input');
  235. }
  236. digit = basicToDigit(input.charCodeAt(index++));
  237. if (digit >= base || digit > floor((maxInt - i) / w)) {
  238. error('overflow');
  239. }
  240. i += digit * w;
  241. t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
  242. if (digit < t) {
  243. break;
  244. }
  245. baseMinusT = base - t;
  246. if (w > floor(maxInt / baseMinusT)) {
  247. error('overflow');
  248. }
  249. w *= baseMinusT;
  250. }
  251. out = output.length + 1;
  252. bias = adapt(i - oldi, out, oldi == 0);
  253. // `i` was supposed to wrap around from `out` to `0`,
  254. // incrementing `n` each time, so we'll fix that now:
  255. if (floor(i / out) > maxInt - n) {
  256. error('overflow');
  257. }
  258. n += floor(i / out);
  259. i %= out;
  260. // Insert `n` at position `i` of the output
  261. output.splice(i++, 0, n);
  262. }
  263. return ucs2encode(output);
  264. }
  265. /**
  266. * Converts a string of Unicode code points to a Punycode string of ASCII
  267. * code points.
  268. * @memberOf punycode
  269. * @param {String} input The string of Unicode code points.
  270. * @returns {String} The resulting Punycode string of ASCII code points.
  271. */
  272. function encode(input) {
  273. var n,
  274. delta,
  275. handledCPCount,
  276. basicLength,
  277. bias,
  278. j,
  279. m,
  280. q,
  281. k,
  282. t,
  283. currentValue,
  284. output = [],
  285. /** `inputLength` will hold the number of code points in `input`. */
  286. inputLength,
  287. /** Cached calculation results */
  288. handledCPCountPlusOne,
  289. baseMinusT,
  290. qMinusT;
  291. // Convert the input in UCS-2 to Unicode
  292. input = ucs2decode(input);
  293. // Cache the length
  294. inputLength = input.length;
  295. // Initialize the state
  296. n = initialN;
  297. delta = 0;
  298. bias = initialBias;
  299. // Handle the basic code points
  300. for (j = 0; j < inputLength; ++j) {
  301. currentValue = input[j];
  302. if (currentValue < 0x80) {
  303. output.push(stringFromCharCode(currentValue));
  304. }
  305. }
  306. handledCPCount = basicLength = output.length;
  307. // `handledCPCount` is the number of code points that have been handled;
  308. // `basicLength` is the number of basic code points.
  309. // Finish the basic string - if it is not empty - with a delimiter
  310. if (basicLength) {
  311. output.push(delimiter);
  312. }
  313. // Main encoding loop:
  314. while (handledCPCount < inputLength) {
  315. // All non-basic code points < n have been handled already. Find the next
  316. // larger one:
  317. for (m = maxInt, j = 0; j < inputLength; ++j) {
  318. currentValue = input[j];
  319. if (currentValue >= n && currentValue < m) {
  320. m = currentValue;
  321. }
  322. }
  323. // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
  324. // but guard against overflow
  325. handledCPCountPlusOne = handledCPCount + 1;
  326. if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
  327. error('overflow');
  328. }
  329. delta += (m - n) * handledCPCountPlusOne;
  330. n = m;
  331. for (j = 0; j < inputLength; ++j) {
  332. currentValue = input[j];
  333. if (currentValue < n && ++delta > maxInt) {
  334. error('overflow');
  335. }
  336. if (currentValue == n) {
  337. // Represent delta as a generalized variable-length integer
  338. for (q = delta, k = base; /* no condition */; k += base) {
  339. t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
  340. if (q < t) {
  341. break;
  342. }
  343. qMinusT = q - t;
  344. baseMinusT = base - t;
  345. output.push(
  346. stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
  347. );
  348. q = floor(qMinusT / baseMinusT);
  349. }
  350. output.push(stringFromCharCode(digitToBasic(q, 0)));
  351. bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
  352. delta = 0;
  353. ++handledCPCount;
  354. }
  355. }
  356. ++delta;
  357. ++n;
  358. }
  359. return output.join('');
  360. }
  361. /**
  362. * Converts a Punycode string representing a domain name to Unicode. Only the
  363. * Punycoded parts of the domain name will be converted, i.e. it doesn't
  364. * matter if you call it on a string that has already been converted to
  365. * Unicode.
  366. * @memberOf punycode
  367. * @param {String} domain The Punycode domain name to convert to Unicode.
  368. * @returns {String} The Unicode representation of the given Punycode
  369. * string.
  370. */
  371. function toUnicode(domain) {
  372. return mapDomain(domain, function(string) {
  373. return regexPunycode.test(string)
  374. ? decode(string.slice(4).toLowerCase())
  375. : string;
  376. });
  377. }
  378. /**
  379. * Converts a Unicode string representing a domain name to Punycode. Only the
  380. * non-ASCII parts of the domain name will be converted, i.e. it doesn't
  381. * matter if you call it with a domain that's already in ASCII.
  382. * @memberOf punycode
  383. * @param {String} domain The domain name to convert, as a Unicode string.
  384. * @returns {String} The Punycode representation of the given domain name.
  385. */
  386. function toASCII(domain) {
  387. return mapDomain(domain, function(string) {
  388. return regexNonASCII.test(string)
  389. ? 'xn--' + encode(string)
  390. : string;
  391. });
  392. }
  393. /*--------------------------------------------------------------------------*/
  394. /** Define the public API */
  395. punycode = {
  396. /**
  397. * A string representing the current Punycode.js version number.
  398. * @memberOf punycode
  399. * @type String
  400. */
  401. 'version': '1.2.1',
  402. /**
  403. * An object of methods to convert from JavaScript's internal character
  404. * representation (UCS-2) to decimal Unicode code points, and back.
  405. * @see <http://mathiasbynens.be/notes/javascript-encoding>
  406. * @memberOf punycode
  407. * @type Object
  408. */
  409. 'ucs2': {
  410. 'decode': ucs2decode,
  411. 'encode': ucs2encode
  412. },
  413. 'decode': decode,
  414. 'encode': encode,
  415. 'toASCII': toASCII,
  416. 'toUnicode': toUnicode
  417. };
  418. /** Expose `punycode` */
  419. // Some AMD build optimizers, like r.js, check for specific condition patterns
  420. // like the following:
  421. if (
  422. typeof define == 'function' &&
  423. typeof define.amd == 'object' &&
  424. define.amd
  425. ) {
  426. define(function() {
  427. return punycode;
  428. });
  429. } else if (freeExports && !freeExports.nodeType) {
  430. if (freeModule) { // in Node.js or RingoJS v0.8.0+
  431. freeModule.exports = punycode;
  432. } else { // in Narwhal or RingoJS v0.7.0-
  433. for (key in punycode) {
  434. punycode.hasOwnProperty(key) && (freeExports[key] = punycode[key]);
  435. }
  436. }
  437. } else { // in Rhino or a web browser
  438. root.punycode = punycode;
  439. }
  440. }(this));