encoding.js 122 KB


  1. (function (global, factory) {
  2. typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
  3. typeof define === 'function' && define.amd ? define(['exports'], factory) :
  4. (global = global || self, factory(global.TextEncoding = {}));
  5. }(this, (function (exports) { 'use strict';
  6. /** @const */ var DEFAULT_ENCODING = 'utf-8';
  7. /**
  8. * @param {boolean} fatal If true, decoding errors raise an exception.
  9. * @param {number=} opt_code_point Override the standard fallback code point.
  10. * @return {number} The code point to insert on a decoding error.
  11. */
  12. function decoderError(fatal, opt_code_point) {
  13. if (opt_code_point === void 0) { opt_code_point = undefined; }
  14. if (fatal)
  15. throw TypeError("Decoder error");
  16. return opt_code_point || 0xfffd;
  17. }
  18. /**
  19. * @param {number} code_point The code point that could not be encoded.
  20. * @return {number} Always throws, no value is actually returned.
  21. */
  22. function encoderError(code_point) {
  23. throw TypeError("The code point " + code_point + " could not be encoded.");
  24. }
  25. // 5.2 Names and labels
  26. // TODO: Define @typedef for Encoding: {name:string,labels:Array.<string>}
  27. // https://github.com/google/closure-compiler/issues/247
  28. /**
  29. * @param {string} label The encoding label.
  30. * @return {?{name:string,labels:Array.<string>}}
  31. */
  32. function getEncoding(label) {
  33. // 1. Remove any leading and trailing ASCII whitespace from label.
  34. var keyLabel = String(label).trim().toLowerCase();
  35. // 2. If label is an ASCII case-insensitive match for any of the
  36. // labels listed in the table below, return the corresponding
  37. // encoding, and failure otherwise.
  38. if (keyLabel in label_to_encoding) {
  39. return label_to_encoding[keyLabel];
  40. }
  41. return null;
  42. }
  43. /**
  44. * Encodings table: https://encoding.spec.whatwg.org/encodings.json
  45. * @const
  46. * @type {!Array.<{
  47. * heading: string,
  48. * encodings: Array.<{name:string,labels:Array.<string>}>
  49. * }>}
  50. */
  51. var encodings = [
  52. {
  53. encodings: [
  54. {
  55. labels: ["unicode-1-1-utf-8", "utf-8", "utf8"],
  56. name: "UTF-8",
  57. },
  58. ],
  59. heading: "The Encoding",
  60. },
  61. {
  62. encodings: [
  63. {
  64. labels: ["866", "cp866", "csibm866", "ibm866"],
  65. name: "IBM866",
  66. },
  67. {
  68. labels: [
  69. "csisolatin2",
  70. "iso-8859-2",
  71. "iso-ir-101",
  72. "iso8859-2",
  73. "iso88592",
  74. "iso_8859-2",
  75. "iso_8859-2:1987",
  76. "l2",
  77. "latin2",
  78. ],
  79. name: "ISO-8859-2",
  80. },
  81. {
  82. labels: [
  83. "csisolatin3",
  84. "iso-8859-3",
  85. "iso-ir-109",
  86. "iso8859-3",
  87. "iso88593",
  88. "iso_8859-3",
  89. "iso_8859-3:1988",
  90. "l3",
  91. "latin3",
  92. ],
  93. name: "ISO-8859-3",
  94. },
  95. {
  96. labels: [
  97. "csisolatin4",
  98. "iso-8859-4",
  99. "iso-ir-110",
  100. "iso8859-4",
  101. "iso88594",
  102. "iso_8859-4",
  103. "iso_8859-4:1988",
  104. "l4",
  105. "latin4",
  106. ],
  107. name: "ISO-8859-4",
  108. },
  109. {
  110. labels: [
  111. "csisolatincyrillic",
  112. "cyrillic",
  113. "iso-8859-5",
  114. "iso-ir-144",
  115. "iso8859-5",
  116. "iso88595",
  117. "iso_8859-5",
  118. "iso_8859-5:1988",
  119. ],
  120. name: "ISO-8859-5",
  121. },
  122. {
  123. labels: [
  124. "arabic",
  125. "asmo-708",
  126. "csiso88596e",
  127. "csiso88596i",
  128. "csisolatinarabic",
  129. "ecma-114",
  130. "iso-8859-6",
  131. "iso-8859-6-e",
  132. "iso-8859-6-i",
  133. "iso-ir-127",
  134. "iso8859-6",
  135. "iso88596",
  136. "iso_8859-6",
  137. "iso_8859-6:1987",
  138. ],
  139. name: "ISO-8859-6",
  140. },
  141. {
  142. labels: [
  143. "csisolatingreek",
  144. "ecma-118",
  145. "elot_928",
  146. "greek",
  147. "greek8",
  148. "iso-8859-7",
  149. "iso-ir-126",
  150. "iso8859-7",
  151. "iso88597",
  152. "iso_8859-7",
  153. "iso_8859-7:1987",
  154. "sun_eu_greek",
  155. ],
  156. name: "ISO-8859-7",
  157. },
  158. {
  159. labels: [
  160. "csiso88598e",
  161. "csisolatinhebrew",
  162. "hebrew",
  163. "iso-8859-8",
  164. "iso-8859-8-e",
  165. "iso-ir-138",
  166. "iso8859-8",
  167. "iso88598",
  168. "iso_8859-8",
  169. "iso_8859-8:1988",
  170. "visual",
  171. ],
  172. name: "ISO-8859-8",
  173. },
  174. {
  175. labels: ["csiso88598i", "iso-8859-8-i", "logical"],
  176. name: "ISO-8859-8-I",
  177. },
  178. {
  179. labels: [
  180. "csisolatin6",
  181. "iso-8859-10",
  182. "iso-ir-157",
  183. "iso8859-10",
  184. "iso885910",
  185. "l6",
  186. "latin6",
  187. ],
  188. name: "ISO-8859-10",
  189. },
  190. {
  191. labels: ["iso-8859-13", "iso8859-13", "iso885913"],
  192. name: "ISO-8859-13",
  193. },
  194. {
  195. labels: ["iso-8859-14", "iso8859-14", "iso885914"],
  196. name: "ISO-8859-14",
  197. },
  198. {
  199. labels: [
  200. "csisolatin9",
  201. "iso-8859-15",
  202. "iso8859-15",
  203. "iso885915",
  204. "iso_8859-15",
  205. "l9",
  206. ],
  207. name: "ISO-8859-15",
  208. },
  209. {
  210. labels: ["iso-8859-16"],
  211. name: "ISO-8859-16",
  212. },
  213. {
  214. labels: ["cskoi8r", "koi", "koi8", "koi8-r", "koi8_r"],
  215. name: "KOI8-R",
  216. },
  217. {
  218. labels: ["koi8-ru", "koi8-u"],
  219. name: "KOI8-U",
  220. },
  221. {
  222. labels: ["csmacintosh", "mac", "macintosh", "x-mac-roman"],
  223. name: "macintosh",
  224. },
  225. {
  226. labels: [
  227. "dos-874",
  228. "iso-8859-11",
  229. "iso8859-11",
  230. "iso885911",
  231. "tis-620",
  232. "windows-874",
  233. ],
  234. name: "windows-874",
  235. },
  236. {
  237. labels: ["cp1250", "windows-1250", "x-cp1250"],
  238. name: "windows-1250",
  239. },
  240. {
  241. labels: ["cp1251", "windows-1251", "x-cp1251"],
  242. name: "windows-1251",
  243. },
  244. {
  245. labels: [
  246. "ansi_x3.4-1968",
  247. "cp1252",
  248. "cp819",
  249. "ibm819",
  250. "iso-ir-100",
  251. "windows-1252",
  252. "x-cp1252",
  253. ],
  254. name: "windows-1252",
  255. },
  256. {
  257. labels: [
  258. "ascii",
  259. "us-ascii",
  260. "iso-8859-1",
  261. "iso8859-1",
  262. "iso88591",
  263. "iso_8859-1",
  264. "iso_8859-1:1987",
  265. "l1",
  266. "latin1",
  267. "csisolatin1",
  268. ],
  269. name: "iso-8859-1",
  270. },
  271. {
  272. labels: ["cp1253", "windows-1253", "x-cp1253"],
  273. name: "windows-1253",
  274. },
  275. {
  276. labels: [
  277. "cp1254",
  278. "csisolatin5",
  279. "iso-8859-9",
  280. "iso-ir-148",
  281. "iso8859-9",
  282. "iso88599",
  283. "iso_8859-9",
  284. "iso_8859-9:1989",
  285. "l5",
  286. "latin5",
  287. "windows-1254",
  288. "x-cp1254",
  289. ],
  290. name: "windows-1254",
  291. },
  292. {
  293. labels: ["cp1255", "windows-1255", "x-cp1255"],
  294. name: "windows-1255",
  295. },
  296. {
  297. labels: ["cp1256", "windows-1256", "x-cp1256"],
  298. name: "windows-1256",
  299. },
  300. {
  301. labels: ["cp1257", "windows-1257", "x-cp1257"],
  302. name: "windows-1257",
  303. },
  304. {
  305. labels: ["cp1258", "windows-1258", "x-cp1258"],
  306. name: "windows-1258",
  307. },
  308. {
  309. labels: ["x-mac-cyrillic", "x-mac-ukrainian"],
  310. name: "x-mac-cyrillic",
  311. },
  312. ],
  313. heading: "Legacy single-byte encodings",
  314. },
  315. {
  316. encodings: [
  317. {
  318. labels: [
  319. "chinese",
  320. "csgb2312",
  321. "csiso58gb231280",
  322. "gb2312",
  323. "gb_2312",
  324. "gb_2312-80",
  325. "gbk",
  326. "iso-ir-58",
  327. "x-gbk",
  328. ],
  329. name: "GBK",
  330. },
  331. {
  332. labels: ["gb18030"],
  333. name: "gb18030",
  334. },
  335. ],
  336. heading: "Legacy multi-byte Chinese (simplified) encodings",
  337. },
  338. {
  339. encodings: [
  340. {
  341. labels: ["big5", "big5-hkscs", "cn-big5", "csbig5", "x-x-big5"],
  342. name: "Big5",
  343. },
  344. ],
  345. heading: "Legacy multi-byte Chinese (traditional) encodings",
  346. },
  347. {
  348. encodings: [
  349. {
  350. labels: ["cseucpkdfmtjapanese", "euc-jp", "x-euc-jp"],
  351. name: "EUC-JP",
  352. },
  353. {
  354. labels: ["csiso2022jp", "iso-2022-jp"],
  355. name: "ISO-2022-JP",
  356. },
  357. {
  358. labels: [
  359. "csshiftjis",
  360. "ms932",
  361. "ms_kanji",
  362. "shift-jis",
  363. "shift_jis",
  364. "sjis",
  365. "windows-31j",
  366. "x-sjis",
  367. ],
  368. name: "Shift_JIS",
  369. },
  370. ],
  371. heading: "Legacy multi-byte Japanese encodings",
  372. },
  373. {
  374. encodings: [
  375. {
  376. labels: [
  377. "cseuckr",
  378. "csksc56011987",
  379. "euc-kr",
  380. "iso-ir-149",
  381. "korean",
  382. "ks_c_5601-1987",
  383. "ks_c_5601-1989",
  384. "ksc5601",
  385. "ksc_5601",
  386. "windows-949",
  387. ],
  388. name: "EUC-KR",
  389. },
  390. ],
  391. heading: "Legacy multi-byte Korean encodings",
  392. },
  393. {
  394. encodings: [
  395. {
  396. labels: [
  397. "csiso2022kr",
  398. "hz-gb-2312",
  399. "iso-2022-cn",
  400. "iso-2022-cn-ext",
  401. "iso-2022-kr",
  402. ],
  403. name: "replacement",
  404. },
  405. {
  406. labels: ["utf-16be"],
  407. name: "UTF-16BE",
  408. },
  409. {
  410. labels: ["utf-16", "utf-16le"],
  411. name: "UTF-16LE",
  412. },
  413. {
  414. labels: ["x-user-defined"],
  415. name: "x-user-defined",
  416. },
  417. ],
  418. heading: "Legacy miscellaneous encodings",
  419. },
  420. ];
  421. // Label to encoding registry.
  422. /** @type {Object.<string,{name:string,labels:Array.<string>}>} */
  423. var label_to_encoding = {};
  424. encodings.forEach(function (category) {
  425. category.encodings.forEach(function (encoding) {
  426. encoding.labels.forEach(function (label) {
  427. label_to_encoding[label] = encoding;
  428. });
  429. });
  430. });
  431. // 5.1 Encoders and decoders
  432. /** @const */
  433. var finished = -1;
  434. function getArrayVal(idxVal) {
  435. return Array.isArray(idxVal) ? idxVal : [idxVal];
  436. }
  437. /**
  438. * @param {number} a The number to test.
  439. * @param {number} min The minimum value in the range, inclusive.
  440. * @param {number} max The maximum value in the range, inclusive.
  441. * @return {boolean} True if a >= min and a <= max.
  442. */
  443. function inRange(a, min, max) {
  444. return min <= a && a <= max;
  445. }
  446. /**
  447. * @param {!Array.<*>} array The array to check.
  448. * @param {*} item The item to look for in the array.
  449. * @return {boolean} True if the item appears in the array.
  450. */
  451. function includes(array, item) {
  452. return array.indexOf(item) !== -1;
  453. }
  454. /**
  455. * @param {*} o
  456. * @return {Object}
  457. */
  458. function ToDictionary(o) {
  459. if (o === undefined || o === null)
  460. return {};
  461. if (o === Object(o))
  462. return o;
  463. throw TypeError('Could not convert argument to dictionary');
  464. }
  465. /**
  466. * @param {string} string Input string of UTF-16 code units.
  467. * @return {!Array.<number>} Code points.
  468. */
  469. function stringToCodePoints(string) {
  470. // https://heycam.github.io/webidl/#dfn-obtain-unicode
  471. // 1. Let S be the DOMString value.
  472. var s = String(string);
  473. // 2. Let n be the length of S.
  474. var n = s.length;
  475. // 3. Initialize i to 0.
  476. var i = 0;
  477. // 4. Initialize U to be an empty sequence of Unicode characters.
  478. var u = [];
  479. // 5. While i < n:
  480. while (i < n) {
  481. // 1. Let c be the code unit in S at index i.
  482. var c = s.charCodeAt(i);
  483. // 2. Depending on the value of c:
  484. // c < 0xD800 or c > 0xDFFF
  485. if (c < 0xD800 || c > 0xDFFF) {
  486. // Append to U the Unicode character with code point c.
  487. u.push(c);
  488. }
  489. // 0xDC00 ≤ c ≤ 0xDFFF
  490. else if (0xDC00 <= c && c <= 0xDFFF) {
  491. // Append to U a U+FFFD REPLACEMENT CHARACTER.
  492. u.push(0xFFFD);
  493. }
  494. // 0xD800 ≤ c ≤ 0xDBFF
  495. else if (0xD800 <= c && c <= 0xDBFF) {
  496. // 1. If i = n−1, then append to U a U+FFFD REPLACEMENT
  497. // CHARACTER.
  498. if (i === n - 1) {
  499. u.push(0xFFFD);
  500. }
  501. // 2. Otherwise, i < n−1:
  502. else {
  503. // 1. Let d be the code unit in S at index i+1.
  504. var d = s.charCodeAt(i + 1);
  505. // 2. If 0xDC00 ≤ d ≤ 0xDFFF, then:
  506. if (0xDC00 <= d && d <= 0xDFFF) {
  507. // 1. Let a be c & 0x3FF.
  508. var a = c & 0x3FF;
  509. // 2. Let b be d & 0x3FF.
  510. var b = d & 0x3FF;
  511. // 3. Append to U the Unicode character with code point
  512. // 2^16+2^10*a+b.
  513. u.push(0x10000 + (a << 10) + b);
  514. // 4. Set i to i+1.
  515. i += 1;
  516. }
  517. // 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a
  518. // U+FFFD REPLACEMENT CHARACTER.
  519. else {
  520. u.push(0xFFFD);
  521. }
  522. }
  523. }
  524. // 3. Set i to i+1.
  525. i += 1;
  526. }
  527. // 6. Return U.
  528. return u;
  529. }
  530. /**
  531. * @param {!Array.<number>} code_points Array of code points.
  532. * @return {string} string String of UTF-16 code units.
  533. */
  534. function codePointsToString(code_points) {
  535. var s = '';
  536. for (var i = 0; i < code_points.length; ++i) {
  537. var cp = code_points[i];
  538. if (cp <= 0xFFFF) {
  539. s += String.fromCharCode(cp);
  540. }
  541. else {
  542. cp -= 0x10000;
  543. s += String.fromCharCode((cp >> 10) + 0xD800, (cp & 0x3FF) + 0xDC00);
  544. }
  545. }
  546. return s;
  547. }
  548. function getGlobalScope() {
  549. if (typeof global !== 'undefined')
  550. return global;
  551. if (typeof window !== 'undefined')
  552. return window;
  553. if (typeof self !== 'undefined')
  554. return self;
  555. return;
  556. }
  557. var _encodingIndexes;
  558. function checkForEncodingIndexes() {
  559. if (typeof TextEncodingIndexes !== 'undefined')
  560. return TextEncodingIndexes.encodingIndexes;
  561. var glo = getGlobalScope();
  562. if (!glo)
  563. return null;
  564. if ('TextEncodingIndexes' in glo)
  565. return global['TextEncodingIndexes']['encodingIndexes'];
  566. if ('encoding-indexes' in glo)
  567. return global['encodingIndexes'];
  568. return null;
  569. }
  570. function getEncodingIndexes() {
  571. if (_encodingIndexes) {
  572. return _encodingIndexes;
  573. }
  574. var indexes = checkForEncodingIndexes();
  575. if (!indexes) {
  576. return null;
  577. }
  578. _encodingIndexes = indexes;
  579. return indexes;
  580. }
  581. /**
  582. * @param {number} pointer The |pointer| to search for.
  583. * @param {(!Array.<?number>|undefined)} index The |index| to search within.
  584. * @return {?number} The code point corresponding to |pointer| in |index|,
  585. * or null if |code point| is not in |index|.
  586. */
  587. function indexCodePointFor(pointer, index) {
  588. if (!index)
  589. return null;
  590. return index[pointer] || null;
  591. }
  592. /**
  593. * @param {number} code_point The |code point| to search for.
  594. * @param {!Array.<?number>} index The |index| to search within.
  595. * @return {?number} The first pointer corresponding to |code point| in
  596. * |index|, or null if |code point| is not in |index|.
  597. */
  598. function indexPointerFor(code_point, index) {
  599. var pointer = index.indexOf(code_point);
  600. return pointer === -1 ? null : pointer;
  601. }
  602. /**
  603. * @param {string} name Name of the index.
  604. * @return {(!Array.<number>|!Array.<Array.<number>>)}
  605. * */
  606. function index(name) {
  607. var encodingIndexes = getEncodingIndexes();
  608. if (!encodingIndexes) {
  609. throw Error("Indexes missing." +
  610. " Did you forget to include encoding-indexes.js first?");
  611. }
  612. return encodingIndexes[name];
  613. }
  614. /**
  615. * @param {number} pointer The |pointer| to search for in the gb18030 index.
  616. * @return {?number} The code point corresponding to |pointer| in |index|,
  617. * or null if |code point| is not in the gb18030 index.
  618. */
  619. function indexGB18030RangesCodePointFor(pointer) {
  620. // 1. If pointer is greater than 39419 and less than 189000, or
  621. // pointer is greater than 1237575, return null.
  622. if ((pointer > 39419 && pointer < 189000) || (pointer > 1237575))
  623. return null;
  624. // 2. If pointer is 7457, return code point U+E7C7.
  625. if (pointer === 7457)
  626. return 0xE7C7;
  627. // 3. Let offset be the last pointer in index gb18030 ranges that
  628. // is equal to or less than pointer and let code point offset be
  629. // its corresponding code point.
  630. var offset = 0;
  631. var code_point_offset = 0;
  632. var idx = index('gb18030-ranges');
  633. for (var i = 0; i < idx.length; ++i) {
  634. /** @type {!Array.<number>} */
  635. var entry = getArrayVal(idx[i]);
  636. if (entry[0] <= pointer) {
  637. offset = entry[0];
  638. code_point_offset = entry[1];
  639. }
  640. else {
  641. break;
  642. }
  643. }
  644. // 4. Return a code point whose value is code point offset +
  645. // pointer − offset.
  646. return code_point_offset + pointer - offset;
  647. }
  648. /**
  649. * @param {number} code_point The |code point| to locate in the gb18030 index.
  650. * @return {number} The first pointer corresponding to |code point| in the
  651. * gb18030 index.
  652. */
  653. function indexGB18030RangesPointerFor(code_point) {
  654. // 1. If code point is U+E7C7, return pointer 7457.
  655. if (code_point === 0xE7C7)
  656. return 7457;
  657. // 2. Let offset be the last code point in index gb18030 ranges
  658. // that is equal to or less than code point and let pointer offset
  659. // be its corresponding pointer.
  660. var offset = 0;
  661. var pointer_offset = 0;
  662. var idx = index('gb18030-ranges');
  663. for (var i = 0; i < idx.length; ++i) {
  664. var idxVal = idx[i];
  665. /** @type {!Array.<number>} */
  666. var entry = getArrayVal(idxVal);
  667. if (entry[1] <= code_point) {
  668. offset = entry[1];
  669. pointer_offset = entry[0];
  670. }
  671. else {
  672. break;
  673. }
  674. }
  675. // 3. Return a pointer whose value is pointer offset + code point
  676. // − offset.
  677. return pointer_offset + code_point - offset;
  678. }
  679. /**
  680. * @param {number} code_point The |code_point| to search for in the Shift_JIS
  681. * index.
  682. * @return {?number} The code point corresponding to |pointer| in |index|,
  683. * or null if |code point| is not in the Shift_JIS index.
  684. */
  685. function indexShiftJISPointerFor(code_point) {
  686. // 1. Let index be index jis0208 excluding all entries whose
  687. // pointer is in the range 8272 to 8835, inclusive.
  688. shift_jis_index = shift_jis_index ||
  689. index('jis0208').map(function (code_point, pointer) {
  690. return inRange(pointer, 8272, 8835) ? null : code_point;
  691. });
  692. var index_ = shift_jis_index;
  693. // 2. Return the index pointer for code point in index.
  694. return index_.indexOf(code_point);
  695. }
  696. var shift_jis_index;
  697. /**
  698. * @param {number} code_point The |code_point| to search for in the big5
  699. * index.
  700. * @return {?number} The code point corresponding to |pointer| in |index|,
  701. * or null if |code point| is not in the big5 index.
  702. */
  703. function indexBig5PointerFor(code_point) {
  704. // 1. Let index be index Big5 excluding all entries whose pointer
  705. big5_index_no_hkscs = big5_index_no_hkscs ||
  706. index('big5').map(function (code_point, pointer) {
  707. return (pointer < (0xA1 - 0x81) * 157) ? null : code_point;
  708. });
  709. var index_ = big5_index_no_hkscs;
  710. // 2. If code point is U+2550, U+255E, U+2561, U+256A, U+5341, or
  711. // U+5345, return the last pointer corresponding to code point in
  712. // index.
  713. if (code_point === 0x2550 || code_point === 0x255E ||
  714. code_point === 0x2561 || code_point === 0x256A ||
  715. code_point === 0x5341 || code_point === 0x5345) {
  716. return index_.lastIndexOf(code_point);
  717. }
  718. // 3. Return the index pointer for code point in index.
  719. return indexPointerFor(code_point, index_);
  720. }
  721. var big5_index_no_hkscs;
  722. //
  723. // Implementation of Encoding specification
  724. // https://encoding.spec.whatwg.org/
  725. //
  726. //
  727. // 4. Terminology
  728. //
  729. /**
  730. * An ASCII byte is a byte in the range 0x00 to 0x7F, inclusive.
  731. * @param {number} a The number to test.
  732. * @return {boolean} True if a is in the range 0x00 to 0x7F, inclusive.
  733. */
  734. function isASCIIByte(a) {
  735. return 0x00 <= a && a <= 0x7F;
  736. }
  737. /**
  738. * An ASCII code point is a code point in the range U+0000 to
  739. * U+007F, inclusive.
  740. */
  741. var isASCIICodePoint = isASCIIByte;
  742. /**
  743. * End-of-stream is a special token that signifies no more tokens
  744. * are in the stream.
  745. * @const
  746. */ var end_of_stream = -1;
  747. /**
  748. * @constructor
  749. * @implements {Decoder}
  750. * @param {{fatal: boolean}} options
  751. */
  752. var Big5Decoder = /** @class */ (function () {
  753. function Big5Decoder(options) {
  754. this.fatal = options.fatal;
  755. // Big5's decoder has an associated Big5 lead (initially 0x00).
  756. /** @type {number} */ this.Big5_lead = 0x00;
  757. }
  758. /**
  759. * @param {Stream} stream The stream of bytes being decoded.
  760. * @param {number} bite The next byte read from the stream.
  761. * @return {?(number|!Array.<number>)} The next code point(s)
  762. * decoded, or null if not enough data exists in the input
  763. * stream to decode a complete code point.
  764. */
  765. Big5Decoder.prototype.handler = function (stream, bite) {
  766. // 1. If byte is end-of-stream and Big5 lead is not 0x00, set
  767. // Big5 lead to 0x00 and return error.
  768. if (bite === end_of_stream && this.Big5_lead !== 0x00) {
  769. this.Big5_lead = 0x00;
  770. return decoderError(this.fatal);
  771. }
  772. // 2. If byte is end-of-stream and Big5 lead is 0x00, return
  773. // finished.
  774. if (bite === end_of_stream && this.Big5_lead === 0x00)
  775. return finished;
  776. // 3. If Big5 lead is not 0x00, let lead be Big5 lead, let
  777. // pointer be null, set Big5 lead to 0x00, and then run these
  778. // substeps:
  779. if (this.Big5_lead !== 0x00) {
  780. var lead = this.Big5_lead;
  781. var pointer = null;
  782. this.Big5_lead = 0x00;
  783. // 1. Let offset be 0x40 if byte is less than 0x7F and 0x62
  784. // otherwise.
  785. var offset = bite < 0x7F ? 0x40 : 0x62;
  786. // 2. If byte is in the range 0x40 to 0x7E, inclusive, or 0xA1
  787. // to 0xFE, inclusive, set pointer to (lead − 0x81) × 157 +
  788. // (byte − offset).
  789. if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0xA1, 0xFE))
  790. pointer = (lead - 0x81) * 157 + (bite - offset);
  791. // 3. If there is a row in the table below whose first column
  792. // is pointer, return the two code points listed in its second
  793. // column
  794. // Pointer | Code points
  795. // --------+--------------
  796. // 1133 | U+00CA U+0304
  797. // 1135 | U+00CA U+030C
  798. // 1164 | U+00EA U+0304
  799. // 1166 | U+00EA U+030C
  800. switch (pointer) {
  801. case 1133: return [0x00CA, 0x0304];
  802. case 1135: return [0x00CA, 0x030C];
  803. case 1164: return [0x00EA, 0x0304];
  804. case 1166: return [0x00EA, 0x030C];
  805. }
  806. // 4. Let code point be null if pointer is null and the index
  807. // code point for pointer in index Big5 otherwise.
  808. var code_point = (pointer === null) ? null :
  809. indexCodePointFor(pointer, index('big5'));
  810. // 5. If code point is null and byte is an ASCII byte, prepend
  811. // byte to stream.
  812. if (code_point === null && isASCIIByte(bite))
  813. stream.prepend(bite);
  814. // 6. If code point is null, return error.
  815. if (code_point === null)
  816. return decoderError(this.fatal);
  817. // 7. Return a code point whose value is code point.
  818. return code_point;
  819. }
  820. // 4. If byte is an ASCII byte, return a code point whose value
  821. // is byte.
  822. if (isASCIIByte(bite))
  823. return bite;
  824. // 5. If byte is in the range 0x81 to 0xFE, inclusive, set Big5
  825. // lead to byte and return continue.
  826. if (inRange(bite, 0x81, 0xFE)) {
  827. this.Big5_lead = bite;
  828. return null;
  829. }
  830. // 6. Return error.
  831. return decoderError(this.fatal);
  832. };
  833. return Big5Decoder;
  834. }());
  835. /**
  836. * @constructor
  837. * @implements {Encoder}
  838. * @param {{fatal: boolean}} options
  839. */
  840. var Big5Encoder = /** @class */ (function () {
  841. function Big5Encoder(options) {
  842. this.fatal = options.fatal;
  843. }
  844. /**
  845. * @param {Stream} stream Input stream.
  846. * @param {number} code_point Next code point read from the stream.
  847. * @return {(number|!Array.<number>)} Byte(s) to emit.
  848. */
  849. Big5Encoder.prototype.handler = function (stream, code_point) {
  850. // 1. If code point is end-of-stream, return finished.
  851. if (code_point === end_of_stream)
  852. return finished;
  853. // 2. If code point is an ASCII code point, return a byte whose
  854. // value is code point.
  855. if (isASCIICodePoint(code_point))
  856. return code_point;
  857. // 3. Let pointer be the index Big5 pointer for code point.
  858. var pointer = indexBig5PointerFor(code_point);
  859. // 4. If pointer is null, return error with code point.
  860. if (pointer === null)
  861. return encoderError(code_point);
  862. // 5. Let lead be Math.floor(pointer / 157) + 0x81.
  863. var lead = Math.floor(pointer / 157) + 0x81;
  864. // 6. If lead is less than 0xA1, return error with code point.
  865. if (lead < 0xA1)
  866. return encoderError(code_point);
  867. // 7. Let trail be pointer % 157.
  868. var trail = pointer % 157;
  869. // 8. Let offset be 0x40 if trail is less than 0x3F and 0x62
  870. // otherwise.
  871. var offset = trail < 0x3F ? 0x40 : 0x62;
  872. // Return two bytes whose values are lead and trail + offset.
  873. return [lead, trail + offset];
  874. };
  875. return Big5Encoder;
  876. }());
  877. /**
  878. * @constructor
  879. * @implements {Decoder}
  880. * @param {{fatal: boolean}} options
  881. */
  882. var EUCJPDecoder = /** @class */ (function () {
  883. function EUCJPDecoder(options) {
  884. this.fatal = options.fatal;
  885. // euc-jp's decoder has an associated euc-jp jis0212 flag
  886. // (initially unset) and euc-jp lead (initially 0x00).
  887. /** @type {boolean} */ this.eucjp_jis0212_flag = false,
  888. /** @type {number} */ this.eucjp_lead = 0x00;
  889. }
  890. /**
  891. * @param {Stream} stream The stream of bytes being decoded.
  892. * @param {number} bite The next byte read from the stream.
  893. * @return {?(number|!Array.<number>)} The next code point(s)
  894. * decoded, or null if not enough data exists in the input
  895. * stream to decode a complete code point.
  896. */
  897. EUCJPDecoder.prototype.handler = function (stream, bite) {
  898. // 1. If byte is end-of-stream and euc-jp lead is not 0x00, set
  899. // euc-jp lead to 0x00, and return error.
  900. if (bite === end_of_stream && this.eucjp_lead !== 0x00) {
  901. this.eucjp_lead = 0x00;
  902. return decoderError(this.fatal);
  903. }
  904. // 2. If byte is end-of-stream and euc-jp lead is 0x00, return
  905. // finished.
  906. if (bite === end_of_stream && this.eucjp_lead === 0x00)
  907. return finished;
  908. // 3. If euc-jp lead is 0x8E and byte is in the range 0xA1 to
  909. // 0xDF, inclusive, set euc-jp lead to 0x00 and return a code
  910. // point whose value is 0xFF61 − 0xA1 + byte.
  911. if (this.eucjp_lead === 0x8E && inRange(bite, 0xA1, 0xDF)) {
  912. this.eucjp_lead = 0x00;
  913. return 0xFF61 - 0xA1 + bite;
  914. }
  915. // 4. If euc-jp lead is 0x8F and byte is in the range 0xA1 to
  916. // 0xFE, inclusive, set the euc-jp jis0212 flag, set euc-jp lead
  917. // to byte, and return continue.
  918. if (this.eucjp_lead === 0x8F && inRange(bite, 0xA1, 0xFE)) {
  919. this.eucjp_jis0212_flag = true;
  920. this.eucjp_lead = bite;
  921. return null;
  922. }
  923. // 5. If euc-jp lead is not 0x00, let lead be euc-jp lead, set
  924. // euc-jp lead to 0x00, and run these substeps:
  925. if (this.eucjp_lead !== 0x00) {
  926. var lead = this.eucjp_lead;
  927. this.eucjp_lead = 0x00;
  928. // 1. Let code point be null.
  929. var code_point = null;
  930. // 2. If lead and byte are both in the range 0xA1 to 0xFE,
  931. // inclusive, set code point to the index code point for (lead
  932. // − 0xA1) × 94 + byte − 0xA1 in index jis0208 if the euc-jp
  933. // jis0212 flag is unset and in index jis0212 otherwise.
  934. if (inRange(lead, 0xA1, 0xFE) && inRange(bite, 0xA1, 0xFE)) {
  935. code_point = indexCodePointFor((lead - 0xA1) * 94 + (bite - 0xA1), index(!this.eucjp_jis0212_flag ? 'jis0208' : 'jis0212'));
  936. }
  937. // 3. Unset the euc-jp jis0212 flag.
  938. this.eucjp_jis0212_flag = false;
  939. // 4. If byte is not in the range 0xA1 to 0xFE, inclusive,
  940. // prepend byte to stream.
  941. if (!inRange(bite, 0xA1, 0xFE))
  942. stream.prepend(bite);
  943. // 5. If code point is null, return error.
  944. if (code_point === null)
  945. return decoderError(this.fatal);
  946. // 6. Return a code point whose value is code point.
  947. return code_point;
  948. }
  949. // 6. If byte is an ASCII byte, return a code point whose value
  950. // is byte.
  951. if (isASCIIByte(bite))
  952. return bite;
  953. // 7. If byte is 0x8E, 0x8F, or in the range 0xA1 to 0xFE,
  954. // inclusive, set euc-jp lead to byte and return continue.
  955. if (bite === 0x8E || bite === 0x8F || inRange(bite, 0xA1, 0xFE)) {
  956. this.eucjp_lead = bite;
  957. return null;
  958. }
  959. // 8. Return error.
  960. return decoderError(this.fatal);
  961. };
  962. return EUCJPDecoder;
  963. }());
  964. /**
  965. * @constructor
  966. * @implements {Encoder}
  967. * @param {{fatal: boolean}} options
  968. */
  969. var EUCJPEncoder = /** @class */ (function () {
  970. function EUCJPEncoder(options) {
  971. this.fatal = options.fatal;
  972. }
  973. /**
  974. * @param {Stream} stream Input stream.
  975. * @param {number} code_point Next code point read from the stream.
  976. * @return {(number|!Array.<number>)} Byte(s) to emit.
  977. */
  978. EUCJPEncoder.prototype.handler = function (stream, code_point) {
  979. // 1. If code point is end-of-stream, return finished.
  980. if (code_point === end_of_stream)
  981. return finished;
  982. // 2. If code point is an ASCII code point, return a byte whose
  983. // value is code point.
  984. if (isASCIICodePoint(code_point))
  985. return code_point;
  986. // 3. If code point is U+00A5, return byte 0x5C.
  987. if (code_point === 0x00A5)
  988. return 0x5C;
  989. // 4. If code point is U+203E, return byte 0x7E.
  990. if (code_point === 0x203E)
  991. return 0x7E;
  992. // 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
  993. // return two bytes whose values are 0x8E and code point −
  994. // 0xFF61 + 0xA1.
  995. if (inRange(code_point, 0xFF61, 0xFF9F))
  996. return [0x8E, code_point - 0xFF61 + 0xA1];
  997. // 6. If code point is U+2212, set it to U+FF0D.
  998. if (code_point === 0x2212)
  999. code_point = 0xFF0D;
  1000. // 7. Let pointer be the index pointer for code point in index
  1001. // jis0208.
  1002. var pointer = indexPointerFor(code_point, index('jis0208'));
  1003. // 8. If pointer is null, return error with code point.
  1004. if (pointer === null)
  1005. return encoderError(code_point);
  1006. // 9. Let lead be Math.floor(pointer / 94) + 0xA1.
  1007. var lead = Math.floor(pointer / 94) + 0xA1;
  1008. // 10. Let trail be pointer % 94 + 0xA1.
  1009. var trail = pointer % 94 + 0xA1;
  1010. // 11. Return two bytes whose values are lead and trail.
  1011. return [lead, trail];
  1012. };
  1013. return EUCJPEncoder;
  1014. }());
  1015. /**
  1016. * @constructor
  1017. * @implements {Decoder}
  1018. * @param {{fatal: boolean}} options
  1019. */
  1020. var EUCKRDecoder = /** @class */ (function () {
  1021. function EUCKRDecoder(options) {
  1022. this.fatal = options.fatal;
  1023. // euc-kr's decoder has an associated euc-kr lead (initially 0x00).
  1024. /** @type {number} */ this.euckr_lead = 0x00;
  1025. }
  1026. /**
  1027. * @param {Stream} stream The stream of bytes being decoded.
  1028. * @param {number} bite The next byte read from the stream.
  1029. * @return {?(number|!Array.<number>)} The next code point(s)
  1030. * decoded, or null if not enough data exists in the input
  1031. * stream to decode a complete code point.
  1032. */
  1033. EUCKRDecoder.prototype.handler = function (stream, bite) {
  1034. // 1. If byte is end-of-stream and euc-kr lead is not 0x00, set
  1035. // euc-kr lead to 0x00 and return error.
  1036. if (bite === end_of_stream && this.euckr_lead !== 0) {
  1037. this.euckr_lead = 0x00;
  1038. return decoderError(this.fatal);
  1039. }
  1040. // 2. If byte is end-of-stream and euc-kr lead is 0x00, return
  1041. // finished.
  1042. if (bite === end_of_stream && this.euckr_lead === 0)
  1043. return finished;
  1044. // 3. If euc-kr lead is not 0x00, let lead be euc-kr lead, let
  1045. // pointer be null, set euc-kr lead to 0x00, and then run these
  1046. // substeps:
  1047. if (this.euckr_lead !== 0x00) {
  1048. var lead = this.euckr_lead;
  1049. var pointer = null;
  1050. this.euckr_lead = 0x00;
  1051. // 1. If byte is in the range 0x41 to 0xFE, inclusive, set
  1052. // pointer to (lead − 0x81) × 190 + (byte − 0x41).
  1053. if (inRange(bite, 0x41, 0xFE))
  1054. pointer = (lead - 0x81) * 190 + (bite - 0x41);
  1055. // 2. Let code point be null, if pointer is null, and the
  1056. // index code point for pointer in index euc-kr otherwise.
  1057. var code_point = (pointer === null)
  1058. ? null : indexCodePointFor(pointer, index('euc-kr'));
  1059. // 3. If code point is null and byte is an ASCII byte, prepend
  1060. // byte to stream.
  1061. if (pointer === null && isASCIIByte(bite))
  1062. stream.prepend(bite);
  1063. // 4. If code point is null, return error.
  1064. if (code_point === null)
  1065. return decoderError(this.fatal);
  1066. // 5. Return a code point whose value is code point.
  1067. return code_point;
  1068. }
  1069. // 4. If byte is an ASCII byte, return a code point whose value
  1070. // is byte.
  1071. if (isASCIIByte(bite))
  1072. return bite;
  1073. // 5. If byte is in the range 0x81 to 0xFE, inclusive, set
  1074. // euc-kr lead to byte and return continue.
  1075. if (inRange(bite, 0x81, 0xFE)) {
  1076. this.euckr_lead = bite;
  1077. return null;
  1078. }
  1079. // 6. Return error.
  1080. return decoderError(this.fatal);
  1081. };
  1082. return EUCKRDecoder;
  1083. }());
  1084. /**
  1085. * @constructor
  1086. * @implements {Encoder}
  1087. * @param {{fatal: boolean}} options
  1088. */
  1089. var EUCKREncoder = /** @class */ (function () {
  1090. function EUCKREncoder(options) {
  1091. this.fatal = options.fatal;
  1092. }
  1093. /**
  1094. * @param {Stream} stream Input stream.
  1095. * @param {number} code_point Next code point read from the stream.
  1096. * @return {(number|!Array.<number>)} Byte(s) to emit.
  1097. */
  1098. EUCKREncoder.prototype.handler = function (stream, code_point) {
  1099. // 1. If code point is end-of-stream, return finished.
  1100. if (code_point === end_of_stream)
  1101. return finished;
  1102. // 2. If code point is an ASCII code point, return a byte whose
  1103. // value is code point.
  1104. if (isASCIICodePoint(code_point))
  1105. return code_point;
  1106. // 3. Let pointer be the index pointer for code point in index
  1107. // euc-kr.
  1108. var pointer = indexPointerFor(code_point, index('euc-kr'));
  1109. // 4. If pointer is null, return error with code point.
  1110. if (pointer === null)
  1111. return encoderError(code_point);
  1112. // 5. Let lead be Math.floor(pointer / 190) + 0x81.
  1113. var lead = Math.floor(pointer / 190) + 0x81;
  1114. // 6. Let trail be pointer % 190 + 0x41.
  1115. var trail = (pointer % 190) + 0x41;
  1116. // 7. Return two bytes whose values are lead and trail.
  1117. return [lead, trail];
  1118. };
  1119. return EUCKREncoder;
  1120. }());
  1121. /**
  1122. * @constructor
  1123. * @implements {Decoder}
  1124. * @param {{fatal: boolean}} options
  1125. */
  1126. var GB18030Decoder = /** @class */ (function () {
  1127. function GB18030Decoder(options) {
  1128. this.fatal = options.fatal;
  1129. // gb18030's decoder has an associated gb18030 first, gb18030
  1130. // second, and gb18030 third (all initially 0x00).
  1131. /** @type {number} */ this.gb18030_first = 0x00,
  1132. /** @type {number} */ this.gb18030_second = 0x00,
  1133. /** @type {number} */ this.gb18030_third = 0x00;
  1134. }
  1135. /**
  1136. * @param {Stream} stream The stream of bytes being decoded.
  1137. * @param {number} bite The next byte read from the stream.
  1138. * @return {?(number|!Array.<number>)} The next code point(s)
  1139. * decoded, or null if not enough data exists in the input
  1140. * stream to decode a complete code point.
  1141. */
  1142. GB18030Decoder.prototype.handler = function (stream, bite) {
  1143. // 1. If byte is end-of-stream and gb18030 first, gb18030
  1144. // second, and gb18030 third are 0x00, return finished.
  1145. if (bite === end_of_stream && this.gb18030_first === 0x00 &&
  1146. this.gb18030_second === 0x00 && this.gb18030_third === 0x00) {
  1147. return finished;
  1148. }
  1149. // 2. If byte is end-of-stream, and gb18030 first, gb18030
  1150. // second, or gb18030 third is not 0x00, set gb18030 first,
  1151. // gb18030 second, and gb18030 third to 0x00, and return error.
  1152. if (bite === end_of_stream &&
  1153. (this.gb18030_first !== 0x00 || this.gb18030_second !== 0x00 ||
  1154. this.gb18030_third !== 0x00)) {
  1155. this.gb18030_first = 0x00;
  1156. this.gb18030_second = 0x00;
  1157. this.gb18030_third = 0x00;
  1158. decoderError(this.fatal);
  1159. }
  1160. var code_point;
  1161. // 3. If gb18030 third is not 0x00, run these substeps:
  1162. if (this.gb18030_third !== 0x00) {
  1163. // 1. Let code point be null.
  1164. code_point = null;
  1165. // 2. If byte is in the range 0x30 to 0x39, inclusive, set
  1166. // code point to the index gb18030 ranges code point for
  1167. // (((gb18030 first − 0x81) × 10 + gb18030 second − 0x30) ×
  1168. // 126 + gb18030 third − 0x81) × 10 + byte − 0x30.
  1169. if (inRange(bite, 0x30, 0x39)) {
  1170. code_point = indexGB18030RangesCodePointFor((((this.gb18030_first - 0x81) * 10 + this.gb18030_second - 0x30) * 126 +
  1171. this.gb18030_third - 0x81) * 10 + bite - 0x30);
  1172. }
  1173. // 3. Let buffer be a byte sequence consisting of gb18030
  1174. // second, gb18030 third, and byte, in order.
  1175. var buffer = [this.gb18030_second, this.gb18030_third, bite];
  1176. // 4. Set gb18030 first, gb18030 second, and gb18030 third to
  1177. // 0x00.
  1178. this.gb18030_first = 0x00;
  1179. this.gb18030_second = 0x00;
  1180. this.gb18030_third = 0x00;
  1181. // 5. If code point is null, prepend buffer to stream and
  1182. // return error.
  1183. if (code_point === null) {
  1184. stream.prepend(buffer);
  1185. return decoderError(this.fatal);
  1186. }
  1187. // 6. Return a code point whose value is code point.
  1188. return code_point;
  1189. }
  1190. // 4. If gb18030 second is not 0x00, run these substeps:
  1191. if (this.gb18030_second !== 0x00) {
  1192. // 1. If byte is in the range 0x81 to 0xFE, inclusive, set
  1193. // gb18030 third to byte and return continue.
  1194. if (inRange(bite, 0x81, 0xFE)) {
  1195. this.gb18030_third = bite;
  1196. return null;
  1197. }
  1198. // 2. Prepend gb18030 second followed by byte to stream, set
  1199. // gb18030 first and gb18030 second to 0x00, and return error.
  1200. stream.prepend([this.gb18030_second, bite]);
  1201. this.gb18030_first = 0x00;
  1202. this.gb18030_second = 0x00;
  1203. return decoderError(this.fatal);
  1204. }
  1205. // 5. If gb18030 first is not 0x00, run these substeps:
  1206. if (this.gb18030_first !== 0x00) {
  1207. // 1. If byte is in the range 0x30 to 0x39, inclusive, set
  1208. // gb18030 second to byte and return continue.
  1209. if (inRange(bite, 0x30, 0x39)) {
  1210. this.gb18030_second = bite;
  1211. return null;
  1212. }
  1213. // 2. Let lead be gb18030 first, let pointer be null, and set
  1214. // gb18030 first to 0x00.
  1215. var lead = this.gb18030_first;
  1216. var pointer = null;
  1217. this.gb18030_first = 0x00;
  1218. // 3. Let offset be 0x40 if byte is less than 0x7F and 0x41
  1219. // otherwise.
  1220. var offset = bite < 0x7F ? 0x40 : 0x41;
  1221. // 4. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
  1222. // to 0xFE, inclusive, set pointer to (lead − 0x81) × 190 +
  1223. // (byte − offset).
  1224. if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFE))
  1225. pointer = (lead - 0x81) * 190 + (bite - offset);
  1226. // 5. Let code point be null if pointer is null and the index
  1227. // code point for pointer in index gb18030 otherwise.
  1228. code_point = pointer === null ? null :
  1229. indexCodePointFor(pointer, index('gb18030'));
  1230. // 6. If code point is null and byte is an ASCII byte, prepend
  1231. // byte to stream.
  1232. if (code_point === null && isASCIIByte(bite))
  1233. stream.prepend(bite);
  1234. // 7. If code point is null, return error.
  1235. if (code_point === null)
  1236. return decoderError(this.fatal);
  1237. // 8. Return a code point whose value is code point.
  1238. return code_point;
  1239. }
  1240. // 6. If byte is an ASCII byte, return a code point whose value
  1241. // is byte.
  1242. if (isASCIIByte(bite))
  1243. return bite;
  1244. // 7. If byte is 0x80, return code point U+20AC.
  1245. if (bite === 0x80)
  1246. return 0x20AC;
  1247. // 8. If byte is in the range 0x81 to 0xFE, inclusive, set
  1248. // gb18030 first to byte and return continue.
  1249. if (inRange(bite, 0x81, 0xFE)) {
  1250. this.gb18030_first = bite;
  1251. return null;
  1252. }
  1253. // 9. Return error.
  1254. return decoderError(this.fatal);
  1255. };
  1256. return GB18030Decoder;
  1257. }());
  1258. /**
  1259. * @constructor
  1260. * @implements {Encoder}
  1261. * @param {{fatal: boolean}} options
  1262. * @param {boolean=} gbk_flag
  1263. */
  1264. var GB18030Encoder = /** @class */ (function () {
  1265. function GB18030Encoder(options, gbk_flag) {
  1266. if (gbk_flag === void 0) { gbk_flag = undefined; }
  1267. this.gbk_flag = gbk_flag;
  1268. this.fatal = options.fatal;
  1269. // gb18030's decoder has an associated gbk flag (initially unset).
  1270. }
  1271. /**
  1272. * @param {Stream} stream Input stream.
  1273. * @param {number} code_point Next code point read from the stream.
  1274. * @return {(number|!Array.<number>)} Byte(s) to emit.
  1275. */
  1276. GB18030Encoder.prototype.handler = function (stream, code_point) {
  1277. // 1. If code point is end-of-stream, return finished.
  1278. if (code_point === end_of_stream)
  1279. return finished;
  1280. // 2. If code point is an ASCII code point, return a byte whose
  1281. // value is code point.
  1282. if (isASCIICodePoint(code_point))
  1283. return code_point;
  1284. // 3. If code point is U+E5E5, return error with code point.
  1285. if (code_point === 0xE5E5)
  1286. return encoderError(code_point);
  1287. // 4. If the gbk flag is set and code point is U+20AC, return
  1288. // byte 0x80.
  1289. if (this.gbk_flag && code_point === 0x20AC)
  1290. return 0x80;
  1291. // 5. Let pointer be the index pointer for code point in index
  1292. // gb18030.
  1293. var pointer = indexPointerFor(code_point, index('gb18030'));
  1294. // 6. If pointer is not null, run these substeps:
  1295. if (pointer !== null) {
  1296. // 1. Let lead be Math.floor(pointer / 190) + 0x81.
  1297. var lead = Math.floor(pointer / 190) + 0x81;
  1298. // 2. Let trail be pointer % 190.
  1299. var trail = pointer % 190;
  1300. // 3. Let offset be 0x40 if trail is less than 0x3F and 0x41 otherwise.
  1301. var offset = trail < 0x3F ? 0x40 : 0x41;
  1302. // 4. Return two bytes whose values are lead and trail + offset.
  1303. return [lead, trail + offset];
  1304. }
  1305. // 7. If gbk flag is set, return error with code point.
  1306. if (this.gbk_flag)
  1307. return encoderError(code_point);
  1308. // 8. Set pointer to the index gb18030 ranges pointer for code
  1309. // point.
  1310. pointer = indexGB18030RangesPointerFor(code_point);
  1311. // 9. Let byte1 be Math.floor(pointer / 10 / 126 / 10).
  1312. var byte1 = Math.floor(pointer / 10 / 126 / 10);
  1313. // 10. Set pointer to pointer − byte1 × 10 × 126 × 10.
  1314. pointer = pointer - byte1 * 10 * 126 * 10;
  1315. // 11. Let byte2 be Math.floor(pointer / 10 / 126).
  1316. var byte2 = Math.floor(pointer / 10 / 126);
  1317. // 12. Set pointer to pointer − byte2 × 10 × 126.
  1318. pointer = pointer - byte2 * 10 * 126;
  1319. // 13. Let byte3 be Math.floor(pointer / 10).
  1320. var byte3 = Math.floor(pointer / 10);
  1321. // 14. Let byte4 be pointer − byte3 × 10.
  1322. var byte4 = pointer - byte3 * 10;
  1323. // 15. Return four bytes whose values are byte1 + 0x81, byte2 +
  1324. // 0x30, byte3 + 0x81, byte4 + 0x30.
  1325. return [byte1 + 0x81,
  1326. byte2 + 0x30,
  1327. byte3 + 0x81,
  1328. byte4 + 0x30];
  1329. };
  1330. return GB18030Encoder;
  1331. }());
  1332. var states;
  1333. (function (states) {
  1334. states[states["ASCII"] = 0] = "ASCII";
  1335. states[states["Roman"] = 1] = "Roman";
  1336. states[states["Katakana"] = 2] = "Katakana";
  1337. states[states["LeadByte"] = 3] = "LeadByte";
  1338. states[states["TrailByte"] = 4] = "TrailByte";
  1339. states[states["EscapeStart"] = 5] = "EscapeStart";
  1340. states[states["Escape"] = 6] = "Escape";
  1341. })(states || (states = {}));
  1342. var ISO2022JPDecoder = /** @class */ (function () {
  1343. /**
  1344. * @constructor
  1345. * @implements {Decoder}
  1346. * @param {{fatal: boolean}} options
  1347. */
  1348. function ISO2022JPDecoder(options) {
  1349. this.fatal = options.fatal;
  1350. // iso-2022-jp's decoder has an associated iso-2022-jp decoder
  1351. // state (initially ASCII), iso-2022-jp decoder output state
  1352. // (initially ASCII), iso-2022-jp lead (initially 0x00), and
  1353. // iso-2022-jp output flag (initially unset).
  1354. /** @type {number} */ this.iso2022jp_decoder_state = states.ASCII,
  1355. /** @type {number} */ this.iso2022jp_decoder_output_state = states.ASCII,
  1356. /** @type {number} */ this.iso2022jp_lead = 0x00,
  1357. /** @type {boolean} */ this.iso2022jp_output_flag = false;
  1358. }
  1359. /**
  1360. * @param {Stream} stream The stream of bytes being decoded.
  1361. * @param {number} bite The next byte read from the stream.
  1362. * @return {?(number|!Array.<number>)} The next code point(s)
  1363. * decoded, or null if not enough data exists in the input
  1364. * stream to decode a complete code point.
  1365. */
  1366. ISO2022JPDecoder.prototype.handler = function (stream, bite) {
  1367. // switching on iso-2022-jp decoder state:
  1368. switch (this.iso2022jp_decoder_state) {
  1369. default:
  1370. case states.ASCII:
  1371. // ASCII
  1372. // Based on byte:
  1373. // 0x1B
  1374. if (bite === 0x1B) {
  1375. // Set iso-2022-jp decoder state to escape start and return
  1376. // continue.
  1377. this.iso2022jp_decoder_state = states.EscapeStart;
  1378. return null;
  1379. }
  1380. // 0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B
  1381. if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E
  1382. && bite !== 0x0F && bite !== 0x1B) {
  1383. // Unset the iso-2022-jp output flag and return a code point
  1384. // whose value is byte.
  1385. this.iso2022jp_output_flag = false;
  1386. return bite;
  1387. }
  1388. // end-of-stream
  1389. if (bite === end_of_stream) {
  1390. // Return finished.
  1391. return finished;
  1392. }
  1393. // Otherwise
  1394. // Unset the iso-2022-jp output flag and return error.
  1395. this.iso2022jp_output_flag = false;
  1396. return decoderError(this.fatal);
  1397. case states.Roman:
  1398. // Roman
  1399. // Based on byte:
  1400. // 0x1B
  1401. if (bite === 0x1B) {
  1402. // Set iso-2022-jp decoder state to escape start and return
  1403. // continue.
  1404. this.iso2022jp_decoder_state = states.EscapeStart;
  1405. return null;
  1406. }
  1407. // 0x5C
  1408. if (bite === 0x5C) {
  1409. // Unset the iso-2022-jp output flag and return code point
  1410. // U+00A5.
  1411. this.iso2022jp_output_flag = false;
  1412. return 0x00A5;
  1413. }
  1414. // 0x7E
  1415. if (bite === 0x7E) {
  1416. // Unset the iso-2022-jp output flag and return code point
  1417. // U+203E.
  1418. this.iso2022jp_output_flag = false;
  1419. return 0x203E;
  1420. }
  1421. // 0x00 to 0x7F, excluding 0x0E, 0x0F, 0x1B, 0x5C, and 0x7E
  1422. if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E && bite !== 0x0F
  1423. && bite !== 0x1B && bite !== 0x5C && bite !== 0x7E) {
  1424. // Unset the iso-2022-jp output flag and return a code point
  1425. // whose value is byte.
  1426. this.iso2022jp_output_flag = false;
  1427. return bite;
  1428. }
  1429. // end-of-stream
  1430. if (bite === end_of_stream) {
  1431. // Return finished.
  1432. return finished;
  1433. }
  1434. // Otherwise
  1435. // Unset the iso-2022-jp output flag and return error.
  1436. this.iso2022jp_output_flag = false;
  1437. return decoderError(this.fatal);
  1438. case states.Katakana:
  1439. // Katakana
  1440. // Based on byte:
  1441. // 0x1B
  1442. if (bite === 0x1B) {
  1443. // Set iso-2022-jp decoder state to escape start and return
  1444. // continue.
  1445. this.iso2022jp_decoder_state = states.EscapeStart;
  1446. return null;
  1447. }
  1448. // 0x21 to 0x5F
  1449. if (inRange(bite, 0x21, 0x5F)) {
  1450. // Unset the iso-2022-jp output flag and return a code point
  1451. // whose value is 0xFF61 − 0x21 + byte.
  1452. this.iso2022jp_output_flag = false;
  1453. return 0xFF61 - 0x21 + bite;
  1454. }
  1455. // end-of-stream
  1456. if (bite === end_of_stream) {
  1457. // Return finished.
  1458. return finished;
  1459. }
  1460. // Otherwise
  1461. // Unset the iso-2022-jp output flag and return error.
  1462. this.iso2022jp_output_flag = false;
  1463. return decoderError(this.fatal);
  1464. case states.LeadByte:
  1465. // Lead byte
  1466. // Based on byte:
  1467. // 0x1B
  1468. if (bite === 0x1B) {
  1469. // Set iso-2022-jp decoder state to escape start and return
  1470. // continue.
  1471. this.iso2022jp_decoder_state = states.EscapeStart;
  1472. return null;
  1473. }
  1474. // 0x21 to 0x7E
  1475. if (inRange(bite, 0x21, 0x7E)) {
  1476. // Unset the iso-2022-jp output flag, set iso-2022-jp lead
  1477. // to byte, iso-2022-jp decoder state to trail byte, and
  1478. // return continue.
  1479. this.iso2022jp_output_flag = false;
  1480. this.iso2022jp_lead = bite;
  1481. this.iso2022jp_decoder_state = states.TrailByte;
  1482. return null;
  1483. }
  1484. // end-of-stream
  1485. if (bite === end_of_stream) {
  1486. // Return finished.
  1487. return finished;
  1488. }
  1489. // Otherwise
  1490. // Unset the iso-2022-jp output flag and return error.
  1491. this.iso2022jp_output_flag = false;
  1492. return decoderError(this.fatal);
  1493. case states.TrailByte:
  1494. // Trail byte
  1495. // Based on byte:
  1496. // 0x1B
  1497. if (bite === 0x1B) {
  1498. // Set iso-2022-jp decoder state to escape start and return
  1499. // continue.
  1500. this.iso2022jp_decoder_state = states.EscapeStart;
  1501. return decoderError(this.fatal);
  1502. }
  1503. // 0x21 to 0x7E
  1504. if (inRange(bite, 0x21, 0x7E)) {
  1505. // 1. Set the iso-2022-jp decoder state to lead byte.
  1506. this.iso2022jp_decoder_state = states.LeadByte;
  1507. // 2. Let pointer be (iso-2022-jp lead − 0x21) × 94 + byte − 0x21.
  1508. var pointer = (this.iso2022jp_lead - 0x21) * 94 + bite - 0x21;
  1509. // 3. Let code point be the index code point for pointer in
  1510. // index jis0208.
  1511. var code_point = indexCodePointFor(pointer, index('jis0208'));
  1512. // 4. If code point is null, return error.
  1513. if (code_point === null)
  1514. return decoderError(this.fatal);
  1515. // 5. Return a code point whose value is code point.
  1516. return code_point;
  1517. }
  1518. // end-of-stream
  1519. if (bite === end_of_stream) {
  1520. // Set the iso-2022-jp decoder state to lead byte, prepend
  1521. // byte to stream, and return error.
  1522. this.iso2022jp_decoder_state = states.LeadByte;
  1523. stream.prepend(bite);
  1524. return decoderError(this.fatal);
  1525. }
  1526. // Otherwise
  1527. // Set iso-2022-jp decoder state to lead byte and return
  1528. // error.
  1529. this.iso2022jp_decoder_state = states.LeadByte;
  1530. return decoderError(this.fatal);
  1531. case states.EscapeStart:
  1532. // Escape start
  1533. // 1. If byte is either 0x24 or 0x28, set iso-2022-jp lead to
  1534. // byte, iso-2022-jp decoder state to escape, and return
  1535. // continue.
  1536. if (bite === 0x24 || bite === 0x28) {
  1537. this.iso2022jp_lead = bite;
  1538. this.iso2022jp_decoder_state = states.Escape;
  1539. return null;
  1540. }
  1541. // 2. Prepend byte to stream.
  1542. stream.prepend(bite);
  1543. // 3. Unset the iso-2022-jp output flag, set iso-2022-jp
  1544. // decoder state to iso-2022-jp decoder output state, and
  1545. // return error.
  1546. this.iso2022jp_output_flag = false;
  1547. this.iso2022jp_decoder_state = this.iso2022jp_decoder_output_state;
  1548. return decoderError(this.fatal);
  1549. case states.Escape:
  1550. // Escape
  1551. // 1. Let lead be iso-2022-jp lead and set iso-2022-jp lead to
  1552. // 0x00.
  1553. var lead = this.iso2022jp_lead;
  1554. this.iso2022jp_lead = 0x00;
  1555. // 2. Let state be null.
  1556. var state = null;
  1557. // 3. If lead is 0x28 and byte is 0x42, set state to ASCII.
  1558. if (lead === 0x28 && bite === 0x42)
  1559. state = states.ASCII;
  1560. // 4. If lead is 0x28 and byte is 0x4A, set state to Roman.
  1561. if (lead === 0x28 && bite === 0x4A)
  1562. state = states.Roman;
  1563. // 5. If lead is 0x28 and byte is 0x49, set state to Katakana.
  1564. if (lead === 0x28 && bite === 0x49)
  1565. state = states.Katakana;
  1566. // 6. If lead is 0x24 and byte is either 0x40 or 0x42, set
  1567. // state to lead byte.
  1568. if (lead === 0x24 && (bite === 0x40 || bite === 0x42))
  1569. state = states.LeadByte;
  1570. // 7. If state is non-null, run these substeps:
  1571. if (state !== null) {
  1572. // 1. Set iso-2022-jp decoder state and iso-2022-jp decoder
  1573. // output state to states.
  1574. this.iso2022jp_decoder_state = this.iso2022jp_decoder_state = state;
  1575. // 2. Let output flag be the iso-2022-jp output flag.
  1576. var output_flag = this.iso2022jp_output_flag;
  1577. // 3. Set the iso-2022-jp output flag.
  1578. this.iso2022jp_output_flag = true;
  1579. // 4. Return continue, if output flag is unset, and error
  1580. // otherwise.
  1581. return !output_flag ? null : decoderError(this.fatal);
  1582. }
  1583. // 8. Prepend lead and byte to stream.
  1584. stream.prepend([lead, bite]);
  1585. // 9. Unset the iso-2022-jp output flag, set iso-2022-jp
  1586. // decoder state to iso-2022-jp decoder output state and
  1587. // return error.
  1588. this.iso2022jp_output_flag = false;
  1589. this.iso2022jp_decoder_state = this.iso2022jp_decoder_output_state;
  1590. return decoderError(this.fatal);
  1591. }
  1592. };
  1593. return ISO2022JPDecoder;
  1594. }());
  1595. var states$1;
  1596. (function (states) {
  1597. states[states["ASCII"] = 0] = "ASCII";
  1598. states[states["Roman"] = 1] = "Roman";
  1599. states[states["jis0208"] = 2] = "jis0208";
  1600. })(states$1 || (states$1 = {}));
  1601. /**
  1602. * @constructor
  1603. * @implements {Encoder}
  1604. * @param {{fatal: boolean}} options
  1605. */
  1606. var ISO2022JPEncoder = /** @class */ (function () {
  1607. function ISO2022JPEncoder(options) {
  1608. this.fatal = options.fatal;
  1609. // iso-2022-jp's encoder has an associated iso-2022-jp encoder
  1610. // state which is one of ASCII, Roman, and jis0208 (initially
  1611. // ASCII).
  1612. /** @type {number} */ this.iso2022jp_state = states$1.ASCII;
  1613. }
  1614. /**
  1615. * @param {Stream} stream Input stream.
  1616. * @param {number} code_point Next code point read from the stream.
  1617. * @return {(number|!Array.<number>)} Byte(s) to emit.
  1618. */
  1619. ISO2022JPEncoder.prototype.handler = function (stream, code_point) {
  1620. // 1. If code point is end-of-stream and iso-2022-jp encoder
  1621. // state is not ASCII, prepend code point to stream, set
  1622. // iso-2022-jp encoder state to ASCII, and return three bytes
  1623. // 0x1B 0x28 0x42.
  1624. if (code_point === end_of_stream &&
  1625. this.iso2022jp_state !== states$1.ASCII) {
  1626. stream.prepend(code_point);
  1627. this.iso2022jp_state = states$1.ASCII;
  1628. return [0x1B, 0x28, 0x42];
  1629. }
  1630. // 2. If code point is end-of-stream and iso-2022-jp encoder
  1631. // state is ASCII, return finished.
  1632. if (code_point === end_of_stream && this.iso2022jp_state === states$1.ASCII)
  1633. return finished;
  1634. // 3. If ISO-2022-JP encoder state is ASCII or Roman, and code
  1635. // point is U+000E, U+000F, or U+001B, return error with U+FFFD.
  1636. if ((this.iso2022jp_state === states$1.ASCII ||
  1637. this.iso2022jp_state === states$1.Roman) &&
  1638. (code_point === 0x000E || code_point === 0x000F ||
  1639. code_point === 0x001B)) {
  1640. return encoderError(0xFFFD);
  1641. }
  1642. // 4. If iso-2022-jp encoder state is ASCII and code point is an
  1643. // ASCII code point, return a byte whose value is code point.
  1644. if (this.iso2022jp_state === states$1.ASCII &&
  1645. isASCIICodePoint(code_point))
  1646. return code_point;
  1647. // 5. If iso-2022-jp encoder state is Roman and code point is an
  1648. // ASCII code point, excluding U+005C and U+007E, or is U+00A5
  1649. // or U+203E, run these substeps:
  1650. if (this.iso2022jp_state === states$1.Roman &&
  1651. ((isASCIICodePoint(code_point) &&
  1652. code_point !== 0x005C && code_point !== 0x007E) ||
  1653. (code_point == 0x00A5 || code_point == 0x203E))) {
  1654. // 1. If code point is an ASCII code point, return a byte
  1655. // whose value is code point.
  1656. if (isASCIICodePoint(code_point))
  1657. return code_point;
  1658. // 2. If code point is U+00A5, return byte 0x5C.
  1659. if (code_point === 0x00A5)
  1660. return 0x5C;
  1661. // 3. If code point is U+203E, return byte 0x7E.
  1662. if (code_point === 0x203E)
  1663. return 0x7E;
  1664. }
  1665. // 6. If code point is an ASCII code point, and iso-2022-jp
  1666. // encoder state is not ASCII, prepend code point to stream, set
  1667. // iso-2022-jp encoder state to ASCII, and return three bytes
  1668. // 0x1B 0x28 0x42.
  1669. if (isASCIICodePoint(code_point) &&
  1670. this.iso2022jp_state !== states$1.ASCII) {
  1671. stream.prepend(code_point);
  1672. this.iso2022jp_state = states$1.ASCII;
  1673. return [0x1B, 0x28, 0x42];
  1674. }
  1675. // 7. If code point is either U+00A5 or U+203E, and iso-2022-jp
  1676. // encoder state is not Roman, prepend code point to stream, set
  1677. // iso-2022-jp encoder state to Roman, and return three bytes
  1678. // 0x1B 0x28 0x4A.
  1679. if ((code_point === 0x00A5 || code_point === 0x203E) &&
  1680. this.iso2022jp_state !== states$1.Roman) {
  1681. stream.prepend(code_point);
  1682. this.iso2022jp_state = states$1.Roman;
  1683. return [0x1B, 0x28, 0x4A];
  1684. }
  1685. // 8. If code point is U+2212, set it to U+FF0D.
  1686. if (code_point === 0x2212)
  1687. code_point = 0xFF0D;
  1688. // 9. Let pointer be the index pointer for code point in index
  1689. // jis0208.
  1690. var pointer = indexPointerFor(code_point, index('jis0208'));
  1691. // 10. If pointer is null, return error with code point.
  1692. if (pointer === null)
  1693. return encoderError(code_point);
  1694. // 11. If iso-2022-jp encoder state is not jis0208, prepend code
  1695. // point to stream, set iso-2022-jp encoder state to jis0208,
  1696. // and return three bytes 0x1B 0x24 0x42.
  1697. if (this.iso2022jp_state !== states$1.jis0208) {
  1698. stream.prepend(code_point);
  1699. this.iso2022jp_state = states$1.jis0208;
  1700. return [0x1B, 0x24, 0x42];
  1701. }
  1702. // 12. Let lead be Math.floor(pointer / 94) + 0x21.
  1703. var lead = Math.floor(pointer / 94) + 0x21;
  1704. // 13. Let trail be pointer % 94 + 0x21.
  1705. var trail = pointer % 94 + 0x21;
  1706. // 14. Return two bytes whose values are lead and trail.
  1707. return [lead, trail];
  1708. };
  1709. return ISO2022JPEncoder;
  1710. }());
  1711. /**
  1712. * @constructor
  1713. * @implements {Decoder}
  1714. * @param {{fatal: boolean}} options
  1715. */
  1716. var ShiftJISDecoder = /** @class */ (function () {
  1717. function ShiftJISDecoder(options) {
  1718. this.fatal = options.fatal;
  1719. // Shift_JIS's decoder has an associated Shift_JIS lead (initially
  1720. // 0x00).
  1721. /** @type {number} */ this.Shift_JIS_lead = 0x00;
  1722. }
  1723. /**
  1724. * @param {Stream} stream The stream of bytes being decoded.
  1725. * @param {number} bite The next byte read from the stream.
  1726. * @return {?(number|!Array.<number>)} The next code point(s)
  1727. * decoded, or null if not enough data exists in the input
  1728. * stream to decode a complete code point.
  1729. */
  1730. ShiftJISDecoder.prototype.handler = function (stream, bite) {
  1731. // 1. If byte is end-of-stream and Shift_JIS lead is not 0x00,
  1732. // set Shift_JIS lead to 0x00 and return error.
  1733. if (bite === end_of_stream && this.Shift_JIS_lead !== 0x00) {
  1734. this.Shift_JIS_lead = 0x00;
  1735. return decoderError(this.fatal);
  1736. }
  1737. // 2. If byte is end-of-stream and Shift_JIS lead is 0x00,
  1738. // return finished.
  1739. if (bite === end_of_stream && this.Shift_JIS_lead === 0x00)
  1740. return finished;
  1741. // 3. If Shift_JIS lead is not 0x00, let lead be Shift_JIS lead,
  1742. // let pointer be null, set Shift_JIS lead to 0x00, and then run
  1743. // these substeps:
  1744. if (this.Shift_JIS_lead !== 0x00) {
  1745. var lead = this.Shift_JIS_lead;
  1746. var pointer = null;
  1747. this.Shift_JIS_lead = 0x00;
  1748. // 1. Let offset be 0x40, if byte is less than 0x7F, and 0x41
  1749. // otherwise.
  1750. var offset = (bite < 0x7F) ? 0x40 : 0x41;
  1751. // 2. Let lead offset be 0x81, if lead is less than 0xA0, and
  1752. // 0xC1 otherwise.
  1753. var lead_offset = (lead < 0xA0) ? 0x81 : 0xC1;
  1754. // 3. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
  1755. // to 0xFC, inclusive, set pointer to (lead − lead offset) ×
  1756. // 188 + byte − offset.
  1757. if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFC))
  1758. pointer = (lead - lead_offset) * 188 + bite - offset;
  1759. // 4. If pointer is in the range 8836 to 10715, inclusive,
  1760. // return a code point whose value is 0xE000 − 8836 + pointer.
  1761. if (inRange(pointer, 8836, 10715))
  1762. return 0xE000 - 8836 + pointer;
  1763. // 5. Let code point be null, if pointer is null, and the
  1764. // index code point for pointer in index jis0208 otherwise.
  1765. var code_point = (pointer === null) ? null :
  1766. indexCodePointFor(pointer, index('jis0208'));
  1767. // 6. If code point is null and byte is an ASCII byte, prepend
  1768. // byte to stream.
  1769. if (code_point === null && isASCIIByte(bite))
  1770. stream.prepend(bite);
  1771. // 7. If code point is null, return error.
  1772. if (code_point === null)
  1773. return decoderError(this.fatal);
  1774. // 8. Return a code point whose value is code point.
  1775. return code_point;
  1776. }
  1777. // 4. If byte is an ASCII byte or 0x80, return a code point
  1778. // whose value is byte.
  1779. if (isASCIIByte(bite) || bite === 0x80)
  1780. return bite;
  1781. // 5. If byte is in the range 0xA1 to 0xDF, inclusive, return a
  1782. // code point whose value is 0xFF61 − 0xA1 + byte.
  1783. if (inRange(bite, 0xA1, 0xDF))
  1784. return 0xFF61 - 0xA1 + bite;
  1785. // 6. If byte is in the range 0x81 to 0x9F, inclusive, or 0xE0
  1786. // to 0xFC, inclusive, set Shift_JIS lead to byte and return
  1787. // continue.
  1788. if (inRange(bite, 0x81, 0x9F) || inRange(bite, 0xE0, 0xFC)) {
  1789. this.Shift_JIS_lead = bite;
  1790. return null;
  1791. }
  1792. // 7. Return error.
  1793. return decoderError(this.fatal);
  1794. };
  1795. return ShiftJISDecoder;
  1796. }());
  1797. /**
  1798. * @constructor
  1799. * @implements {Encoder}
  1800. * @param {{fatal: boolean}} options
  1801. */
  1802. var ShiftJISEncoder = /** @class */ (function () {
  1803. function ShiftJISEncoder(options) {
  1804. this.fatal = options.fatal;
  1805. }
  1806. /**
  1807. * @param {Stream} stream Input stream.
  1808. * @param {number} code_point Next code point read from the stream.
  1809. * @return {(number|!Array.<number>)} Byte(s) to emit.
  1810. */
  1811. ShiftJISEncoder.prototype.handler = function (stream, code_point) {
  1812. // 1. If code point is end-of-stream, return finished.
  1813. if (code_point === end_of_stream)
  1814. return finished;
  1815. // 2. If code point is an ASCII code point or U+0080, return a
  1816. // byte whose value is code point.
  1817. if (isASCIICodePoint(code_point) || code_point === 0x0080)
  1818. return code_point;
  1819. // 3. If code point is U+00A5, return byte 0x5C.
  1820. if (code_point === 0x00A5)
  1821. return 0x5C;
  1822. // 4. If code point is U+203E, return byte 0x7E.
  1823. if (code_point === 0x203E)
  1824. return 0x7E;
  1825. // 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
  1826. // return a byte whose value is code point − 0xFF61 + 0xA1.
  1827. if (inRange(code_point, 0xFF61, 0xFF9F))
  1828. return code_point - 0xFF61 + 0xA1;
  1829. // 6. If code point is U+2212, set it to U+FF0D.
  1830. if (code_point === 0x2212)
  1831. code_point = 0xFF0D;
  1832. // 7. Let pointer be the index Shift_JIS pointer for code point.
  1833. var pointer = indexShiftJISPointerFor(code_point);
  1834. // 8. If pointer is null, return error with code point.
  1835. if (pointer === null)
  1836. return encoderError(code_point);
  1837. // 9. Let lead be Math.floor(pointer / 188).
  1838. var lead = Math.floor(pointer / 188);
  1839. // 10. Let lead offset be 0x81, if lead is less than 0x1F, and
  1840. // 0xC1 otherwise.
  1841. var lead_offset = (lead < 0x1F) ? 0x81 : 0xC1;
  1842. // 11. Let trail be pointer % 188.
  1843. var trail = pointer % 188;
  1844. // 12. Let offset be 0x40, if trail is less than 0x3F, and 0x41
  1845. // otherwise.
  1846. var offset = (trail < 0x3F) ? 0x40 : 0x41;
  1847. // 13. Return two bytes whose values are lead + lead offset and
  1848. // trail + offset.
  1849. return [lead + lead_offset, trail + offset];
  1850. };
  1851. return ShiftJISEncoder;
  1852. }());
  1853. /**
  1854. * @constructor
  1855. * @implements {Decoder}
  1856. * @param {!Array.<number>} index The encoding index.
  1857. * @param {{fatal: boolean}} options
  1858. */
  1859. var SingleByteDecoder = /** @class */ (function () {
  1860. function SingleByteDecoder(index, options) {
  1861. this.index = index;
  1862. this.fatal = options.fatal;
  1863. }
  1864. /**
  1865. * @param {Stream} stream The stream of bytes being decoded.
  1866. * @param {number} bite The next byte read from the stream.
  1867. * @return {?(number|!Array.<number>)} The next code point(s)
  1868. * decoded, or null if not enough data exists in the input
  1869. * stream to decode a complete code point.
  1870. */
  1871. SingleByteDecoder.prototype.handler = function (stream, bite) {
  1872. // 1. If byte is end-of-stream, return finished.
  1873. if (bite === end_of_stream)
  1874. return finished;
  1875. // 2. If byte is an ASCII byte, return a code point whose value
  1876. // is byte.
  1877. if (isASCIIByte(bite))
  1878. return bite;
  1879. // 3. Let code point be the index code point for byte − 0x80 in
  1880. // index single-byte.
  1881. var code_point = this.index[bite - 0x80];
  1882. // 4. If code point is null, return error.
  1883. if (!code_point)
  1884. return decoderError(this.fatal);
  1885. // 5. Return a code point whose value is code point.
  1886. return code_point;
  1887. };
  1888. return SingleByteDecoder;
  1889. }());
  1890. /**
  1891. * @constructor
  1892. * @implements {Encoder}
  1893. * @param {!Array.<?number>} index The encoding index.
  1894. * @param {{fatal: boolean}} options
  1895. */
  1896. var SingleByteEncoder = /** @class */ (function () {
  1897. function SingleByteEncoder(index, options) {
  1898. this.index = index;
  1899. this.fatal = options.fatal;
  1900. }
  1901. /**
  1902. * @param {Stream} stream Input stream.
  1903. * @param {number} code_point Next code point read from the stream.
  1904. * @return {(number|!Array.<number>)} Byte(s) to emit.
  1905. */
  1906. SingleByteEncoder.prototype.handler = function (stream, code_point) {
  1907. // 1. If code point is end-of-stream, return finished.
  1908. if (code_point === end_of_stream)
  1909. return finished;
  1910. // 2. If code point is an ASCII code point, return a byte whose
  1911. // value is code point.
  1912. if (isASCIICodePoint(code_point))
  1913. return code_point;
  1914. // 3. Let pointer be the index pointer for code point in index
  1915. // single-byte.
  1916. var pointer = indexPointerFor(code_point, this.index);
  1917. // 4. If pointer is null, return error with code point.
  1918. if (pointer === null)
  1919. encoderError(code_point);
  1920. // 5. Return a byte whose value is pointer + 0x80.
  1921. return pointer + 0x80;
  1922. };
  1923. return SingleByteEncoder;
  1924. }());
  1925. /**
  1926. * @param {number} code_unit
  1927. * @param {boolean} utf16be
  1928. * @return {!Array.<number>} bytes
  1929. */
  1930. function convertCodeUnitToBytes(code_unit, utf16be) {
  1931. // 1. Let byte1 be code unit >> 8.
  1932. var byte1 = code_unit >> 8;
  1933. // 2. Let byte2 be code unit & 0x00FF.
  1934. var byte2 = code_unit & 0x00FF;
  1935. // 3. Then return the bytes in order:
  1936. // utf-16be flag is set: byte1, then byte2.
  1937. if (utf16be)
  1938. return [byte1, byte2];
  1939. // utf-16be flag is unset: byte2, then byte1.
  1940. return [byte2, byte1];
  1941. }
  1942. /**
  1943. * @constructor
  1944. * @implements {Decoder}
  1945. * @param {boolean} utf16_be True if big-endian, false if little-endian.
  1946. * @param {{fatal: boolean}} options
  1947. */
  1948. var UTF16Decoder = /** @class */ (function () {
  1949. function UTF16Decoder(utf16_be, options) {
  1950. this.utf16_be = utf16_be;
  1951. this.fatal = options.fatal;
  1952. /** @type {?number} */ this.utf16_lead_byte = null;
  1953. /** @type {?number} */ this.utf16_lead_surrogate = null;
  1954. }
  1955. /**
  1956. * @param {Stream} stream The stream of bytes being decoded.
  1957. * @param {number} bite The next byte read from the stream.
  1958. * @return {?(number|!Array.<number>)} The next code point(s)
  1959. * decoded, or null if not enough data exists in the input
  1960. * stream to decode a complete code point.
  1961. */
  1962. UTF16Decoder.prototype.handler = function (stream, bite) {
  1963. // 1. If byte is end-of-stream and either utf-16 lead byte or
  1964. // utf-16 lead surrogate is not null, set utf-16 lead byte and
  1965. // utf-16 lead surrogate to null, and return error.
  1966. if (bite === end_of_stream && (this.utf16_lead_byte !== null ||
  1967. this.utf16_lead_surrogate !== null)) {
  1968. return decoderError(this.fatal);
  1969. }
  1970. // 2. If byte is end-of-stream and utf-16 lead byte and utf-16
  1971. // lead surrogate are null, return finished.
  1972. if (bite === end_of_stream && this.utf16_lead_byte === null &&
  1973. this.utf16_lead_surrogate === null) {
  1974. return finished;
  1975. }
  1976. // 3. If utf-16 lead byte is null, set utf-16 lead byte to byte
  1977. // and return continue.
  1978. if (this.utf16_lead_byte === null) {
  1979. this.utf16_lead_byte = bite;
  1980. return null;
  1981. }
  1982. // 4. Let code unit be the result of:
  1983. var code_unit;
  1984. if (this.utf16_be) {
  1985. // utf-16be decoder flag is set
  1986. // (utf-16 lead byte << 8) + byte.
  1987. code_unit = (this.utf16_lead_byte << 8) + bite;
  1988. }
  1989. else {
  1990. // utf-16be decoder flag is unset
  1991. // (byte << 8) + utf-16 lead byte.
  1992. code_unit = (bite << 8) + this.utf16_lead_byte;
  1993. }
  1994. // Then set utf-16 lead byte to null.
  1995. this.utf16_lead_byte = null;
  1996. // 5. If utf-16 lead surrogate is not null, let lead surrogate
  1997. // be utf-16 lead surrogate, set utf-16 lead surrogate to null,
  1998. // and then run these substeps:
  1999. if (this.utf16_lead_surrogate !== null) {
  2000. var lead_surrogate = this.utf16_lead_surrogate;
  2001. this.utf16_lead_surrogate = null;
  2002. // 1. If code unit is in the range U+DC00 to U+DFFF,
  2003. // inclusive, return a code point whose value is 0x10000 +
  2004. // ((lead surrogate − 0xD800) << 10) + (code unit − 0xDC00).
  2005. if (inRange(code_unit, 0xDC00, 0xDFFF)) {
  2006. return 0x10000 + (lead_surrogate - 0xD800) * 0x400 +
  2007. (code_unit - 0xDC00);
  2008. }
  2009. // 2. Prepend the sequence resulting of converting code unit
  2010. // to bytes using utf-16be decoder flag to stream and return
  2011. // error.
  2012. stream.prepend(convertCodeUnitToBytes(code_unit, this.utf16_be));
  2013. return decoderError(this.fatal);
  2014. }
  2015. // 6. If code unit is in the range U+D800 to U+DBFF, inclusive,
  2016. // set utf-16 lead surrogate to code unit and return continue.
  2017. if (inRange(code_unit, 0xD800, 0xDBFF)) {
  2018. this.utf16_lead_surrogate = code_unit;
  2019. return null;
  2020. }
  2021. // 7. If code unit is in the range U+DC00 to U+DFFF, inclusive,
  2022. // return error.
  2023. if (inRange(code_unit, 0xDC00, 0xDFFF))
  2024. return decoderError(this.fatal);
  2025. // 8. Return code point code unit.
  2026. return code_unit;
  2027. };
  2028. return UTF16Decoder;
  2029. }());
  2030. /**
  2031. * @constructor
  2032. * @implements {Encoder}
  2033. * @param {boolean} utf16_be True if big-endian, false if little-endian.
  2034. * @param {{fatal: boolean}} options
  2035. */
  2036. var UTF16Encoder = /** @class */ (function () {
  2037. function UTF16Encoder(utf16_be, options) {
  2038. this.utf16_be = utf16_be;
  2039. this.fatal = options.fatal;
  2040. }
  2041. /**
  2042. * @param {Stream} stream Input stream.
  2043. * @param {number} code_point Next code point read from the stream.
  2044. * @return {(number|!Array.<number>)} Byte(s) to emit.
  2045. */
  2046. UTF16Encoder.prototype.handler = function (stream, code_point) {
  2047. // 1. If code point is end-of-stream, return finished.
  2048. if (code_point === end_of_stream)
  2049. return finished;
  2050. // 2. If code point is in the range U+0000 to U+FFFF, inclusive,
  2051. // return the sequence resulting of converting code point to
  2052. // bytes using utf-16be encoder flag.
  2053. if (inRange(code_point, 0x0000, 0xFFFF))
  2054. return convertCodeUnitToBytes(code_point, this.utf16_be);
  2055. // 3. Let lead be ((code point − 0x10000) >> 10) + 0xD800,
  2056. // converted to bytes using utf-16be encoder flag.
  2057. var lead = convertCodeUnitToBytes(((code_point - 0x10000) >> 10) + 0xD800, this.utf16_be);
  2058. // 4. Let trail be ((code point − 0x10000) & 0x3FF) + 0xDC00,
  2059. // converted to bytes using utf-16be encoder flag.
  2060. var trail = convertCodeUnitToBytes(((code_point - 0x10000) & 0x3FF) + 0xDC00, this.utf16_be);
  2061. // 5. Return a byte sequence of lead followed by trail.
  2062. return lead.concat(trail);
  2063. };
  2064. return UTF16Encoder;
  2065. }());
  2066. /**
  2067. * @constructor
  2068. * @implements {Decoder}
  2069. * @param {{fatal: boolean}} options
  2070. */
  2071. var UTF8Decoder = /** @class */ (function () {
  2072. function UTF8Decoder(options) {
  2073. this.fatal = options.fatal;
  2074. // utf-8's decoder's has an associated utf-8 code point, utf-8
  2075. // bytes seen, and utf-8 bytes needed (all initially 0), a utf-8
  2076. // lower boundary (initially 0x80), and a utf-8 upper boundary
  2077. // (initially 0xBF).
  2078. /** @type {number} */ this.utf8_code_point = 0,
  2079. /** @type {number} */ this.utf8_bytes_seen = 0,
  2080. /** @type {number} */ this.utf8_bytes_needed = 0,
  2081. /** @type {number} */ this.utf8_lower_boundary = 0x80,
  2082. /** @type {number} */ this.utf8_upper_boundary = 0xBF;
  2083. }
  2084. /**
  2085. * @param {Stream} stream The stream of bytes being decoded.
  2086. * @param {number} bite The next byte read from the stream.
  2087. * @return {?(number|!Array.<number>)} The next code point(s)
  2088. * decoded, or null if not enough data exists in the input
  2089. * stream to decode a complete code point.
  2090. */
  2091. UTF8Decoder.prototype.handler = function (stream, bite) {
  2092. // 1. If byte is end-of-stream and utf-8 bytes needed is not 0,
  2093. // set utf-8 bytes needed to 0 and return error.
  2094. if (bite === end_of_stream && this.utf8_bytes_needed !== 0) {
  2095. this.utf8_bytes_needed = 0;
  2096. return decoderError(this.fatal);
  2097. }
  2098. // 2. If byte is end-of-stream, return finished.
  2099. if (bite === end_of_stream)
  2100. return finished;
  2101. // 3. If utf-8 bytes needed is 0, based on byte:
  2102. if (this.utf8_bytes_needed === 0) {
  2103. // 0x00 to 0x7F
  2104. if (inRange(bite, 0x00, 0x7F)) {
  2105. // Return a code point whose value is byte.
  2106. return bite;
  2107. }
  2108. // 0xC2 to 0xDF
  2109. else if (inRange(bite, 0xC2, 0xDF)) {
  2110. // 1. Set utf-8 bytes needed to 1.
  2111. this.utf8_bytes_needed = 1;
  2112. // 2. Set UTF-8 code point to byte & 0x1F.
  2113. this.utf8_code_point = bite & 0x1F;
  2114. }
  2115. // 0xE0 to 0xEF
  2116. else if (inRange(bite, 0xE0, 0xEF)) {
  2117. // 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0.
  2118. if (bite === 0xE0)
  2119. this.utf8_lower_boundary = 0xA0;
  2120. // 2. If byte is 0xED, set utf-8 upper boundary to 0x9F.
  2121. if (bite === 0xED)
  2122. this.utf8_upper_boundary = 0x9F;
  2123. // 3. Set utf-8 bytes needed to 2.
  2124. this.utf8_bytes_needed = 2;
  2125. // 4. Set UTF-8 code point to byte & 0xF.
  2126. this.utf8_code_point = bite & 0xF;
  2127. }
  2128. // 0xF0 to 0xF4
  2129. else if (inRange(bite, 0xF0, 0xF4)) {
  2130. // 1. If byte is 0xF0, set utf-8 lower boundary to 0x90.
  2131. if (bite === 0xF0)
  2132. this.utf8_lower_boundary = 0x90;
  2133. // 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F.
  2134. if (bite === 0xF4)
  2135. this.utf8_upper_boundary = 0x8F;
  2136. // 3. Set utf-8 bytes needed to 3.
  2137. this.utf8_bytes_needed = 3;
  2138. // 4. Set UTF-8 code point to byte & 0x7.
  2139. this.utf8_code_point = bite & 0x7;
  2140. }
  2141. // Otherwise
  2142. else {
  2143. // Return error.
  2144. return decoderError(this.fatal);
  2145. }
  2146. // Return continue.
  2147. return null;
  2148. }
  2149. // 4. If byte is not in the range utf-8 lower boundary to utf-8
  2150. // upper boundary, inclusive, run these substeps:
  2151. if (!inRange(bite, this.utf8_lower_boundary, this.utf8_upper_boundary)) {
  2152. // 1. Set utf-8 code point, utf-8 bytes needed, and utf-8
  2153. // bytes seen to 0, set utf-8 lower boundary to 0x80, and set
  2154. // utf-8 upper boundary to 0xBF.
  2155. this.utf8_code_point = this.utf8_bytes_needed = this.utf8_bytes_seen = 0;
  2156. this.utf8_lower_boundary = 0x80;
  2157. this.utf8_upper_boundary = 0xBF;
  2158. // 2. Prepend byte to stream.
  2159. stream.prepend(bite);
  2160. // 3. Return error.
  2161. return decoderError(this.fatal);
  2162. }
  2163. // 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary
  2164. // to 0xBF.
  2165. this.utf8_lower_boundary = 0x80;
  2166. this.utf8_upper_boundary = 0xBF;
  2167. // 6. Set UTF-8 code point to (UTF-8 code point << 6) | (byte &
  2168. // 0x3F)
  2169. this.utf8_code_point = (this.utf8_code_point << 6) | (bite & 0x3F);
  2170. // 7. Increase utf-8 bytes seen by one.
  2171. this.utf8_bytes_seen += 1;
  2172. // 8. If utf-8 bytes seen is not equal to utf-8 bytes needed,
  2173. // continue.
  2174. if (this.utf8_bytes_seen !== this.utf8_bytes_needed)
  2175. return null;
  2176. // 9. Let code point be utf-8 code point.
  2177. var code_point = this.utf8_code_point;
  2178. // 10. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
  2179. // seen to 0.
  2180. this.utf8_code_point = this.utf8_bytes_needed = this.utf8_bytes_seen = 0;
  2181. // 11. Return a code point whose value is code point.
  2182. return code_point;
  2183. };
  2184. return UTF8Decoder;
  2185. }());
  2186. /**
  2187. * @constructor
  2188. * @implements {Encoder}
  2189. * @param {{fatal: boolean}} options
  2190. */
  2191. var UTF8Encoder = /** @class */ (function () {
  2192. function UTF8Encoder(options) {
  2193. this.fatal = options.fatal;
  2194. }
  2195. /**
  2196. * @param {Stream} stream Input stream.
  2197. * @param {number} code_point Next code point read from the stream.
  2198. * @return {(number|!Array.<number>)} Byte(s) to emit.
  2199. */
  2200. UTF8Encoder.prototype.handler = function (stream, code_point) {
  2201. // 1. If code point is end-of-stream, return finished.
  2202. if (code_point === end_of_stream)
  2203. return finished;
  2204. // 2. If code point is an ASCII code point, return a byte whose
  2205. // value is code point.
  2206. if (isASCIICodePoint(code_point))
  2207. return code_point;
  2208. // 3. Set count and offset based on the range code point is in:
  2209. var count, offset;
  2210. // U+0080 to U+07FF, inclusive:
  2211. if (inRange(code_point, 0x0080, 0x07FF)) {
  2212. // 1 and 0xC0
  2213. count = 1;
  2214. offset = 0xC0;
  2215. }
  2216. // U+0800 to U+FFFF, inclusive:
  2217. else if (inRange(code_point, 0x0800, 0xFFFF)) {
  2218. // 2 and 0xE0
  2219. count = 2;
  2220. offset = 0xE0;
  2221. }
  2222. // U+10000 to U+10FFFF, inclusive:
  2223. else if (inRange(code_point, 0x10000, 0x10FFFF)) {
  2224. // 3 and 0xF0
  2225. count = 3;
  2226. offset = 0xF0;
  2227. }
  2228. // 4. Let bytes be a byte sequence whose first byte is (code
  2229. // point >> (6 × count)) + offset.
  2230. var bytes = [(code_point >> (6 * count)) + offset];
  2231. // 5. Run these substeps while count is greater than 0:
  2232. while (count > 0) {
  2233. // 1. Set temp to code point >> (6 × (count − 1)).
  2234. var temp = code_point >> (6 * (count - 1));
  2235. // 2. Append to bytes 0x80 | (temp & 0x3F).
  2236. bytes.push(0x80 | (temp & 0x3F));
  2237. // 3. Decrease count by one.
  2238. count -= 1;
  2239. }
  2240. // 6. Return bytes bytes, in order.
  2241. return bytes;
  2242. };
  2243. return UTF8Encoder;
  2244. }());
  2245. /**
  2246. * @constructor
  2247. * @implements {Decoder}
  2248. * @param {{fatal: boolean}} options
  2249. */
  2250. var XUserDefinedDecoder = /** @class */ (function () {
  2251. function XUserDefinedDecoder(options) {
  2252. this.fatal = options.fatal;
  2253. }
  2254. /**
  2255. * @param {Stream} stream The stream of bytes being decoded.
  2256. * @param {number} bite The next byte read from the stream.
  2257. * @return {?(number|!Array.<number>)} The next code point(s)
  2258. * decoded, or null if not enough data exists in the input
  2259. * stream to decode a complete code point.
  2260. */
  2261. XUserDefinedDecoder.prototype.handler = function (stream, bite) {
  2262. // 1. If byte is end-of-stream, return finished.
  2263. if (bite === end_of_stream)
  2264. return finished;
  2265. // 2. If byte is an ASCII byte, return a code point whose value
  2266. // is byte.
  2267. if (isASCIIByte(bite))
  2268. return bite;
  2269. // 3. Return a code point whose value is 0xF780 + byte − 0x80.
  2270. return 0xF780 + bite - 0x80;
  2271. };
  2272. return XUserDefinedDecoder;
  2273. }());
  2274. /**
  2275. * @constructor
  2276. * @implements {Encoder}
  2277. * @param {{fatal: boolean}} options
  2278. */
  2279. var XUserDefinedEncoder = /** @class */ (function () {
  2280. function XUserDefinedEncoder(options) {
  2281. this.fatal = options.fatal;
  2282. }
  2283. /**
  2284. * @param {Stream} stream Input stream.
  2285. * @param {number} code_point Next code point read from the stream.
  2286. * @return {(number|!Array.<number>)} Byte(s) to emit.
  2287. */
  2288. XUserDefinedEncoder.prototype.handler = function (stream, code_point) {
  2289. // 1.If code point is end-of-stream, return finished.
  2290. if (code_point === end_of_stream)
  2291. return finished;
  2292. // 2. If code point is an ASCII code point, return a byte whose
  2293. // value is code point.
  2294. if (isASCIICodePoint(code_point))
  2295. return code_point;
  2296. // 3. If code point is in the range U+F780 to U+F7FF, inclusive,
  2297. // return a byte whose value is code point − 0xF780 + 0x80.
  2298. if (inRange(code_point, 0xF780, 0xF7FF))
  2299. return code_point - 0xF780 + 0x80;
  2300. // 4. Return error with code point.
  2301. return encoderError(code_point);
  2302. };
  2303. return XUserDefinedEncoder;
  2304. }());
  2305. // This is free and unencumbered software released into the public domain.
  2306. //
  2307. // Utilities
  2308. //
  2309. // import './encoding/utilities';
  2310. //
  2311. // Implementation of Encoding specification
  2312. // https://encoding.spec.whatwg.org/
  2313. //
  2314. //
  2315. // 4. Terminology
  2316. //
  2317. // import './encoding/terminology';
  2318. //
  2319. // 5. Encodings
  2320. //
  2321. // import "./encoding/encodings";
  2322. //
  2323. // 6. Indexes
  2324. //
  2325. // import './encoding/indexes';
  2326. var encodingIndexes = getEncodingIndexes();
  2327. // Registry of of encoder/decoder factories, by encoding name.
  2328. /** @type {Object.<string, function({fatal:boolean}): Encoder>} */
  2329. // const encoders: Encoders = {};
  2330. /** @type {Object.<string, function({fatal:boolean}): Decoder>} */
  2331. // const decoders: Decoders = {};
  2332. //
  2333. // 10. Legacy single-byte encodings
  2334. //
  2335. // 10.1 single-byte decoder
  2336. // 10.2 single-byte encoder
  2337. var encoders = {
  2338. // 9.1 utf-8
  2339. // 9.1.1 utf-8 decoder
  2340. // 9.1.2 utf-8 encoder
  2341. /** @param {{fatal: boolean}} options */
  2342. 'UTF-8': function (options) { return new UTF8Encoder(options); },
  2343. //
  2344. // 11. Legacy multi-byte Chinese (simplified) encodings
  2345. //
  2346. // 11.1 gbk
  2347. // 11.1.1 gbk decoder
  2348. // gbk's decoder is gb18030's decoder.
  2349. // 11.1.2 gbk encoder
  2350. // gbk's encoder is gb18030's encoder with its gbk flag set.
  2351. /** @param {{fatal: boolean}} options */
  2352. 'GBK': function (options) { return new GB18030Encoder(options, true); },
  2353. // 11.2 gb18030
  2354. // 11.2.1 gb18030 decoder
  2355. // 11.2.2 gb18030 encoder
  2356. /** @param {{fatal: boolean}} options */
  2357. 'gb18030': function (options) { return new GB18030Encoder(options); },
  2358. //
  2359. // 12. Legacy multi-byte Chinese (traditional) encodings
  2360. //
  2361. // 12.1 Big5
  2362. // 12.1.1 Big5 decoder
  2363. // 12.1.2 Big5 encoder
  2364. /** @param {{fatal: boolean}} options */
  2365. 'Big5': function (options) { return new Big5Encoder(options); },
  2366. //
  2367. // 13. Legacy multi-byte Japanese encodings
  2368. //
  2369. // 13.1 euc-jp
  2370. // 13.1.1 euc-jp decoder
  2371. // 13.1.2 euc-jp encoder
  2372. /** @param {{fatal: boolean}} options */
  2373. 'EUC-JP': function (options) { return new EUCJPEncoder(options); },
  2374. // 13.2 iso-2022-jp
  2375. // 13.2.1 iso-2022-jp decoder
  2376. // 13.2.2 iso-2022-jp encoder
  2377. /** @param {{fatal: boolean}} options */
  2378. 'ISO-2022-JP': function (options) { return new ISO2022JPEncoder(options); },
  2379. // 13.3 Shift_JIS
  2380. // 13.3.1 Shift_JIS decoder
  2381. // 13.3.2 Shift_JIS encoder
  2382. /** @param {{fatal: boolean}} options */
  2383. 'Shift_JIS': function (options) { return new ShiftJISEncoder(options); },
  2384. //
  2385. // 14. Legacy multi-byte Korean encodings
  2386. //
  2387. // 14.1 euc-kr
  2388. // 14.1.1 euc-kr decoder
  2389. // 14.1.2 euc-kr encoder
  2390. /** @param {{fatal: boolean}} options */
  2391. 'EUC-KR': function (options) { return new EUCKREncoder(options); },
  2392. //
  2393. // 15. Legacy miscellaneous encodings
  2394. //
  2395. // 15.1 replacement
  2396. // Not needed - API throws RangeError
  2397. // 15.2 Common infrastructure for utf-16be and utf-16le
  2398. // 15.2.1 shared utf-16 decoder
  2399. // 15.2.2 shared utf-16 encoder
  2400. // 15.3 utf-16be
  2401. // 15.3.1 utf-16be decoder
  2402. /** @param {{fatal: boolean}} options */
  2403. 'UTF-16BE': function (options) { return new UTF16Encoder(true, options); },
  2404. // 15.3.2 utf-16be encoder
  2405. // 15.4 utf-16le
  2406. // 15.4.1 utf-16le decoder
  2407. /** @param {{fatal: boolean}} options */
  2408. 'UTF-16LE': function (options) { return new UTF16Encoder(false, options); },
  2409. // 15.4.2 utf-16le encoder
  2410. // 15.5 x-user-defined
  2411. // 15.5.1 x-user-defined decoder
  2412. // 15.5.2 x-user-defined encoder
  2413. /** @param {{fatal: boolean}} options */
  2414. 'x-user-defined': function (options) { return new XUserDefinedEncoder(options); },
  2415. };
  2416. var decoders = {
  2417. /** @param {{fatal: boolean}} options */
  2418. 'UTF-8': function (options) { return new UTF8Decoder(options); },
  2419. /** @param {{fatal: boolean}} options */
  2420. 'GBK': function (options) { return new GB18030Decoder(options); },
  2421. /** @param {{fatal: boolean}} options */
  2422. 'gb18030': function (options) { return new GB18030Decoder(options); },
  2423. /** @param {{fatal: boolean}} options */
  2424. 'Big5': function (options) { return new Big5Decoder(options); },
  2425. /** @param {{fatal: boolean}} options */
  2426. 'EUC-JP': function (options) { return new EUCJPDecoder(options); },
  2427. /** @param {{fatal: boolean}} options */
  2428. 'ISO-2022-JP': function (options) { return new ISO2022JPDecoder(options); },
  2429. /** @param {{fatal: boolean}} options */
  2430. 'Shift_JIS': function (options) { return new ShiftJISDecoder(options); },
  2431. /** @param {{fatal: boolean}} options */
  2432. 'EUC-KR': function (options) { return new EUCKRDecoder(options); },
  2433. /** @param {{fatal: boolean}} options */
  2434. 'UTF-16BE': function (options) { return new UTF16Decoder(true, options); },
  2435. /** @param {{fatal: boolean}} options */
  2436. 'UTF-16LE': function (options) { return new UTF16Decoder(false, options); },
  2437. /** @param {{fatal: boolean}} options */
  2438. 'x-user-defined': function (options) { return new XUserDefinedDecoder(options); },
  2439. };
  2440. if (encodingIndexes) {
  2441. encodings.forEach(function (category) {
  2442. if (category.heading !== 'Legacy single-byte encodings')
  2443. return;
  2444. category.encodings.forEach(function (encoding) {
  2445. var name = encoding.name;
  2446. var idx = index(name.toLowerCase());
  2447. /** @param {{fatal: boolean}} options */
  2448. decoders[name] = function (options) {
  2449. return new SingleByteDecoder(idx, options);
  2450. };
  2451. /** @param {{fatal: boolean}} options */
  2452. encoders[name] = function (options) {
  2453. return new SingleByteEncoder(idx, options);
  2454. };
  2455. });
  2456. });
  2457. }
  2458. /**
  2459. * A stream represents an ordered sequence of tokens.
  2460. */
  2461. var Stream = /** @class */ (function () {
  2462. /**
  2463. *
  2464. * @constructor
  2465. * @param {!(Array.<number>|Uint8Array)} tokens Array of tokens that provide
  2466. * the stream.
  2467. */
  2468. function Stream(tokens) {
  2469. /** @type {!Array.<number>} */
  2470. this.tokens = Array.from(tokens);
  2471. // Reversed as push/pop is more efficient than shift/unshift.
  2472. this.tokens.reverse();
  2473. }
  2474. /**
  2475. * @return {boolean} True if end-of-stream has been hit.
  2476. */
  2477. Stream.prototype.endOfStream = function () {
  2478. return !this.tokens.length;
  2479. };
  2480. /**
  2481. * When a token is read from a stream, the first token in the
  2482. * stream must be returned and subsequently removed, and
  2483. * end-of-stream must be returned otherwise.
  2484. *
  2485. * @return {number} Get the next token from the stream, or
  2486. * end_of_stream.
  2487. */
  2488. Stream.prototype.read = function () {
  2489. if (!this.tokens.length)
  2490. return end_of_stream;
  2491. return this.tokens.pop();
  2492. };
  2493. /**
  2494. * When one or more tokens are prepended to a stream, those tokens
  2495. * must be inserted, in given order, before the first token in the
  2496. * stream.
  2497. *
  2498. * @param {(number|!Array.<number>)} token The token(s) to prepend to the
  2499. * stream.
  2500. */
  2501. Stream.prototype.prepend = function (token) {
  2502. if (Array.isArray(token)) {
  2503. var tokens = (token);
  2504. while (tokens.length)
  2505. this.tokens.push(tokens.pop());
  2506. }
  2507. else {
  2508. this.tokens.push(token);
  2509. }
  2510. };
  2511. /**
  2512. * When one or more tokens are pushed to a stream, those tokens
  2513. * must be inserted, in given order, after the last token in the
  2514. * stream.
  2515. *
  2516. * @param {(number|!Array.<number>)} token The tokens(s) to push to the
  2517. * stream.
  2518. */
  2519. Stream.prototype.push = function (token) {
  2520. if (Array.isArray(token)) {
  2521. var tokens = (token);
  2522. while (tokens.length)
  2523. this.tokens.unshift(tokens.shift());
  2524. }
  2525. else {
  2526. this.tokens.unshift(token);
  2527. }
  2528. };
  2529. return Stream;
  2530. }());
  2531. /**
  2532. * @constructor
  2533. * @param {string=} label The label of the encoding;
  2534. * defaults to 'utf-8'.
  2535. * @param {Object=} options
  2536. */
  2537. var TextDecoder = /** @class */ (function () {
  2538. function TextDecoder(label, options) {
  2539. label = label !== undefined ? String(label) : DEFAULT_ENCODING;
  2540. var optionsMap = ToDictionary(options);
  2541. // A TextDecoder object has an associated encoding, decoder,
  2542. // stream, ignore BOM flag (initially unset), BOM seen flag
  2543. // (initially unset), error mode (initially replacement), and do
  2544. // not flush flag (initially unset).
  2545. /** @private */
  2546. this._encoding = null;
  2547. /** @private @type {?Decoder} */
  2548. this._decoder = null;
  2549. /** @private @type {boolean} */
  2550. this._ignoreBOM = false;
  2551. /** @private @type {boolean} */
  2552. this._BOMseen = false;
  2553. /** @private @type {string} */
  2554. this._error_mode = 'replacement';
  2555. /** @private @type {boolean} */
  2556. this._do_not_flush = false;
  2557. // 1. Let encoding be the result of getting an encoding from
  2558. // label.
  2559. var encoding = getEncoding(label);
  2560. // 2. If encoding is failure or replacement, throw a RangeError.
  2561. if (encoding === null || encoding.name === 'replacement')
  2562. throw RangeError('Unknown encoding: ' + label);
  2563. if (!decoders[encoding.name]) {
  2564. throw Error('Decoder not present.' +
  2565. ' Did you forget to include encoding-indexes.js first?');
  2566. }
  2567. // 3. Let dec be a new TextDecoder object.
  2568. // const dec = this;
  2569. // no need to do this as this is a proper class
  2570. // now and TSC will handle transpilation to older platforms
  2571. // 4. Set dec's encoding to encoding.
  2572. this._encoding = encoding;
  2573. // 5. If options's fatal member is true, set dec's error mode to
  2574. // fatal.
  2575. if (Boolean(optionsMap['fatal']))
  2576. this._error_mode = 'fatal';
  2577. // 6. If options's ignoreBOM member is true, set dec's ignore BOM
  2578. // flag.
  2579. if (Boolean(optionsMap['ignoreBOM']))
  2580. this._ignoreBOM = true;
  2581. // For pre-ES5 runtimes:
  2582. // if (!Object.defineProperty) {
  2583. // this.encoding = dec._encoding.name.toLowerCase();
  2584. // this.fatal = dec._error_mode === 'fatal';
  2585. // this.ignoreBOM = dec._ignoreBOM;
  2586. // }
  2587. // 7. Return dec.
  2588. // return dec;
  2589. }
  2590. Object.defineProperty(TextDecoder.prototype, "encoding", {
  2591. // if (Object.defineProperty) {
  2592. // The encoding attribute's getter must return encoding's name.
  2593. // Object.defineProperty(TextDecoder.prototype, 'encoding', {
  2594. // /** @this {TextDecoder} */
  2595. // get: function () { return this._encoding.name.toLowerCase(); }
  2596. // });
  2597. get: function () {
  2598. return this._encoding.name.toLowerCase();
  2599. },
  2600. enumerable: true,
  2601. configurable: true
  2602. });
  2603. Object.defineProperty(TextDecoder.prototype, "fatal", {
  2604. // The fatal attribute's getter must return true if error mode
  2605. // is fatal, and false otherwise.
  2606. // Object.defineProperty(TextDecoder.prototype, 'fatal', {
  2607. // /** @this {TextDecoder} */
  2608. // get: function () { return this._error_mode === 'fatal'; }
  2609. // });
  2610. get: function () {
  2611. return this._error_mode === 'fatal';
  2612. },
  2613. enumerable: true,
  2614. configurable: true
  2615. });
  2616. Object.defineProperty(TextDecoder.prototype, "ignoreBOM", {
  2617. // The ignoreBOM attribute's getter must return true if ignore
  2618. // BOM flag is set, and false otherwise.
  2619. // Object.defineProperty(TextDecoder.prototype, 'ignoreBOM', {
  2620. // /** @this {TextDecoder} */
  2621. // get: function () { return this._ignoreBOM; }
  2622. // });
  2623. get: function () {
  2624. return this._ignoreBOM;
  2625. },
  2626. enumerable: true,
  2627. configurable: true
  2628. });
  2629. // }
  2630. /**
  2631. * @param {BufferSource=} input The buffer of bytes to decode.
  2632. * @param {Object=} options
  2633. * @return {string} The decoded string.
  2634. */
  2635. TextDecoder.prototype.decode = function (input, options) {
  2636. var bytes = getBytesFromInput(input);
  2637. var optionsMap = ToDictionary(options);
  2638. // 1. If the do not flush flag is unset, set decoder to a new
  2639. // encoding's decoder, set stream to a new stream, and unset the
  2640. // BOM seen flag.
  2641. if (!this._do_not_flush) {
  2642. this._decoder = decoders[this._encoding.name]({
  2643. fatal: this._error_mode === 'fatal'
  2644. });
  2645. this._BOMseen = false;
  2646. }
  2647. // 2. If options's stream is true, set the do not flush flag, and
  2648. // unset the do not flush flag otherwise.
  2649. this._do_not_flush = Boolean(optionsMap['stream']);
  2650. // 3. If input is given, push a copy of input to stream.
  2651. // TODO: Align with spec algorithm - maintain stream on instance.
  2652. var input_stream = new Stream(bytes);
  2653. // 4. Let output be a new stream.
  2654. var output = [];
  2655. /** @type {?(number|!Array.<number>)} */
  2656. var result;
  2657. // 5. While true:
  2658. while (true) {
  2659. // 1. Let token be the result of reading from stream.
  2660. var token = input_stream.read();
  2661. // 2. If token is end-of-stream and the do not flush flag is
  2662. // set, return output, serialized.
  2663. // TODO: Align with spec algorithm.
  2664. if (token === end_of_stream)
  2665. break;
  2666. // 3. Otherwise, run these subsubsteps:
  2667. // 1. Let result be the result of processing token for decoder,
  2668. // stream, output, and error mode.
  2669. result = this._decoder.handler(input_stream, token);
  2670. // 2. If result is finished, return output, serialized.
  2671. if (result === finished)
  2672. break;
  2673. if (result !== null) {
  2674. if (Array.isArray(result))
  2675. output.push.apply(output, /**@type {!Array.<number>}*/ (result));
  2676. else
  2677. output.push(result);
  2678. }
  2679. // 3. Otherwise, if result is error, throw a TypeError.
  2680. // (Thrown in handler)
  2681. // 4. Otherwise, do nothing.
  2682. }
  2683. // TODO: Align with spec algorithm.
  2684. if (!this._do_not_flush) {
  2685. do {
  2686. result = this._decoder.handler(input_stream, input_stream.read());
  2687. if (result === finished)
  2688. break;
  2689. if (!result)
  2690. continue;
  2691. if (Array.isArray(result))
  2692. output.push.apply(output, /**@type {!Array.<number>}*/ (result));
  2693. else
  2694. output.push(result);
  2695. } while (!input_stream.endOfStream());
  2696. this._decoder = null;
  2697. }
  2698. return this.serializeStream(output);
  2699. };
  2700. // A TextDecoder object also has an associated serialize stream
  2701. // algorithm...
  2702. /**
  2703. * @param {!Array.<number>} stream
  2704. * @return {string}
  2705. * @this {TextDecoder}
  2706. */
  2707. TextDecoder.prototype.serializeStream = function (stream) {
  2708. // 1. Let token be the result of reading from stream.
  2709. // (Done in-place on array, rather than as a stream)
  2710. // 2. If encoding is UTF-8, UTF-16BE, or UTF-16LE, and ignore
  2711. // BOM flag and BOM seen flag are unset, run these subsubsteps:
  2712. if (includes(['UTF-8', 'UTF-16LE', 'UTF-16BE'], this._encoding.name) &&
  2713. !this._ignoreBOM && !this._BOMseen) {
  2714. if (stream.length > 0 && stream[0] === 0xFEFF) {
  2715. // 1. If token is U+FEFF, set BOM seen flag.
  2716. this._BOMseen = true;
  2717. stream.shift();
  2718. }
  2719. else if (stream.length > 0) {
  2720. // 2. Otherwise, if token is not end-of-stream, set BOM seen
  2721. // flag and append token to stream.
  2722. this._BOMseen = true;
  2723. }
  2724. }
  2725. // 4. Otherwise, return output.
  2726. return codePointsToString(stream);
  2727. };
  2728. return TextDecoder;
  2729. }());
  2730. function isBufferInstance(input) {
  2731. try {
  2732. return input instanceof ArrayBuffer;
  2733. }
  2734. catch (e) {
  2735. console.error(e);
  2736. return false;
  2737. }
  2738. }
  2739. function getBytesFromInput(input) {
  2740. if (typeof input !== 'object')
  2741. return new Uint8Array(0);
  2742. if (isBufferInstance(input)) {
  2743. return new Uint8Array(input);
  2744. }
  2745. if ('buffer' in input && isBufferInstance(input.buffer)) {
  2746. return new Uint8Array(input.buffer, input.byteOffset, input.byteLength);
  2747. }
  2748. return new Uint8Array(0);
  2749. }
  2750. /**
  2751. * @constructor
  2752. * @param {string=} label The label of the encoding. NONSTANDARD.
  2753. * @param {Object=} options NONSTANDARD.
  2754. */
  2755. var TextEncoder = /** @class */ (function () {
  2756. function TextEncoder(label, options) {
  2757. var optionsMap = ToDictionary(options);
  2758. // A TextEncoder object has an associated encoding and encoder.
  2759. /** @private */
  2760. this._encoding = null;
  2761. /** @private @type {?Encoder} */
  2762. this._encoder = null;
  2763. // Non-standard
  2764. /** @private @type {boolean} */
  2765. this._do_not_flush = false;
  2766. /** @private @type {string} */
  2767. this._fatal = Boolean(optionsMap['fatal']) ? 'fatal' : 'replacement';
  2768. // 1. Let enc be a new TextEncoder object.
  2769. // const enc = this;
  2770. // no need to do this as this is a proper class
  2771. // now and TSC will handle transpilation to older platforms
  2772. // 2. Set enc's encoding to UTF-8's encoder.
  2773. if (Boolean(optionsMap['NONSTANDARD_allowLegacyEncoding'])) {
  2774. // NONSTANDARD behavior.
  2775. label = !!label ? String(label) : DEFAULT_ENCODING;
  2776. var encoding = getEncoding(label);
  2777. if (encoding === null || encoding.name === 'replacement')
  2778. throw RangeError('Unknown encoding: ' + label);
  2779. if (!encoders[encoding.name]) {
  2780. throw Error('Encoder not present.' +
  2781. ' Did you forget to include encoding-indexes.js first?');
  2782. }
  2783. this._encoding = encoding;
  2784. // EXPERIMENTAL_CODE
  2785. // } else if (["iso-8859-1", "ISO-8859-1", "latin-1", "latin1", "LATIN-1", "LATIN1"].indexOf(label) !== -1) {
  2786. // this._encoding = getEncoding('iso-8859-1');
  2787. }
  2788. else {
  2789. // Standard behavior.
  2790. this._encoding = getEncoding('utf-8');
  2791. var glo = getGlobalScope() || {};
  2792. if (label !== undefined && 'console' in glo) {
  2793. console.warn('TextEncoder constructor called with encoding label, '
  2794. + 'which is ignored.');
  2795. }
  2796. }
  2797. // For pre-ES5 runtimes:
  2798. // if (!Object.defineProperty)
  2799. // this.encoding = enc._encoding.name.toLowerCase();
  2800. // 3. Return enc.
  2801. // return enc;
  2802. }
  2803. Object.defineProperty(TextEncoder.prototype, "encoding", {
  2804. // if(Object.defineProperty) {
  2805. // // The encoding attribute's getter must return encoding's name.
  2806. // Object.defineProperty(TextEncoder.prototype, 'encoding', {
  2807. // /** @this {TextEncoder} */
  2808. // get: function () { return this._encoding.name.toLowerCase(); }
  2809. // });
  2810. // }
  2811. get: function () {
  2812. return this._encoding.name.toLowerCase();
  2813. },
  2814. enumerable: true,
  2815. configurable: true
  2816. });
  2817. /**
  2818. * @param {string=} opt_string The string to encode.
  2819. * @param {Object=} options
  2820. * @return {!Uint8Array} Encoded bytes, as a Uint8Array.
  2821. */
  2822. TextEncoder.prototype.encode = function (opt_string, options) {
  2823. opt_string = opt_string === undefined ? '' : String(opt_string);
  2824. var optionsMap = ToDictionary(options);
  2825. // NOTE: This option is nonstandard. None of the encodings
  2826. // permitted for encoding (i.e. UTF-8, UTF-16) are stateful when
  2827. // the input is a USVString so streaming is not necessary.
  2828. if (!this._do_not_flush)
  2829. this._encoder = encoders[this._encoding.name]({
  2830. fatal: this._fatal === 'fatal'
  2831. });
  2832. this._do_not_flush = Boolean(optionsMap['stream']);
  2833. // 1. Convert input to a stream.
  2834. var input = new Stream(stringToCodePoints(opt_string));
  2835. // 2. Let output be a new stream
  2836. var output = [];
  2837. /** @type {?(number|!Array.<number>)} */
  2838. var result;
  2839. // 3. While true, run these substeps:
  2840. while (true) {
  2841. // 1. Let token be the result of reading from input.
  2842. var token = input.read();
  2843. if (token === end_of_stream)
  2844. break;
  2845. // 2. Let result be the result of processing token for encoder,
  2846. // input, output.
  2847. result = this._encoder.handler(input, token);
  2848. if (result === finished)
  2849. break;
  2850. if (Array.isArray(result))
  2851. output.push.apply(output, /**@type {!Array.<number>}*/ (result));
  2852. else
  2853. output.push(result);
  2854. }
  2855. // TODO: Align with spec algorithm.
  2856. if (!this._do_not_flush) {
  2857. while (true) {
  2858. result = this._encoder.handler(input, input.read());
  2859. if (result === finished)
  2860. break;
  2861. if (Array.isArray(result))
  2862. output.push.apply(output, /**@type {!Array.<number>}*/ (result));
  2863. else
  2864. output.push(result);
  2865. }
  2866. this._encoder = null;
  2867. }
  2868. // 3. If result is finished, convert output into a byte sequence,
  2869. // and then return a Uint8Array object wrapping an ArrayBuffer
  2870. // containing output.
  2871. return new Uint8Array(output);
  2872. };
  2873. return TextEncoder;
  2874. }());
  2875. // Polyfills browser
  2876. if (typeof window !== 'undefined') {
  2877. var checkUndefined = function (key) { return !(key in window)
  2878. || typeof window[key] === 'undefined'
  2879. || window[key] === null; };
  2880. if (checkUndefined('TextDecoder'))
  2881. window['TextDecoder'] = TextDecoder;
  2882. if (checkUndefined('TextEncoder'))
  2883. window['TextEncoder'] = TextEncoder;
  2884. }
  2885. exports.TextDecoder = TextDecoder;
  2886. exports.TextEncoder = TextEncoder;
  2887. Object.defineProperty(exports, '__esModule', { value: true });
  2888. })));
  2889. //# sourceMappingURL=encoding.js.map