encodings.js 13 KB


  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. /**
  4. * @param {boolean} fatal If true, decoding errors raise an exception.
  5. * @param {number=} opt_code_point Override the standard fallback code point.
  6. * @return {number} The code point to insert on a decoding error.
  7. */
  8. function decoderError(fatal, opt_code_point) {
  9. if (opt_code_point === void 0) { opt_code_point = undefined; }
  10. if (fatal)
  11. throw TypeError("Decoder error");
  12. return opt_code_point || 0xfffd;
  13. }
  14. exports.decoderError = decoderError;
  15. /**
  16. * @param {number} code_point The code point that could not be encoded.
  17. * @return {number} Always throws, no value is actually returned.
  18. */
  19. function encoderError(code_point) {
  20. throw TypeError("The code point " + code_point + " could not be encoded.");
  21. }
  22. exports.encoderError = encoderError;
  23. // 5.2 Names and labels
  24. // TODO: Define @typedef for Encoding: {name:string,labels:Array.<string>}
  25. // https://github.com/google/closure-compiler/issues/247
  26. /**
  27. * @param {string} label The encoding label.
  28. * @return {?{name:string,labels:Array.<string>}}
  29. */
  30. function getEncoding(label) {
  31. // 1. Remove any leading and trailing ASCII whitespace from label.
  32. var keyLabel = String(label).trim().toLowerCase();
  33. // 2. If label is an ASCII case-insensitive match for any of the
  34. // labels listed in the table below, return the corresponding
  35. // encoding, and failure otherwise.
  36. if (keyLabel in label_to_encoding) {
  37. return label_to_encoding[keyLabel];
  38. }
  39. return null;
  40. }
  41. exports.getEncoding = getEncoding;
  42. /**
  43. * Encodings table: https://encoding.spec.whatwg.org/encodings.json
  44. * @const
  45. * @type {!Array.<{
  46. * heading: string,
  47. * encodings: Array.<{name:string,labels:Array.<string>}>
  48. * }>}
  49. */
  50. var encodings = [
  51. {
  52. encodings: [
  53. {
  54. labels: ["unicode-1-1-utf-8", "utf-8", "utf8"],
  55. name: "UTF-8",
  56. },
  57. ],
  58. heading: "The Encoding",
  59. },
  60. {
  61. encodings: [
  62. {
  63. labels: ["866", "cp866", "csibm866", "ibm866"],
  64. name: "IBM866",
  65. },
  66. {
  67. labels: [
  68. "csisolatin2",
  69. "iso-8859-2",
  70. "iso-ir-101",
  71. "iso8859-2",
  72. "iso88592",
  73. "iso_8859-2",
  74. "iso_8859-2:1987",
  75. "l2",
  76. "latin2",
  77. ],
  78. name: "ISO-8859-2",
  79. },
  80. {
  81. labels: [
  82. "csisolatin3",
  83. "iso-8859-3",
  84. "iso-ir-109",
  85. "iso8859-3",
  86. "iso88593",
  87. "iso_8859-3",
  88. "iso_8859-3:1988",
  89. "l3",
  90. "latin3",
  91. ],
  92. name: "ISO-8859-3",
  93. },
  94. {
  95. labels: [
  96. "csisolatin4",
  97. "iso-8859-4",
  98. "iso-ir-110",
  99. "iso8859-4",
  100. "iso88594",
  101. "iso_8859-4",
  102. "iso_8859-4:1988",
  103. "l4",
  104. "latin4",
  105. ],
  106. name: "ISO-8859-4",
  107. },
  108. {
  109. labels: [
  110. "csisolatincyrillic",
  111. "cyrillic",
  112. "iso-8859-5",
  113. "iso-ir-144",
  114. "iso8859-5",
  115. "iso88595",
  116. "iso_8859-5",
  117. "iso_8859-5:1988",
  118. ],
  119. name: "ISO-8859-5",
  120. },
  121. {
  122. labels: [
  123. "arabic",
  124. "asmo-708",
  125. "csiso88596e",
  126. "csiso88596i",
  127. "csisolatinarabic",
  128. "ecma-114",
  129. "iso-8859-6",
  130. "iso-8859-6-e",
  131. "iso-8859-6-i",
  132. "iso-ir-127",
  133. "iso8859-6",
  134. "iso88596",
  135. "iso_8859-6",
  136. "iso_8859-6:1987",
  137. ],
  138. name: "ISO-8859-6",
  139. },
  140. {
  141. labels: [
  142. "csisolatingreek",
  143. "ecma-118",
  144. "elot_928",
  145. "greek",
  146. "greek8",
  147. "iso-8859-7",
  148. "iso-ir-126",
  149. "iso8859-7",
  150. "iso88597",
  151. "iso_8859-7",
  152. "iso_8859-7:1987",
  153. "sun_eu_greek",
  154. ],
  155. name: "ISO-8859-7",
  156. },
  157. {
  158. labels: [
  159. "csiso88598e",
  160. "csisolatinhebrew",
  161. "hebrew",
  162. "iso-8859-8",
  163. "iso-8859-8-e",
  164. "iso-ir-138",
  165. "iso8859-8",
  166. "iso88598",
  167. "iso_8859-8",
  168. "iso_8859-8:1988",
  169. "visual",
  170. ],
  171. name: "ISO-8859-8",
  172. },
  173. {
  174. labels: ["csiso88598i", "iso-8859-8-i", "logical"],
  175. name: "ISO-8859-8-I",
  176. },
  177. {
  178. labels: [
  179. "csisolatin6",
  180. "iso-8859-10",
  181. "iso-ir-157",
  182. "iso8859-10",
  183. "iso885910",
  184. "l6",
  185. "latin6",
  186. ],
  187. name: "ISO-8859-10",
  188. },
  189. {
  190. labels: ["iso-8859-13", "iso8859-13", "iso885913"],
  191. name: "ISO-8859-13",
  192. },
  193. {
  194. labels: ["iso-8859-14", "iso8859-14", "iso885914"],
  195. name: "ISO-8859-14",
  196. },
  197. {
  198. labels: [
  199. "csisolatin9",
  200. "iso-8859-15",
  201. "iso8859-15",
  202. "iso885915",
  203. "iso_8859-15",
  204. "l9",
  205. ],
  206. name: "ISO-8859-15",
  207. },
  208. {
  209. labels: ["iso-8859-16"],
  210. name: "ISO-8859-16",
  211. },
  212. {
  213. labels: ["cskoi8r", "koi", "koi8", "koi8-r", "koi8_r"],
  214. name: "KOI8-R",
  215. },
  216. {
  217. labels: ["koi8-ru", "koi8-u"],
  218. name: "KOI8-U",
  219. },
  220. {
  221. labels: ["csmacintosh", "mac", "macintosh", "x-mac-roman"],
  222. name: "macintosh",
  223. },
  224. {
  225. labels: [
  226. "dos-874",
  227. "iso-8859-11",
  228. "iso8859-11",
  229. "iso885911",
  230. "tis-620",
  231. "windows-874",
  232. ],
  233. name: "windows-874",
  234. },
  235. {
  236. labels: ["cp1250", "windows-1250", "x-cp1250"],
  237. name: "windows-1250",
  238. },
  239. {
  240. labels: ["cp1251", "windows-1251", "x-cp1251"],
  241. name: "windows-1251",
  242. },
  243. {
  244. labels: [
  245. "ansi_x3.4-1968",
  246. "cp1252",
  247. "cp819",
  248. "ibm819",
  249. "iso-ir-100",
  250. "windows-1252",
  251. "x-cp1252",
  252. ],
  253. name: "windows-1252",
  254. },
  255. {
  256. labels: [
  257. "ascii",
  258. "us-ascii",
  259. "iso-8859-1",
  260. "iso8859-1",
  261. "iso88591",
  262. "iso_8859-1",
  263. "iso_8859-1:1987",
  264. "l1",
  265. "latin1",
  266. "csisolatin1",
  267. ],
  268. name: "iso-8859-1",
  269. },
  270. {
  271. labels: ["cp1253", "windows-1253", "x-cp1253"],
  272. name: "windows-1253",
  273. },
  274. {
  275. labels: [
  276. "cp1254",
  277. "csisolatin5",
  278. "iso-8859-9",
  279. "iso-ir-148",
  280. "iso8859-9",
  281. "iso88599",
  282. "iso_8859-9",
  283. "iso_8859-9:1989",
  284. "l5",
  285. "latin5",
  286. "windows-1254",
  287. "x-cp1254",
  288. ],
  289. name: "windows-1254",
  290. },
  291. {
  292. labels: ["cp1255", "windows-1255", "x-cp1255"],
  293. name: "windows-1255",
  294. },
  295. {
  296. labels: ["cp1256", "windows-1256", "x-cp1256"],
  297. name: "windows-1256",
  298. },
  299. {
  300. labels: ["cp1257", "windows-1257", "x-cp1257"],
  301. name: "windows-1257",
  302. },
  303. {
  304. labels: ["cp1258", "windows-1258", "x-cp1258"],
  305. name: "windows-1258",
  306. },
  307. {
  308. labels: ["x-mac-cyrillic", "x-mac-ukrainian"],
  309. name: "x-mac-cyrillic",
  310. },
  311. ],
  312. heading: "Legacy single-byte encodings",
  313. },
  314. {
  315. encodings: [
  316. {
  317. labels: [
  318. "chinese",
  319. "csgb2312",
  320. "csiso58gb231280",
  321. "gb2312",
  322. "gb_2312",
  323. "gb_2312-80",
  324. "gbk",
  325. "iso-ir-58",
  326. "x-gbk",
  327. ],
  328. name: "GBK",
  329. },
  330. {
  331. labels: ["gb18030"],
  332. name: "gb18030",
  333. },
  334. ],
  335. heading: "Legacy multi-byte Chinese (simplified) encodings",
  336. },
  337. {
  338. encodings: [
  339. {
  340. labels: ["big5", "big5-hkscs", "cn-big5", "csbig5", "x-x-big5"],
  341. name: "Big5",
  342. },
  343. ],
  344. heading: "Legacy multi-byte Chinese (traditional) encodings",
  345. },
  346. {
  347. encodings: [
  348. {
  349. labels: ["cseucpkdfmtjapanese", "euc-jp", "x-euc-jp"],
  350. name: "EUC-JP",
  351. },
  352. {
  353. labels: ["csiso2022jp", "iso-2022-jp"],
  354. name: "ISO-2022-JP",
  355. },
  356. {
  357. labels: [
  358. "csshiftjis",
  359. "ms932",
  360. "ms_kanji",
  361. "shift-jis",
  362. "shift_jis",
  363. "sjis",
  364. "windows-31j",
  365. "x-sjis",
  366. ],
  367. name: "Shift_JIS",
  368. },
  369. ],
  370. heading: "Legacy multi-byte Japanese encodings",
  371. },
  372. {
  373. encodings: [
  374. {
  375. labels: [
  376. "cseuckr",
  377. "csksc56011987",
  378. "euc-kr",
  379. "iso-ir-149",
  380. "korean",
  381. "ks_c_5601-1987",
  382. "ks_c_5601-1989",
  383. "ksc5601",
  384. "ksc_5601",
  385. "windows-949",
  386. ],
  387. name: "EUC-KR",
  388. },
  389. ],
  390. heading: "Legacy multi-byte Korean encodings",
  391. },
  392. {
  393. encodings: [
  394. {
  395. labels: [
  396. "csiso2022kr",
  397. "hz-gb-2312",
  398. "iso-2022-cn",
  399. "iso-2022-cn-ext",
  400. "iso-2022-kr",
  401. ],
  402. name: "replacement",
  403. },
  404. {
  405. labels: ["utf-16be"],
  406. name: "UTF-16BE",
  407. },
  408. {
  409. labels: ["utf-16", "utf-16le"],
  410. name: "UTF-16LE",
  411. },
  412. {
  413. labels: ["x-user-defined"],
  414. name: "x-user-defined",
  415. },
  416. ],
  417. heading: "Legacy miscellaneous encodings",
  418. },
  419. ];
  420. exports.encodings = encodings;
  421. // Label to encoding registry.
  422. /** @type {Object.<string,{name:string,labels:Array.<string>}>} */
  423. var label_to_encoding = {};
  424. encodings.forEach(function (category) {
  425. category.encodings.forEach(function (encoding) {
  426. encoding.labels.forEach(function (label) {
  427. label_to_encoding[label] = encoding;
  428. });
  429. });
  430. });
  431. //# sourceMappingURL=encodings.js.map