encodings.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. /**
  2. * @param {boolean} fatal If true, decoding errors raise an exception.
  3. * @param {number=} opt_code_point Override the standard fallback code point.
  4. * @return {number} The code point to insert on a decoding error.
  5. */
  6. export function decoderError(fatal, opt_code_point) {
  7. if (opt_code_point === void 0) { opt_code_point = undefined; }
  8. if (fatal)
  9. throw TypeError("Decoder error");
  10. return opt_code_point || 0xfffd;
  11. }
  12. /**
  13. * @param {number} code_point The code point that could not be encoded.
  14. * @return {number} Always throws, no value is actually returned.
  15. */
  16. export function encoderError(code_point) {
  17. throw TypeError("The code point " + code_point + " could not be encoded.");
  18. }
  19. // 5.2 Names and labels
  20. // TODO: Define @typedef for Encoding: {name:string,labels:Array.<string>}
  21. // https://github.com/google/closure-compiler/issues/247
  22. /**
  23. * @param {string} label The encoding label.
  24. * @return {?{name:string,labels:Array.<string>}}
  25. */
  26. export function getEncoding(label) {
  27. // 1. Remove any leading and trailing ASCII whitespace from label.
  28. var keyLabel = String(label).trim().toLowerCase();
  29. // 2. If label is an ASCII case-insensitive match for any of the
  30. // labels listed in the table below, return the corresponding
  31. // encoding, and failure otherwise.
  32. if (keyLabel in label_to_encoding) {
  33. return label_to_encoding[keyLabel];
  34. }
  35. return null;
  36. }
  37. /**
  38. * Encodings table: https://encoding.spec.whatwg.org/encodings.json
  39. * @const
  40. * @type {!Array.<{
  41. * heading: string,
  42. * encodings: Array.<{name:string,labels:Array.<string>}>
  43. * }>}
  44. */
  45. var encodings = [
  46. {
  47. encodings: [
  48. {
  49. labels: ["unicode-1-1-utf-8", "utf-8", "utf8"],
  50. name: "UTF-8",
  51. },
  52. ],
  53. heading: "The Encoding",
  54. },
  55. {
  56. encodings: [
  57. {
  58. labels: ["866", "cp866", "csibm866", "ibm866"],
  59. name: "IBM866",
  60. },
  61. {
  62. labels: [
  63. "csisolatin2",
  64. "iso-8859-2",
  65. "iso-ir-101",
  66. "iso8859-2",
  67. "iso88592",
  68. "iso_8859-2",
  69. "iso_8859-2:1987",
  70. "l2",
  71. "latin2",
  72. ],
  73. name: "ISO-8859-2",
  74. },
  75. {
  76. labels: [
  77. "csisolatin3",
  78. "iso-8859-3",
  79. "iso-ir-109",
  80. "iso8859-3",
  81. "iso88593",
  82. "iso_8859-3",
  83. "iso_8859-3:1988",
  84. "l3",
  85. "latin3",
  86. ],
  87. name: "ISO-8859-3",
  88. },
  89. {
  90. labels: [
  91. "csisolatin4",
  92. "iso-8859-4",
  93. "iso-ir-110",
  94. "iso8859-4",
  95. "iso88594",
  96. "iso_8859-4",
  97. "iso_8859-4:1988",
  98. "l4",
  99. "latin4",
  100. ],
  101. name: "ISO-8859-4",
  102. },
  103. {
  104. labels: [
  105. "csisolatincyrillic",
  106. "cyrillic",
  107. "iso-8859-5",
  108. "iso-ir-144",
  109. "iso8859-5",
  110. "iso88595",
  111. "iso_8859-5",
  112. "iso_8859-5:1988",
  113. ],
  114. name: "ISO-8859-5",
  115. },
  116. {
  117. labels: [
  118. "arabic",
  119. "asmo-708",
  120. "csiso88596e",
  121. "csiso88596i",
  122. "csisolatinarabic",
  123. "ecma-114",
  124. "iso-8859-6",
  125. "iso-8859-6-e",
  126. "iso-8859-6-i",
  127. "iso-ir-127",
  128. "iso8859-6",
  129. "iso88596",
  130. "iso_8859-6",
  131. "iso_8859-6:1987",
  132. ],
  133. name: "ISO-8859-6",
  134. },
  135. {
  136. labels: [
  137. "csisolatingreek",
  138. "ecma-118",
  139. "elot_928",
  140. "greek",
  141. "greek8",
  142. "iso-8859-7",
  143. "iso-ir-126",
  144. "iso8859-7",
  145. "iso88597",
  146. "iso_8859-7",
  147. "iso_8859-7:1987",
  148. "sun_eu_greek",
  149. ],
  150. name: "ISO-8859-7",
  151. },
  152. {
  153. labels: [
  154. "csiso88598e",
  155. "csisolatinhebrew",
  156. "hebrew",
  157. "iso-8859-8",
  158. "iso-8859-8-e",
  159. "iso-ir-138",
  160. "iso8859-8",
  161. "iso88598",
  162. "iso_8859-8",
  163. "iso_8859-8:1988",
  164. "visual",
  165. ],
  166. name: "ISO-8859-8",
  167. },
  168. {
  169. labels: ["csiso88598i", "iso-8859-8-i", "logical"],
  170. name: "ISO-8859-8-I",
  171. },
  172. {
  173. labels: [
  174. "csisolatin6",
  175. "iso-8859-10",
  176. "iso-ir-157",
  177. "iso8859-10",
  178. "iso885910",
  179. "l6",
  180. "latin6",
  181. ],
  182. name: "ISO-8859-10",
  183. },
  184. {
  185. labels: ["iso-8859-13", "iso8859-13", "iso885913"],
  186. name: "ISO-8859-13",
  187. },
  188. {
  189. labels: ["iso-8859-14", "iso8859-14", "iso885914"],
  190. name: "ISO-8859-14",
  191. },
  192. {
  193. labels: [
  194. "csisolatin9",
  195. "iso-8859-15",
  196. "iso8859-15",
  197. "iso885915",
  198. "iso_8859-15",
  199. "l9",
  200. ],
  201. name: "ISO-8859-15",
  202. },
  203. {
  204. labels: ["iso-8859-16"],
  205. name: "ISO-8859-16",
  206. },
  207. {
  208. labels: ["cskoi8r", "koi", "koi8", "koi8-r", "koi8_r"],
  209. name: "KOI8-R",
  210. },
  211. {
  212. labels: ["koi8-ru", "koi8-u"],
  213. name: "KOI8-U",
  214. },
  215. {
  216. labels: ["csmacintosh", "mac", "macintosh", "x-mac-roman"],
  217. name: "macintosh",
  218. },
  219. {
  220. labels: [
  221. "dos-874",
  222. "iso-8859-11",
  223. "iso8859-11",
  224. "iso885911",
  225. "tis-620",
  226. "windows-874",
  227. ],
  228. name: "windows-874",
  229. },
  230. {
  231. labels: ["cp1250", "windows-1250", "x-cp1250"],
  232. name: "windows-1250",
  233. },
  234. {
  235. labels: ["cp1251", "windows-1251", "x-cp1251"],
  236. name: "windows-1251",
  237. },
  238. {
  239. labels: [
  240. "ansi_x3.4-1968",
  241. "cp1252",
  242. "cp819",
  243. "ibm819",
  244. "iso-ir-100",
  245. "windows-1252",
  246. "x-cp1252",
  247. ],
  248. name: "windows-1252",
  249. },
  250. {
  251. labels: [
  252. "ascii",
  253. "us-ascii",
  254. "iso-8859-1",
  255. "iso8859-1",
  256. "iso88591",
  257. "iso_8859-1",
  258. "iso_8859-1:1987",
  259. "l1",
  260. "latin1",
  261. "csisolatin1",
  262. ],
  263. name: "iso-8859-1",
  264. },
  265. {
  266. labels: ["cp1253", "windows-1253", "x-cp1253"],
  267. name: "windows-1253",
  268. },
  269. {
  270. labels: [
  271. "cp1254",
  272. "csisolatin5",
  273. "iso-8859-9",
  274. "iso-ir-148",
  275. "iso8859-9",
  276. "iso88599",
  277. "iso_8859-9",
  278. "iso_8859-9:1989",
  279. "l5",
  280. "latin5",
  281. "windows-1254",
  282. "x-cp1254",
  283. ],
  284. name: "windows-1254",
  285. },
  286. {
  287. labels: ["cp1255", "windows-1255", "x-cp1255"],
  288. name: "windows-1255",
  289. },
  290. {
  291. labels: ["cp1256", "windows-1256", "x-cp1256"],
  292. name: "windows-1256",
  293. },
  294. {
  295. labels: ["cp1257", "windows-1257", "x-cp1257"],
  296. name: "windows-1257",
  297. },
  298. {
  299. labels: ["cp1258", "windows-1258", "x-cp1258"],
  300. name: "windows-1258",
  301. },
  302. {
  303. labels: ["x-mac-cyrillic", "x-mac-ukrainian"],
  304. name: "x-mac-cyrillic",
  305. },
  306. ],
  307. heading: "Legacy single-byte encodings",
  308. },
  309. {
  310. encodings: [
  311. {
  312. labels: [
  313. "chinese",
  314. "csgb2312",
  315. "csiso58gb231280",
  316. "gb2312",
  317. "gb_2312",
  318. "gb_2312-80",
  319. "gbk",
  320. "iso-ir-58",
  321. "x-gbk",
  322. ],
  323. name: "GBK",
  324. },
  325. {
  326. labels: ["gb18030"],
  327. name: "gb18030",
  328. },
  329. ],
  330. heading: "Legacy multi-byte Chinese (simplified) encodings",
  331. },
  332. {
  333. encodings: [
  334. {
  335. labels: ["big5", "big5-hkscs", "cn-big5", "csbig5", "x-x-big5"],
  336. name: "Big5",
  337. },
  338. ],
  339. heading: "Legacy multi-byte Chinese (traditional) encodings",
  340. },
  341. {
  342. encodings: [
  343. {
  344. labels: ["cseucpkdfmtjapanese", "euc-jp", "x-euc-jp"],
  345. name: "EUC-JP",
  346. },
  347. {
  348. labels: ["csiso2022jp", "iso-2022-jp"],
  349. name: "ISO-2022-JP",
  350. },
  351. {
  352. labels: [
  353. "csshiftjis",
  354. "ms932",
  355. "ms_kanji",
  356. "shift-jis",
  357. "shift_jis",
  358. "sjis",
  359. "windows-31j",
  360. "x-sjis",
  361. ],
  362. name: "Shift_JIS",
  363. },
  364. ],
  365. heading: "Legacy multi-byte Japanese encodings",
  366. },
  367. {
  368. encodings: [
  369. {
  370. labels: [
  371. "cseuckr",
  372. "csksc56011987",
  373. "euc-kr",
  374. "iso-ir-149",
  375. "korean",
  376. "ks_c_5601-1987",
  377. "ks_c_5601-1989",
  378. "ksc5601",
  379. "ksc_5601",
  380. "windows-949",
  381. ],
  382. name: "EUC-KR",
  383. },
  384. ],
  385. heading: "Legacy multi-byte Korean encodings",
  386. },
  387. {
  388. encodings: [
  389. {
  390. labels: [
  391. "csiso2022kr",
  392. "hz-gb-2312",
  393. "iso-2022-cn",
  394. "iso-2022-cn-ext",
  395. "iso-2022-kr",
  396. ],
  397. name: "replacement",
  398. },
  399. {
  400. labels: ["utf-16be"],
  401. name: "UTF-16BE",
  402. },
  403. {
  404. labels: ["utf-16", "utf-16le"],
  405. name: "UTF-16LE",
  406. },
  407. {
  408. labels: ["x-user-defined"],
  409. name: "x-user-defined",
  410. },
  411. ],
  412. heading: "Legacy miscellaneous encodings",
  413. },
  414. ];
  415. // Label to encoding registry.
  416. /** @type {Object.<string,{name:string,labels:Array.<string>}>} */
  417. var label_to_encoding = {};
  418. encodings.forEach(function (category) {
  419. category.encodings.forEach(function (encoding) {
  420. encoding.labels.forEach(function (label) {
  421. label_to_encoding[label] = encoding;
  422. });
  423. });
  424. });
  425. export { encodings };
  426. //# sourceMappingURL=encodings.js.map