encodings.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. /**
  2. * @param {boolean} fatal If true, decoding errors raise an exception.
  3. * @param {number=} opt_code_point Override the standard fallback code point.
  4. * @return {number} The code point to insert on a decoding error.
  5. */
  6. export function decoderError(fatal, opt_code_point = undefined) {
  7. if (fatal)
  8. throw TypeError("Decoder error");
  9. return opt_code_point || 0xfffd;
  10. }
  11. /**
  12. * @param {number} code_point The code point that could not be encoded.
  13. * @return {number} Always throws, no value is actually returned.
  14. */
  15. export function encoderError(code_point) {
  16. throw TypeError("The code point " + code_point + " could not be encoded.");
  17. }
  18. // 5.2 Names and labels
  19. // TODO: Define @typedef for Encoding: {name:string,labels:Array.<string>}
  20. // https://github.com/google/closure-compiler/issues/247
  21. /**
  22. * @param {string} label The encoding label.
  23. * @return {?{name:string,labels:Array.<string>}}
  24. */
  25. export function getEncoding(label) {
  26. // 1. Remove any leading and trailing ASCII whitespace from label.
  27. const keyLabel = String(label).trim().toLowerCase();
  28. // 2. If label is an ASCII case-insensitive match for any of the
  29. // labels listed in the table below, return the corresponding
  30. // encoding, and failure otherwise.
  31. if (keyLabel in label_to_encoding) {
  32. return label_to_encoding[keyLabel];
  33. }
  34. return null;
  35. }
  36. /**
  37. * Encodings table: https://encoding.spec.whatwg.org/encodings.json
  38. * @const
  39. * @type {!Array.<{
  40. * heading: string,
  41. * encodings: Array.<{name:string,labels:Array.<string>}>
  42. * }>}
  43. */
  44. const encodings = [
  45. {
  46. encodings: [
  47. {
  48. labels: ["unicode-1-1-utf-8", "utf-8", "utf8"],
  49. name: "UTF-8",
  50. },
  51. ],
  52. heading: "The Encoding",
  53. },
  54. {
  55. encodings: [
  56. {
  57. labels: ["866", "cp866", "csibm866", "ibm866"],
  58. name: "IBM866",
  59. },
  60. {
  61. labels: [
  62. "csisolatin2",
  63. "iso-8859-2",
  64. "iso-ir-101",
  65. "iso8859-2",
  66. "iso88592",
  67. "iso_8859-2",
  68. "iso_8859-2:1987",
  69. "l2",
  70. "latin2",
  71. ],
  72. name: "ISO-8859-2",
  73. },
  74. {
  75. labels: [
  76. "csisolatin3",
  77. "iso-8859-3",
  78. "iso-ir-109",
  79. "iso8859-3",
  80. "iso88593",
  81. "iso_8859-3",
  82. "iso_8859-3:1988",
  83. "l3",
  84. "latin3",
  85. ],
  86. name: "ISO-8859-3",
  87. },
  88. {
  89. labels: [
  90. "csisolatin4",
  91. "iso-8859-4",
  92. "iso-ir-110",
  93. "iso8859-4",
  94. "iso88594",
  95. "iso_8859-4",
  96. "iso_8859-4:1988",
  97. "l4",
  98. "latin4",
  99. ],
  100. name: "ISO-8859-4",
  101. },
  102. {
  103. labels: [
  104. "csisolatincyrillic",
  105. "cyrillic",
  106. "iso-8859-5",
  107. "iso-ir-144",
  108. "iso8859-5",
  109. "iso88595",
  110. "iso_8859-5",
  111. "iso_8859-5:1988",
  112. ],
  113. name: "ISO-8859-5",
  114. },
  115. {
  116. labels: [
  117. "arabic",
  118. "asmo-708",
  119. "csiso88596e",
  120. "csiso88596i",
  121. "csisolatinarabic",
  122. "ecma-114",
  123. "iso-8859-6",
  124. "iso-8859-6-e",
  125. "iso-8859-6-i",
  126. "iso-ir-127",
  127. "iso8859-6",
  128. "iso88596",
  129. "iso_8859-6",
  130. "iso_8859-6:1987",
  131. ],
  132. name: "ISO-8859-6",
  133. },
  134. {
  135. labels: [
  136. "csisolatingreek",
  137. "ecma-118",
  138. "elot_928",
  139. "greek",
  140. "greek8",
  141. "iso-8859-7",
  142. "iso-ir-126",
  143. "iso8859-7",
  144. "iso88597",
  145. "iso_8859-7",
  146. "iso_8859-7:1987",
  147. "sun_eu_greek",
  148. ],
  149. name: "ISO-8859-7",
  150. },
  151. {
  152. labels: [
  153. "csiso88598e",
  154. "csisolatinhebrew",
  155. "hebrew",
  156. "iso-8859-8",
  157. "iso-8859-8-e",
  158. "iso-ir-138",
  159. "iso8859-8",
  160. "iso88598",
  161. "iso_8859-8",
  162. "iso_8859-8:1988",
  163. "visual",
  164. ],
  165. name: "ISO-8859-8",
  166. },
  167. {
  168. labels: ["csiso88598i", "iso-8859-8-i", "logical"],
  169. name: "ISO-8859-8-I",
  170. },
  171. {
  172. labels: [
  173. "csisolatin6",
  174. "iso-8859-10",
  175. "iso-ir-157",
  176. "iso8859-10",
  177. "iso885910",
  178. "l6",
  179. "latin6",
  180. ],
  181. name: "ISO-8859-10",
  182. },
  183. {
  184. labels: ["iso-8859-13", "iso8859-13", "iso885913"],
  185. name: "ISO-8859-13",
  186. },
  187. {
  188. labels: ["iso-8859-14", "iso8859-14", "iso885914"],
  189. name: "ISO-8859-14",
  190. },
  191. {
  192. labels: [
  193. "csisolatin9",
  194. "iso-8859-15",
  195. "iso8859-15",
  196. "iso885915",
  197. "iso_8859-15",
  198. "l9",
  199. ],
  200. name: "ISO-8859-15",
  201. },
  202. {
  203. labels: ["iso-8859-16"],
  204. name: "ISO-8859-16",
  205. },
  206. {
  207. labels: ["cskoi8r", "koi", "koi8", "koi8-r", "koi8_r"],
  208. name: "KOI8-R",
  209. },
  210. {
  211. labels: ["koi8-ru", "koi8-u"],
  212. name: "KOI8-U",
  213. },
  214. {
  215. labels: ["csmacintosh", "mac", "macintosh", "x-mac-roman"],
  216. name: "macintosh",
  217. },
  218. {
  219. labels: [
  220. "dos-874",
  221. "iso-8859-11",
  222. "iso8859-11",
  223. "iso885911",
  224. "tis-620",
  225. "windows-874",
  226. ],
  227. name: "windows-874",
  228. },
  229. {
  230. labels: ["cp1250", "windows-1250", "x-cp1250"],
  231. name: "windows-1250",
  232. },
  233. {
  234. labels: ["cp1251", "windows-1251", "x-cp1251"],
  235. name: "windows-1251",
  236. },
  237. {
  238. labels: [
  239. "ansi_x3.4-1968",
  240. "cp1252",
  241. "cp819",
  242. "ibm819",
  243. "iso-ir-100",
  244. "windows-1252",
  245. "x-cp1252",
  246. ],
  247. name: "windows-1252",
  248. },
  249. {
  250. labels: [
  251. "ascii",
  252. "us-ascii",
  253. "iso-8859-1",
  254. "iso8859-1",
  255. "iso88591",
  256. "iso_8859-1",
  257. "iso_8859-1:1987",
  258. "l1",
  259. "latin1",
  260. "csisolatin1",
  261. ],
  262. name: "iso-8859-1",
  263. },
  264. {
  265. labels: ["cp1253", "windows-1253", "x-cp1253"],
  266. name: "windows-1253",
  267. },
  268. {
  269. labels: [
  270. "cp1254",
  271. "csisolatin5",
  272. "iso-8859-9",
  273. "iso-ir-148",
  274. "iso8859-9",
  275. "iso88599",
  276. "iso_8859-9",
  277. "iso_8859-9:1989",
  278. "l5",
  279. "latin5",
  280. "windows-1254",
  281. "x-cp1254",
  282. ],
  283. name: "windows-1254",
  284. },
  285. {
  286. labels: ["cp1255", "windows-1255", "x-cp1255"],
  287. name: "windows-1255",
  288. },
  289. {
  290. labels: ["cp1256", "windows-1256", "x-cp1256"],
  291. name: "windows-1256",
  292. },
  293. {
  294. labels: ["cp1257", "windows-1257", "x-cp1257"],
  295. name: "windows-1257",
  296. },
  297. {
  298. labels: ["cp1258", "windows-1258", "x-cp1258"],
  299. name: "windows-1258",
  300. },
  301. {
  302. labels: ["x-mac-cyrillic", "x-mac-ukrainian"],
  303. name: "x-mac-cyrillic",
  304. },
  305. ],
  306. heading: "Legacy single-byte encodings",
  307. },
  308. {
  309. encodings: [
  310. {
  311. labels: [
  312. "chinese",
  313. "csgb2312",
  314. "csiso58gb231280",
  315. "gb2312",
  316. "gb_2312",
  317. "gb_2312-80",
  318. "gbk",
  319. "iso-ir-58",
  320. "x-gbk",
  321. ],
  322. name: "GBK",
  323. },
  324. {
  325. labels: ["gb18030"],
  326. name: "gb18030",
  327. },
  328. ],
  329. heading: "Legacy multi-byte Chinese (simplified) encodings",
  330. },
  331. {
  332. encodings: [
  333. {
  334. labels: ["big5", "big5-hkscs", "cn-big5", "csbig5", "x-x-big5"],
  335. name: "Big5",
  336. },
  337. ],
  338. heading: "Legacy multi-byte Chinese (traditional) encodings",
  339. },
  340. {
  341. encodings: [
  342. {
  343. labels: ["cseucpkdfmtjapanese", "euc-jp", "x-euc-jp"],
  344. name: "EUC-JP",
  345. },
  346. {
  347. labels: ["csiso2022jp", "iso-2022-jp"],
  348. name: "ISO-2022-JP",
  349. },
  350. {
  351. labels: [
  352. "csshiftjis",
  353. "ms932",
  354. "ms_kanji",
  355. "shift-jis",
  356. "shift_jis",
  357. "sjis",
  358. "windows-31j",
  359. "x-sjis",
  360. ],
  361. name: "Shift_JIS",
  362. },
  363. ],
  364. heading: "Legacy multi-byte Japanese encodings",
  365. },
  366. {
  367. encodings: [
  368. {
  369. labels: [
  370. "cseuckr",
  371. "csksc56011987",
  372. "euc-kr",
  373. "iso-ir-149",
  374. "korean",
  375. "ks_c_5601-1987",
  376. "ks_c_5601-1989",
  377. "ksc5601",
  378. "ksc_5601",
  379. "windows-949",
  380. ],
  381. name: "EUC-KR",
  382. },
  383. ],
  384. heading: "Legacy multi-byte Korean encodings",
  385. },
  386. {
  387. encodings: [
  388. {
  389. labels: [
  390. "csiso2022kr",
  391. "hz-gb-2312",
  392. "iso-2022-cn",
  393. "iso-2022-cn-ext",
  394. "iso-2022-kr",
  395. ],
  396. name: "replacement",
  397. },
  398. {
  399. labels: ["utf-16be"],
  400. name: "UTF-16BE",
  401. },
  402. {
  403. labels: ["utf-16", "utf-16le"],
  404. name: "UTF-16LE",
  405. },
  406. {
  407. labels: ["x-user-defined"],
  408. name: "x-user-defined",
  409. },
  410. ],
  411. heading: "Legacy miscellaneous encodings",
  412. },
  413. ];
  414. // Label to encoding registry.
  415. /** @type {Object.<string,{name:string,labels:Array.<string>}>} */
  416. const label_to_encoding = {};
  417. encodings.forEach((category) => {
  418. category.encodings.forEach((encoding) => {
  419. encoding.labels.forEach((label) => {
  420. label_to_encoding[label] = encoding;
  421. });
  422. });
  423. });
  424. export { encodings };
  425. //# sourceMappingURL=encodings.js.map