TextDecoder.js 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. import { DEFAULT_ENCODING } from "../encoding/defaultEncoding";
  2. import { decoders } from "../encoding/encoding-factory";
  3. import { getEncoding } from "../encoding/encodings";
  4. import { finished } from "../encoding/finished";
  5. import { end_of_stream } from "../encoding/terminology";
  6. import { codePointsToString, includes, ToDictionary } from "../encoding/utilities";
  7. import { Stream } from "./Stream";
  8. /**
  9. * @constructor
  10. * @param {string=} label The label of the encoding;
  11. * defaults to 'utf-8'.
  12. * @param {Object=} options
  13. */
  14. var TextDecoder = /** @class */ (function () {
  15. function TextDecoder(label, options) {
  16. label = label !== undefined ? String(label) : DEFAULT_ENCODING;
  17. var optionsMap = ToDictionary(options);
  18. // A TextDecoder object has an associated encoding, decoder,
  19. // stream, ignore BOM flag (initially unset), BOM seen flag
  20. // (initially unset), error mode (initially replacement), and do
  21. // not flush flag (initially unset).
  22. /** @private */
  23. this._encoding = null;
  24. /** @private @type {?Decoder} */
  25. this._decoder = null;
  26. /** @private @type {boolean} */
  27. this._ignoreBOM = false;
  28. /** @private @type {boolean} */
  29. this._BOMseen = false;
  30. /** @private @type {string} */
  31. this._error_mode = 'replacement';
  32. /** @private @type {boolean} */
  33. this._do_not_flush = false;
  34. // 1. Let encoding be the result of getting an encoding from
  35. // label.
  36. var encoding = getEncoding(label);
  37. // 2. If encoding is failure or replacement, throw a RangeError.
  38. if (encoding === null || encoding.name === 'replacement')
  39. throw RangeError('Unknown encoding: ' + label);
  40. if (!decoders[encoding.name]) {
  41. throw Error('Decoder not present.' +
  42. ' Did you forget to include encoding-indexes.js first?');
  43. }
  44. // 3. Let dec be a new TextDecoder object.
  45. // const dec = this;
  46. // no need to do this as this is a proper class
  47. // now and TSC will handle transpilation to older platforms
  48. // 4. Set dec's encoding to encoding.
  49. this._encoding = encoding;
  50. // 5. If options's fatal member is true, set dec's error mode to
  51. // fatal.
  52. if (Boolean(optionsMap['fatal']))
  53. this._error_mode = 'fatal';
  54. // 6. If options's ignoreBOM member is true, set dec's ignore BOM
  55. // flag.
  56. if (Boolean(optionsMap['ignoreBOM']))
  57. this._ignoreBOM = true;
  58. // For pre-ES5 runtimes:
  59. // if (!Object.defineProperty) {
  60. // this.encoding = dec._encoding.name.toLowerCase();
  61. // this.fatal = dec._error_mode === 'fatal';
  62. // this.ignoreBOM = dec._ignoreBOM;
  63. // }
  64. // 7. Return dec.
  65. // return dec;
  66. }
  67. Object.defineProperty(TextDecoder.prototype, "encoding", {
  68. // if (Object.defineProperty) {
  69. // The encoding attribute's getter must return encoding's name.
  70. // Object.defineProperty(TextDecoder.prototype, 'encoding', {
  71. // /** @this {TextDecoder} */
  72. // get: function () { return this._encoding.name.toLowerCase(); }
  73. // });
  74. get: function () {
  75. return this._encoding.name.toLowerCase();
  76. },
  77. enumerable: true,
  78. configurable: true
  79. });
  80. Object.defineProperty(TextDecoder.prototype, "fatal", {
  81. // The fatal attribute's getter must return true if error mode
  82. // is fatal, and false otherwise.
  83. // Object.defineProperty(TextDecoder.prototype, 'fatal', {
  84. // /** @this {TextDecoder} */
  85. // get: function () { return this._error_mode === 'fatal'; }
  86. // });
  87. get: function () {
  88. return this._error_mode === 'fatal';
  89. },
  90. enumerable: true,
  91. configurable: true
  92. });
  93. Object.defineProperty(TextDecoder.prototype, "ignoreBOM", {
  94. // The ignoreBOM attribute's getter must return true if ignore
  95. // BOM flag is set, and false otherwise.
  96. // Object.defineProperty(TextDecoder.prototype, 'ignoreBOM', {
  97. // /** @this {TextDecoder} */
  98. // get: function () { return this._ignoreBOM; }
  99. // });
  100. get: function () {
  101. return this._ignoreBOM;
  102. },
  103. enumerable: true,
  104. configurable: true
  105. });
  106. // }
  107. /**
  108. * @param {BufferSource=} input The buffer of bytes to decode.
  109. * @param {Object=} options
  110. * @return {string} The decoded string.
  111. */
  112. TextDecoder.prototype.decode = function (input, options) {
  113. var bytes = getBytesFromInput(input);
  114. var optionsMap = ToDictionary(options);
  115. // 1. If the do not flush flag is unset, set decoder to a new
  116. // encoding's decoder, set stream to a new stream, and unset the
  117. // BOM seen flag.
  118. if (!this._do_not_flush) {
  119. this._decoder = decoders[this._encoding.name]({
  120. fatal: this._error_mode === 'fatal'
  121. });
  122. this._BOMseen = false;
  123. }
  124. // 2. If options's stream is true, set the do not flush flag, and
  125. // unset the do not flush flag otherwise.
  126. this._do_not_flush = Boolean(optionsMap['stream']);
  127. // 3. If input is given, push a copy of input to stream.
  128. // TODO: Align with spec algorithm - maintain stream on instance.
  129. var input_stream = new Stream(bytes);
  130. // 4. Let output be a new stream.
  131. var output = [];
  132. /** @type {?(number|!Array.<number>)} */
  133. var result;
  134. // 5. While true:
  135. while (true) {
  136. // 1. Let token be the result of reading from stream.
  137. var token = input_stream.read();
  138. // 2. If token is end-of-stream and the do not flush flag is
  139. // set, return output, serialized.
  140. // TODO: Align with spec algorithm.
  141. if (token === end_of_stream)
  142. break;
  143. // 3. Otherwise, run these subsubsteps:
  144. // 1. Let result be the result of processing token for decoder,
  145. // stream, output, and error mode.
  146. result = this._decoder.handler(input_stream, token);
  147. // 2. If result is finished, return output, serialized.
  148. if (result === finished)
  149. break;
  150. if (result !== null) {
  151. if (Array.isArray(result))
  152. output.push.apply(output, /**@type {!Array.<number>}*/ (result));
  153. else
  154. output.push(result);
  155. }
  156. // 3. Otherwise, if result is error, throw a TypeError.
  157. // (Thrown in handler)
  158. // 4. Otherwise, do nothing.
  159. }
  160. // TODO: Align with spec algorithm.
  161. if (!this._do_not_flush) {
  162. do {
  163. result = this._decoder.handler(input_stream, input_stream.read());
  164. if (result === finished)
  165. break;
  166. if (!result)
  167. continue;
  168. if (Array.isArray(result))
  169. output.push.apply(output, /**@type {!Array.<number>}*/ (result));
  170. else
  171. output.push(result);
  172. } while (!input_stream.endOfStream());
  173. this._decoder = null;
  174. }
  175. return this.serializeStream(output);
  176. };
  177. // A TextDecoder object also has an associated serialize stream
  178. // algorithm...
  179. /**
  180. * @param {!Array.<number>} stream
  181. * @return {string}
  182. * @this {TextDecoder}
  183. */
  184. TextDecoder.prototype.serializeStream = function (stream) {
  185. // 1. Let token be the result of reading from stream.
  186. // (Done in-place on array, rather than as a stream)
  187. // 2. If encoding is UTF-8, UTF-16BE, or UTF-16LE, and ignore
  188. // BOM flag and BOM seen flag are unset, run these subsubsteps:
  189. if (includes(['UTF-8', 'UTF-16LE', 'UTF-16BE'], this._encoding.name) &&
  190. !this._ignoreBOM && !this._BOMseen) {
  191. if (stream.length > 0 && stream[0] === 0xFEFF) {
  192. // 1. If token is U+FEFF, set BOM seen flag.
  193. this._BOMseen = true;
  194. stream.shift();
  195. }
  196. else if (stream.length > 0) {
  197. // 2. Otherwise, if token is not end-of-stream, set BOM seen
  198. // flag and append token to stream.
  199. this._BOMseen = true;
  200. }
  201. else {
  202. // 3. Otherwise, if token is not end-of-stream, append token
  203. // to output.
  204. // (no-op)
  205. }
  206. }
  207. // 4. Otherwise, return output.
  208. return codePointsToString(stream);
  209. };
  210. return TextDecoder;
  211. }());
  212. export { TextDecoder };
  213. function isBufferInstance(input) {
  214. try {
  215. return input instanceof ArrayBuffer;
  216. }
  217. catch (e) {
  218. console.error(e);
  219. return false;
  220. }
  221. }
  222. function getBytesFromInput(input) {
  223. if (typeof input !== 'object')
  224. return new Uint8Array(0);
  225. if (isBufferInstance(input)) {
  226. return new Uint8Array(input);
  227. }
  228. if ('buffer' in input && isBufferInstance(input.buffer)) {
  229. return new Uint8Array(input.buffer, input.byteOffset, input.byteLength);
  230. }
  231. return new Uint8Array(0);
  232. }
  233. //# sourceMappingURL=TextDecoder.js.map