655 lines
19 KiB
JavaScript
655 lines
19 KiB
JavaScript
(function () { 'use strict';
|
||
|
||
// This is free and unencumbered software released into the public domain.
|
||
// See LICENSE.md for more information.
|
||
|
||
//
|
||
// Utilities
|
||
//
|
||
|
||
/**
|
||
* @param {number} a The number to test.
|
||
* @param {number} min The minimum value in the range, inclusive.
|
||
* @param {number} max The maximum value in the range, inclusive.
|
||
* @return {boolean} True if a >= min and a <= max.
|
||
*/
|
||
function inRange(a, min, max) {
|
||
return min <= a && a <= max;
|
||
}
|
||
|
||
/**
|
||
* @param {*} o
|
||
* @return {Object}
|
||
*/
|
||
function ToDictionary(o) {
|
||
if (o === undefined) return {};
|
||
if (o === Object(o)) return o;
|
||
throw TypeError('Could not convert argument to dictionary');
|
||
}
|
||
|
||
/**
|
||
* @param {string} string Input string of UTF-16 code units.
|
||
* @return {!Array.<number>} Code points.
|
||
*/
|
||
function stringToCodePoints(string) {
|
||
// https://heycam.github.io/webidl/#dfn-obtain-unicode
|
||
|
||
// 1. Let S be the DOMString value.
|
||
var s = String(string);
|
||
|
||
// 2. Let n be the length of S.
|
||
var n = s.length;
|
||
|
||
// 3. Initialize i to 0.
|
||
var i = 0;
|
||
|
||
// 4. Initialize U to be an empty sequence of Unicode characters.
|
||
var u = [];
|
||
|
||
// 5. While i < n:
|
||
while (i < n) {
|
||
|
||
// 1. Let c be the code unit in S at index i.
|
||
var c = s.charCodeAt(i);
|
||
|
||
// 2. Depending on the value of c:
|
||
|
||
// c < 0xD800 or c > 0xDFFF
|
||
if (c < 0xD800 || c > 0xDFFF) {
|
||
// Append to U the Unicode character with code point c.
|
||
u.push(c);
|
||
}
|
||
|
||
// 0xDC00 ≤ c ≤ 0xDFFF
|
||
else if (0xDC00 <= c && c <= 0xDFFF) {
|
||
// Append to U a U+FFFD REPLACEMENT CHARACTER.
|
||
u.push(0xFFFD);
|
||
}
|
||
|
||
// 0xD800 ≤ c ≤ 0xDBFF
|
||
else if (0xD800 <= c && c <= 0xDBFF) {
|
||
// 1. If i = n−1, then append to U a U+FFFD REPLACEMENT
|
||
// CHARACTER.
|
||
if (i === n - 1) {
|
||
u.push(0xFFFD);
|
||
}
|
||
// 2. Otherwise, i < n−1:
|
||
else {
|
||
// 1. Let d be the code unit in S at index i+1.
|
||
var d = string.charCodeAt(i + 1);
|
||
|
||
// 2. If 0xDC00 ≤ d ≤ 0xDFFF, then:
|
||
if (0xDC00 <= d && d <= 0xDFFF) {
|
||
// 1. Let a be c & 0x3FF.
|
||
var a = c & 0x3FF;
|
||
|
||
// 2. Let b be d & 0x3FF.
|
||
var b = d & 0x3FF;
|
||
|
||
// 3. Append to U the Unicode character with code point
|
||
// 2^16+2^10*a+b.
|
||
u.push(0x10000 + (a << 10) + b);
|
||
|
||
// 4. Set i to i+1.
|
||
i += 1;
|
||
}
|
||
|
||
// 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a
|
||
// U+FFFD REPLACEMENT CHARACTER.
|
||
else {
|
||
u.push(0xFFFD);
|
||
}
|
||
}
|
||
}
|
||
|
||
// 3. Set i to i+1.
|
||
i += 1;
|
||
}
|
||
|
||
// 6. Return U.
|
||
return u;
|
||
}
|
||
|
||
/**
|
||
* @param {!Array.<number>} code_points Array of code points.
|
||
* @return {string} string String of UTF-16 code units.
|
||
*/
|
||
function codePointsToString(code_points) {
|
||
var s = '';
|
||
for (var i = 0; i < code_points.length; ++i) {
|
||
var cp = code_points[i];
|
||
if (cp <= 0xFFFF) {
|
||
s += String.fromCharCode(cp);
|
||
} else {
|
||
cp -= 0x10000;
|
||
s += String.fromCharCode((cp >> 10) + 0xD800,
|
||
(cp & 0x3FF) + 0xDC00);
|
||
}
|
||
}
|
||
return s;
|
||
}
|
||
|
||
|
||
//
|
||
// Implementation of Encoding specification
|
||
// https://encoding.spec.whatwg.org/
|
||
//
|
||
|
||
//
|
||
// 3. Terminology
|
||
//
|
||
|
||
/**
|
||
* End-of-stream is a special token that signifies no more tokens
|
||
* are in the stream.
|
||
* @const
|
||
*/ var end_of_stream = -1;
|
||
|
||
/**
|
||
* A stream represents an ordered sequence of tokens.
|
||
*
|
||
* @constructor
|
||
* @param {!(Array.<number>|Uint8Array)} tokens Array of tokens that provide the
|
||
* stream.
|
||
*/
|
||
function Stream(tokens) {
|
||
/** @type {!Array.<number>} */
|
||
this.tokens = [].slice.call(tokens);
|
||
}
|
||
|
||
Stream.prototype = {
|
||
/**
|
||
* @return {boolean} True if end-of-stream has been hit.
|
||
*/
|
||
endOfStream: function() {
|
||
return !this.tokens.length;
|
||
},
|
||
|
||
/**
|
||
* When a token is read from a stream, the first token in the
|
||
* stream must be returned and subsequently removed, and
|
||
* end-of-stream must be returned otherwise.
|
||
*
|
||
* @return {number} Get the next token from the stream, or
|
||
* end_of_stream.
|
||
*/
|
||
read: function() {
|
||
if (!this.tokens.length)
|
||
return end_of_stream;
|
||
return this.tokens.shift();
|
||
},
|
||
|
||
/**
|
||
* When one or more tokens are prepended to a stream, those tokens
|
||
* must be inserted, in given order, before the first token in the
|
||
* stream.
|
||
*
|
||
* @param {(number|!Array.<number>)} token The token(s) to prepend to the stream.
|
||
*/
|
||
prepend: function(token) {
|
||
if (Array.isArray(token)) {
|
||
var tokens = /**@type {!Array.<number>}*/(token);
|
||
while (tokens.length)
|
||
this.tokens.unshift(tokens.pop());
|
||
} else {
|
||
this.tokens.unshift(token);
|
||
}
|
||
},
|
||
|
||
/**
|
||
* When one or more tokens are pushed to a stream, those tokens
|
||
* must be inserted, in given order, after the last token in the
|
||
* stream.
|
||
*
|
||
* @param {(number|!Array.<number>)} token The tokens(s) to prepend to the stream.
|
||
*/
|
||
push: function(token) {
|
||
if (Array.isArray(token)) {
|
||
var tokens = /**@type {!Array.<number>}*/(token);
|
||
while (tokens.length)
|
||
this.tokens.push(tokens.shift());
|
||
} else {
|
||
this.tokens.push(token);
|
||
}
|
||
}
|
||
};
|
||
|
||
//
|
||
// 4. Encodings
|
||
//
|
||
|
||
// 4.1 Encoders and decoders
|
||
|
||
/** @const */
|
||
var finished = -1;
|
||
|
||
/**
|
||
* @param {boolean} fatal If true, decoding errors raise an exception.
|
||
* @param {number=} opt_code_point Override the standard fallback code point.
|
||
* @return {number} The code point to insert on a decoding error.
|
||
*/
|
||
function decoderError(fatal, opt_code_point) {
|
||
if (fatal)
|
||
throw TypeError('Decoder error');
|
||
return opt_code_point || 0xFFFD;
|
||
}
|
||
|
||
//
|
||
// 7. API
|
||
//
|
||
|
||
/** @const */ var DEFAULT_ENCODING = 'utf-8';
|
||
|
||
// 7.1 Interface TextDecoder
|
||
|
||
/**
|
||
* @constructor
|
||
* @param {string=} encoding The label of the encoding;
|
||
* defaults to 'utf-8'.
|
||
* @param {Object=} options
|
||
*/
|
||
function TextDecoder$1(encoding, options) {
|
||
if (!(this instanceof TextDecoder$1)) {
|
||
return new TextDecoder$1(encoding, options);
|
||
}
|
||
encoding = encoding !== undefined ? String(encoding).toLowerCase() : DEFAULT_ENCODING;
|
||
if (encoding !== DEFAULT_ENCODING) {
|
||
throw new Error('Encoding not supported. Only utf-8 is supported');
|
||
}
|
||
options = ToDictionary(options);
|
||
|
||
/** @private @type {boolean} */
|
||
this._streaming = false;
|
||
/** @private @type {boolean} */
|
||
this._BOMseen = false;
|
||
/** @private @type {?Decoder} */
|
||
this._decoder = null;
|
||
/** @private @type {boolean} */
|
||
this._fatal = Boolean(options['fatal']);
|
||
/** @private @type {boolean} */
|
||
this._ignoreBOM = Boolean(options['ignoreBOM']);
|
||
|
||
Object.defineProperty(this, 'encoding', {value: 'utf-8'});
|
||
Object.defineProperty(this, 'fatal', {value: this._fatal});
|
||
Object.defineProperty(this, 'ignoreBOM', {value: this._ignoreBOM});
|
||
}
|
||
|
||
TextDecoder$1.prototype = {
|
||
/**
|
||
* @param {ArrayBufferView=} input The buffer of bytes to decode.
|
||
* @param {Object=} options
|
||
* @return {string} The decoded string.
|
||
*/
|
||
decode: function decode(input, options) {
|
||
var bytes;
|
||
if (typeof input === 'object' && input instanceof ArrayBuffer) {
|
||
bytes = new Uint8Array(input);
|
||
} else if (typeof input === 'object' && 'buffer' in input &&
|
||
input.buffer instanceof ArrayBuffer) {
|
||
bytes = new Uint8Array(input.buffer,
|
||
input.byteOffset,
|
||
input.byteLength);
|
||
} else {
|
||
bytes = new Uint8Array(0);
|
||
}
|
||
|
||
options = ToDictionary(options);
|
||
|
||
if (!this._streaming) {
|
||
this._decoder = new UTF8Decoder({fatal: this._fatal});
|
||
this._BOMseen = false;
|
||
}
|
||
this._streaming = Boolean(options['stream']);
|
||
|
||
var input_stream = new Stream(bytes);
|
||
|
||
var code_points = [];
|
||
|
||
/** @type {?(number|!Array.<number>)} */
|
||
var result;
|
||
|
||
while (!input_stream.endOfStream()) {
|
||
result = this._decoder.handler(input_stream, input_stream.read());
|
||
if (result === finished)
|
||
break;
|
||
if (result === null)
|
||
continue;
|
||
if (Array.isArray(result))
|
||
code_points.push.apply(code_points, /**@type {!Array.<number>}*/(result));
|
||
else
|
||
code_points.push(result);
|
||
}
|
||
if (!this._streaming) {
|
||
do {
|
||
result = this._decoder.handler(input_stream, input_stream.read());
|
||
if (result === finished)
|
||
break;
|
||
if (result === null)
|
||
continue;
|
||
if (Array.isArray(result))
|
||
code_points.push.apply(code_points, /**@type {!Array.<number>}*/(result));
|
||
else
|
||
code_points.push(result);
|
||
} while (!input_stream.endOfStream());
|
||
this._decoder = null;
|
||
}
|
||
|
||
if (code_points.length) {
|
||
// If encoding is one of utf-8, utf-16be, and utf-16le, and
|
||
// ignore BOM flag and BOM seen flag are unset, run these
|
||
// subsubsteps:
|
||
if (['utf-8'].indexOf(this.encoding) !== -1 &&
|
||
!this._ignoreBOM && !this._BOMseen) {
|
||
// If token is U+FEFF, set BOM seen flag.
|
||
if (code_points[0] === 0xFEFF) {
|
||
this._BOMseen = true;
|
||
code_points.shift();
|
||
} else {
|
||
// Otherwise, if token is not end-of-stream, set BOM seen
|
||
// flag and append token to output.
|
||
this._BOMseen = true;
|
||
}
|
||
}
|
||
}
|
||
|
||
return codePointsToString(code_points);
|
||
}
|
||
};
|
||
|
||
// 7.2 Interface TextEncoder
|
||
|
||
/**
|
||
* @constructor
|
||
* @param {string=} encoding The label of the encoding;
|
||
* defaults to 'utf-8'.
|
||
* @param {Object=} options
|
||
*/
|
||
function TextEncoder$1(encoding, options) {
|
||
if (!(this instanceof TextEncoder$1))
|
||
return new TextEncoder$1(encoding, options);
|
||
encoding = encoding !== undefined ? String(encoding).toLowerCase() : DEFAULT_ENCODING;
|
||
if (encoding !== DEFAULT_ENCODING) {
|
||
throw new Error('Encoding not supported. Only utf-8 is supported');
|
||
}
|
||
options = ToDictionary(options);
|
||
|
||
/** @private @type {boolean} */
|
||
this._streaming = false;
|
||
/** @private @type {?Encoder} */
|
||
this._encoder = null;
|
||
/** @private @type {{fatal: boolean}} */
|
||
this._options = {fatal: Boolean(options['fatal'])};
|
||
|
||
Object.defineProperty(this, 'encoding', {value: 'utf-8'});
|
||
}
|
||
|
||
TextEncoder$1.prototype = {
|
||
/**
|
||
* @param {string=} opt_string The string to encode.
|
||
* @param {Object=} options
|
||
* @return {Uint8Array} Encoded bytes, as a Uint8Array.
|
||
*/
|
||
encode: function encode(opt_string, options) {
|
||
opt_string = opt_string ? String(opt_string) : '';
|
||
options = ToDictionary(options);
|
||
|
||
// NOTE: This option is nonstandard. None of the encodings
|
||
// permitted for encoding (i.e. UTF-8, UTF-16) are stateful,
|
||
// so streaming is not necessary.
|
||
if (!this._streaming)
|
||
this._encoder = new UTF8Encoder(this._options);
|
||
this._streaming = Boolean(options['stream']);
|
||
|
||
var bytes = [];
|
||
var input_stream = new Stream(stringToCodePoints(opt_string));
|
||
/** @type {?(number|!Array.<number>)} */
|
||
var result;
|
||
while (!input_stream.endOfStream()) {
|
||
result = this._encoder.handler(input_stream, input_stream.read());
|
||
if (result === finished)
|
||
break;
|
||
if (Array.isArray(result))
|
||
bytes.push.apply(bytes, /**@type {!Array.<number>}*/(result));
|
||
else
|
||
bytes.push(result);
|
||
}
|
||
if (!this._streaming) {
|
||
while (true) {
|
||
result = this._encoder.handler(input_stream, input_stream.read());
|
||
if (result === finished)
|
||
break;
|
||
if (Array.isArray(result))
|
||
bytes.push.apply(bytes, /**@type {!Array.<number>}*/(result));
|
||
else
|
||
bytes.push(result);
|
||
}
|
||
this._encoder = null;
|
||
}
|
||
return new Uint8Array(bytes);
|
||
}
|
||
};
|
||
|
||
//
|
||
// 8. The encoding
|
||
//
|
||
|
||
// 8.1 utf-8
|
||
|
||
/**
|
||
* @constructor
|
||
* @implements {Decoder}
|
||
* @param {{fatal: boolean}} options
|
||
*/
|
||
function UTF8Decoder(options) {
|
||
var fatal = options.fatal;
|
||
|
||
// utf-8's decoder's has an associated utf-8 code point, utf-8
|
||
// bytes seen, and utf-8 bytes needed (all initially 0), a utf-8
|
||
// lower boundary (initially 0x80), and a utf-8 upper boundary
|
||
// (initially 0xBF).
|
||
var /** @type {number} */ utf8_code_point = 0,
|
||
/** @type {number} */ utf8_bytes_seen = 0,
|
||
/** @type {number} */ utf8_bytes_needed = 0,
|
||
/** @type {number} */ utf8_lower_boundary = 0x80,
|
||
/** @type {number} */ utf8_upper_boundary = 0xBF;
|
||
|
||
/**
|
||
* @param {Stream} stream The stream of bytes being decoded.
|
||
* @param {number} bite The next byte read from the stream.
|
||
* @return {?(number|!Array.<number>)} The next code point(s)
|
||
* decoded, or null if not enough data exists in the input
|
||
* stream to decode a complete code point.
|
||
*/
|
||
this.handler = function(stream, bite) {
|
||
// 1. If byte is end-of-stream and utf-8 bytes needed is not 0,
|
||
// set utf-8 bytes needed to 0 and return error.
|
||
if (bite === end_of_stream && utf8_bytes_needed !== 0) {
|
||
utf8_bytes_needed = 0;
|
||
return decoderError(fatal);
|
||
}
|
||
|
||
// 2. If byte is end-of-stream, return finished.
|
||
if (bite === end_of_stream)
|
||
return finished;
|
||
|
||
// 3. If utf-8 bytes needed is 0, based on byte:
|
||
if (utf8_bytes_needed === 0) {
|
||
|
||
// 0x00 to 0x7F
|
||
if (inRange(bite, 0x00, 0x7F)) {
|
||
// Return a code point whose value is byte.
|
||
return bite;
|
||
}
|
||
|
||
// 0xC2 to 0xDF
|
||
if (inRange(bite, 0xC2, 0xDF)) {
|
||
// Set utf-8 bytes needed to 1 and utf-8 code point to byte
|
||
// − 0xC0.
|
||
utf8_bytes_needed = 1;
|
||
utf8_code_point = bite - 0xC0;
|
||
}
|
||
|
||
// 0xE0 to 0xEF
|
||
else if (inRange(bite, 0xE0, 0xEF)) {
|
||
// 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0.
|
||
if (bite === 0xE0)
|
||
utf8_lower_boundary = 0xA0;
|
||
// 2. If byte is 0xED, set utf-8 upper boundary to 0x9F.
|
||
if (bite === 0xED)
|
||
utf8_upper_boundary = 0x9F;
|
||
// 3. Set utf-8 bytes needed to 2 and utf-8 code point to
|
||
// byte − 0xE0.
|
||
utf8_bytes_needed = 2;
|
||
utf8_code_point = bite - 0xE0;
|
||
}
|
||
|
||
// 0xF0 to 0xF4
|
||
else if (inRange(bite, 0xF0, 0xF4)) {
|
||
// 1. If byte is 0xF0, set utf-8 lower boundary to 0x90.
|
||
if (bite === 0xF0)
|
||
utf8_lower_boundary = 0x90;
|
||
// 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F.
|
||
if (bite === 0xF4)
|
||
utf8_upper_boundary = 0x8F;
|
||
// 3. Set utf-8 bytes needed to 3 and utf-8 code point to
|
||
// byte − 0xF0.
|
||
utf8_bytes_needed = 3;
|
||
utf8_code_point = bite - 0xF0;
|
||
}
|
||
|
||
// Otherwise
|
||
else {
|
||
// Return error.
|
||
return decoderError(fatal);
|
||
}
|
||
|
||
// Then (byte is in the range 0xC2 to 0xF4) set utf-8 code
|
||
// point to utf-8 code point << (6 × utf-8 bytes needed) and
|
||
// return continue.
|
||
utf8_code_point = utf8_code_point << (6 * utf8_bytes_needed);
|
||
return null;
|
||
}
|
||
|
||
// 4. If byte is not in the range utf-8 lower boundary to utf-8
|
||
// upper boundary, run these substeps:
|
||
if (!inRange(bite, utf8_lower_boundary, utf8_upper_boundary)) {
|
||
|
||
// 1. Set utf-8 code point, utf-8 bytes needed, and utf-8
|
||
// bytes seen to 0, set utf-8 lower boundary to 0x80, and set
|
||
// utf-8 upper boundary to 0xBF.
|
||
utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
|
||
utf8_lower_boundary = 0x80;
|
||
utf8_upper_boundary = 0xBF;
|
||
|
||
// 2. Prepend byte to stream.
|
||
stream.prepend(bite);
|
||
|
||
// 3. Return error.
|
||
return decoderError(fatal);
|
||
}
|
||
|
||
// 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary
|
||
// to 0xBF.
|
||
utf8_lower_boundary = 0x80;
|
||
utf8_upper_boundary = 0xBF;
|
||
|
||
// 6. Increase utf-8 bytes seen by one and set utf-8 code point
|
||
// to utf-8 code point + (byte − 0x80) << (6 × (utf-8 bytes
|
||
// needed − utf-8 bytes seen)).
|
||
utf8_bytes_seen += 1;
|
||
utf8_code_point += (bite - 0x80) << (6 * (utf8_bytes_needed - utf8_bytes_seen));
|
||
|
||
// 7. If utf-8 bytes seen is not equal to utf-8 bytes needed,
|
||
// continue.
|
||
if (utf8_bytes_seen !== utf8_bytes_needed)
|
||
return null;
|
||
|
||
// 8. Let code point be utf-8 code point.
|
||
var code_point = utf8_code_point;
|
||
|
||
// 9. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
|
||
// seen to 0.
|
||
utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
|
||
|
||
// 10. Return a code point whose value is code point.
|
||
return code_point;
|
||
};
|
||
}
|
||
|
||
/**
|
||
* @constructor
|
||
* @implements {Encoder}
|
||
* @param {{fatal: boolean}} options
|
||
*/
|
||
function UTF8Encoder(options) {
|
||
var fatal = options.fatal;
|
||
/**
|
||
* @param {Stream} stream Input stream.
|
||
* @param {number} code_point Next code point read from the stream.
|
||
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
||
*/
|
||
this.handler = function(stream, code_point) {
|
||
// 1. If code point is end-of-stream, return finished.
|
||
if (code_point === end_of_stream)
|
||
return finished;
|
||
|
||
// 2. If code point is in the range U+0000 to U+007F, return a
|
||
// byte whose value is code point.
|
||
if (inRange(code_point, 0x0000, 0x007f))
|
||
return code_point;
|
||
|
||
// 3. Set count and offset based on the range code point is in:
|
||
var count, offset;
|
||
// U+0080 to U+07FF: 1 and 0xC0
|
||
if (inRange(code_point, 0x0080, 0x07FF)) {
|
||
count = 1;
|
||
offset = 0xC0;
|
||
}
|
||
// U+0800 to U+FFFF: 2 and 0xE0
|
||
else if (inRange(code_point, 0x0800, 0xFFFF)) {
|
||
count = 2;
|
||
offset = 0xE0;
|
||
}
|
||
// U+10000 to U+10FFFF: 3 and 0xF0
|
||
else if (inRange(code_point, 0x10000, 0x10FFFF)) {
|
||
count = 3;
|
||
offset = 0xF0;
|
||
}
|
||
|
||
// 4.Let bytes be a byte sequence whose first byte is (code
|
||
// point >> (6 × count)) + offset.
|
||
var bytes = [(code_point >> (6 * count)) + offset];
|
||
|
||
// 5. Run these substeps while count is greater than 0:
|
||
while (count > 0) {
|
||
|
||
// 1. Set temp to code point >> (6 × (count − 1)).
|
||
var temp = code_point >> (6 * (count - 1));
|
||
|
||
// 2. Append to bytes 0x80 | (temp & 0x3F).
|
||
bytes.push(0x80 | (temp & 0x3F));
|
||
|
||
// 3. Decrease count by one.
|
||
count -= 1;
|
||
}
|
||
|
||
// 6. Return bytes bytes, in order.
|
||
return bytes;
|
||
};
|
||
}
|
||
|
||
function getGlobal() {
|
||
if (typeof self !== 'undefined') return self;
|
||
if (typeof global !== 'undefined') return global;
|
||
throw new Error('No global found');
|
||
}
|
||
|
||
if (typeof TextDecoder !== 'function') {
|
||
getGlobal().TextDecoder = TextDecoder$1;
|
||
}
|
||
|
||
if (typeof TextEncoder !== 'function') {
|
||
getGlobal().TextEncoder = TextEncoder$1;
|
||
}
|
||
|
||
})(); |