dotfiles/vscode/.vscode/extensions/randomfractalsinc.vscode-data-preview-2.3.0/node_modules/text-encoding-utf-8/lib/encoding.lib.mjs
Errol Sancaktar ff17c17e23 vscode
2024-06-14 09:31:58 -06:00

666 lines
18 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

'use strict';
// This is free and unencumbered software released into the public domain.
// See LICENSE.md for more information.
//
// Utilities
//
/**
* @param {number} a The number to test.
* @param {number} min The minimum value in the range, inclusive.
* @param {number} max The maximum value in the range, inclusive.
* @return {boolean} True if a >= min and a <= max.
*/
function inRange(a, min, max) {
return min <= a && a <= max;
}
/**
* @param {*} o
* @return {Object}
*/
function ToDictionary(o) {
if (o === undefined) return {};
if (o === Object(o)) return o;
throw TypeError('Could not convert argument to dictionary');
}
/**
* @param {string} string Input string of UTF-16 code units.
* @return {!Array.<number>} Code points.
*/
function stringToCodePoints(string) {
// https://heycam.github.io/webidl/#dfn-obtain-unicode
// 1. Let S be the DOMString value.
var s = String(string);
// 2. Let n be the length of S.
var n = s.length;
// 3. Initialize i to 0.
var i = 0;
// 4. Initialize U to be an empty sequence of Unicode characters.
var u = [];
// 5. While i < n:
while (i < n) {
// 1. Let c be the code unit in S at index i.
var c = s.charCodeAt(i);
// 2. Depending on the value of c:
// c < 0xD800 or c > 0xDFFF
if (c < 0xD800 || c > 0xDFFF) {
// Append to U the Unicode character with code point c.
u.push(c);
}
// 0xDC00 ≤ c ≤ 0xDFFF
else if (0xDC00 <= c && c <= 0xDFFF) {
// Append to U a U+FFFD REPLACEMENT CHARACTER.
u.push(0xFFFD);
}
// 0xD800 ≤ c ≤ 0xDBFF
else if (0xD800 <= c && c <= 0xDBFF) {
// 1. If i = n1, then append to U a U+FFFD REPLACEMENT
// CHARACTER.
if (i === n - 1) {
u.push(0xFFFD);
}
// 2. Otherwise, i < n1:
else {
// 1. Let d be the code unit in S at index i+1.
var d = string.charCodeAt(i + 1);
// 2. If 0xDC00 ≤ d ≤ 0xDFFF, then:
if (0xDC00 <= d && d <= 0xDFFF) {
// 1. Let a be c & 0x3FF.
var a = c & 0x3FF;
// 2. Let b be d & 0x3FF.
var b = d & 0x3FF;
// 3. Append to U the Unicode character with code point
// 2^16+2^10*a+b.
u.push(0x10000 + (a << 10) + b);
// 4. Set i to i+1.
i += 1;
}
// 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a
// U+FFFD REPLACEMENT CHARACTER.
else {
u.push(0xFFFD);
}
}
}
// 3. Set i to i+1.
i += 1;
}
// 6. Return U.
return u;
}
/**
* @param {!Array.<number>} code_points Array of code points.
* @return {string} string String of UTF-16 code units.
*/
function codePointsToString(code_points) {
var s = '';
for (var i = 0; i < code_points.length; ++i) {
var cp = code_points[i];
if (cp <= 0xFFFF) {
s += String.fromCharCode(cp);
} else {
cp -= 0x10000;
s += String.fromCharCode((cp >> 10) + 0xD800,
(cp & 0x3FF) + 0xDC00);
}
}
return s;
}
//
// Implementation of Encoding specification
// https://encoding.spec.whatwg.org/
//
//
// 3. Terminology
//
/**
* End-of-stream is a special token that signifies no more tokens
* are in the stream.
* @const
*/ var end_of_stream = -1;
/**
* A stream represents an ordered sequence of tokens.
*
* @constructor
* @param {!(Array.<number>|Uint8Array)} tokens Array of tokens that provide the
* stream.
*/
function Stream(tokens) {
/** @type {!Array.<number>} */
this.tokens = [].slice.call(tokens);
}
Stream.prototype = {
/**
* @return {boolean} True if end-of-stream has been hit.
*/
endOfStream: function() {
return !this.tokens.length;
},
/**
* When a token is read from a stream, the first token in the
* stream must be returned and subsequently removed, and
* end-of-stream must be returned otherwise.
*
* @return {number} Get the next token from the stream, or
* end_of_stream.
*/
read: function() {
if (!this.tokens.length)
return end_of_stream;
return this.tokens.shift();
},
/**
* When one or more tokens are prepended to a stream, those tokens
* must be inserted, in given order, before the first token in the
* stream.
*
* @param {(number|!Array.<number>)} token The token(s) to prepend to the stream.
*/
prepend: function(token) {
if (Array.isArray(token)) {
var tokens = /**@type {!Array.<number>}*/(token);
while (tokens.length)
this.tokens.unshift(tokens.pop());
} else {
this.tokens.unshift(token);
}
},
/**
* When one or more tokens are pushed to a stream, those tokens
* must be inserted, in given order, after the last token in the
* stream.
*
* @param {(number|!Array.<number>)} token The tokens(s) to prepend to the stream.
*/
push: function(token) {
if (Array.isArray(token)) {
var tokens = /**@type {!Array.<number>}*/(token);
while (tokens.length)
this.tokens.push(tokens.shift());
} else {
this.tokens.push(token);
}
}
};
//
// 4. Encodings
//
// 4.1 Encoders and decoders
/** @const */
var finished = -1;
/**
* @param {boolean} fatal If true, decoding errors raise an exception.
* @param {number=} opt_code_point Override the standard fallback code point.
* @return {number} The code point to insert on a decoding error.
*/
function decoderError(fatal, opt_code_point) {
if (fatal)
throw TypeError('Decoder error');
return opt_code_point || 0xFFFD;
}
/** @interface */
function Decoder() {}
Decoder.prototype = {
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
* @return {?(number|!Array.<number>)} The next code point(s)
* decoded, or null if not enough data exists in the input
* stream to decode a complete code point, or |finished|.
*/
handler: function(stream, bite) {}
};
/** @interface */
function Encoder() {}
Encoder.prototype = {
/**
* @param {Stream} stream The stream of code points being encoded.
* @param {number} code_point Next code point read from the stream.
* @return {(number|!Array.<number>)} Byte(s) to emit, or |finished|.
*/
handler: function(stream, code_point) {}
};
//
// 7. API
//
/** @const */ var DEFAULT_ENCODING = 'utf-8';
// 7.1 Interface TextDecoder
/**
* @constructor
* @param {string=} encoding The label of the encoding;
* defaults to 'utf-8'.
* @param {Object=} options
*/
function TextDecoder(encoding, options) {
if (!(this instanceof TextDecoder)) {
return new TextDecoder(encoding, options);
}
encoding = encoding !== undefined ? String(encoding).toLowerCase() : DEFAULT_ENCODING;
if (encoding !== DEFAULT_ENCODING) {
throw new Error('Encoding not supported. Only utf-8 is supported');
}
options = ToDictionary(options);
/** @private @type {boolean} */
this._streaming = false;
/** @private @type {boolean} */
this._BOMseen = false;
/** @private @type {?Decoder} */
this._decoder = null;
/** @private @type {boolean} */
this._fatal = Boolean(options['fatal']);
/** @private @type {boolean} */
this._ignoreBOM = Boolean(options['ignoreBOM']);
Object.defineProperty(this, 'encoding', {value: 'utf-8'});
Object.defineProperty(this, 'fatal', {value: this._fatal});
Object.defineProperty(this, 'ignoreBOM', {value: this._ignoreBOM});
}
TextDecoder.prototype = {
/**
* @param {ArrayBufferView=} input The buffer of bytes to decode.
* @param {Object=} options
* @return {string} The decoded string.
*/
decode: function decode(input, options) {
var bytes;
if (typeof input === 'object' && input instanceof ArrayBuffer) {
bytes = new Uint8Array(input);
} else if (typeof input === 'object' && 'buffer' in input &&
input.buffer instanceof ArrayBuffer) {
bytes = new Uint8Array(input.buffer,
input.byteOffset,
input.byteLength);
} else {
bytes = new Uint8Array(0);
}
options = ToDictionary(options);
if (!this._streaming) {
this._decoder = new UTF8Decoder({fatal: this._fatal});
this._BOMseen = false;
}
this._streaming = Boolean(options['stream']);
var input_stream = new Stream(bytes);
var code_points = [];
/** @type {?(number|!Array.<number>)} */
var result;
while (!input_stream.endOfStream()) {
result = this._decoder.handler(input_stream, input_stream.read());
if (result === finished)
break;
if (result === null)
continue;
if (Array.isArray(result))
code_points.push.apply(code_points, /**@type {!Array.<number>}*/(result));
else
code_points.push(result);
}
if (!this._streaming) {
do {
result = this._decoder.handler(input_stream, input_stream.read());
if (result === finished)
break;
if (result === null)
continue;
if (Array.isArray(result))
code_points.push.apply(code_points, /**@type {!Array.<number>}*/(result));
else
code_points.push(result);
} while (!input_stream.endOfStream());
this._decoder = null;
}
if (code_points.length) {
// If encoding is one of utf-8, utf-16be, and utf-16le, and
// ignore BOM flag and BOM seen flag are unset, run these
// subsubsteps:
if (['utf-8'].indexOf(this.encoding) !== -1 &&
!this._ignoreBOM && !this._BOMseen) {
// If token is U+FEFF, set BOM seen flag.
if (code_points[0] === 0xFEFF) {
this._BOMseen = true;
code_points.shift();
} else {
// Otherwise, if token is not end-of-stream, set BOM seen
// flag and append token to output.
this._BOMseen = true;
}
}
}
return codePointsToString(code_points);
}
};
// 7.2 Interface TextEncoder
/**
* @constructor
* @param {string=} encoding The label of the encoding;
* defaults to 'utf-8'.
* @param {Object=} options
*/
function TextEncoder(encoding, options) {
if (!(this instanceof TextEncoder))
return new TextEncoder(encoding, options);
encoding = encoding !== undefined ? String(encoding).toLowerCase() : DEFAULT_ENCODING;
if (encoding !== DEFAULT_ENCODING) {
throw new Error('Encoding not supported. Only utf-8 is supported');
}
options = ToDictionary(options);
/** @private @type {boolean} */
this._streaming = false;
/** @private @type {?Encoder} */
this._encoder = null;
/** @private @type {{fatal: boolean}} */
this._options = {fatal: Boolean(options['fatal'])};
Object.defineProperty(this, 'encoding', {value: 'utf-8'});
}
TextEncoder.prototype = {
/**
* @param {string=} opt_string The string to encode.
* @param {Object=} options
* @return {Uint8Array} Encoded bytes, as a Uint8Array.
*/
encode: function encode(opt_string, options) {
opt_string = opt_string ? String(opt_string) : '';
options = ToDictionary(options);
// NOTE: This option is nonstandard. None of the encodings
// permitted for encoding (i.e. UTF-8, UTF-16) are stateful,
// so streaming is not necessary.
if (!this._streaming)
this._encoder = new UTF8Encoder(this._options);
this._streaming = Boolean(options['stream']);
var bytes = [];
var input_stream = new Stream(stringToCodePoints(opt_string));
/** @type {?(number|!Array.<number>)} */
var result;
while (!input_stream.endOfStream()) {
result = this._encoder.handler(input_stream, input_stream.read());
if (result === finished)
break;
if (Array.isArray(result))
bytes.push.apply(bytes, /**@type {!Array.<number>}*/(result));
else
bytes.push(result);
}
if (!this._streaming) {
while (true) {
result = this._encoder.handler(input_stream, input_stream.read());
if (result === finished)
break;
if (Array.isArray(result))
bytes.push.apply(bytes, /**@type {!Array.<number>}*/(result));
else
bytes.push(result);
}
this._encoder = null;
}
return new Uint8Array(bytes);
}
};
//
// 8. The encoding
//
// 8.1 utf-8
/**
* @constructor
* @implements {Decoder}
* @param {{fatal: boolean}} options
*/
function UTF8Decoder(options) {
var fatal = options.fatal;
// utf-8's decoder's has an associated utf-8 code point, utf-8
// bytes seen, and utf-8 bytes needed (all initially 0), a utf-8
// lower boundary (initially 0x80), and a utf-8 upper boundary
// (initially 0xBF).
var /** @type {number} */ utf8_code_point = 0,
/** @type {number} */ utf8_bytes_seen = 0,
/** @type {number} */ utf8_bytes_needed = 0,
/** @type {number} */ utf8_lower_boundary = 0x80,
/** @type {number} */ utf8_upper_boundary = 0xBF;
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
* @return {?(number|!Array.<number>)} The next code point(s)
* decoded, or null if not enough data exists in the input
* stream to decode a complete code point.
*/
this.handler = function(stream, bite) {
// 1. If byte is end-of-stream and utf-8 bytes needed is not 0,
// set utf-8 bytes needed to 0 and return error.
if (bite === end_of_stream && utf8_bytes_needed !== 0) {
utf8_bytes_needed = 0;
return decoderError(fatal);
}
// 2. If byte is end-of-stream, return finished.
if (bite === end_of_stream)
return finished;
// 3. If utf-8 bytes needed is 0, based on byte:
if (utf8_bytes_needed === 0) {
// 0x00 to 0x7F
if (inRange(bite, 0x00, 0x7F)) {
// Return a code point whose value is byte.
return bite;
}
// 0xC2 to 0xDF
if (inRange(bite, 0xC2, 0xDF)) {
// Set utf-8 bytes needed to 1 and utf-8 code point to byte
// 0xC0.
utf8_bytes_needed = 1;
utf8_code_point = bite - 0xC0;
}
// 0xE0 to 0xEF
else if (inRange(bite, 0xE0, 0xEF)) {
// 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0.
if (bite === 0xE0)
utf8_lower_boundary = 0xA0;
// 2. If byte is 0xED, set utf-8 upper boundary to 0x9F.
if (bite === 0xED)
utf8_upper_boundary = 0x9F;
// 3. Set utf-8 bytes needed to 2 and utf-8 code point to
// byte 0xE0.
utf8_bytes_needed = 2;
utf8_code_point = bite - 0xE0;
}
// 0xF0 to 0xF4
else if (inRange(bite, 0xF0, 0xF4)) {
// 1. If byte is 0xF0, set utf-8 lower boundary to 0x90.
if (bite === 0xF0)
utf8_lower_boundary = 0x90;
// 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F.
if (bite === 0xF4)
utf8_upper_boundary = 0x8F;
// 3. Set utf-8 bytes needed to 3 and utf-8 code point to
// byte 0xF0.
utf8_bytes_needed = 3;
utf8_code_point = bite - 0xF0;
}
// Otherwise
else {
// Return error.
return decoderError(fatal);
}
// Then (byte is in the range 0xC2 to 0xF4) set utf-8 code
// point to utf-8 code point << (6 × utf-8 bytes needed) and
// return continue.
utf8_code_point = utf8_code_point << (6 * utf8_bytes_needed);
return null;
}
// 4. If byte is not in the range utf-8 lower boundary to utf-8
// upper boundary, run these substeps:
if (!inRange(bite, utf8_lower_boundary, utf8_upper_boundary)) {
// 1. Set utf-8 code point, utf-8 bytes needed, and utf-8
// bytes seen to 0, set utf-8 lower boundary to 0x80, and set
// utf-8 upper boundary to 0xBF.
utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
utf8_lower_boundary = 0x80;
utf8_upper_boundary = 0xBF;
// 2. Prepend byte to stream.
stream.prepend(bite);
// 3. Return error.
return decoderError(fatal);
}
// 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary
// to 0xBF.
utf8_lower_boundary = 0x80;
utf8_upper_boundary = 0xBF;
// 6. Increase utf-8 bytes seen by one and set utf-8 code point
// to utf-8 code point + (byte 0x80) << (6 × (utf-8 bytes
// needed utf-8 bytes seen)).
utf8_bytes_seen += 1;
utf8_code_point += (bite - 0x80) << (6 * (utf8_bytes_needed - utf8_bytes_seen));
// 7. If utf-8 bytes seen is not equal to utf-8 bytes needed,
// continue.
if (utf8_bytes_seen !== utf8_bytes_needed)
return null;
// 8. Let code point be utf-8 code point.
var code_point = utf8_code_point;
// 9. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
// seen to 0.
utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
// 10. Return a code point whose value is code point.
return code_point;
};
}
/**
* @constructor
* @implements {Encoder}
* @param {{fatal: boolean}} options
*/
function UTF8Encoder(options) {
var fatal = options.fatal;
/**
* @param {Stream} stream Input stream.
* @param {number} code_point Next code point read from the stream.
* @return {(number|!Array.<number>)} Byte(s) to emit.
*/
this.handler = function(stream, code_point) {
// 1. If code point is end-of-stream, return finished.
if (code_point === end_of_stream)
return finished;
// 2. If code point is in the range U+0000 to U+007F, return a
// byte whose value is code point.
if (inRange(code_point, 0x0000, 0x007f))
return code_point;
// 3. Set count and offset based on the range code point is in:
var count, offset;
// U+0080 to U+07FF: 1 and 0xC0
if (inRange(code_point, 0x0080, 0x07FF)) {
count = 1;
offset = 0xC0;
}
// U+0800 to U+FFFF: 2 and 0xE0
else if (inRange(code_point, 0x0800, 0xFFFF)) {
count = 2;
offset = 0xE0;
}
// U+10000 to U+10FFFF: 3 and 0xF0
else if (inRange(code_point, 0x10000, 0x10FFFF)) {
count = 3;
offset = 0xF0;
}
// 4.Let bytes be a byte sequence whose first byte is (code
// point >> (6 × count)) + offset.
var bytes = [(code_point >> (6 * count)) + offset];
// 5. Run these substeps while count is greater than 0:
while (count > 0) {
// 1. Set temp to code point >> (6 × (count 1)).
var temp = code_point >> (6 * (count - 1));
// 2. Append to bytes 0x80 | (temp & 0x3F).
bytes.push(0x80 | (temp & 0x3F));
// 3. Decrease count by one.
count -= 1;
}
// 6. Return bytes bytes, in order.
return bytes;
};
}
export {TextEncoder, TextDecoder};