dotfiles/vscode/.vscode/extensions/randomfractalsinc.vscode-data-preview-2.3.0/node_modules/avsc/lib/specs.js
Errol Sancaktar ff17c17e23 vscode
2024-06-14 09:31:58 -06:00

779 lines
23 KiB
JavaScript

/* jshint node: true */
// TODO: Add minimal templating.
// TODO: Add option to prefix nested type declarations with the outer types'
// names.
'use strict';
/** IDL to protocol (services) and schema (types) parsing logic. */
var files = require('./files'),
utils = require('./utils'),
path = require('path'),
util = require('util');
var f = util.format;
// Default type references defined by Avro.
var TYPE_REFS = {
date: {type: 'int', logicalType: 'date'},
decimal: {type: 'bytes', logicalType: 'decimal'},
time_ms: {type: 'long', logicalType: 'time-millis'},
timestamp_ms: {type: 'long', logicalType: 'timestamp-millis'}
};
/** Assemble an IDL file into a decoded protocol. */
function assembleProtocol(fpath, opts, cb) {
if (!cb && typeof opts == 'function') {
cb = opts;
opts = undefined;
}
opts = opts || {};
if (!opts.importHook) {
opts.importHook = files.createImportHook();
}
importFile(fpath, function (err, protocol) {
if (err) {
cb(err);
return;
}
if (!protocol) {
cb(new Error('empty root import'));
return;
}
var schemas = protocol.types;
if (schemas) {
// Strip redundant namespaces from types before returning the protocol.
// Note that we keep empty (`''`) nested namespaces when the outer one is
// non-empty. This allows figuring out whether unqualified imported names
// should be qualified by the protocol's namespace: they should if their
// namespace is `undefined` and should not if it is empty.
var namespace = protocolNamespace(protocol) || '';
schemas.forEach(function (schema) {
if (schema.namespace === namespace) {
delete schema.namespace;
}
});
}
cb(null, protocol);
});
function importFile(fpath, cb) {
opts.importHook(fpath, 'idl', function (err, str) {
if (err) {
cb(err);
return;
}
if (str === undefined) {
// This signals an already imported file by the default import hooks.
// Implementors who wish to disallow duplicate imports should provide a
// custom hook which throws an error when a duplicate is detected.
cb();
return;
}
try {
var reader = new Reader(str, opts);
var obj = reader._readProtocol(str, opts);
} catch (err) {
err.path = fpath; // To help debug which file caused the error.
cb(err);
return;
}
fetchImports(obj.protocol, obj.imports, cb);
});
}
function fetchImports(protocol, imports, cb) {
var importedProtocols = [];
next();
function next() {
var info = imports.shift();
if (!info) {
// We are done with this file. We prepend all imported types to this
// file's and we can return the final result.
importedProtocols.reverse();
try {
importedProtocols.forEach(function (imported) {
mergeImport(protocol, imported);
});
} catch (err) {
cb(err);
return;
}
cb(null, protocol);
return;
}
var importPath = path.join(path.dirname(fpath), info.name);
if (info.kind === 'idl') {
importFile(importPath, function (err, imported) {
if (err) {
cb(err);
return;
}
if (imported) {
importedProtocols.push(imported);
}
next();
});
} else {
// We are importing a protocol or schema file.
opts.importHook(importPath, info.kind, function (err, str) {
if (err) {
cb(err);
return;
}
switch (info.kind) {
case 'protocol':
case 'schema':
if (str === undefined) {
// Skip duplicate import (see related comment above).
next();
return;
}
try {
var obj = JSON.parse(str);
} catch (err) {
err.path = importPath;
cb(err);
return;
}
var imported = info.kind === 'schema' ? {types: [obj]} : obj;
importedProtocols.push(imported);
next();
return;
default:
cb(new Error(f('invalid import kind: %s', info.kind)));
}
});
}
}
}
function mergeImport(protocol, imported) {
// Merge first the types (where we don't need to check for duplicates
// since instantiating the service will take care of it), then the messages
// (where we need to, as duplicates will overwrite each other).
var schemas = imported.types || [];
schemas.reverse();
schemas.forEach(function (schema) {
if (!protocol.types) {
protocol.types = [];
}
// Ensure the imported protocol's namespace is inherited correctly (it
// might be different from the current one).
if (schema.namespace === undefined) {
schema.namespace = protocolNamespace(imported) || '';
}
protocol.types.unshift(schema);
});
Object.keys(imported.messages || {}).forEach(function (name) {
if (!protocol.messages) {
protocol.messages = {};
}
if (protocol.messages[name]) {
throw new Error(f('duplicate message: %s', name));
}
protocol.messages[name] = imported.messages[name];
});
}
}
// Parsing functions.
/**
* Convenience function to parse multiple inputs into protocols and schemas.
*
* It should cover most basic use-cases but has a few limitations:
*
* + It doesn't allow passing options to the parsing step.
* + The protocol/type inference logic can be deceived.
*
* The parsing logic is as follows:
*
* + If `str` contains `path.sep` (on windows `\`, otherwise `/`) and is a path
* to an existing file, it will first be read as JSON, then as an IDL
* specification if JSON parsing failed. If either succeeds, the result is
* returned, otherwise the next steps are run using the file's content
* instead of the input path.
* + If `str` is a valid JSON string, it is parsed then returned.
* + If `str` is a valid IDL protocol specification, it is parsed and returned
* if no imports are present (and an error is thrown if there are any
* imports).
* + If `str` is a valid IDL type specification, it is parsed and returned.
* + If neither of the above cases apply, `str` is returned.
*/
function read(str) {
var schema;
if (typeof str == 'string' && ~str.indexOf(path.sep) && files.existsSync(str)) {
// Try interpreting `str` as path to a file contain a JSON schema or an IDL
// protocol. Note that we add the second check to skip primitive references
// (e.g. `"int"`, the most common use-case for `avro.parse`).
var contents = files.readFileSync(str, {encoding: 'utf8'});
try {
return JSON.parse(contents);
} catch (err) {
var opts = {importHook: files.createSyncImportHook()};
assembleProtocol(str, opts, function (err, protocolSchema) {
schema = err ? contents : protocolSchema;
});
}
} else {
schema = str;
}
if (typeof schema != 'string' || schema === 'null') {
// This last predicate is to allow `read('null')` to work similarly to
// `read('int')` and other primitives (null needs to be handled separately
// since it is also a valid JSON identifier).
return schema;
}
try {
return JSON.parse(schema);
} catch (err) {
try {
return Reader.readProtocol(schema);
} catch (err) {
try {
return Reader.readSchema(schema);
} catch (err) {
return schema;
}
}
}
}
function Reader(str, opts) {
opts = opts || {};
this._tk = new Tokenizer(str);
this._ackVoidMessages = !!opts.ackVoidMessages;
this._implicitTags = !opts.delimitedCollections;
this._typeRefs = opts.typeRefs || TYPE_REFS;
}
Reader.readProtocol = function (str, opts) {
var reader = new Reader(str, opts);
var protocol = reader._readProtocol();
if (protocol.imports.length) {
// Imports can only be resolved when the IDL file is provided via its
// path, we fail rather than silently ignore imports.
throw new Error('unresolvable import');
}
return protocol.protocol;
};
Reader.readSchema = function (str, opts) {
var reader = new Reader(str, opts);
var doc = reader._readJavadoc();
var schema = reader._readType(doc === undefined ? {} : {doc: doc}, true);
reader._tk.next({id: '(eof)'}); // Check that we have read everything.
return schema;
};
Reader.prototype._readProtocol = function () {
var tk = this._tk;
var imports = [];
var types = [];
var messages = {};
var pos;
// Outer declarations (outside of the protocol block).
this._readImports(imports);
var protocolSchema = {};
var protocolJavadoc = this._readJavadoc();
if (protocolJavadoc !== undefined) {
protocolSchema.doc = protocolJavadoc;
}
this._readAnnotations(protocolSchema);
tk.next({val: 'protocol'});
if (!tk.next({val: '{', silent: true})) {
// Named protocol.
protocolSchema.protocol = tk.next({id: 'name'}).val;
tk.next({val: '{'});
}
// Inner declarations.
while (!tk.next({val: '}', silent: true})) {
if (!this._readImports(imports)) {
var javadoc = this._readJavadoc();
var typeSchema = this._readType({}, true);
var numImports = this._readImports(imports, true);
var message = undefined;
// We mark our position and try to parse a message from here.
pos = tk.pos;
if (!numImports && (message = this._readMessage(typeSchema))) {
// Note that if any imports were found, we cannot be parsing a message.
if (javadoc !== undefined && message.schema.doc === undefined) {
message.schema.doc = javadoc;
}
var oneWay = false;
if (
message.schema.response === 'void' ||
message.schema.response.type === 'void'
) {
oneWay = !this._ackVoidMessages && !message.schema.errors;
if (message.schema.response === 'void') {
message.schema.response = 'null';
} else {
message.schema.response.type = 'null';
}
}
if (oneWay) {
message.schema['one-way'] = true;
}
if (messages[message.name]) {
// We have to do this check here otherwise the duplicate will be
// overwritten (and service instantiation won't be able to catch it).
throw new Error(f('duplicate message: %s', message.name));
}
messages[message.name] = message.schema;
} else {
// This was a standalone type definition.
if (javadoc) {
if (typeof typeSchema == 'string') {
typeSchema = {doc: javadoc, type: typeSchema};
} else if (typeSchema.doc === undefined) {
typeSchema.doc = javadoc;
}
}
types.push(typeSchema);
// We backtrack until just before the type's type name and swallow an
// eventual semi-colon (to make type declarations more consistent).
tk.pos = pos;
tk.next({val: ';', silent: true});
}
javadoc = undefined;
}
}
tk.next({id: '(eof)'});
if (types.length) {
protocolSchema.types = types;
}
if (Object.keys(messages).length) {
protocolSchema.messages = messages;
}
return {protocol: protocolSchema, imports: imports};
};
Reader.prototype._readAnnotations = function (schema) {
var tk = this._tk;
while (tk.next({val: '@', silent: true})) {
// Annotations are allowed to have names which aren't valid Avro names,
// we must advance until we hit the first left parenthesis.
var parts = [];
while (!tk.next({val: '(', silent: true})) {
parts.push(tk.next().val);
}
schema[parts.join('')] = tk.next({id: 'json'}).val;
tk.next({val: ')'});
}
};
Reader.prototype._readMessage = function (responseSchema) {
var tk = this._tk;
var schema = {request: [], response: responseSchema};
this._readAnnotations(schema);
var name = tk.next().val;
if (tk.next().val !== '(') {
// This isn't a message.
return;
}
if (!tk.next({val: ')', silent: true})) {
do {
schema.request.push(this._readField());
} while (!tk.next({val: ')', silent: true}) && tk.next({val: ','}));
}
var token = tk.next();
switch (token.val) {
case 'throws':
// It doesn't seem like the IDL is explicit about which syntax to used
// for multiple errors. We will assume a comma-separated list.
schema.errors = [];
do {
schema.errors.push(this._readType());
} while (!tk.next({val: ';', silent: true}) && tk.next({val: ','}));
break;
case 'oneway':
schema['one-way'] = true;
tk.next({val: ';'});
break;
case ';':
break;
default:
throw tk.error('invalid message suffix', token);
}
return {name: name, schema: schema};
};
Reader.prototype._readJavadoc = function () {
var token = this._tk.next({id: 'javadoc', emitJavadoc: true, silent: true});
if (token) {
return token.val;
}
};
Reader.prototype._readField = function () {
var tk = this._tk;
var javadoc = this._readJavadoc();
var schema = {type: this._readType()};
if (javadoc !== undefined && schema.doc === undefined) {
schema.doc = javadoc;
}
this._readAnnotations(schema);
schema.name = tk.next({id: 'name'}).val;
if (tk.next({val: '=', silent: true})) {
schema['default'] = tk.next({id: 'json'}).val;
}
return schema;
};
Reader.prototype._readType = function (schema, top) {
schema = schema || {};
this._readAnnotations(schema);
schema.type = this._tk.next({id: 'name'}).val;
switch (schema.type) {
case 'record':
case 'error':
return this._readRecord(schema);
case 'fixed':
return this._readFixed(schema);
case 'enum':
return this._readEnum(schema, top);
case 'map':
return this._readMap(schema);
case 'array':
return this._readArray(schema);
case 'union':
if (Object.keys(schema).length > 1) {
throw new Error('union annotations are not supported');
}
return this._readUnion();
default:
// Reference.
var ref = this._typeRefs[schema.type];
if (ref) {
delete schema.type; // Always overwrite the type.
utils.copyOwnProperties(ref, schema);
}
return Object.keys(schema).length > 1 ? schema : schema.type;
}
};
Reader.prototype._readFixed = function (schema) {
var tk = this._tk;
if (!tk.next({val: '(', silent: true})) {
schema.name = tk.next({id: 'name'}).val;
tk.next({val: '('});
}
schema.size = parseInt(tk.next({id: 'number'}).val);
tk.next({val: ')'});
return schema;
};
Reader.prototype._readMap = function (schema) {
var tk = this._tk;
// Brackets are unwieldy when declaring inline types. We allow for them to be
// omitted (but we keep the consistency that if the entry bracket is present,
// the exit one must be as well). Note that this is non-standard.
var silent = this._implicitTags;
var implicitTags = tk.next({val: '<', silent: silent}) === undefined;
schema.values = this._readType();
tk.next({val: '>', silent: implicitTags});
return schema;
};
Reader.prototype._readArray = function (schema) {
var tk = this._tk;
var silent = this._implicitTags;
var implicitTags = tk.next({val: '<', silent: silent}) === undefined;
schema.items = this._readType();
tk.next({val: '>', silent: implicitTags});
return schema;
};
Reader.prototype._readEnum = function (schema, top) {
var tk = this._tk;
if (!tk.next({val: '{', silent: true})) {
schema.name = tk.next({id: 'name'}).val;
tk.next({val: '{'});
}
schema.symbols = [];
do {
schema.symbols.push(tk.next().val);
} while (!tk.next({val: '}', silent: true}) && tk.next({val: ','}));
// To avoid confusing syntax, reader enums (i.e. enums with a default value)
// can only be defined top-level.
if (top && tk.next({val: '=', silent: true})) {
schema.default = tk.next().val;
tk.next({val: ';'});
}
return schema;
};
Reader.prototype._readUnion = function () {
var tk = this._tk;
var arr = [];
tk.next({val: '{'});
do {
arr.push(this._readType());
} while (!tk.next({val: '}', silent: true}) && tk.next({val: ','}));
return arr;
};
Reader.prototype._readRecord = function (schema) {
var tk = this._tk;
if (!tk.next({val: '{', silent: true})) {
schema.name = tk.next({id: 'name'}).val;
tk.next({val: '{'});
}
schema.fields = [];
while (!tk.next({val: '}', silent: true})) {
schema.fields.push(this._readField());
tk.next({val: ';'});
}
return schema;
};
Reader.prototype._readImports = function (imports, maybeMessage) {
var tk = this._tk;
var numImports = 0;
var pos = tk.pos;
while (tk.next({val: 'import', silent: true})) {
if (!numImports && maybeMessage && tk.next({val: '(', silent: true})) {
// This will happen if a message is named import.
tk.pos = pos;
return;
}
var kind = tk.next({id: 'name'}).val;
var fname = JSON.parse(tk.next({id: 'string'}).val);
tk.next({val: ';'});
imports.push({kind: kind, name: fname});
numImports++;
}
return numImports;
};
// Helpers.
/**
* Simple class to split an input string into tokens.
*
* There are different types of tokens, characterized by their `id`:
*
* + `number` numbers.
* + `name` references.
* + `string` double-quoted.
* + `operator`, anything else, always single character.
* + `javadoc`, only emitted when `next` is called with `emitJavadoc` set.
* + `json`, only emitted when `next` is called with `'json'` as `id` (the
* tokenizer doesn't have enough context to predict these).
*/
function Tokenizer(str) {
this._str = str;
this.pos = 0;
}
Tokenizer.prototype.next = function (opts) {
var token = {pos: this.pos, id: undefined, val: undefined};
var javadoc = this._skip(opts && opts.emitJavadoc);
if (javadoc) {
token.id = 'javadoc';
token.val = javadoc;
} else {
var pos = this.pos;
var str = this._str;
var c = str.charAt(pos);
if (!c) {
token.id = '(eof)';
} else {
if (opts && opts.id === 'json') {
token.id = 'json';
this.pos = this._endOfJson();
} else if (c === '"') {
token.id = 'string';
this.pos = this._endOfString();
} else if (/[0-9]/.test(c)) {
token.id = 'number';
this.pos = this._endOf(/[0-9]/);
} else if (/[`A-Za-z_.]/.test(c)) {
token.id = 'name';
this.pos = this._endOf(/[`A-Za-z0-9_.]/);
} else {
token.id = 'operator';
this.pos = pos + 1;
}
token.val = str.slice(pos, this.pos);
if (token.id === 'json') {
// Let's be nice and give a more helpful error message when this occurs
// (JSON parsing errors wouldn't let us find the location otherwise).
try {
token.val = JSON.parse(token.val);
} catch (err) {
throw this.error('invalid JSON', token);
}
} else if (token.id === 'name') {
// Unescape names (our parser doesn't need them).
token.val = token.val.replace(/`/g, '');
}
}
}
var err;
if (opts && opts.id && opts.id !== token.id) {
err = this.error(f('expected ID %s', opts.id), token);
} else if (opts && opts.val && opts.val !== token.val) {
err = this.error(f('expected value %s', opts.val), token);
}
if (!err) {
return token;
} else if (opts && opts.silent) {
this.pos = token.pos; // Backtrack to start of token.
return undefined;
} else {
throw err;
}
};
Tokenizer.prototype.error = function (reason, context) {
// Context must be either a token or a position.
var isToken = typeof context != 'number';
var pos = isToken ? context.pos : context;
var str = this._str;
var lineNum = 1;
var lineStart = 0;
var i;
for (i = 0; i < pos; i++) {
if (str.charAt(i) === '\n') {
lineNum++;
lineStart = i;
}
}
var msg = isToken ? f('invalid token %j: %s', context, reason) : reason;
var err = new Error(msg);
err.token = isToken ? context : undefined;
err.lineNum = lineNum;
err.colNum = pos - lineStart;
return err;
};
/** Skip whitespace and comments. */
Tokenizer.prototype._skip = function (emitJavadoc) {
var str = this._str;
var isJavadoc = false;
var pos, c;
while ((c = str.charAt(this.pos)) && /\s/.test(c)) {
this.pos++;
}
pos = this.pos;
if (c === '/') {
switch (str.charAt(this.pos + 1)) {
case '/':
this.pos += 2;
while ((c = str.charAt(this.pos)) && c !== '\n') {
this.pos++;
}
return this._skip(emitJavadoc);
case '*':
this.pos += 2;
if (str.charAt(this.pos) === '*') {
isJavadoc = true;
}
while ((c = str.charAt(this.pos++))) {
if (c === '*' && str.charAt(this.pos) === '/') {
this.pos++;
if (isJavadoc && emitJavadoc) {
return extractJavadoc(str.slice(pos + 3, this.pos - 2));
}
return this._skip(emitJavadoc);
}
}
throw this.error('unterminated comment', pos);
}
}
};
/** Generic end of method. */
Tokenizer.prototype._endOf = function (pat) {
var pos = this.pos;
var str = this._str;
while (pat.test(str.charAt(pos))) {
pos++;
}
return pos;
};
/** Find end of a string. */
Tokenizer.prototype._endOfString = function () {
var pos = this.pos + 1; // Skip first double quote.
var str = this._str;
var c;
while ((c = str.charAt(pos))) {
if (c === '"') {
// The spec doesn't explicitly say so, but IDLs likely only
// allow double quotes for strings (C- and Java-style).
return pos + 1;
}
if (c === '\\') {
pos += 2;
} else {
pos++;
}
}
throw this.error('unterminated string', pos - 1);
};
/** Find end of JSON object, throwing an error if the end is reached first. */
Tokenizer.prototype._endOfJson = function () {
var pos = utils.jsonEnd(this._str, this.pos);
if (pos < 0) {
throw this.error('invalid JSON', pos);
}
return pos;
};
/**
* Extract Javadoc contents from the comment.
*
* The parsing done is very simple and simply removes the line prefixes and
* leading / trailing empty lines. It's better to be conservative with
* formatting rather than risk losing information.
*/
function extractJavadoc(str) {
var lines = str
.replace(/^[ \t]+|[ \t]+$/g, '') // Trim whitespace.
.split('\n').map(function (line, i) {
return i ? line.replace(/^\s*\*\s?/, '') : line;
});
while (!lines[0]) {
lines.shift();
}
while (!lines[lines.length - 1]) {
lines.pop();
}
return lines.join('\n');
}
/** Returns the namespace generated by a protocol. */
function protocolNamespace(protocol) {
if (protocol.namespace) {
return protocol.namespace;
}
var match = /^(.*)\.[^.]+$/.exec(protocol.protocol);
return match ? match[1] : undefined;
}
module.exports = {
Tokenizer: Tokenizer,
assembleProtocol: assembleProtocol,
read: read,
readProtocol: Reader.readProtocol,
readSchema: Reader.readSchema
};