dotfiles/vscode/.vscode/extensions/randomfractalsinc.vscode-data-preview-2.3.0/node_modules/parquets/lib/schema.js
Errol Sancaktar ff17c17e23 vscode
2024-06-14 09:31:58 -06:00

152 lines
4.6 KiB
JavaScript

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const codec_1 = require("./codec");
const compression_1 = require("./compression");
const shred_1 = require("./shred");
const types_1 = require("./types");
/**
* A parquet file schema
*/
class ParquetSchema {
/**
* Create a new schema from a JSON schema definition
*/
constructor(schema) {
this.schema = schema;
this.fields = buildFields(schema, 0, 0, []);
this.fieldList = listFields(this.fields);
}
findField(path) {
if (path.constructor !== Array) {
// tslint:disable-next-line:no-parameter-reassignment
path = path.split(',');
}
else {
// tslint:disable-next-line:no-parameter-reassignment
path = path.slice(0); // clone array
}
let n = this.fields;
for (; path.length > 1; path.shift()) {
n = n[path[0]].fields;
}
return n[path[0]];
}
findFieldBranch(path) {
if (path.constructor !== Array) {
// tslint:disable-next-line:no-parameter-reassignment
path = path.split(',');
}
const branch = [];
let n = this.fields;
for (; path.length > 0; path.shift()) {
branch.push(n[path[0]]);
if (path.length > 1) {
n = n[path[0]].fields;
}
}
return branch;
}
shredRecord(record, buffer) {
shred_1.shredRecord(this, record, buffer);
}
materializeRecords(buffer) {
return shred_1.materializeRecords(this, buffer);
}
compress(type) {
setCompress(this.schema, type);
setCompress(this.fields, type);
return this;
}
buffer() {
return shred_1.shredBuffer(this);
}
}
exports.ParquetSchema = ParquetSchema;
function setCompress(schema, type) {
for (const name in schema) {
const node = schema[name];
if (node.fields) {
setCompress(node.fields, type);
}
else {
node.compression = type;
}
}
}
function buildFields(schema, rLevelParentMax, dLevelParentMax, path) {
const fieldList = {};
for (const name in schema) {
const opts = schema[name];
/* field repetition type */
const required = !opts.optional;
const repeated = !!opts.repeated;
let rLevelMax = rLevelParentMax;
let dLevelMax = dLevelParentMax;
let repetitionType = 'REQUIRED';
if (!required) {
repetitionType = 'OPTIONAL';
dLevelMax++;
}
if (repeated) {
repetitionType = 'REPEATED';
rLevelMax++;
if (required)
dLevelMax++;
}
/* nested field */
if (opts.fields) {
const cpath = path.concat([name]);
fieldList[name] = {
name,
path: cpath,
key: cpath.join(),
repetitionType,
rLevelMax,
dLevelMax,
isNested: true,
fieldCount: Object.keys(opts.fields).length,
fields: buildFields(opts.fields, rLevelMax, dLevelMax, cpath)
};
continue;
}
const typeDef = types_1.PARQUET_LOGICAL_TYPES[opts.type];
if (!typeDef) {
throw new Error(`invalid parquet type: ${opts.type}`);
}
opts.encoding = opts.encoding || 'PLAIN';
if (!(opts.encoding in codec_1.PARQUET_CODEC)) {
throw new Error(`unsupported parquet encoding: ${opts.encoding}`);
}
opts.compression = opts.compression || 'UNCOMPRESSED';
if (!(opts.compression in compression_1.PARQUET_COMPRESSION_METHODS)) {
throw new Error(`unsupported compression method: ${opts.compression}`);
}
/* add to schema */
const cpath = path.concat([name]);
fieldList[name] = {
name,
primitiveType: typeDef.primitiveType,
originalType: typeDef.originalType,
path: cpath,
key: cpath.join(),
repetitionType,
encoding: opts.encoding,
compression: opts.compression,
typeLength: opts.typeLength || typeDef.typeLength,
rLevelMax,
dLevelMax
};
}
return fieldList;
}
function listFields(fields) {
let list = [];
for (const k in fields) {
list.push(fields[k]);
if (fields[k].isNested) {
list = list.concat(listFields(fields[k].fields));
}
}
return list;
}
//# sourceMappingURL=schema.js.map