"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const codec_1 = require("./codec"); const compression_1 = require("./compression"); const shred_1 = require("./shred"); const types_1 = require("./types"); /** * A parquet file schema */ class ParquetSchema { /** * Create a new schema from a JSON schema definition */ constructor(schema) { this.schema = schema; this.fields = buildFields(schema, 0, 0, []); this.fieldList = listFields(this.fields); } findField(path) { if (path.constructor !== Array) { // tslint:disable-next-line:no-parameter-reassignment path = path.split(','); } else { // tslint:disable-next-line:no-parameter-reassignment path = path.slice(0); // clone array } let n = this.fields; for (; path.length > 1; path.shift()) { n = n[path[0]].fields; } return n[path[0]]; } findFieldBranch(path) { if (path.constructor !== Array) { // tslint:disable-next-line:no-parameter-reassignment path = path.split(','); } const branch = []; let n = this.fields; for (; path.length > 0; path.shift()) { branch.push(n[path[0]]); if (path.length > 1) { n = n[path[0]].fields; } } return branch; } shredRecord(record, buffer) { shred_1.shredRecord(this, record, buffer); } materializeRecords(buffer) { return shred_1.materializeRecords(this, buffer); } compress(type) { setCompress(this.schema, type); setCompress(this.fields, type); return this; } buffer() { return shred_1.shredBuffer(this); } } exports.ParquetSchema = ParquetSchema; function setCompress(schema, type) { for (const name in schema) { const node = schema[name]; if (node.fields) { setCompress(node.fields, type); } else { node.compression = type; } } } function buildFields(schema, rLevelParentMax, dLevelParentMax, path) { const fieldList = {}; for (const name in schema) { const opts = schema[name]; /* field repetition type */ const required = !opts.optional; const repeated = !!opts.repeated; let rLevelMax = rLevelParentMax; let dLevelMax = dLevelParentMax; let repetitionType = 'REQUIRED'; if (!required) { repetitionType = 'OPTIONAL'; dLevelMax++; } if (repeated) { repetitionType = 'REPEATED'; rLevelMax++; if (required) dLevelMax++; } /* nested field */ if (opts.fields) { const cpath = path.concat([name]); fieldList[name] = { name, path: cpath, key: cpath.join(), repetitionType, rLevelMax, dLevelMax, isNested: true, fieldCount: Object.keys(opts.fields).length, fields: buildFields(opts.fields, rLevelMax, dLevelMax, cpath) }; continue; } const typeDef = types_1.PARQUET_LOGICAL_TYPES[opts.type]; if (!typeDef) { throw new Error(`invalid parquet type: ${opts.type}`); } opts.encoding = opts.encoding || 'PLAIN'; if (!(opts.encoding in codec_1.PARQUET_CODEC)) { throw new Error(`unsupported parquet encoding: ${opts.encoding}`); } opts.compression = opts.compression || 'UNCOMPRESSED'; if (!(opts.compression in compression_1.PARQUET_COMPRESSION_METHODS)) { throw new Error(`unsupported compression method: ${opts.compression}`); } /* add to schema */ const cpath = path.concat([name]); fieldList[name] = { name, primitiveType: typeDef.primitiveType, originalType: typeDef.originalType, path: cpath, key: cpath.join(), repetitionType, encoding: opts.encoding, compression: opts.compression, typeLength: opts.typeLength || typeDef.typeLength, rLevelMax, dLevelMax }; } return fieldList; } function listFields(fields) { let list = []; for (const k in fields) { list.push(fields[k]); if (fields[k].isNested) { list = list.concat(listFields(fields[k].fields)); } } return list; } //# sourceMappingURL=schema.js.map