dotfiles/vscode/.vscode/extensions/randomfractalsinc.vscode-data-preview-2.3.0/node_modules/parquets/lib/shred.js
Errol Sancaktar ff17c17e23 vscode
2024-06-14 09:31:58 -06:00

187 lines
6.6 KiB
JavaScript

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const Types = require("./types");
function shredBuffer(schema) {
const columnData = {};
for (const field of schema.fieldList) {
columnData[field.key] = {
dlevels: [],
rlevels: [],
values: [],
count: 0
};
}
return { rowCount: 0, columnData };
}
exports.shredBuffer = shredBuffer;
/**
* 'Shred' a record into a list of <value, repetition_level, definition_level>
* tuples per column using the Google Dremel Algorithm..
*
* The buffer argument must point to an object into which the shredded record
* will be returned. You may re-use the buffer for repeated calls to this function
* to append to an existing buffer, as long as the schema is unchanged.
*
* The format in which the shredded records will be stored in the buffer is as
* follows:
*
* buffer = {
* columnData: [
* 'my_col': {
* dlevels: [d1, d2, .. dN],
* rlevels: [r1, r2, .. rN],
* values: [v1, v2, .. vN],
* }, ...
* ],
* rowCount: X,
* }
*/
function shredRecord(schema, record, buffer) {
/* shred the record, this may raise an exception */
const data = shredBuffer(schema).columnData;
shredRecordFields(schema.fields, record, data, 0, 0);
/* if no error during shredding, add the shredded record to the buffer */
if (!('columnData' in buffer) || !('rowCount' in buffer)) {
buffer.rowCount = 1;
buffer.columnData = data;
return;
}
buffer.rowCount += 1;
for (const field of schema.fieldList) {
Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);
Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);
Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);
buffer.columnData[field.key].count += data[field.key].count;
}
}
exports.shredRecord = shredRecord;
function shredRecordFields(fields, record, data, rLevel, dLevel) {
for (const name in fields) {
const field = fields[name];
// fetch values
let values = [];
if (record && (field.name in record) && record[field.name] !== undefined && record[field.name] !== null) {
if (record[field.name].constructor === Array) {
values = record[field.name];
}
else {
values.push(record[field.name]);
}
}
// check values
if (values.length === 0 && !!record && field.repetitionType === 'REQUIRED') {
throw new Error(`missing required field: ${field.name}`);
}
if (values.length > 1 && field.repetitionType !== 'REPEATED') {
throw new Error(`too many values for field: ${field.name}`);
}
// push null
if (values.length === 0) {
if (field.isNested) {
shredRecordFields(field.fields, null, data, rLevel, dLevel);
}
else {
data[field.key].count += 1;
data[field.key].rlevels.push(rLevel);
data[field.key].dlevels.push(dLevel);
}
continue;
}
// push values
for (let i = 0; i < values.length; i++) {
const rlvl = i === 0 ? rLevel : field.rLevelMax;
if (field.isNested) {
shredRecordFields(field.fields, values[i], data, rlvl, field.dLevelMax);
}
else {
data[field.key].count += 1;
data[field.key].rlevels.push(rlvl);
data[field.key].dlevels.push(field.dLevelMax);
data[field.key].values.push(Types.toPrimitive(field.originalType || field.primitiveType, values[i]));
}
}
}
}
/**
* 'Materialize' a list of <value, repetition_level, definition_level>
* tuples back to nested records (objects/arrays) using the Google Dremel
* Algorithm..
*
* The buffer argument must point to an object with the following structure (i.e.
* the same structure that is returned by shredRecords):
*
* buffer = {
* columnData: [
* 'my_col': {
* dlevels: [d1, d2, .. dN],
* rlevels: [r1, r2, .. rN],
* values: [v1, v2, .. vN],
* }, ...
* ],
* rowCount: X,
* }
*/
function materializeRecords(schema, buffer) {
const records = [];
for (let i = 0; i < buffer.rowCount; i++)
records.push({});
for (const key in buffer.columnData) {
materializeColumn(schema, buffer, key, records);
}
return records;
}
exports.materializeRecords = materializeRecords;
function materializeColumn(schema, buffer, key, records) {
const data = buffer.columnData[key];
if (!data.count)
return;
const field = schema.findField(key);
const branch = schema.findFieldBranch(key);
// tslint:disable-next-line:prefer-array-literal
const rLevels = new Array(field.rLevelMax + 1).fill(0);
let vIndex = 0;
for (let i = 0; i < data.count; i++) {
const dLevel = data.dlevels[i];
const rLevel = data.rlevels[i];
rLevels[rLevel]++;
rLevels.fill(0, rLevel + 1);
let rIndex = 0;
let record = records[rLevels[rIndex++] - 1];
// Internal nodes
for (const step of branch) {
if (step === field)
break;
if (dLevel < step.dLevelMax)
break;
if (step.repetitionType === 'REPEATED') {
if (!(step.name in record))
record[step.name] = [];
const ix = rLevels[rIndex++];
while (record[step.name].length <= ix)
record[step.name].push({});
record = record[step.name][ix];
}
else {
record[step.name] = record[step.name] || {};
record = record[step.name];
}
}
// Leaf node
if (dLevel === field.dLevelMax) {
const value = Types.fromPrimitive(field.originalType || field.primitiveType, data.values[vIndex]);
vIndex++;
if (field.repetitionType === 'REPEATED') {
if (!(field.name in record))
record[field.name] = [];
const ix = rLevels[rIndex];
while (record[field.name].length <= ix)
record[field.name].push(null);
record[field.name][ix] = value;
}
else {
record[field.name] = value;
}
}
}
}
//# sourceMappingURL=shred.js.map