Skip to content

Instantly share code, notes, and snippets.

@primiano
Created June 9, 2020 21:10
Show Gist options
  • Save primiano/8a0daabbbc5f8e28defad7c6e25579bd to your computer and use it in GitHub Desktop.
Save primiano/8a0daabbbc5f8e28defad7c6e25579bd to your computer and use it in GitHub Desktop.
const encoder = new TextEncoder('utf-8');
const decoder = new TextDecoder('utf-8');
const TAG_MASK = 0x3;
const TAG_INT32 = 0;
const TAG_FLOAT64 = 1;
const TAG_STRING = 2;
function encode(cells) {
// Overestimate, will resize at the end.
let buf = new ArrayBuffer(cells.length * 40);
const b8 = new Uint8Array(buf);
const b32 = new Uint32Array(buf);
const f64 = new Float64Array(buf);
// [num cells] [cell bitmap]
// [2048 payload]
// [strings]
let chunkStart = 0;
let bOff = 8;
let pStart = 0;
let pOff = 0;
let sOff = 0;
function finalizeChunk() {
const nextChunkStart = (~~((sOff + 7) / 8)) * 8;
const chunkLen = nextChunkStart - chunkStart;
const numCells = (bOff - chunkStart - 8);
console.log('new chunk', chunkLen, numCells);
b32[chunkStart / 4] = chunkLen;
b32[chunkStart / 4 + 1] = numCells;
chunkStart += chunkLen;
bOff = chunkStart + 8;
pStart = chunkStart + BITMAP_SIZE;
pOff = pStart;
sOff = pOff + INT_PAYLOAD_SIZE;
return nextChunkStart;
}
finalizeChunk();
for (const cell of cells) {
if (0 * cell === 0) { // a number
if (cell <= 0x7FFFFFFF) {
b8[bOff++] = TAG_INT32;
b32[pOff/4] = cell;
pOff += 4;
} else {
b8[bOff++] = TAG_FLOAT64;
const idx = ~~((pOff + 7) / 8);
f64[idx] = cell;
pOff = idx * 8 + 8;
}
} else { // a string
b8[bOff++] = TAG_STRING;
sOff += encoder.encodeInto(cell, b8.subarray(sOff)).written;
b8[sOff++] = '\0';
}
if (bOff - chunkStart >= BITMAP_SIZE || pOff - pStart >= INT_PAYLOAD_SIZE - 8) {
finalizeChunk();
}
}
return buf.slice(0, finalizeChunk());
}
function decodeAndIterate(buf) {
const b8 = new Uint8Array(buf);
const b32 = new Uint32Array(buf);
const f64 = new Float64Array(buf);
let numCells = 0;
let chunkStart = 0;
const hasher = new Hasher();
for (;;) {
let chunkSize = b32[chunkStart / 4];
let cellsInChunk = b32[chunkStart / 4 + 1];
let pStart = chunkStart + BITMAP_SIZE;
let pOff = pStart;
let sStart = pOff + INT_PAYLOAD_SIZE;
let sEnd = chunkStart + chunkSize;
let strings = decoder.decode(b8.subarray(sStart, sEnd)).split('\0');
let strIdx = 0;
// console.log('sz', chunkSize, 'cells', cellsInChunk, 'strings', sEnd - sStart);
for (let bOff = chunkStart + 8; bOff < chunkStart + 8 + cellsInChunk; bOff++) {
const tag = b8[bOff];
let cell;
if (tag === TAG_INT32) {
cell = b32[pOff / 4];
pOff += 4;
hasher.update(cell);
} else if (tag === TAG_FLOAT64) {
const idx = ~~((pOff + 7) / 8);
cell = f64[idx];
pOff = idx * 8 + 8;
hasher.update(cell);
} else if (tag === TAG_STRING) {
cell = strings[strIdx++];
hasher.update(cell.length);
}
numCells++;
} // For cell
chunkStart += chunkSize;
if (chunkStart >= buf.byteLength) break;
}
return [numCells, hasher.digest];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment