Skip to content

Instantly share code, notes, and snippets.

@dunhamsteve
Last active May 1, 2021 03:50
Show Gist options
  • Save dunhamsteve/78ec0162dc959e0678d449bb2713f387 to your computer and use it in GitHub Desktop.
Save dunhamsteve/78ec0162dc959e0678d449bb2713f387 to your computer and use it in GitHub Desktop.
This script exports your Craft.app databases as json files.
#!/usr/bin/env node
// Placed in the public domain
// Dump craft database as json
// This is intended as a sample of how to extract craft raw data for
// playing around with. It has no dependencies aside from node.
// The top section reads the Craft DB into memory (takes about 380ms for 50k blocks)
// To determine the realm file format I consulted the source code. This works with
// Realm 5, which Craft currently uses. It will need to be tweaked if Craft switches
// to Realm 6.
// The code at the end does a little cleanup (expanding nested json) and writes to realmData.js
// This can be replaced by whatever you want to do with the data.
// NB - you'll find the cached images, etc in Caches/com.luki.datacache
// the filenames are sha1(url).
let {readdirSync, readFileSync, writeFileSync} = require("fs");
let assert = (cond, msg) => { if (!cond) throw Error(msg || "Assert"); };
function read_database(rfile2, translateRefs = true) {
function readBlob(ref) {
assert(!(ref & 1), `bad ref ${ref}`);
let size = (buf[ref + 5] << 16) + (buf[ref + 6] << 8) + buf[ref + 7];
let h = buf[ref + 4];
let width = 1 << (h & 7) >> 1;
let wtype = (h & 24) >> 3;
assert(wtype === 2);
assert(width === 1);
return buf.slice(ref+8,ref+8+size);
}
function readStringArray(ref) {
assert(!(ref & 1), `bad ref ${ref}`);
let size = (buf[ref + 5] << 16) + (buf[ref + 6] << 8) + buf[ref + 7];
let h = buf[ref + 4];
let width = 1 << (h & 7) >> 1;
let wtype = (h & 24) >> 3;
assert(wtype === 1);
let rval = [];
for (let i = 0; i < size; i++) {
let s = ref + 8 + width * i;
let e = s + width - buf[s + width - 1] - 1;
rval.push(buf.toString("utf8", s, e));
}
return rval;
}
const getFlags = (ref) => buf[ref + 4] >> 5;
function readArray(ref) {
assert(!(ref & 1), `bad ref ${ref}`);
let size = (buf[ref + 5] << 16) + (buf[ref + 6] << 8) + buf[ref + 7];
let h = buf[ref + 4];
let width = 1 << (h & 7) >> 1;
let wtype = (h & 24) >> 3;
assert(wtype === 0);
let rval = [];
for (let i = 0; i < size; i++) {
if (width === 0)
rval.push(0);
else if (width === 1)
rval.push(1 & buf[ref + 8 + i / 8 | 0] >> i % 8);
else if (width === 2)
rval.push(3 & buf[ref + 8 + i / 4 | 0] >> 2 * (i % 3));
else if (width === 4)
rval.push(15 & buf[ref + 8 + i / 2 | 0] >> 4 * (i % 2));
else if (width === 8)
rval.push(buf[ref + 8 + i]);
else if (width === 16)
rval.push(buf.readUInt16LE(ref + 8 + i * 2));
else if (width === 32)
rval.push(buf.readUInt32LE(ref + 8 + i * 4));
else
assert(false, `width ${width} ints not handled`);
}
return rval;
}
const readers = {
int(acc, ref) {
assert(getFlags(ref) == 0);
acc.push(...readArray(ref));
},
bool(acc, ref) {
assert(getFlags(ref) == 0);
for (let x of readArray(ref))
acc.push(x == 1);
},
linklist(acc, ref) {
assert(getFlags(ref) == 2);
readArray(ref).forEach((v) => acc.push(v == 0 ? [] : readArray(v)));
},
timestamp(acc, ref) {
assert(getFlags(ref) == 2);
scanBPTree(acc, readArray(ref)[0], (acc2, ref2) => readArray(ref2).slice(1).forEach((x) => acc2.push(x)));
},
string(acc, ref) {
let flags = getFlags(ref);
if (flags == 2) {
let arr = readArray(ref);
let ends = readArray(arr[0]);
let blob = readBlob(arr[1]);
let s = 0;
ends.forEach((e) => { acc.push(blob.toString('utf8', s, e - 1)); s = e });
} else if (flags === 3) {
for (let r of readArray(ref)) {
if (r) {
let blob = readBlob(r)
acc.push(blob.toString('utf8',0,blob.length-1))
} else {
acc.push(null)
}
}
} else if (flags === 0) {
acc.push(...readStringArray(ref));
}
}
};
function scanBPTree(acc, ref, reader) {
let flags = getFlags(ref);
if (flags & 4) {
readArray(ref).slice(1, -1).forEach((ref2) => scanBPTree(acc, ref2, reader));
} else {
reader(acc, ref);
}
}
const colTypes = {0: "int", 1: "bool", 2: "string", 8: "timestamp", 13: "linklist"};
function readTable(name, ref) {
let arr = readArray(ref);
let spec = readArray(arr[0]);
let types = readArray(spec[0]);
let names = readStringArray(spec[1]);
let attrs = readArray(spec[2]);
let subspecs = spec.length > 3 && readArray(spec[3]);
let crefs = readArray(arr[1]);
let cix = 0;
let six = 0;
let cols = [];
let refs = {};
for (let i = 0; i < names.length; i++) {
let cname = names[i];
let ct = colTypes[types[i]];
if (subspecs && ct == "linklist") {
refs[cname] = subspecs[six++] >> 1;
}
let col = [];
scanBPTree(col, crefs[cix], readers[ct]);
cols.push(col);
cix++;
if (attrs[i] & 1)
cix++;
}
let rows = [];
for (let i = 0; i < cols[0].length; i++) {
let row = {};
for (let j = 0; j < names.length; j++) {
row[names[j]] = cols[j][i];
}
rows.push(row);
}
return {name, rows, refs};
}
function readTop(ref) {
let [rNames, rTables] = readArray(ref);
let names = readStringArray(rNames);
let trefs = readArray(rTables);
return names.map((name, i) => readTable(name, trefs[i]));
}
function get_topref(buf2) {
let magic = buf2.readInt32LE(16);
assert(magic == 0x42442d54, "bad magic");
let sel = buf2.readUInt8(23) & 1;
let topref2 = buf2.readInt32LE(sel * 8);
return topref2;
}
let buf = readFileSync(rfile2);
let topref = get_topref(buf);
let tables = readTop(topref);
let pks = {};
let db2 = {};
for (let table of tables) {
if (table.name == "pk") {
for (let {pk_table, pk_property} of table.rows)
pks[pk_table] = pk_property;
} else if (table.name.startsWith("class_")) {
db2[table.name.slice(6)] = table.rows;
}
}
if (translateRefs)
for (let table of tables) {
for (let k in table.refs) {
let dest = tables[table.refs[k]];
let dpk = pks[dest.name.slice(6)];
console.log(table.name, k, "->", dest.name, dpk);
for (let item of table.rows) {
let value = item[k];
if (value && value.length) {
for (let i = 0; i < value.length; i++)
value[i] = dest.rows[value[i]][dpk];
}
}
}
}
return db2;
}
let jsonProps = {
BlockDataModel: ['offSchemaProperties', 'rawProperties','style', 'pageStyleData'],
FolderDataModel: ['properties'],
DocumentShareDataModel: ['_sharedBlocks'],
SnapshotDataModel:['_userIds'],
SpaceConfigDataModel: ['value'],
}
function decodeNestedJson(db) {
function decode(value) {
if (!value) return null
if (value === "{}") return {}
return JSON.parse(value)
}
for (let tname in jsonProps) {
for (let block of db[tname]) {
for (let k of jsonProps[tname]) {
block[k] = decode(block[k])
}
if (tname === 'FolderDataModel') {
// more json inside of json
let props = block.properties
if (props && props.icon) {
props.icon = decode(props.icon)
}
}
if (tname === 'DocumentShareDataModel') {
block._sharedBlocks.forEach(sb => {sb.settings = decode(sb.settings)})
}
}
}
}
let base = `${process.env.HOME}/Library/Containers/com.lukilabs.lukiapp/Data/Library/Application Support/com.lukilabs.lukiapp`;
for (let fn of readdirSync(base)) {
if (fn.startsWith("LukiMain") && fn.endsWith(".realm")) {
let realm_id = fn.split('_')[1].split('||').pop()
console.log('Read', realm_id,fn)
let rfile = `${base}/${fn}`
// pass in false here if you want some of the xrefs to be array indices instead of uuids.
let db = read_database(rfile, true);
// Some values are strings containing json, we decode them here for convenience.
decodeNestedJson(db)
let json = JSON.stringify(db, null, ' ')
let outfn = `realm_${realm_id}.json`
writeFileSync(outfn, json)
console.log('wrote', json.length, 'bytes to', outfn)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment