Skip to content

Instantly share code, notes, and snippets.

@jsundram
Last active November 10, 2023 21:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jsundram/f362c5a62e0dc9cb1aab9c815d9e5b5c to your computer and use it in GitHub Desktop.
Save jsundram/f362c5a62e0dc9cb1aab9c815d9e5b5c to your computer and use it in GitHub Desktop.

MDB Excavation

MDB is a Microsoft Access Database file. We'd like to look inside of it, but we have a Mac and no MS Office license.

export.py is a program that can do each of the following:

  1. Put each table into its own .csv file to look at.
  2. Put each table in the data into an excel / google sheets doc for easy viewing/browsing
  3. Create a sqlite db which is similarly portable but accessible via a variety of common tools

Prerequisites

  • mdbtools, a set of tools for working with mdb files from the command line. brew install mdbtools
  • pip install -r requirements.txt in order to write Excel files.

Usage:

export.py [-h] mdb_file format

Convert MDB files to CSV, XLSX, or SQLite

positional arguments:
  mdb_file    Path to the MDB file
  format      Export format: csv, xlsx, or sqlite

options:
  -h, --help  show this help message and exit

Credits

#!/usr/bin/env python3
import argparse
import csv
import io
import os
import shutil
import sqlite3
import subprocess
import sys
# pip install xlsxwriter
from xlsxwriter.workbook import Workbook
"""
NB: This need mdbtools (`brew install mdbtools`) in order to work
Wraps mdbtools to rescue data from an .mdb file and make it accessible via:
1. csv
2. xlsx
3. sqlite
Doesn't rely on pandas for ease of debugging / installation etc.
"""
def run_command(cmd):
"""runs the command, returns the utf-8 decoded output as a single string"""
s = subprocess.check_output(cmd)
return s.decode("utf-8")
def list_tables(filename):
delimiter = ", "
u = run_command(["mdb-tables", "-d", delimiter, filename])
tables = u.split(delimiter)
return [stripped for t in tables if (stripped := t.strip()) != ""]
def read(dbname, tablename, readfn=csv.reader):
u = run_command(["mdb-export", dbname, tablename])
f = io.StringIO(u)
r = readfn(f)
return [row for row in r]
def export_csv(dbname, tablename):
u = run_command(["mdb-export", dbname, tablename])
with open(tablename + ".csv", "w") as f:
f.write(u)
print("wrote %d bytes to %s.csv" % (len(u), tablename))
def export_excel(dbname, tables, filename):
workbook = Workbook(filename)
rows, cols = 0, 0
for table in tables:
data = read(dbname, table)
header = data[0]
print("%s has %d columns" % (table, len(header)))
worksheet = workbook.add_worksheet(table)
for i, row in enumerate(data):
worksheet.write_row(i, 0, row)
cols += len(header)
rows += len(data)
print(
"Wrote %d tables with a total of %d cols and %d rows to %s"
% (len(table), cols, rows, filename)
)
workbook.close()
def prompt_overwrite(filename):
if os.path.exists(filename):
response = (
input(f"The file '{filename}' already exists. Overwrite? (Y/n): ")
.strip()
.lower()
)
r = response in ["", "y"]
if r:
os.remove(filename)
else:
print("Operation cancelled.")
return r
return True
def export_sqlite(dbname, tablenames, filename):
print(f"creating {filename}")
con = sqlite3.connect(filename)
cur = con.cursor()
# Populate It.
create = "mdb-schema --indexes --relations --default-values --not-null".split(" ")
for table in tablenames:
print(f"creating table {table}")
table_create = run_command(create + [dbname, "-T", table, "sqlite"])
cur.execute(table_create)
sql = run_command(["mdb-export", "-I", "sqlite", "-S", "1", dbname, table])
for i, ins in enumerate(sql.split(";\n")):
cur.execute(ins)
print(f"inserted {i} records into {table}")
con.commit()
con.close()
def check_env():
tools = ["mdb-schema", "mdb-export", "mdb-tables"]
missing_tools = [tool for tool in tools if not shutil.which(tool)]
if missing_tools:
print(f"Missing required tools: {', '.join(missing_tools)}")
print("Please install mdbtools. On MacOS, use 'brew install mdbtools'.")
sys.exit(1)
def main():
check_env()
parser = argparse.ArgumentParser(description="Convert MDB files to CSV, XLSX, or SQLite")
parser.add_argument("mdb_file", help="Path to the MDB file")
parser.add_argument("format", help="Export format: csv, xlsx, or sqlite")
args = parser.parse_args()
dbfile = args.mdb_file
if not os.path.exists(dbfile):
print(f"Can't find {dbfile}!")
sys.exit(1)
tables = list_tables(dbfile)
match args.format.lower():
case "csv":
for table in tables:
if table:
export_csv(args.mdb_file, table)
case "xlsx":
outfile = dbfile.replace(".mdb", ".xlsx")
if prompt_overwrite(outfile):
export_excel(dbfile, tables, outfile)
case "sqlite":
outfile = dbfile.replace(".mdb", ".sqlite")
if prompt_overwrite(outfile):
export_sqlite(filename, tables, outfile)
case _:
print("Invalid format. Choose 'csv', 'xlsx', or 'sqlite'.")
if __name__ == "__main__":
main()
XlsxWriter==3.1.9
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment