Skip to content

Instantly share code, notes, and snippets.

@hay
Created November 11, 2018 21:03
Show Gist options
  • Save hay/5f0f184cf36dc2e96246b72f01275f0a to your computer and use it in GitHub Desktop.
Save hay/5f0f184cf36dc2e96246b72f01275f0a to your computer and use it in GitHub Desktop.
A pretty horrible Python script to fix the SQL errors in the Rijksmonumenten dump mentioned here: https://github.com/clytras/AccessConverter/issues/5
#!/usr/bin/env python3
from tqdm import tqdm
from sys import argv, exit
from os.path import getsize
def fix_token(token):
if token == ",":
return "'',"
elif token == "(,":
return "('',"
elif ",)," in token:
return token.replace(",),", ",''),")
elif token == "),":
return "''),"
elif token == ");":
return "'');"
elif ",);" in token:
return token.replace(",);", ",'');")
else:
return token
def iter_tokens(path):
with open(path) as f:
data = ""
statement = ""
for line in f:
for token in line.split(" "):
yield token.strip()
yield "\n"
def main():
if len(argv) != 2:
exit("Invalid arguments")
path = argv[1]
target = path.replace(".sql", "-fixed.sql")
tokens = iter_tokens(path)
tokens = tqdm(tokens, total = getsize(path) / 10)
with open(target, "w") as f:
statement = []
values_open = False
for token in tokens:
if token == "INSERT":
values_open = True
if token == "CREATE" and values_open:
# This should not happen, drop the whole current statement
values_open = False
statement = []
if values_open:
statement.append(token)
else:
f.write(token + " ")
if ");" in token:
statement = [fix_token(token) for token in statement]
statement = " ".join(statement)
f.write(f"\n{statement}\n")
values_open = False
statement = []
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment