Skip to content

Instantly share code, notes, and snippets.

@minikomi
Created October 17, 2019 06:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save minikomi/3255a27babeb12c60b61837d4b9ccef7 to your computer and use it in GitHub Desktop.
Save minikomi/3255a27babeb12c60b61837d4b9ccef7 to your computer and use it in GitHub Desktop.
parsing clippings from kindle into json
quotes = [q.split("\n") for q in txt.split("==========\n")]
import re
from datetime import datetime
import json
re_page = re.compile("- Your Highlight on page ([\d\-]+)")
re_loc = re.compile(".*Location ([\d\-]+)")
re_date = re.compile("Added on (.*[AM|PM])")
def parse_date(d_str):
return datetime.strptime(d_str, "%A, %B %d, %Y %I:%M:%S %p")
ret = {}
for q in quotes:
a_m = re.findall(r"\(([^\(\)]+)\)", q[0])
author = a_m[-1] if a_m else None
if author:
book = q[0].replace(" ({})".format(author), "")
else:
book = q[0]
page_m = re.search(re_page, q[1])
page = page_m[1] if page_m else None
loc_m = re.search(re_loc, q[1])
loc = loc_m[1] if loc_m else None
date_m = re.search(re_date, q[1])
date = parse_date(date_m[1]).isoformat() if date_m else None
data = {"date": date,
"page": page,
"loc": loc,
"quote": "\n".join(q[2:]).strip()}
if book not in ret:
ret[book] = {"author": author,
"quotes": [data]}
else:
ret[book]["quotes"].append(data)
print(json.dumps(ret, indent=4, sort_keys = True))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment