Skip to content

Instantly share code, notes, and snippets.

@briantjacobs
Last active October 3, 2017 17:22
Show Gist options
  • Save briantjacobs/27f4890bd172d134197faaecfd893476 to your computer and use it in GitHub Desktop.
Save briantjacobs/27f4890bd172d134197faaecfd893476 to your computer and use it in GitHub Desktop.
Script to process a folder of ephemeris data and break into date ranges
from jsonmerge import merge
import itertools
import os
import sys
import subprocess
import processHorizons
import simplejson as json
import msgpack
import dateutil.parser
from datetime import datetime
rootdir = 'data/cassini'
dformat = "%Y-%m-%d %H:%M:%S"
dataInterval = 10
# filter data to only after date of interest
def dateFilter(d):
return datetime.strptime(d["date"],dformat) > datetime(2004, 6, 29)
#filter data between arbitrary date range
def dateRangeFilter(dateFrom, dateTo):
def myfilter(d):
return datetime.strptime(d["date"],dformat) < dateTo and datetime.strptime(d["date"],dformat) >= dateFrom
return myfilter
# extract data from folder of text files
def process():
for subdir, dirs, files in os.walk(rootdir):
for file in files:
path = os.path.join(subdir, file)
print "read " + path
processed = processHorizons.main(path)
yield processed
def main(size):
# do different things if mobile or desktop flag is provided
if size == "mobile":
print "Process Mobile"
intervalKey = "intervalMobile"
suffix = "_sm"
else:
print "Process Big"
intervalKey = "interval"
suffix = ""
# turn all input files into a single list of data
jsonCombined = list(itertools.chain.from_iterable(process()))
print "Orig size: " + str(len(jsonCombined))
## read from config file, restructure data in to date range groups
jsonRanges = open('src/ngm-assets/data/cassiniSlideRanges.json', "rU")
jsonRangesData = json.loads(jsonRanges.read())
# go through all the configs
for i, d in enumerate(jsonRangesData):
# most groups dont have a "from"
if not i == 0:
d["from"] = jsonRangesData[i-1]["to"]
# last key doesnt have a "to"
if i == len(jsonRangesData)-1:
d["to"] = jsonCombined[len(jsonCombined)-1]["date"]
# create a filter function to use
filterFunc = dateRangeFilter(datetime.strptime(d["from"],dformat) , datetime.strptime(d["to"],dformat) )
# use the filter function to limit data between date range
filteredData = list(itertools.ifilter(filterFunc, jsonCombined))
# sample the data according to time interval
reducedData = list(itertools.islice(filteredData, 0, None, d[intervalKey]/dataInterval))
d["data"] = reducedData
print d["from"] + " to " + d["to"] + ":"
print "Date filtered: " + str(len(filteredData))
print "Interval filtered:" + str(len(reducedData))
jsonOutput = open('src/ngm-assets/data/cassini_combined'+suffix+'.json', 'w')
msgPackOutput = open('src/ngm-assets/data/cassini_combined'+suffix+'.pack', 'w')
# write a json
print "write json"
out = json.dumps(jsonRangesData)
jsonOutput.write(out)
# compress to msgpack
print "write msgpack"
packOut = msgpack.packb(jsonRangesData)
msgPackOutput.write(packOut)
jsonRanges.close()
jsonOutput.close()
## is this running from commandline?
if __name__ == "__main__":
main(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment