Skip to content

Instantly share code, notes, and snippets.

@whitews
Last active December 14, 2015 19:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save whitews/5135793 to your computer and use it in GitHub Desktop.
Save whitews/5135793 to your computer and use it in GitHub Desktop.
Example showing the speed up of using numpy to parse well-structured binary data
# on Unix make a 10MB bin file using:
# dd if=/dev/random of=10meg.bin bs=1024 count=$[1024*10]
from timeit import timeit
count = 50
setup = '''
import os
import numpy as np
from struct import calcsize, unpack
f = open('10meg.bin', 'rb')
byte_count = os.fstat(f.fileno()).st_size
dtype ='i'
num_items = byte_count / calcsize(dtype)
def run_struct():
f.seek(0)
tmp = unpack('%s%d%s' % ('>', num_items, dtype), f.read())
return np.array(tmp)
def run_np():
f.seek(0)
fmt = np.dtype('%s%d%s' % ('>', num_items, dtype))
return np.fromfile(f,fmt, count=1)
def run_mm():
f.seek(0)
fmt = np.dtype('%s%d%s' % ('>', num_items, dtype))
return np.memmap(f, fmt, mode='r', shape=1)
if not np.all(run_struct() == run_np()) and np.all(run_np() == run_mm()):
print 'Something is wrong. Terribly, terribly wrong. What have you done?!!'
'''
print '%d runs' % count
print 'struct runtime:',
print timeit('run_struct()', setup=setup, number=count)
print 'np runtime:',
print timeit('run_np()', setup=setup, number=count)
print 'mm runtime:',
print timeit('run_mm()', setup=setup, number=count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment