Last active
December 14, 2015 19:19
-
-
Save whitews/5135793 to your computer and use it in GitHub Desktop.
Example showing the speed up of using numpy to parse well-structured binary data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# on Unix make a 10MB bin file using: | |
# dd if=/dev/random of=10meg.bin bs=1024 count=$[1024*10] | |
from timeit import timeit | |
count = 50 | |
setup = ''' | |
import os | |
import numpy as np | |
from struct import calcsize, unpack | |
f = open('10meg.bin', 'rb') | |
byte_count = os.fstat(f.fileno()).st_size | |
dtype ='i' | |
num_items = byte_count / calcsize(dtype) | |
def run_struct(): | |
f.seek(0) | |
tmp = unpack('%s%d%s' % ('>', num_items, dtype), f.read()) | |
return np.array(tmp) | |
def run_np(): | |
f.seek(0) | |
fmt = np.dtype('%s%d%s' % ('>', num_items, dtype)) | |
return np.fromfile(f,fmt, count=1) | |
def run_mm(): | |
f.seek(0) | |
fmt = np.dtype('%s%d%s' % ('>', num_items, dtype)) | |
return np.memmap(f, fmt, mode='r', shape=1) | |
if not np.all(run_struct() == run_np()) and np.all(run_np() == run_mm()): | |
print 'Something is wrong. Terribly, terribly wrong. What have you done?!!' | |
''' | |
print '%d runs' % count | |
print 'struct runtime:', | |
print timeit('run_struct()', setup=setup, number=count) | |
print 'np runtime:', | |
print timeit('run_np()', setup=setup, number=count) | |
print 'mm runtime:', | |
print timeit('run_mm()', setup=setup, number=count) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment