Skip to content

Instantly share code, notes, and snippets.

@vecchp
Created July 1, 2018 23:07
Show Gist options
  • Save vecchp/9bd2cfae74374871147d9709aac3dd67 to your computer and use it in GitHub Desktop.
Save vecchp/9bd2cfae74374871147d9709aac3dd67 to your computer and use it in GitHub Desktop.
Comparing PyPy and Cython Interfaces
from io import BytesIO
import time
from fastavro import (
acquaint_schema,
writer
)
from fastavro.write import write_data
from fastavro.read import read_data
def write_data_fastavro(schema, records, runs=1):
times = []
record = records[0]
for _ in range(runs):
iostream = BytesIO()
start = time.time()
write_data(iostream, record, schema)
end = time.time()
times.append(end - start)
print('... {0} runs took {1} seconds'.format(runs, (sum(times))))
return iostream
def read_data_fastavro(iostream, schema=None, runs=1, extra_schema=None):
acquaint_schema(schema)
times = []
for _ in range(runs):
iostream.seek(0)
start = time.time()
records = [(read_data(iostream, schema))]
end = time.time()
times.append(end - start)
print('... {0} runs took {1} seconds'.format(runs, (sum(times))))
return records
schema = {
"type": "record",
"name": "Test",
"namespace": "test",
"fields": [{
"name": "null",
"type": "null",
}, {
"name": "boolean",
"type": "boolean",
}, {
"name": "string",
"type": "string",
}, {
"name": "bytes",
"type": "bytes",
}, {
"name": "int",
"type": "int",
}, {
"name": "long",
"type": "long",
}, {
"name": "fixed",
"type": {
"type": "fixed",
"name": "fixed_field",
"size": 5,
},
}, {
"name": "union",
"type": [
'null',
'int',
{
"type": "record",
"name": "union_record",
"fields": [{
"name": "union_record_field",
"type": "string",
}],
},
]
}, {
"name": "enum",
"type": {
"type": "enum",
"name": "enum_name",
"symbols": ["FOO", "BAR"],
},
}, {
"name": "array",
"type": {
"type": "array",
"items": "string",
},
}, {
"name": "map",
"type": {
"type": "map",
"values": "int",
},
}, {
"name": "record",
"type": {
"type": "record",
"name": "subrecord",
"fields": [{
"name": "sub_int",
"type": "int",
}],
},
}]
}
record = {
'null': None,
'boolean': True,
'string': 'foo',
'bytes': b'\xe2\x99\xa5',
'int': 1,
'long': 1 << 33,
'fixed': b'\x61\x61\x61\x61\x61',
'union': None,
'enum': 'BAR',
'array': ['a', 'b'],
'map': {
'c': 1,
'd': 2
},
'record': {
'sub_int': 123,
}
}
# Configuration is a tuple of (schema, single_record, num_records, num_runs)
configurations = [
(schema, record, 1, 100000),
#(schema, record, 100, 1000),
#(schema, record, 10000, 10),
]
libraries = [
('fastavo_data', write_data_fastavro, read_data_fastavro),
]
for schema, single_record, num_records, num_runs in configurations:
for library, writer_, reader_ in libraries:
print('')
print('### {} ###'.format(library))
original_records = [single_record for _ in range(num_records)]
print('Writing {0} records to one file...'.format(num_records))
bytesio = writer_(schema, original_records, runs=num_runs)
print('Reading {0} records from one file...'.format(num_records))
records = reader_(bytesio, schema=schema, runs=num_runs)
if records != original_records:
print(records[0])
print(original_records[0])
assert records == original_records
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment