Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mmalone/351429 to your computer and use it in GitHub Desktop.
Save mmalone/351429 to your computer and use it in GitHub Desktop.
import random
import time
from eventlet import patcher, greenpool, pools
patcher.monkey_patch()
import simplegeo
LAYER = 'your.layer.identifier.here'
OAUTH_KEY = 'YOUR OAUTH KEY HERE'
OAUTH_SECRET = 'YOUR OAUTH SECRET HERE'
class SimpleGeoPool(pools.Pool):
"""
A pool of ``simplegeo.Client`` object instances, that can be used in a
cooperative manner to access the SimpleGeo API.
"""
def create(self):
return simplegeo.Client(key=OAUTH_KEY, secret=OAUTH_SECRET)
class RandomDataInserter(object):
def __init__(self, layer, pool_size=500, lat_min=37.740262,
lat_max=37.80975, lon_min=-122.461274, lon_max=-122.40599):
self.layer = layer
self.pool_size = pool_size
self.lat_min = lat_min
self.lat_max = lat_max
self.lon_min = lon_min
self.lon_max = lon_max
self.current_id = 0
self.requests_made = 0
self.pool = greenpool.GreenPool(pool_size)
self.clients = SimpleGeoPool(max_size=pool_size)
def insert_random_data(self, batch_size):
starting_id = self.current_id
# First build the list of records
records = []
for i in xrange(batch_size):
r = simplegeo.Record(
layer=self.layer,
id=str(self.current_id),
lat=random.uniform(self.lat_min, self.lat_max),
lon=random.uniform(self.lon_min, self.lon_max)
)
self.current_id += 1
records.append(r)
# Now insert that via the SimpleGeo API
with self.clients.item() as client:
client.add_records(self.layer, records)
self.requests_made += 1
print 'Inserted record ids %d through %d' % (
starting_id,
starting_id + batch_size,
)
def destroy_cassandra(self, batch_size, num):
"""
Kicks off the insertion process, hopefully killing their Cassandra
cluster and showing those NoSQL weenies that they should just add an
index.
Paramters:
``batch_size``:
The number of records to send per API call.
``num``:
The total number of records to insert.
"""
created = 0
while created < num:
self.pool.spawn_n(self.insert_random_data, batch_size)
created += batch_size
self.pool.waitall()
if __name__ == '__main__':
started = time.time()
inserter = RandomDataInserter(LAYER)
num = 100000
inserter.destroy_cassandra(100, num)
duration = time.time() - started
print 'Took %s seconds to insert %d records (%s per second)' % (
duration,
num,
num / duration,
)
print 'Used %s API requests' % (inserter.requests_made,)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment