Skip to content

Instantly share code, notes, and snippets.

@j0hnsmith
Created April 19, 2013 10:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save j0hnsmith/5419576 to your computer and use it in GitHub Desktop.
Save j0hnsmith/5419576 to your computer and use it in GitHub Desktop.
delete all files from a S3 bucket
#!/usr/bin/env python
from gevent import monkey
monkey.patch_all()
import sys
import optparse
from datetime import datetime
import gevent
from gevent.pool import Pool
from gevent.queue import Queue
from boto.s3.connection import S3Connection
from boto.s3.bucket import Bucket
def delete_key(conn_pool, bucket_name, keys):
conn = conn_pool.get()
bucket = Bucket(conn, bucket_name)
bucket.delete_keys(keys)
conn_pool.put(conn)
print '.',
sys.stdout.flush()
def delete_keys(access_key, access_secret, bucket_name, pool_size):
start = datetime.now()
pool = Pool(pool_size) # control concurrency
conn_pool = Queue(pool_size)
print 'creating %s connections to S3' % pool_size
for _ in range(conn_pool.maxsize):
conn = S3Connection(access_key, access_secret)
conn_pool.put(conn)
conn = S3Connection(access_key, access_secret)
bucket = Bucket(conn, bucket_name)
print 'getting keys'
keys = bucket.list().__iter__()
print 'deleting...'
total_count = 0
more_keys = True
while more_keys:
batch_count = 0
bkeys = []
while batch_count < 1000:
try:
bkeys.append(next(keys))
batch_count += 1
except StopIteration:
more_keys = False
break
pool.spawn(delete_key, conn_pool, bucket_name, bkeys)
total_count += batch_count
while pool.free_count() != pool_size:
gevent.sleep(2)
print '\nfinished, %s keys deleted in %s' % (total_count, datetime.now() - start)
def main(argv):
parser = optparse.OptionParser()
parser.set_usage('%prog [options] <bucket name>')
parser.add_option(
'-a', '--access_key',
dest='access_key', type='string',
help='Your AWS Access Key ID'
)
parser.add_option(
'-s', '--secret_key',
dest='secret_key', type='string',
help='Your AWS Secret Access Key'
)
parser.add_option(
'-c', '--concurrency',
dest='concurrency', type='int',
help='Number of simultaneous S3 connections (default 50 if not specified)',
default=50
)
options, args = parser.parse_args()
if not args or len(args) != 1:
parser.print_help()
sys.exit(1)
delete_keys(
options.access_key,
options.secret_key,
args[0],
options.concurrency
)
if __name__ == '__main__':
main(sys.argv)
@j0hnsmith
Copy link
Author

maybe it would benefit from gevent.sleep(0) after line 54?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment