Created
April 19, 2013 10:43
-
-
Save j0hnsmith/5419576 to your computer and use it in GitHub Desktop.
delete all files from a S3 bucket
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from gevent import monkey | |
monkey.patch_all() | |
import sys | |
import optparse | |
from datetime import datetime | |
import gevent | |
from gevent.pool import Pool | |
from gevent.queue import Queue | |
from boto.s3.connection import S3Connection | |
from boto.s3.bucket import Bucket | |
def delete_key(conn_pool, bucket_name, keys): | |
conn = conn_pool.get() | |
bucket = Bucket(conn, bucket_name) | |
bucket.delete_keys(keys) | |
conn_pool.put(conn) | |
print '.', | |
sys.stdout.flush() | |
def delete_keys(access_key, access_secret, bucket_name, pool_size): | |
start = datetime.now() | |
pool = Pool(pool_size) # control concurrency | |
conn_pool = Queue(pool_size) | |
print 'creating %s connections to S3' % pool_size | |
for _ in range(conn_pool.maxsize): | |
conn = S3Connection(access_key, access_secret) | |
conn_pool.put(conn) | |
conn = S3Connection(access_key, access_secret) | |
bucket = Bucket(conn, bucket_name) | |
print 'getting keys' | |
keys = bucket.list().__iter__() | |
print 'deleting...' | |
total_count = 0 | |
more_keys = True | |
while more_keys: | |
batch_count = 0 | |
bkeys = [] | |
while batch_count < 1000: | |
try: | |
bkeys.append(next(keys)) | |
batch_count += 1 | |
except StopIteration: | |
more_keys = False | |
break | |
pool.spawn(delete_key, conn_pool, bucket_name, bkeys) | |
total_count += batch_count | |
while pool.free_count() != pool_size: | |
gevent.sleep(2) | |
print '\nfinished, %s keys deleted in %s' % (total_count, datetime.now() - start) | |
def main(argv): | |
parser = optparse.OptionParser() | |
parser.set_usage('%prog [options] <bucket name>') | |
parser.add_option( | |
'-a', '--access_key', | |
dest='access_key', type='string', | |
help='Your AWS Access Key ID' | |
) | |
parser.add_option( | |
'-s', '--secret_key', | |
dest='secret_key', type='string', | |
help='Your AWS Secret Access Key' | |
) | |
parser.add_option( | |
'-c', '--concurrency', | |
dest='concurrency', type='int', | |
help='Number of simultaneous S3 connections (default 50 if not specified)', | |
default=50 | |
) | |
options, args = parser.parse_args() | |
if not args or len(args) != 1: | |
parser.print_help() | |
sys.exit(1) | |
delete_keys( | |
options.access_key, | |
options.secret_key, | |
args[0], | |
options.concurrency | |
) | |
if __name__ == '__main__': | |
main(sys.argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
maybe it would benefit from
gevent.sleep(0)
after line 54?