Skip to content

Commit 3739734

Browse files
michaelzhiluosuquark
authored andcommitted
Fast removal of S3 Storage buckets with 10k-1 million objects (#893)
1 parent 5381c92 commit 3739734

File tree

3 files changed

+24
-12
lines changed

3 files changed

+24
-12
lines changed

sky/cli.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2010,7 +2010,7 @@ def storage_delete(all: bool, name: str): # pylint: disable=redefined-builtin
20102010
sky storage delete -a
20112011
"""
20122012
if all:
2013-
click.echo('Deleting all storage objects...')
2013+
click.echo('Deleting all storage objects.')
20142014
storages = global_user_state.get_storage()
20152015
for row in storages:
20162016
store_object = data.Storage(name=row['name'],
@@ -2023,7 +2023,7 @@ def storage_delete(all: bool, name: str): # pylint: disable=redefined-builtin
20232023
if handle is None:
20242024
click.echo(f'Storage name {n} not found.')
20252025
else:
2026-
click.echo(f'Deleting storage object {n}...')
2026+
click.echo(f'Deleting storage object {n}.')
20272027
store_object = data.Storage(name=handle.storage_name,
20282028
source=handle.source,
20292029
sync_on_reconstruction=False)

sky/data/storage.py

+17-10
Original file line numberDiff line numberDiff line change
@@ -737,8 +737,8 @@ def upload(self):
737737
f'Upload failed for store {self.name}') from e
738738

739739
def delete(self) -> None:
740-
logger.info(f'Deleting S3 Bucket {self.name}')
741-
return self._delete_s3_bucket(self.name)
740+
self._delete_s3_bucket(self.name)
741+
logger.info(f'Deleted S3 bucket {self.name}.')
742742

743743
def get_handle(self) -> StorageHandle:
744744
return aws.resource('s3').Bucket(self.name)
@@ -941,15 +941,22 @@ def _delete_s3_bucket(self, bucket_name: str) -> None:
941941
Args:
942942
bucket_name: str; Name of bucket
943943
"""
944+
# Deleting objects is very slow programatically
945+
# (i.e. bucket.objects.all().delete() is slow).
946+
# In addition, standard delete operations (i.e. via `aws s3 rm`)
947+
# are slow, since AWS puts deletion markers.
948+
# https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
949+
# The fastest way to delete is to run `aws s3 rb --force`,
950+
# which removes the bucket by force.
951+
remove_command = f'aws s3 rb s3://{bucket_name} --force'
944952
try:
945-
s3 = aws.resource('s3')
946-
bucket = s3.Bucket(bucket_name)
947-
bucket.objects.all().delete()
948-
bucket.delete()
949-
except aws.client_exception() as e:
950-
logger.error(f'Unable to delete S3 bucket {self.name}')
951-
logger.error(e)
952-
raise e
953+
with backend_utils.safe_console_status(
954+
f'[bold cyan]Deleting [green]bucket {bucket_name}'):
955+
subprocess.check_output(remove_command.split(' '))
956+
except subprocess.CalledProcessError as e:
957+
logger.error(e.output)
958+
raise exceptions.StorageBucketDeleteError(
959+
f'Failed to delete S3 bucket {bucket_name}.')
953960

954961

955962
class GcsStore(AbstractStore):

sky/exceptions.py

+5
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ class StorageBucketGetError(StorageInitError):
5959
pass
6060

6161

62+
class StorageBucketDeleteError(StorageError):
63+
# Error raised if attempt to delete an existing bucket fails.
64+
pass
65+
66+
6267
class StorageUploadError(StorageError):
6368
# Error raised when bucket is successfully initialized, but upload fails,
6469
# either due to permissions, ctrl-c, or other reasons.

0 commit comments

Comments
 (0)