From 2d78738f2c37b124767926ee05e66f869e01a63c Mon Sep 17 00:00:00 2001 From: Giacomo Alzetta Date: Wed, 15 Jan 2025 17:52:22 +0100 Subject: [PATCH] Preserve trailing slash on delete Most object storages support having a slash in the object name and rohmu should be able to delete such objects. --- rohmu/object_storage/azure.py | 2 +- rohmu/object_storage/google.py | 2 +- rohmu/object_storage/s3.py | 7 +++++-- rohmu/object_storage/swift.py | 2 +- test/object_storage/test_s3.py | 8 ++++++-- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/rohmu/object_storage/azure.py b/rohmu/object_storage/azure.py index fcc89a15..6a552373 100644 --- a/rohmu/object_storage/azure.py +++ b/rohmu/object_storage/azure.py @@ -302,7 +302,7 @@ def _iter_key(self, *, path: str, with_metadata: bool, deep: bool) -> Iterator[I ) def delete_key(self, key: str) -> None: - path = self.format_key_for_backend(key, remove_slash_prefix=True) + path = self.format_key_for_backend(key, remove_slash_prefix=True, trailing_slash=key.endswith("/")) self.log.debug("Deleting key: %r", path) try: blob_client = self.get_blob_service_client().get_blob_client(container=self.container_name, blob=path) diff --git a/rohmu/object_storage/google.py b/rohmu/object_storage/google.py index c6e05043..c79aaa6d 100644 --- a/rohmu/object_storage/google.py +++ b/rohmu/object_storage/google.py @@ -453,7 +453,7 @@ def initial_op(domain: Any) -> HttpRequest: raise NotImplementedError(property_name) def delete_key(self, key: str) -> None: - path = self.format_key_for_backend(key) + path = self.format_key_for_backend(key, trailing_slash=key.endswith("/")) self.log.debug("Deleting key: %r", path) with self._object_client(not_found=path) as clob: # https://googleapis.github.io/google-api-python-client/docs/dyn/storage_v1.objects.html#delete diff --git a/rohmu/object_storage/s3.py b/rohmu/object_storage/s3.py index e19fd693..e5834175 100644 --- a/rohmu/object_storage/s3.py +++ b/rohmu/object_storage/s3.py @@ -332,7 +332,7 @@ def _metadata_for_key(self, key: str) -> Metadata: return response["Metadata"] def delete_key(self, key: str) -> None: - path = self.format_key_for_backend(key, remove_slash_prefix=True) + path = self.format_key_for_backend(key, remove_slash_prefix=True, trailing_slash=key.endswith("/")) self.log.debug("Deleting key: %r", path) self._metadata_for_key(path) # check that key exists self.stats.operation(StorageOperation.delete_key) @@ -342,9 +342,12 @@ def delete_key(self, key: str) -> None: def delete_keys(self, keys: Collection[str]) -> None: self.stats.operation(StorageOperation.delete_key, count=len(keys)) for batch in batched(keys, 1000): # Cannot delete more than 1000 objects at a time + formatted_keys = [ + self.format_key_for_backend(k, remove_slash_prefix=True, trailing_slash=k.endswith("/")) for k in batch + ] self.get_client().delete_objects( Bucket=self.bucket_name, - Delete={"Objects": [{"Key": self.format_key_for_backend(key, remove_slash_prefix=True)} for key in batch]}, + Delete={"Objects": [{"Key": key} for key in formatted_keys]}, ) # Note: `tree_deleted` is not used here because the operation on S3 is not atomic, i.e. # it is possible for a new object to be created after `list_objects` above diff --git a/rohmu/object_storage/swift.py b/rohmu/object_storage/swift.py index 81efa12c..7e9362d0 100644 --- a/rohmu/object_storage/swift.py +++ b/rohmu/object_storage/swift.py @@ -226,7 +226,7 @@ def _delete_object_segments(self, key: str, manifest: str) -> None: self._delete_object_plain(item["name"]) def delete_key(self, key: str) -> None: - path = self.format_key_for_backend(key) + path = self.format_key_for_backend(key, trailing_slash=key.endswith("/")) self.log.debug("Deleting key: %r", path) try: headers = self.conn.head_object(self.container_name, path) diff --git a/test/object_storage/test_s3.py b/test/object_storage/test_s3.py index e39d6fa2..1e935d6a 100644 --- a/test/object_storage/test_s3.py +++ b/test/object_storage/test_s3.py @@ -169,12 +169,16 @@ def test_operations_reporting(infra: S3Infra) -> None: def test_deletion(infra: S3Infra) -> None: - infra.transfer.delete_keys(["2", "3"]) + infra.transfer.delete_keys(["2", "3", "4/"]) infra.s3_client.delete_objects.assert_called_once_with( - Bucket="test-bucket", Delete={"Objects": [{"Key": "test-prefix/2"}, {"Key": "test-prefix/3"}]} + Bucket="test-bucket", + Delete={"Objects": [{"Key": "test-prefix/2"}, {"Key": "test-prefix/3"}, {"Key": "test-prefix/4/"}]}, ) infra.transfer.delete_key("1") infra.s3_client.delete_object.assert_called_once_with(Bucket="test-bucket", Key="test-prefix/1") + infra.s3_client.reset_mock() + infra.transfer.delete_key("2/") + infra.s3_client.delete_object.assert_called_once_with(Bucket="test-bucket", Key="test-prefix/2/") def test_get_contents_to_fileobj_raises_error_on_invalid_byte_range(infra: S3Infra) -> None: