Skip to content

Commit

Permalink
[Core-460] Catalog: Support storage blob delete wait_for_completion (…
Browse files Browse the repository at this point in the history
…#12566)

GitOrigin-RevId: 53d74f103a1324da7d592afc3a3e3f8fd0150e86
  • Loading branch information
stephencpope authored and Descartes Labs Build committed May 8, 2024
1 parent e897388 commit 3fdef19
Show file tree
Hide file tree
Showing 8 changed files with 400 additions and 191 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ The documentation for the latest release can be found at [https://docs.descartes
Changelog
=========

## Unreleased

### Catalog

- The Catalog Storage Blob deletion methods have been enhanced to support waiting for completion of the operation. When a blob is deleted, it is removed immediately from the catalog and a background asynchronous task is launched to clean up the contents of the blob from the backing storage. If a blob is deleted and then a new blob with the identical id is immediately created and uploaded before this background task completes, it is possible for the background task to end up deleting the new blob contents. As of this release the `Blob` instance and class delete methods return a `BlobDeletionTaskStatus` object which provides a `wait_for_completion` method which can be used to wait until the background task completes and it is safe to create a new blob with the same id. For the `Blob.delete_many` method, the `wait_for_completion=True` parameter can be used to wait for all the supplied blobs to be completely deleted. Note that in the case of the `Blob.delete` class method, this is a very slight breaking change, as it used to return True or False, and now instead returns a `BlobDeletionTaskStatus` or `None`, which have the same truthiness and hence are very likely to behave identically in practical use.

## [3.0.5] - 2024-03-21

Bugfix only
Expand Down
12 changes: 10 additions & 2 deletions descarteslabs/core/catalog/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from .task import TaskState
from .product import (
DeletionTaskStatus,
Product,
ProductCollection,
TaskState,
)
from .band import (
Band,
Expand All @@ -38,7 +38,14 @@
ProcessingStepAttribute,
SpectralBand,
)
from .blob import Blob, BlobCollection, BlobSearch, BlobSummaryResult, StorageType
from .blob import (
Blob,
BlobCollection,
BlobDeletionTaskStatus,
BlobSearch,
BlobSummaryResult,
StorageType,
)
from .image import Image, ImageSearch, ImageSummaryResult
from .image_types import ResampleAlgorithm, DownloadFileFormat
from .image_upload import (
Expand Down Expand Up @@ -87,6 +94,7 @@
"BandType",
"Blob",
"BlobCollection",
"BlobDeletionTaskStatus",
"BlobSearch",
"BlobSummaryResult",
"CatalogClient",
Expand Down
174 changes: 165 additions & 9 deletions descarteslabs/core/catalog/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,16 @@
TypedAttribute,
parse_iso_datetime,
)
from .blob_delete import BlobDelete
from .blob_download import BlobDownload
from .catalog_base import CatalogClient, CatalogObject, check_deleted
from .catalog_base import (
CatalogClient,
CatalogObject,
check_deleted,
check_derived,
UnsavedObjectError,
)
from .search import AggregateDateField, GeoSearch, SummarySearchMixin
from .task import TaskStatus

properties = Properties()

Expand Down Expand Up @@ -911,20 +917,27 @@ def generator(response):
return cls(id=id, client=client)._do_download(dest=dest, range=range)

@classmethod
def delete_many(cls, ids, client=None):
def delete_many(
cls, ids, raise_on_missing=False, wait_for_completion=False, client=None
):
"""Delete many blobs from the Descartes Labs catalog.
Only those blobs that exist and are owned by the user will be deleted.
No errors will be raised for blobs that do not exist or are not owned by
the user. If you need to know, compare the supplied list of ids with the
returned list of delete ids.
No errors will be raised for blobs that do not exist or are visible but
not owned by the user. If you need to know, compare the supplied list of
ids with the returned list of deleted ids.
All blobs to be deleted must belong to the same purchase.
Parameters
----------
ids : list(str)
A list of blob ids to delete.
raise_on_missing : bool, optional
If True, raise an exception if any of the blobs are not found, otherwise ignore
missing blobs. Defaults to False.
wait_for_completion : bool, optional
If True, wait for the deletion to complete before returning. Defaults to False.
client : CatalogClient, optional
A `CatalogClient` instance to use for requests to the Descartes Labs catalog.
The :py:meth:`~descarteslabs.catalog.CatalogClient.get_default_client` will
Expand All @@ -941,11 +954,17 @@ def delete_many(cls, ids, client=None):
:ref:`Spurious exception <network_exceptions>` that can occur during a
network request.
"""
blob_delete = BlobDelete(ids=ids, client=client)
if client is None:
client = CatalogClient.get_default_client()

blob_delete.save()
task_status = BlobDeletionTaskStatus.create(
ids=ids, raise_on_missing=raise_on_missing, client=client
)

return blob_delete.ids
if wait_for_completion:
task_status.wait_for_completion()

return task_status.ids

def _do_download(self, dest=None, range=None):
download = BlobDownload.get(id=self.id, client=self._client)
Expand Down Expand Up @@ -993,10 +1012,147 @@ def _do_download(self, dest=None, range=None):
finally:
r.close()

@classmethod
@check_derived
def delete(cls, id, client=None):
"""Delete the catalog object with the given `id`.
Parameters
----------
id : str
The id of the object to be deleted.
client : CatalogClient, optional
A `CatalogClient` instance to use for requests to the Descartes Labs
catalog. The
:py:meth:`~descarteslabs.catalog.CatalogClient.get_default_client` will
be used if not set.
Returns
-------
BlobDeletionTaskStatus
The status of the deletion task which can be used to wait for completion. ``None`` if the
object was not found.
Raises
------
ConflictError
If the object has related objects (bands, images) that exist.
~descarteslabs.exceptions.ClientError or ~descarteslabs.exceptions.ServerError
:ref:`Spurious exception <network_exceptions>` that can occur during a
network request.
Example
-------
>>> Image.delete('my-image-id') # doctest: +SKIP
"""
if client is None:
client = CatalogClient.get_default_client()

try:
return BlobDeletionTaskStatus.create(
ids=[id], raise_on_missing=True, client=client
)
except NotFoundError:
return None

@check_deleted
def _instance_delete(self):
"""Delete this catalog object from the Descartes Labs catalog.
Once deleted, you cannot use the catalog object and should release any
references.
Returns
-------
BlobDeletionTaskStatus
The status of the deletion task which can be used to wait for completion.
Raises
------
DeletedObjectError
If this catalog object was already deleted.
UnsavedObjectError
If this catalog object is being deleted without having been saved.
~descarteslabs.exceptions.ClientError or ~descarteslabs.exceptions.ServerError
:ref:`Spurious exception <network_exceptions>` that can occur during a
network request.
"""
if self.state == DocumentState.UNSAVED:
raise UnsavedObjectError("You cannot delete an unsaved object.")

task_status = BlobDeletionTaskStatus.create(
ids=[self.id], raise_on_missing=True, client=self._client
)
self._deleted = True # non-200 will raise an exception
return task_status


class BlobCollection(Collection):
_item_type = Blob


# handle circular references
Blob._collection_type = BlobCollection


class BlobDeletionTaskStatus(TaskStatus):
"""The asynchronous deletion task's status
Attributes
----------
id : str
The id of the object for which this task is running.
status : TaskState
The state of the task as explained in `TaskState`.
start_datetime : datetime
The date and time at which the task started running.
duration_in_seconds : float
The duration of the task.
objects_deleted : int
The number of objects (a combination of bands or images) that were deleted.
errors : list
In case the status is ``FAILED`` this will contain a list of errors
that were encountered. In all other states this will not be set.
ids : list
The ids of the objects that were deleted.
"""

_task_name = "delete task"
_url = "/storage/delete/{}"

def __init__(self, objects_deleted=None, ids=None, **kwargs):
super(BlobDeletionTaskStatus, self).__init__(**kwargs)
self.objects_deleted = objects_deleted
self.ids = ids

@classmethod
def create(cls, ids, raise_on_missing, client):
# TaskStatus objects are not catalog objects so we need to do this manually
response = client.session.post(
"/storage/delete",
json={
"data": {
"attributes": {
"ids": ids,
"raise_on_missing": raise_on_missing,
},
"type": "storage_delete",
}
},
)

if response.status_code == 201:
data = response.json()["data"]
return BlobDeletionTaskStatus(
id=data["id"], _client=client, **data["attributes"]
)
else:
return None

def __repr__(self):
text = super(BlobDeletionTaskStatus, self).__repr__()

if self.objects_deleted:
text += "\n - {:,} objects deleted".format(self.objects_deleted)

return text
31 changes: 0 additions & 31 deletions descarteslabs/core/catalog/blob_delete.py

This file was deleted.

Loading

0 comments on commit 3fdef19

Please sign in to comment.