diff --git a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py index ec2ef2334..a9258525f 100644 --- a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py +++ b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py @@ -3,6 +3,7 @@ from large_image_source_dicom.dicomweb_utils import get_dicomweb_metadata from requests.exceptions import HTTPError +from girder.api.rest import setContentDisposition, setResponseHeader from girder.exceptions import ValidationException from girder.models.file import File from girder.models.folder import Folder @@ -11,6 +12,8 @@ DICOMWEB_META_KEY = 'dicomweb_meta' +BUF_SIZE = 65536 + class DICOMwebAssetstoreAdapter(AbstractAssetstoreAdapter): """ @@ -104,20 +107,163 @@ def deleteFile(self, file): def downloadFile(self, file, offset=0, headers=True, endByte=None, contentDisposition=None, extraParameters=None, **kwargs): - # FIXME: do we want to support downloading files? We probably - # wouldn't download them the regular way, but we could instead - # use a dicomweb-client like so: - # instance = client.retrieve_instance( - # study_instance_uid=..., - # series_instance_uid=..., - # sop_instance_uid=..., - # ) - # pydicom.filewriter.write_file('output_name.dcm', instance) - msg = 'Download support not yet implemented for DICOMweb files.' - raise NotImplementedError( - msg, + + if offset != 0 or endByte is not None: + # FIXME: implement range requests + msg = 'Range requests are not yet implemented' + raise NotImplementedError(msg) + + from dicomweb_client.web import _Transaction + + dicom_uids = file['dicom_uids'] + study_uid = dicom_uids['study_uid'] + series_uid = dicom_uids['series_uid'] + instance_uid = dicom_uids['instance_uid'] + + client = _create_dicomweb_client(self.assetstore_meta) + + if headers: + setResponseHeader('Content-Type', file['mimeType']) + setContentDisposition(file['name'], contentDisposition or 'attachment') + + # The filesystem assetstore calls the following function, which sets + # the above and also sets the range and content-length headers: + # `self.setContentHeaders(file, offset, endByte, contentDisposition)` + # However, we can't call that since we don't have a great way of + # determining the DICOM file size without downloading the whole thing. + # FIXME: call that function if we find a way to determine file size. + + # Create the URL + url = client._get_instances_url( + _Transaction.RETRIEVE, + study_uid, + series_uid, + instance_uid, ) + # Build the headers + transfer_syntax = '*' + accept_parts = [ + 'multipart/related', + 'type="application/dicom"', + f'transfer-syntax={transfer_syntax}', + ] + headers = { + 'Accept': '; '.join(accept_parts), + } + + def stream(): + # Perform the request + response = client._http_get(url, headers=headers, stream=True) + for chunk in self._stream_retrieve_instance_response(response): + yield chunk + + return stream + + def _extract_media_type_and_boundary(self, response): + content_type = response.headers['content-type'] + media_type, *ct_info = [ct.strip() for ct in content_type.split(';')] + boundary = None + for item in ct_info: + attr, _, value = item.partition('=') + if attr.lower() == 'boundary': + boundary = value.strip('"').encode() + break + + return media_type, boundary + + def _stream_retrieve_instance_response(self, response): + # The first part of this function was largely copied from dicomweb-client's + # _decode_multipart_message() function. But we can't use that function here + # because it relies on reading the whole DICOM file into memory. We want to + # avoid that and stream in chunks. + + # Split the content-type to find the media type and boundary. + media_type, boundary = self._extract_media_type_and_boundary(response) + if media_type.lower() != 'multipart/related': + msg = f'Unexpected media type: "{media_type}". Expected "multipart/related".' + raise ValueError(msg) + + # Ensure we have the multipart/related boundary. + # The beginning boundary and end boundary look slightly different (in my + # examples, beginning looks like '--{boundary}\r\n', and ending looks like + # '\r\n--{boundary}--'). But we skip over the beginning boundary anyways + # since it is before the message body. An end boundary might look like this: + # \r\n--50d7ccd118978542c422543a7156abfce929e7615bc024e533c85801cd77-- + if boundary is None: + content_type = response.headers['content-type'] + msg = f'Failed to locate boundary in content-type: {content_type}' + raise ValueError(msg) + + # Both dicomweb-client and requests-toolbelt check for + # the ending boundary exactly like so: + ending = b'\r\n--' + boundary + + # Sometimes, there are a few extra bytes after the ending, such + # as '--' and '\r\n'. Imaging Data Commons has '--\r\n' at the end. + # But we don't care about what comes after the ending. As soon as we + # encounter the ending, we are done. + ending_size = len(ending) + + # Make sure the buffer is at least large enough to contain the + # ending_size - 1, so that the ending cannot be split between more than 2 chunks. + buffer_size = max(BUF_SIZE, ending_size - 1) + + with response: + # Create our iterator + iterator = response.iter_content(buffer_size) + + # First, stream until we encounter the first `\r\n\r\n`, + # which denotes the end of the header section. + header_found = False + end_header_delimiter = b'\r\n\r\n' + for chunk in iterator: + if end_header_delimiter in chunk: + idx = chunk.index(end_header_delimiter) + # Save the first section of data. We will yield it later. + prev_chunk = chunk[idx + len(end_header_delimiter):] + header_found = True + break + + if not header_found: + msg = 'Failed to find header in response content' + raise ValueError(msg) + + # Now the header has been finished. Stream the data until + # we encounter the ending boundary or finish the data. + # The "prev_chunk" will start out set to the section right after the header. + for chunk in iterator: + # Ensure the chunk is large enough to contain the ending_size - 1, so + # we can be sure the ending won't be split across more than 2 chunks. + while len(chunk) < ending_size - 1: + try: + chunk += next(iterator) + except StopIteration: + break + + # Check if the ending is split between the previous and current chunks. + if ending in prev_chunk + chunk[:ending_size - 1]: + # We found the ending! Remove the ending boundary and return. + data = prev_chunk + chunk[:ending_size - 1] + yield data.split(ending, maxsplit=1)[0] + return + + if prev_chunk: + yield prev_chunk + + prev_chunk = chunk + + # We did not find the ending while looping. + # Check if it is in the final chunk. + if ending in prev_chunk: + # Found the ending in the final chunk. + yield prev_chunk.split(ending, maxsplit=1)[0] + return + + # We should have encountered the ending earlier and returned + msg = 'Failed to find ending boundary in response content' + raise ValueError(msg) + def importData(self, parent, parentType, params, progress, user, **kwargs): """ Import DICOMweb WSI instances from a DICOMweb server. @@ -155,6 +301,7 @@ def importData(self, parent, parentType, params, progress, user, **kwargs): study_uid_key = dicom_key_to_tag('StudyInstanceUID') series_uid_key = dicom_key_to_tag('SeriesInstanceUID') + instance_uid_key = dicom_key_to_tag('SOPInstanceUID') # We are only searching for WSI datasets. Ignore all others. # FIXME: is this actually working? For the SLIM server at @@ -192,25 +339,33 @@ def importData(self, parent, parentType, params, progress, user, **kwargs): # Set the DICOMweb metadata item['dicomweb_meta'] = get_dicomweb_metadata(client, study_uid, series_uid) - item = Item().save(item) - - # Create a placeholder file with the same name - file = File().createFile( - name=f'{series_uid}.dcm', - creator=user, - item=item, - reuseExisting=True, - assetstore=self.assetstore, - mimeType=None, - size=0, - saveFile=False, - ) - file['dicomweb_meta'] = { + item['dicom_uids'] = { 'study_uid': study_uid, 'series_uid': series_uid, } - file['imported'] = True - File().save(file) + item = Item().save(item) + + instance_results = client.search_for_instances(study_uid, series_uid) + for instance in instance_results: + instance_uid = instance[instance_uid_key]['Value'][0] + + file = File().createFile( + name=f'{instance_uid}.dcm', + creator=user, + item=item, + reuseExisting=True, + assetstore=self.assetstore, + mimeType='application/dicom', + size=None, + saveFile=False, + ) + file['dicom_uids'] = { + 'study_uid': study_uid, + 'series_uid': series_uid, + 'instance_uid': instance_uid, + } + file['imported'] = True + File().save(file) items.append(item) diff --git a/sources/dicom/large_image_source_dicom/girder_source.py b/sources/dicom/large_image_source_dicom/girder_source.py index 557591b33..731e72df8 100644 --- a/sources/dicom/large_image_source_dicom/girder_source.py +++ b/sources/dicom/large_image_source_dicom/girder_source.py @@ -59,15 +59,14 @@ def _getFilesystemLargeImagePath(self): def _getDICOMwebLargeImagePath(self, assetstore): meta = assetstore[DICOMWEB_META_KEY] - file = Item().childFiles(self.item, limit=1)[0] - file_meta = file['dicomweb_meta'] + item_uids = self.item['dicom_uids'] adapter = assetstore_utilities.getAssetstoreAdapter(assetstore) return { 'url': meta['url'], - 'study_uid': file_meta['study_uid'], - 'series_uid': file_meta['series_uid'], + 'study_uid': item_uids['study_uid'], + 'series_uid': item_uids['series_uid'], # The following are optional 'qido_prefix': meta.get('qido_prefix'), 'wado_prefix': meta.get('wado_prefix'), diff --git a/sources/dicom/test_dicom/web_client_specs/dicomWebSpec.js b/sources/dicom/test_dicom/web_client_specs/dicomWebSpec.js index ddae35380..653a516ab 100644 --- a/sources/dicom/test_dicom/web_client_specs/dicomWebSpec.js +++ b/sources/dicom/test_dicom/web_client_specs/dicomWebSpec.js @@ -13,9 +13,12 @@ describe('DICOMWeb assetstore', function () { 'Admin', 'Admin', 'adminpassword!')); + it('Create an assetstore and import data', function () { var destinationId; var destinationType; + var itemId; + var fileId; // After importing, we will verify that this item exists const verifyItemName = '1.3.6.1.4.1.5962.99.1.3205815762.381594633.1639588388306.2.0'; @@ -195,7 +198,38 @@ describe('DICOMWeb assetstore', function () { } }).responseJSON.item; - return items.length > 0 && items[0].largeImage !== undefined; + if (items.length === 0 || items[0].largeImage === undefined) { + return false; + } + + // Save the itemId, and the file id + itemId = items[0]['_id']; + fileId = items[0].largeImage.fileId; + return true }, 'Wait for large images to be present'); + + // Verify that we can download the item + waitsFor(function () { + const resp = girder.rest.restRequest({ + url: 'item/' + itemId + '/download', + type: 'GET', + async: false, + }); + + // Should be larger than 10 million bytes + return resp.status === 200 && resp.responseText.length > 10000000; + }, 'Wait to download all DICOM files in the item'); + + // Verify that we can download a single file + waitsFor(function () { + const resp = girder.rest.restRequest({ + url: 'file/' + fileId + '/download', + type: 'GET', + async: false, + }); + + // Should be larger than 500k bytes + return resp.status === 200 && resp.responseText.length > 500000; + }, 'Wait to download a single DICOM file'); }); });