From 0513abea36e28b13341dda31a2540a5f0e201aaa Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Tue, 19 Dec 2023 02:00:14 -0600 Subject: [PATCH 01/12] Separate instances into separate files This also adds download support. Signed-off-by: Patrick Avery --- .../assetstore/dicomweb_assetstore_adapter.py | 85 ++++++++++++------- 1 file changed, 55 insertions(+), 30 deletions(-) diff --git a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py index ec2ef2334..5c7d2dde0 100644 --- a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py +++ b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py @@ -104,19 +104,39 @@ def deleteFile(self, file): def downloadFile(self, file, offset=0, headers=True, endByte=None, contentDisposition=None, extraParameters=None, **kwargs): - # FIXME: do we want to support downloading files? We probably - # wouldn't download them the regular way, but we could instead - # use a dicomweb-client like so: - # instance = client.retrieve_instance( - # study_instance_uid=..., - # series_instance_uid=..., - # sop_instance_uid=..., - # ) - # pydicom.filewriter.write_file('output_name.dcm', instance) - msg = 'Download support not yet implemented for DICOMweb files.' - raise NotImplementedError( - msg, - ) + + dicomweb_meta = file['dicomweb_meta'] + study_uid = dicomweb_meta['study_uid'] + series_uid = dicomweb_meta['series_uid'] + instance_uid = dicomweb_meta['instance_uid'] + + client = _create_dicomweb_client(self.assetstore_meta) + + def stream(): + from dicomweb_client.web import _Transaction + + url = client._get_instances_url( + _Transaction.RETRIEVE, + study_uid, + series_uid, + instance_uid + ) + + transfer_syntax = '*' + accept_parts = [ + 'multipart/related', + 'type="application/dicom"', + f'transfer-syntax={transfer_syntax}', + ] + headers = { + 'Accept': '; '.join(accept_parts), + } + + response = client._http_get(url, headers=headers) + for part in client._decode_multipart_message(response, False): + yield part + + return stream def importData(self, parent, parentType, params, progress, user, **kwargs): """ @@ -155,6 +175,7 @@ def importData(self, parent, parentType, params, progress, user, **kwargs): study_uid_key = dicom_key_to_tag('StudyInstanceUID') series_uid_key = dicom_key_to_tag('SeriesInstanceUID') + instance_uid_key = dicom_key_to_tag('SOPInstanceUID') # We are only searching for WSI datasets. Ignore all others. # FIXME: is this actually working? For the SLIM server at @@ -194,23 +215,27 @@ def importData(self, parent, parentType, params, progress, user, **kwargs): item['dicomweb_meta'] = get_dicomweb_metadata(client, study_uid, series_uid) item = Item().save(item) - # Create a placeholder file with the same name - file = File().createFile( - name=f'{series_uid}.dcm', - creator=user, - item=item, - reuseExisting=True, - assetstore=self.assetstore, - mimeType=None, - size=0, - saveFile=False, - ) - file['dicomweb_meta'] = { - 'study_uid': study_uid, - 'series_uid': series_uid, - } - file['imported'] = True - File().save(file) + instance_results = client.search_for_instances(study_uid, series_uid) + for instance in instance_results: + instance_uid = instance[instance_uid_key]['Value'][0] + + file = File().createFile( + name=f'{instance_uid}.dcm', + creator=user, + item=item, + reuseExisting=True, + assetstore=self.assetstore, + mimeType=None, + size=0, + saveFile=False, + ) + file['dicomweb_meta'] = { + 'study_uid': study_uid, + 'series_uid': series_uid, + 'instance_uid': instance_uid, + } + file['imported'] = True + File().save(file) items.append(item) From bf41cdecbdec05c9fdf1b30a204ccb8a0c82819e Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Mon, 8 Jan 2024 12:45:18 -0600 Subject: [PATCH 02/12] Attempt a range request for file downloads This attempts a range request for file downloads, even though for every case we've seen, the DICOMweb server does not honor it... Signed-off-by: Patrick Avery --- .../assetstore/dicomweb_assetstore_adapter.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py index 5c7d2dde0..b666cb214 100644 --- a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py +++ b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py @@ -132,6 +132,12 @@ def stream(): 'Accept': '; '.join(accept_parts), } + if offset != 0 or endByte is not None: + # Attempt to make a range request (although all DICOMweb + # servers we have seen do not honor it) + end_str = '' if endByte is None else endByte + headers['Range'] = f'bytes={offset}-{end_str}' + response = client._http_get(url, headers=headers) for part in client._decode_multipart_message(response, False): yield part From a37f84c921435af2b79fd947dca7e03ffc7f7f48 Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Mon, 8 Jan 2024 14:49:26 -0600 Subject: [PATCH 03/12] Use 'dicom_uids' key for UIDs This is more descriptive of what it contains compared to 'dicomweb_meta'. Signed-off-by: Patrick Avery --- .../assetstore/dicomweb_assetstore_adapter.py | 14 +++++++++----- .../large_image_source_dicom/girder_source.py | 7 +++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py index b666cb214..8330f9e61 100644 --- a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py +++ b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py @@ -105,10 +105,10 @@ def deleteFile(self, file): def downloadFile(self, file, offset=0, headers=True, endByte=None, contentDisposition=None, extraParameters=None, **kwargs): - dicomweb_meta = file['dicomweb_meta'] - study_uid = dicomweb_meta['study_uid'] - series_uid = dicomweb_meta['series_uid'] - instance_uid = dicomweb_meta['instance_uid'] + dicom_uids = file['dicom_uids'] + study_uid = dicom_uids['study_uid'] + series_uid = dicom_uids['series_uid'] + instance_uid = dicom_uids['instance_uid'] client = _create_dicomweb_client(self.assetstore_meta) @@ -219,6 +219,10 @@ def importData(self, parent, parentType, params, progress, user, **kwargs): # Set the DICOMweb metadata item['dicomweb_meta'] = get_dicomweb_metadata(client, study_uid, series_uid) + item['dicom_uids'] = { + 'study_uid': study_uid, + 'series_uid': series_uid, + } item = Item().save(item) instance_results = client.search_for_instances(study_uid, series_uid) @@ -235,7 +239,7 @@ def importData(self, parent, parentType, params, progress, user, **kwargs): size=0, saveFile=False, ) - file['dicomweb_meta'] = { + file['dicom_uids'] = { 'study_uid': study_uid, 'series_uid': series_uid, 'instance_uid': instance_uid, diff --git a/sources/dicom/large_image_source_dicom/girder_source.py b/sources/dicom/large_image_source_dicom/girder_source.py index 557591b33..731e72df8 100644 --- a/sources/dicom/large_image_source_dicom/girder_source.py +++ b/sources/dicom/large_image_source_dicom/girder_source.py @@ -59,15 +59,14 @@ def _getFilesystemLargeImagePath(self): def _getDICOMwebLargeImagePath(self, assetstore): meta = assetstore[DICOMWEB_META_KEY] - file = Item().childFiles(self.item, limit=1)[0] - file_meta = file['dicomweb_meta'] + item_uids = self.item['dicom_uids'] adapter = assetstore_utilities.getAssetstoreAdapter(assetstore) return { 'url': meta['url'], - 'study_uid': file_meta['study_uid'], - 'series_uid': file_meta['series_uid'], + 'study_uid': item_uids['study_uid'], + 'series_uid': item_uids['series_uid'], # The following are optional 'qido_prefix': meta.get('qido_prefix'), 'wado_prefix': meta.get('wado_prefix'), From a754b90e0d3a425f07fb05ed0d09d9404267f203 Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Tue, 9 Jan 2024 16:02:33 -0600 Subject: [PATCH 04/12] Add downloading the item to the tests Signed-off-by: Patrick Avery --- .../assetstore/dicomweb_assetstore_adapter.py | 9 ++++++--- .../web_client_specs/dicomWebSpec.js | 19 ++++++++++++++++++- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py index 8330f9e61..5ca75d391 100644 --- a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py +++ b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py @@ -105,6 +105,8 @@ def deleteFile(self, file): def downloadFile(self, file, offset=0, headers=True, endByte=None, contentDisposition=None, extraParameters=None, **kwargs): + from dicomweb_client.web import _Transaction + dicom_uids = file['dicom_uids'] study_uid = dicom_uids['study_uid'] series_uid = dicom_uids['series_uid'] @@ -113,8 +115,7 @@ def downloadFile(self, file, offset=0, headers=True, endByte=None, client = _create_dicomweb_client(self.assetstore_meta) def stream(): - from dicomweb_client.web import _Transaction - + # Create the URL url = client._get_instances_url( _Transaction.RETRIEVE, study_uid, @@ -122,6 +123,7 @@ def stream(): instance_uid ) + # Build the headers transfer_syntax = '*' accept_parts = [ 'multipart/related', @@ -138,6 +140,7 @@ def stream(): end_str = '' if endByte is None else endByte headers['Range'] = f'bytes={offset}-{end_str}' + # Perform the request response = client._http_get(url, headers=headers) for part in client._decode_multipart_message(response, False): yield part @@ -235,7 +238,7 @@ def importData(self, parent, parentType, params, progress, user, **kwargs): item=item, reuseExisting=True, assetstore=self.assetstore, - mimeType=None, + mimeType='application/dicom', size=0, saveFile=False, ) diff --git a/sources/dicom/test_dicom/web_client_specs/dicomWebSpec.js b/sources/dicom/test_dicom/web_client_specs/dicomWebSpec.js index ddae35380..d0d4c9deb 100644 --- a/sources/dicom/test_dicom/web_client_specs/dicomWebSpec.js +++ b/sources/dicom/test_dicom/web_client_specs/dicomWebSpec.js @@ -16,6 +16,7 @@ describe('DICOMWeb assetstore', function () { it('Create an assetstore and import data', function () { var destinationId; var destinationType; + var itemId; // After importing, we will verify that this item exists const verifyItemName = '1.3.6.1.4.1.5962.99.1.3205815762.381594633.1639588388306.2.0'; @@ -195,7 +196,23 @@ describe('DICOMWeb assetstore', function () { } }).responseJSON.item; - return items.length > 0 && items[0].largeImage !== undefined; + if (items.length === 0 || items[0].largeImage === undefined) { + return false; + } + + // Save the itemId + itemId = items[0]['_id']; + return true }, 'Wait for large images to be present'); + + // Verify that we can download the item + waitsFor(function () { + const resp = girder.rest.restRequest({ + url: 'item/' + itemId + '/download', + type: 'GET', + async: false, + }); + return resp.status === 200; + }, 'Wait to download the DICOM files'); }); }); From 14f9c2e7745153cdf0c4cf826b26a41d4e3acf4c Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Tue, 9 Jan 2024 16:07:57 -0600 Subject: [PATCH 05/12] Add trailing comma Signed-off-by: Patrick Avery --- .../assetstore/dicomweb_assetstore_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py index 5ca75d391..6587ada38 100644 --- a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py +++ b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py @@ -120,7 +120,7 @@ def stream(): _Transaction.RETRIEVE, study_uid, series_uid, - instance_uid + instance_uid, ) # Build the headers From 164428d799ba21cce929ae1d348e687588491e79 Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Wed, 10 Jan 2024 14:26:23 -0600 Subject: [PATCH 06/12] Fix mimetype when downloading files This also re-organizes a little code and adds FIXMEs for the range requests parts. Signed-off-by: Patrick Avery --- .../assetstore/dicomweb_assetstore_adapter.py | 67 ++++++++++++------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py index 6587ada38..121e45195 100644 --- a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py +++ b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py @@ -3,6 +3,7 @@ from large_image_source_dicom.dicomweb_utils import get_dicomweb_metadata from requests.exceptions import HTTPError +from girder.api.rest import setContentDisposition, setResponseHeader from girder.exceptions import ValidationException from girder.models.file import File from girder.models.folder import Folder @@ -105,6 +106,11 @@ def deleteFile(self, file): def downloadFile(self, file, offset=0, headers=True, endByte=None, contentDisposition=None, extraParameters=None, **kwargs): + if offset != 0 or endByte is not None: + # FIXME: implement range requests + msg = 'Range requests are not yet implemented' + raise NotImplementedError(msg) + from dicomweb_client.web import _Transaction dicom_uids = file['dicom_uids'] @@ -114,35 +120,48 @@ def downloadFile(self, file, offset=0, headers=True, endByte=None, client = _create_dicomweb_client(self.assetstore_meta) - def stream(): - # Create the URL - url = client._get_instances_url( - _Transaction.RETRIEVE, - study_uid, - series_uid, - instance_uid, - ) - - # Build the headers - transfer_syntax = '*' - accept_parts = [ - 'multipart/related', - 'type="application/dicom"', - f'transfer-syntax={transfer_syntax}', - ] - headers = { - 'Accept': '; '.join(accept_parts), - } + if headers: + setResponseHeader('Content-Type', file['mimeType']) + setContentDisposition(file['name'], contentDisposition or 'attachment') + + # The filesystem assetstore calls the following function, which sets + # the above and also sets the range and content-length headers: + # `self.setContentHeaders(file, offset, endByte, contentDisposition)` + # However, we can't call that since we don't have a great way of + # determining the DICOM file size without downloading the whole thing. + # FIXME: call that function if we find a way to determine file size. + + # Create the URL + url = client._get_instances_url( + _Transaction.RETRIEVE, + study_uid, + series_uid, + instance_uid, + ) + + # Build the headers + transfer_syntax = '*' + accept_parts = [ + 'multipart/related', + 'type="application/dicom"', + f'transfer-syntax={transfer_syntax}', + ] + headers = { + 'Accept': '; '.join(accept_parts), + } - if offset != 0 or endByte is not None: - # Attempt to make a range request (although all DICOMweb - # servers we have seen do not honor it) - end_str = '' if endByte is None else endByte - headers['Range'] = f'bytes={offset}-{end_str}' + # FIXME: add this back in when we support range requests + # if offset != 0 or endByte is not None: + # # Attempt to make a range request (although all DICOMweb + # # servers we have seen do not honor it) + # end_str = '' if endByte is None else endByte + # headers['Range'] = f'bytes={offset}-{end_str}' + def stream(): # Perform the request response = client._http_get(url, headers=headers) for part in client._decode_multipart_message(response, False): + # This function produces a single, whole DICOM file yield part return stream From ca7186bef74e33c83318bd3d5d1818e7dcd32e6a Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Fri, 12 Jan 2024 01:52:23 -0600 Subject: [PATCH 07/12] Stream data when performing DICOM downloads This prevents loading all of the data into memory, which is what we were doing before. It is somewhat complicated, but that is because the DICOMweb specification requires it to be multipart/related with boundaries. We have to be sure we remove those boundaries and stream only the DICOM file contents. Signed-off-by: Patrick Avery --- .../assetstore/dicomweb_assetstore_adapter.py | 113 ++++++++++++++++-- 1 file changed, 102 insertions(+), 11 deletions(-) diff --git a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py index 121e45195..c0d16bce3 100644 --- a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py +++ b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py @@ -12,6 +12,8 @@ DICOMWEB_META_KEY = 'dicomweb_meta' +BUF_SIZE = 65536 + class DICOMwebAssetstoreAdapter(AbstractAssetstoreAdapter): """ @@ -150,22 +152,111 @@ def downloadFile(self, file, offset=0, headers=True, endByte=None, 'Accept': '; '.join(accept_parts), } - # FIXME: add this back in when we support range requests - # if offset != 0 or endByte is not None: - # # Attempt to make a range request (although all DICOMweb - # # servers we have seen do not honor it) - # end_str = '' if endByte is None else endByte - # headers['Range'] = f'bytes={offset}-{end_str}' - def stream(): # Perform the request - response = client._http_get(url, headers=headers) - for part in client._decode_multipart_message(response, False): - # This function produces a single, whole DICOM file - yield part + response = client._http_get(url, headers=headers, stream=True) + for chunk in self._stream_retrieve_instance_response(response): + yield chunk return stream + def _stream_retrieve_instance_response(self, response): + # The first part of this function was largely copied from dicomweb-client's + # _decode_multipart_message() function. But we can't use that function here + # because it relies on reading the whole DICOM file into memory. We want to + # avoid that and stream in chunks. + + # Split the content type and verify that it is multipart/related. + content_type = response.headers['content-type'] + media_type, *ct_info = [ct.strip() for ct in content_type.split(';')] + if media_type.lower() != 'multipart/related': + msg = f'Unexpected media type: "{media_type}". Expected "multipart/related".' + raise ValueError(msg) + + # Find the boundary for the multipart/related message. + # The beginning boundary and end boundary look slightly different (in my + # examples, beginning looks like '--{boundary}\r\n', and ending looks like + # '\r\n--{boundary}--'). But we skip over the beginning boundary anyways + # since it is before the message body. An end boundary might look like this: + # \r\n--50d7ccd118978542c422543a7156abfce929e7615bc024e533c85801cd77-- + boundary = None + for item in ct_info: + attr, _, value = item.partition('=') + if attr.lower() == 'boundary': + boundary = value.strip('"').encode() + break + + if boundary is None: + msg = f'Failed to locate boundary in content-type: {content_type}' + raise ValueError(msg) + + # Both dicomweb-client and requests-toolbelt check for + # the ending boundary exactly like so: + ending = b'\r\n--' + boundary + + # Sometimes, there are a few extra bytes after the ending, such + # as '--' and '\r\n'. Imaging Data Commons has '--\r\n' at the end. + # We will make this number large enough to capture possible extra bytes. + max_ending_size = len(ending) + 6 + + # Make sure the buffer is at least large enough to contain the + # full ending, so we can check neighboring chunks for split endings. + buffer_size = max(BUF_SIZE, max_ending_size) + + with response: + # Create our iterator + iterator = response.iter_content(buffer_size) + + # First, stream until we encounter the first `\r\n\r\n`, + # which denotes the end of the header section. + header_found = False + for chunk in iterator: + if b'\r\n\r\n' in chunk: + idx = chunk.index(b'\r\n\r\n') + # Yield the first section of data + yield chunk[idx + 4:] + header_found = True + break + + if not header_found: + msg = 'Failed to find header in response content' + raise ValueError(msg) + + # Now the header has been finished. Stream the data until + # we encounter the ending boundary or finish the data. + prev_chunk = b'' + for chunk in iterator: + # Ensure the chunk is large enough to contain the whole ending, so + # we can be sure the ending won't be split across 3 or more chunks. + while len(chunk) < max_ending_size: + try: + chunk += next(iterator) + except StopIteration: + break + + # Check if the ending is split between two different chunks. + if ending in prev_chunk + chunk[:max_ending_size]: + # We found the ending! Remove the ending boundary and return. + data = prev_chunk + chunk[:max_ending_size] + yield data.split(ending, maxsplit=1)[0] + return + + if prev_chunk: + yield prev_chunk + + prev_chunk = chunk + + # We did not find the ending while looping. + # Check if it is in the final chunk. + if ending in prev_chunk: + # Found the ending in the final chunk. + yield prev_chunk.split(ending, maxsplit=1)[0] + return + + # We should have encountered the ending earlier and returned + msg = 'Failed to find ending boundary in response content' + raise ValueError(msg) + def importData(self, parent, parentType, params, progress, user, **kwargs): """ Import DICOMweb WSI instances from a DICOMweb server. From 60c79fa91a01e3a93bbc550d5b51cf86c0b17c6f Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Fri, 12 Jan 2024 10:54:01 -0600 Subject: [PATCH 08/12] Add function to fix flake8 complexity issue Signed-off-by: Patrick Avery --- .../assetstore/dicomweb_assetstore_adapter.py | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py index c0d16bce3..6908ca9c2 100644 --- a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py +++ b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py @@ -160,33 +160,38 @@ def stream(): return stream + def _extract_media_type_and_boundary(self, response): + content_type = response.headers['content-type'] + media_type, *ct_info = [ct.strip() for ct in content_type.split(';')] + boundary = None + for item in ct_info: + attr, _, value = item.partition('=') + if attr.lower() == 'boundary': + boundary = value.strip('"').encode() + break + + return media_type, boundary + def _stream_retrieve_instance_response(self, response): # The first part of this function was largely copied from dicomweb-client's # _decode_multipart_message() function. But we can't use that function here # because it relies on reading the whole DICOM file into memory. We want to # avoid that and stream in chunks. - # Split the content type and verify that it is multipart/related. - content_type = response.headers['content-type'] - media_type, *ct_info = [ct.strip() for ct in content_type.split(';')] + # Split the content-type to find the media type and boundary. + media_type, boundary = self._extract_media_type_and_boundary(response) if media_type.lower() != 'multipart/related': msg = f'Unexpected media type: "{media_type}". Expected "multipart/related".' raise ValueError(msg) - # Find the boundary for the multipart/related message. + # Ensure we have the multipart/related boundary. # The beginning boundary and end boundary look slightly different (in my # examples, beginning looks like '--{boundary}\r\n', and ending looks like # '\r\n--{boundary}--'). But we skip over the beginning boundary anyways # since it is before the message body. An end boundary might look like this: # \r\n--50d7ccd118978542c422543a7156abfce929e7615bc024e533c85801cd77-- - boundary = None - for item in ct_info: - attr, _, value = item.partition('=') - if attr.lower() == 'boundary': - boundary = value.strip('"').encode() - break - if boundary is None: + content_type = response.headers['content-type'] msg = f'Failed to locate boundary in content-type: {content_type}' raise ValueError(msg) From ea3aa54f9fe7874d3c9f0ed316341ac5364a3b35 Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Mon, 15 Jan 2024 09:55:08 -0600 Subject: [PATCH 09/12] Wait to yield section after the header Although I don't think it is likely (unless the chunk size is large), it is possible that the section right after the header might contain part of the ending. Thus, we should wait and check it, just as we do for all other chunks, before yielding it. Signed-off-by: Patrick Avery --- .../assetstore/dicomweb_assetstore_adapter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py index 6908ca9c2..a0261b45d 100644 --- a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py +++ b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py @@ -218,8 +218,8 @@ def _stream_retrieve_instance_response(self, response): for chunk in iterator: if b'\r\n\r\n' in chunk: idx = chunk.index(b'\r\n\r\n') - # Yield the first section of data - yield chunk[idx + 4:] + # Save the first section of data. We will yield it later. + prev_chunk = chunk[idx + 4:] header_found = True break @@ -229,7 +229,7 @@ def _stream_retrieve_instance_response(self, response): # Now the header has been finished. Stream the data until # we encounter the ending boundary or finish the data. - prev_chunk = b'' + # The "prev_chunk" will start out set to the section right after the header. for chunk in iterator: # Ensure the chunk is large enough to contain the whole ending, so # we can be sure the ending won't be split across 3 or more chunks. From 1f3714e9851d915f417125ba1fecf13c34c9d081 Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Mon, 15 Jan 2024 11:47:06 -0600 Subject: [PATCH 10/12] Add test to download a single DICOM file We also now verify that each download contains a certain amount of bytes. Signed-off-by: Patrick Avery --- .../web_client_specs/dicomWebSpec.js | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/sources/dicom/test_dicom/web_client_specs/dicomWebSpec.js b/sources/dicom/test_dicom/web_client_specs/dicomWebSpec.js index d0d4c9deb..653a516ab 100644 --- a/sources/dicom/test_dicom/web_client_specs/dicomWebSpec.js +++ b/sources/dicom/test_dicom/web_client_specs/dicomWebSpec.js @@ -13,10 +13,12 @@ describe('DICOMWeb assetstore', function () { 'Admin', 'Admin', 'adminpassword!')); + it('Create an assetstore and import data', function () { var destinationId; var destinationType; var itemId; + var fileId; // After importing, we will verify that this item exists const verifyItemName = '1.3.6.1.4.1.5962.99.1.3205815762.381594633.1639588388306.2.0'; @@ -200,8 +202,9 @@ describe('DICOMWeb assetstore', function () { return false; } - // Save the itemId + // Save the itemId, and the file id itemId = items[0]['_id']; + fileId = items[0].largeImage.fileId; return true }, 'Wait for large images to be present'); @@ -212,7 +215,21 @@ describe('DICOMWeb assetstore', function () { type: 'GET', async: false, }); - return resp.status === 200; - }, 'Wait to download the DICOM files'); + + // Should be larger than 10 million bytes + return resp.status === 200 && resp.responseText.length > 10000000; + }, 'Wait to download all DICOM files in the item'); + + // Verify that we can download a single file + waitsFor(function () { + const resp = girder.rest.restRequest({ + url: 'file/' + fileId + '/download', + type: 'GET', + async: false, + }); + + // Should be larger than 500k bytes + return resp.status === 200 && resp.responseText.length > 500000; + }, 'Wait to download a single DICOM file'); }); }); From cfe39477bfeb346fcedcc72b978dafac9b05d188 Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Tue, 16 Jan 2024 20:42:29 -0600 Subject: [PATCH 11/12] Reduce the ending check size to be ending_size - 1 If the ending is split between two chunks, then at most, the second chunk will contain ending_size - 1 bytes of the ending. So we should just check for this. We also don't need to add extra bytes to check for in the ending, because we don't care what comes after the ending. Signed-off-by: Patrick Avery --- .../assetstore/dicomweb_assetstore_adapter.py | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py index a0261b45d..39cff5c53 100644 --- a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py +++ b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py @@ -201,12 +201,13 @@ def _stream_retrieve_instance_response(self, response): # Sometimes, there are a few extra bytes after the ending, such # as '--' and '\r\n'. Imaging Data Commons has '--\r\n' at the end. - # We will make this number large enough to capture possible extra bytes. - max_ending_size = len(ending) + 6 + # But we don't care about what comes after the ending. As soon as we + # encounter the ending, we are done. + ending_size = len(ending) # Make sure the buffer is at least large enough to contain the - # full ending, so we can check neighboring chunks for split endings. - buffer_size = max(BUF_SIZE, max_ending_size) + # ending_size - 1, so that the ending cannot be split between more than 2 chunks. + buffer_size = max(BUF_SIZE, ending_size - 1) with response: # Create our iterator @@ -215,11 +216,12 @@ def _stream_retrieve_instance_response(self, response): # First, stream until we encounter the first `\r\n\r\n`, # which denotes the end of the header section. header_found = False + end_header_delimiter = b'\r\n\r\n' for chunk in iterator: - if b'\r\n\r\n' in chunk: - idx = chunk.index(b'\r\n\r\n') + if end_header_delimiter in chunk: + idx = chunk.index(end_header_delimiter) # Save the first section of data. We will yield it later. - prev_chunk = chunk[idx + 4:] + prev_chunk = chunk[idx + len(end_header_delimiter):] header_found = True break @@ -231,18 +233,18 @@ def _stream_retrieve_instance_response(self, response): # we encounter the ending boundary or finish the data. # The "prev_chunk" will start out set to the section right after the header. for chunk in iterator: - # Ensure the chunk is large enough to contain the whole ending, so - # we can be sure the ending won't be split across 3 or more chunks. - while len(chunk) < max_ending_size: + # Ensure the chunk is large enough to contain the ending_size - 1, so + # we can be sure the ending won't be split across more than 2 chunks. + while len(chunk) < ending_size - 1: try: chunk += next(iterator) except StopIteration: break - # Check if the ending is split between two different chunks. - if ending in prev_chunk + chunk[:max_ending_size]: + # Check if the ending is split between the previous and current chunks. + if ending in prev_chunk + chunk[:ending_size - 1]: # We found the ending! Remove the ending boundary and return. - data = prev_chunk + chunk[:max_ending_size] + data = prev_chunk + chunk[:ending_size - 1] yield data.split(ending, maxsplit=1)[0] return From 15476f6d7a836d3d6a4e344d0302fcdfaa685a01 Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Mon, 22 Jan 2024 11:37:07 -0600 Subject: [PATCH 12/12] Set file size to be "None" This prevents a misleading file size of 0 bytes being displayed in girder. This is working without issues so far. Signed-off-by: Patrick Avery --- .../assetstore/dicomweb_assetstore_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py index 39cff5c53..a9258525f 100644 --- a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py +++ b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py @@ -356,7 +356,7 @@ def importData(self, parent, parentType, params, progress, user, **kwargs): reuseExisting=True, assetstore=self.assetstore, mimeType='application/dicom', - size=0, + size=None, saveFile=False, ) file['dicom_uids'] = {