Skip to content

Commit

Permalink
Merge pull request #1429 from psavery/dicomweb-separate-files
Browse files Browse the repository at this point in the history
Add support for downloading DICOMweb files
  • Loading branch information
manthey authored Jan 22, 2024
2 parents ae6470a + 15476f6 commit 247f711
Show file tree
Hide file tree
Showing 3 changed files with 221 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from large_image_source_dicom.dicomweb_utils import get_dicomweb_metadata
from requests.exceptions import HTTPError

from girder.api.rest import setContentDisposition, setResponseHeader
from girder.exceptions import ValidationException
from girder.models.file import File
from girder.models.folder import Folder
Expand All @@ -11,6 +12,8 @@

DICOMWEB_META_KEY = 'dicomweb_meta'

BUF_SIZE = 65536


class DICOMwebAssetstoreAdapter(AbstractAssetstoreAdapter):
"""
Expand Down Expand Up @@ -104,20 +107,163 @@ def deleteFile(self, file):

def downloadFile(self, file, offset=0, headers=True, endByte=None,
contentDisposition=None, extraParameters=None, **kwargs):
# FIXME: do we want to support downloading files? We probably
# wouldn't download them the regular way, but we could instead
# use a dicomweb-client like so:
# instance = client.retrieve_instance(
# study_instance_uid=...,
# series_instance_uid=...,
# sop_instance_uid=...,
# )
# pydicom.filewriter.write_file('output_name.dcm', instance)
msg = 'Download support not yet implemented for DICOMweb files.'
raise NotImplementedError(
msg,

if offset != 0 or endByte is not None:
# FIXME: implement range requests
msg = 'Range requests are not yet implemented'
raise NotImplementedError(msg)

from dicomweb_client.web import _Transaction

dicom_uids = file['dicom_uids']
study_uid = dicom_uids['study_uid']
series_uid = dicom_uids['series_uid']
instance_uid = dicom_uids['instance_uid']

client = _create_dicomweb_client(self.assetstore_meta)

if headers:
setResponseHeader('Content-Type', file['mimeType'])
setContentDisposition(file['name'], contentDisposition or 'attachment')

# The filesystem assetstore calls the following function, which sets
# the above and also sets the range and content-length headers:
# `self.setContentHeaders(file, offset, endByte, contentDisposition)`
# However, we can't call that since we don't have a great way of
# determining the DICOM file size without downloading the whole thing.
# FIXME: call that function if we find a way to determine file size.

# Create the URL
url = client._get_instances_url(
_Transaction.RETRIEVE,
study_uid,
series_uid,
instance_uid,
)

# Build the headers
transfer_syntax = '*'
accept_parts = [
'multipart/related',
'type="application/dicom"',
f'transfer-syntax={transfer_syntax}',
]
headers = {
'Accept': '; '.join(accept_parts),
}

def stream():
# Perform the request
response = client._http_get(url, headers=headers, stream=True)
for chunk in self._stream_retrieve_instance_response(response):
yield chunk

return stream

def _extract_media_type_and_boundary(self, response):
content_type = response.headers['content-type']
media_type, *ct_info = [ct.strip() for ct in content_type.split(';')]
boundary = None
for item in ct_info:
attr, _, value = item.partition('=')
if attr.lower() == 'boundary':
boundary = value.strip('"').encode()
break

return media_type, boundary

def _stream_retrieve_instance_response(self, response):
# The first part of this function was largely copied from dicomweb-client's
# _decode_multipart_message() function. But we can't use that function here
# because it relies on reading the whole DICOM file into memory. We want to
# avoid that and stream in chunks.

# Split the content-type to find the media type and boundary.
media_type, boundary = self._extract_media_type_and_boundary(response)
if media_type.lower() != 'multipart/related':
msg = f'Unexpected media type: "{media_type}". Expected "multipart/related".'
raise ValueError(msg)

# Ensure we have the multipart/related boundary.
# The beginning boundary and end boundary look slightly different (in my
# examples, beginning looks like '--{boundary}\r\n', and ending looks like
# '\r\n--{boundary}--'). But we skip over the beginning boundary anyways
# since it is before the message body. An end boundary might look like this:
# \r\n--50d7ccd118978542c422543a7156abfce929e7615bc024e533c85801cd77--
if boundary is None:
content_type = response.headers['content-type']
msg = f'Failed to locate boundary in content-type: {content_type}'
raise ValueError(msg)

# Both dicomweb-client and requests-toolbelt check for
# the ending boundary exactly like so:
ending = b'\r\n--' + boundary

# Sometimes, there are a few extra bytes after the ending, such
# as '--' and '\r\n'. Imaging Data Commons has '--\r\n' at the end.
# But we don't care about what comes after the ending. As soon as we
# encounter the ending, we are done.
ending_size = len(ending)

# Make sure the buffer is at least large enough to contain the
# ending_size - 1, so that the ending cannot be split between more than 2 chunks.
buffer_size = max(BUF_SIZE, ending_size - 1)

with response:
# Create our iterator
iterator = response.iter_content(buffer_size)

# First, stream until we encounter the first `\r\n\r\n`,
# which denotes the end of the header section.
header_found = False
end_header_delimiter = b'\r\n\r\n'
for chunk in iterator:
if end_header_delimiter in chunk:
idx = chunk.index(end_header_delimiter)
# Save the first section of data. We will yield it later.
prev_chunk = chunk[idx + len(end_header_delimiter):]
header_found = True
break

if not header_found:
msg = 'Failed to find header in response content'
raise ValueError(msg)

# Now the header has been finished. Stream the data until
# we encounter the ending boundary or finish the data.
# The "prev_chunk" will start out set to the section right after the header.
for chunk in iterator:
# Ensure the chunk is large enough to contain the ending_size - 1, so
# we can be sure the ending won't be split across more than 2 chunks.
while len(chunk) < ending_size - 1:
try:
chunk += next(iterator)
except StopIteration:
break

# Check if the ending is split between the previous and current chunks.
if ending in prev_chunk + chunk[:ending_size - 1]:
# We found the ending! Remove the ending boundary and return.
data = prev_chunk + chunk[:ending_size - 1]
yield data.split(ending, maxsplit=1)[0]
return

if prev_chunk:
yield prev_chunk

prev_chunk = chunk

# We did not find the ending while looping.
# Check if it is in the final chunk.
if ending in prev_chunk:
# Found the ending in the final chunk.
yield prev_chunk.split(ending, maxsplit=1)[0]
return

# We should have encountered the ending earlier and returned
msg = 'Failed to find ending boundary in response content'
raise ValueError(msg)

def importData(self, parent, parentType, params, progress, user, **kwargs):
"""
Import DICOMweb WSI instances from a DICOMweb server.
Expand Down Expand Up @@ -155,6 +301,7 @@ def importData(self, parent, parentType, params, progress, user, **kwargs):

study_uid_key = dicom_key_to_tag('StudyInstanceUID')
series_uid_key = dicom_key_to_tag('SeriesInstanceUID')
instance_uid_key = dicom_key_to_tag('SOPInstanceUID')

# We are only searching for WSI datasets. Ignore all others.
# FIXME: is this actually working? For the SLIM server at
Expand Down Expand Up @@ -192,25 +339,33 @@ def importData(self, parent, parentType, params, progress, user, **kwargs):

# Set the DICOMweb metadata
item['dicomweb_meta'] = get_dicomweb_metadata(client, study_uid, series_uid)
item = Item().save(item)

# Create a placeholder file with the same name
file = File().createFile(
name=f'{series_uid}.dcm',
creator=user,
item=item,
reuseExisting=True,
assetstore=self.assetstore,
mimeType=None,
size=0,
saveFile=False,
)
file['dicomweb_meta'] = {
item['dicom_uids'] = {
'study_uid': study_uid,
'series_uid': series_uid,
}
file['imported'] = True
File().save(file)
item = Item().save(item)

instance_results = client.search_for_instances(study_uid, series_uid)
for instance in instance_results:
instance_uid = instance[instance_uid_key]['Value'][0]

file = File().createFile(
name=f'{instance_uid}.dcm',
creator=user,
item=item,
reuseExisting=True,
assetstore=self.assetstore,
mimeType='application/dicom',
size=None,
saveFile=False,
)
file['dicom_uids'] = {
'study_uid': study_uid,
'series_uid': series_uid,
'instance_uid': instance_uid,
}
file['imported'] = True
File().save(file)

items.append(item)

Expand Down
7 changes: 3 additions & 4 deletions sources/dicom/large_image_source_dicom/girder_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,14 @@ def _getFilesystemLargeImagePath(self):

def _getDICOMwebLargeImagePath(self, assetstore):
meta = assetstore[DICOMWEB_META_KEY]
file = Item().childFiles(self.item, limit=1)[0]
file_meta = file['dicomweb_meta']
item_uids = self.item['dicom_uids']

adapter = assetstore_utilities.getAssetstoreAdapter(assetstore)

return {
'url': meta['url'],
'study_uid': file_meta['study_uid'],
'series_uid': file_meta['series_uid'],
'study_uid': item_uids['study_uid'],
'series_uid': item_uids['series_uid'],
# The following are optional
'qido_prefix': meta.get('qido_prefix'),
'wado_prefix': meta.get('wado_prefix'),
Expand Down
36 changes: 35 additions & 1 deletion sources/dicom/test_dicom/web_client_specs/dicomWebSpec.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@ describe('DICOMWeb assetstore', function () {
'Admin',
'Admin',
'adminpassword!'));

it('Create an assetstore and import data', function () {
var destinationId;
var destinationType;
var itemId;
var fileId;

// After importing, we will verify that this item exists
const verifyItemName = '1.3.6.1.4.1.5962.99.1.3205815762.381594633.1639588388306.2.0';
Expand Down Expand Up @@ -195,7 +198,38 @@ describe('DICOMWeb assetstore', function () {
}
}).responseJSON.item;

return items.length > 0 && items[0].largeImage !== undefined;
if (items.length === 0 || items[0].largeImage === undefined) {
return false;
}

// Save the itemId, and the file id
itemId = items[0]['_id'];
fileId = items[0].largeImage.fileId;
return true
}, 'Wait for large images to be present');

// Verify that we can download the item
waitsFor(function () {
const resp = girder.rest.restRequest({
url: 'item/' + itemId + '/download',
type: 'GET',
async: false,
});

// Should be larger than 10 million bytes
return resp.status === 200 && resp.responseText.length > 10000000;
}, 'Wait to download all DICOM files in the item');

// Verify that we can download a single file
waitsFor(function () {
const resp = girder.rest.restRequest({
url: 'file/' + fileId + '/download',
type: 'GET',
async: false,
});

// Should be larger than 500k bytes
return resp.status === 200 && resp.responseText.length > 500000;
}, 'Wait to download a single DICOM file');
});
});

0 comments on commit 247f711

Please sign in to comment.