Skip to content

Commit

Permalink
URLEncode keys
Browse files Browse the repository at this point in the history
Botocore's xml parser does not handle control chars properly,
so we need to urlencode the keys in the response so that we're able
to handle them appropriately.

Fixes #675.
  • Loading branch information
jamesls committed Mar 13, 2014
1 parent b5f5b92 commit 2a345e0
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 2 deletions.
5 changes: 3 additions & 2 deletions awscli/customizations/s3/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from six.moves import queue
from dateutil.parser import parse
from dateutil.tz import tzlocal
from botocore.compat import unquote_str

from awscli.customizations.s3.constants import MAX_PARTS
from awscli.customizations.s3.constants import MAX_SINGLE_UPLOAD_SIZE
Expand Down Expand Up @@ -298,14 +299,14 @@ def __init__(self, operation, endpoint, date_parser=_date_parser):
self._date_parser = date_parser

def list_objects(self, bucket, prefix=None):
kwargs = {'bucket': bucket}
kwargs = {'bucket': bucket, 'encoding_type': 'url'}
if prefix is not None:
kwargs['prefix'] = prefix
pages = self._operation.paginate(self._endpoint, **kwargs)
for response, page in pages:
contents = page['Contents']
for content in contents:
source_path = bucket + '/' + content['Key']
source_path = bucket + '/' + unquote_str(content['Key'])
size = content['Size']
last_update = self._date_parser(content['LastModified'])
yield source_path, size, last_update
Expand Down
27 changes: 27 additions & 0 deletions tests/unit/customizations/s3/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,33 @@ def test_list_objects(self):
self.assertEqual(objects, [('foo/a', 1, now), ('foo/b', 2, now),
('foo/c', 3, now)])

def test_urlencoded_keys(self):
# In order to workaround control chars being in key names,
# we force the urlencoding of the key names and we decode
# them before yielding them. For example, note the %0D
# in foo.txt:
now = mock.sentinel.now
self.operation.paginate.return_value = [
(None, {'Contents': [
{'LastModified': '2014-02-27T04:20:38.000Z',
'Key': 'bar%0D.txt', 'Size': 1}]}),
]
lister = BucketLister(self.operation, self.endpoint, self.date_parser)
objects = list(lister.list_objects(bucket='foo'))
# And note how it's been converted to '\r'.
self.assertEqual(objects, [('foo/bar\r.txt', 1, now)])

def test_urlencoded_with_unicode_keys(self):
now = mock.sentinel.now
self.operation.paginate.return_value = [
(None, {'Contents': [
{'LastModified': '2014-02-27T04:20:38.000Z',
'Key': '%E2%9C%93', 'Size': 1}]}),
]
lister = BucketLister(self.operation, self.endpoint, self.date_parser)
objects = list(lister.list_objects(bucket='foo'))
# And note how it's been converted to '\r'.
self.assertEqual(objects, [(u'foo/\u2713', 1, now)])

if __name__ == "__main__":
unittest.main()

0 comments on commit 2a345e0

Please sign in to comment.