From a388fb24aa6e9f007b9e41ed2ab943428c5758d0 Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Wed, 12 Mar 2014 15:02:12 -0700 Subject: [PATCH] URLEncode keys Botocore's xml parser does not handle control chars properly, so we need to urlencode the keys in the response so that we're able to handle them appropriately. Fixes #675. --- awscli/customizations/s3/utils.py | 5 +++-- tests/unit/customizations/s3/test_utils.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/awscli/customizations/s3/utils.py b/awscli/customizations/s3/utils.py index 15c91a72d83d9..69f9d3b3af02a 100644 --- a/awscli/customizations/s3/utils.py +++ b/awscli/customizations/s3/utils.py @@ -23,6 +23,7 @@ from six.moves import queue from dateutil.parser import parse from dateutil.tz import tzlocal +from botocore.compat import unquote from awscli.customizations.s3.constants import MAX_PARTS from awscli.customizations.s3.constants import MAX_SINGLE_UPLOAD_SIZE @@ -298,14 +299,14 @@ def __init__(self, operation, endpoint, date_parser=_date_parser): self._date_parser = date_parser def list_objects(self, bucket, prefix=None): - kwargs = {'bucket': bucket} + kwargs = {'bucket': bucket, 'encoding_type': 'url'} if prefix is not None: kwargs['prefix'] = prefix pages = self._operation.paginate(self._endpoint, **kwargs) for response, page in pages: contents = page['Contents'] for content in contents: - source_path = bucket + '/' + content['Key'] + source_path = bucket + '/' + unquote(content['Key']) size = content['Size'] last_update = self._date_parser(content['LastModified']) yield source_path, size, last_update diff --git a/tests/unit/customizations/s3/test_utils.py b/tests/unit/customizations/s3/test_utils.py index 0d409972a0d51..f956e9732b325 100644 --- a/tests/unit/customizations/s3/test_utils.py +++ b/tests/unit/customizations/s3/test_utils.py @@ -218,6 +218,22 @@ def test_list_objects(self): self.assertEqual(objects, [('foo/a', 1, now), ('foo/b', 2, now), ('foo/c', 3, now)]) + def test_urlencoded_keys(self): + # In order to workaround control chars being in key names, + # we force the urlencoding of the key names and we decode + # them before yielding them. For example, note the %0D + # in foo.txt: + now = mock.sentinel.now + self.operation.paginate.return_value = [ + (None, {'Contents': [ + {'LastModified': '2014-02-27T04:20:38.000Z', + 'Key': 'bar%0D.txt', 'Size': 1}]}), + ] + lister = BucketLister(self.operation, self.endpoint, self.date_parser) + objects = list(lister.list_objects(bucket='foo')) + # And note how it's been converted to '\r'. + self.assertEqual(objects, [('foo/bar\r.txt', 1, now)]) + if __name__ == "__main__": unittest.main()