From a388fb24aa6e9f007b9e41ed2ab943428c5758d0 Mon Sep 17 00:00:00 2001
From: James Saryerwinnie <js@jamesls.com>
Date: Wed, 12 Mar 2014 15:02:12 -0700
Subject: [PATCH] URLEncode keys

Botocore's xml parser does not handle control chars properly,
so we need to urlencode the keys in the response so that we're able
to handle them appropriately.

Fixes #675.
---
 awscli/customizations/s3/utils.py          |  5 +++--
 tests/unit/customizations/s3/test_utils.py | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/awscli/customizations/s3/utils.py b/awscli/customizations/s3/utils.py
index 15c91a72d83d9..69f9d3b3af02a 100644
--- a/awscli/customizations/s3/utils.py
+++ b/awscli/customizations/s3/utils.py
@@ -23,6 +23,7 @@
 from six.moves import queue
 from dateutil.parser import parse
 from dateutil.tz import tzlocal
+from botocore.compat import unquote
 
 from awscli.customizations.s3.constants import MAX_PARTS
 from awscli.customizations.s3.constants import MAX_SINGLE_UPLOAD_SIZE
@@ -298,14 +299,14 @@ def __init__(self, operation, endpoint, date_parser=_date_parser):
         self._date_parser = date_parser
 
     def list_objects(self, bucket, prefix=None):
-        kwargs = {'bucket': bucket}
+        kwargs = {'bucket': bucket, 'encoding_type': 'url'}
         if prefix is not None:
             kwargs['prefix'] = prefix
         pages = self._operation.paginate(self._endpoint, **kwargs)
         for response, page in pages:
             contents = page['Contents']
             for content in contents:
-                source_path = bucket + '/' + content['Key']
+                source_path = bucket + '/' + unquote(content['Key'])
                 size = content['Size']
                 last_update = self._date_parser(content['LastModified'])
                 yield source_path, size, last_update
diff --git a/tests/unit/customizations/s3/test_utils.py b/tests/unit/customizations/s3/test_utils.py
index 0d409972a0d51..f956e9732b325 100644
--- a/tests/unit/customizations/s3/test_utils.py
+++ b/tests/unit/customizations/s3/test_utils.py
@@ -218,6 +218,22 @@ def test_list_objects(self):
         self.assertEqual(objects, [('foo/a', 1, now), ('foo/b', 2, now),
                                    ('foo/c', 3, now)])
 
+    def test_urlencoded_keys(self):
+        # In order to workaround control chars being in key names,
+        # we force the urlencoding of the key names and we decode
+        # them before yielding them.  For example, note the %0D
+        # in foo.txt:
+        now = mock.sentinel.now
+        self.operation.paginate.return_value = [
+            (None, {'Contents': [
+                {'LastModified': '2014-02-27T04:20:38.000Z',
+                 'Key': 'bar%0D.txt', 'Size': 1}]}),
+        ]
+        lister = BucketLister(self.operation, self.endpoint, self.date_parser)
+        objects = list(lister.list_objects(bucket='foo'))
+        # And note how it's been converted to '\r'.
+        self.assertEqual(objects, [('foo/bar\r.txt', 1, now)])
+
 
 if __name__ == "__main__":
     unittest.main()