Skip to content

Commit

Permalink
Split out object listing into separate object
Browse files Browse the repository at this point in the history
Let's us vary the implementation from the filegenerator.
  • Loading branch information
jamesls committed Mar 12, 2014
1 parent 64d0196 commit 9c66304
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 37 deletions.
40 changes: 17 additions & 23 deletions awscli/customizations/s3/filegenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
# language governing permissions and limitations under the License.
import os
import sys
import datetime

import six
from dateutil.parser import parse
from dateutil.tz import tzlocal

from awscli.customizations.s3.fileinfo import FileInfo
from awscli.customizations.s3.utils import find_bucket_key, get_file_stat
from awscli.customizations.s3.utils import BucketLister
from awscli.errorhandler import ClientError


Expand Down Expand Up @@ -162,28 +162,22 @@ def list_objects(self, s3_path, dir_op):
yield self._list_single_object(s3_path)
else:
operation = self._service.get_operation('ListObjects')
iterator = operation.paginate(self._endpoint, bucket=bucket,
prefix=prefix)
for html_response, response_data in iterator:
contents = response_data['Contents']
for content in contents:
src_path = bucket + '/' + content['Key']
size = content['Size']
last_update = parse(content['LastModified'])
last_update = last_update.astimezone(tzlocal())
if size == 0 and src_path.endswith('/'):
if self.operation_name == 'delete':
# This is to filter out manually created folders
# in S3. They have a size zero and would be
# undesirably downloaded. Local directories
# are automatically created when they do not
# exist locally. But user should be able to
# delete them.
yield src_path, size, last_update
elif not dir_op and s3_path != src_path:
pass
else:
yield src_path, size, last_update
lister = BucketLister(operation, self._endpoint)
for key in lister.list_objects(bucket=bucket, prefix=prefix):
source_path, size, last_update = key
if size == 0 and source_path.endswith('/'):
if self.operation_name == 'delete':
# This is to filter out manually created folders
# in S3. They have a size zero and would be
# undesirably downloaded. Local directories
# are automatically created when they do not
# exist locally. But user should be able to
# delete them.
yield source_path, size, last_update
elif not dir_op and s3_path != source_path:
pass
else:
yield source_path, size, last_update

def _list_single_object(self, s3_path):
# When we know we're dealing with a single object, we can avoid
Expand Down
26 changes: 26 additions & 0 deletions awscli/customizations/s3/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from six import PY3
from six.moves import queue
from dateutil.parser import parse
from dateutil.tz import tzlocal

from awscli.customizations.s3.constants import MAX_PARTS
Expand Down Expand Up @@ -285,6 +286,31 @@ def __iter__(self):
return iter([])


def _date_parser(date_string):
return parse(date_string).astimezone(tzlocal())


class BucketLister(object):
"""List keys in a bucket."""
def __init__(self, operation, endpoint, date_parser=_date_parser):
self._operation = operation
self._endpoint = endpoint
self._date_parser = date_parser

def list_objects(self, bucket, prefix=None):
kwargs = {'bucket': bucket}
if prefix is not None:
kwargs['prefix'] = prefix
pages = self._operation.paginate(self._endpoint, **kwargs)
for response, page in pages:
contents = page['Contents']
for content in contents:
source_path = bucket + '/' + content['Key']
size = content['Size']
last_update = self._date_parser(content['LastModified'])
yield source_path, size, last_update


IORequest = namedtuple('IORequest', ['filename', 'offset', 'data'])
# Used to signal that IO for the filename is finished, and that
# any associated resources may be cleaned up.
Expand Down
10 changes: 0 additions & 10 deletions awscli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,3 @@ def _find_quote_char_in_part(part):
elif single_quote < double_quote:
quote_char = "'"
return quote_char


class BucketLister(object):
"""List keys in a bucket."""
def __init__(self, operation, endpoint):
self._operation = operation
self._endpoint = endpoint

def list_objects(self, bucket_name, prefix):
pass
24 changes: 20 additions & 4 deletions tests/unit/customizations/s3/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
import shutil
import ntpath

from six.moves import queue
import mock

from awscli.customizations.s3.utils import find_bucket_key, find_chunksize
from awscli.customizations.s3.utils import ReadFileChunk
from awscli.customizations.s3.utils import relative_path
from awscli.customizations.s3.utils import StablePriorityQueue
from awscli.customizations.s3.utils import BucketLister
from awscli.customizations.s3.constants import MAX_SINGLE_UPLOAD_SIZE


Expand Down Expand Up @@ -193,14 +193,30 @@ def test_priority_attr_is_missing(self):
self.assertIs(q.get(), a)


class TestBucketList(object):
class TestBucketList(unittest.TestCase):
def setUp(self):
self.operation = mock.Mock()
self.endpoint = mock.sentinel.endpoint
self.date_parser = mock.Mock()
self.date_parser.return_value = mock.sentinel.now

def test_list_objects(self):
lister = BucketLister(self.operation, self.endpoint)
lister.list_objects(
now = mock.sentinel.now
self.operation.paginate.return_value = [
(None, {'Contents': [
{'LastModified': '2014-02-27T04:20:38.000Z',
'Key': 'a', 'Size': 1},
{'LastModified': '2014-02-27T04:20:38.000Z',
'Key': 'b', 'Size': 2},]}),
(None, {'Contents': [
{'LastModified': '2014-02-27T04:20:38.000Z',
'Key': 'c', 'Size': 3},
]}),
]
lister = BucketLister(self.operation, self.endpoint, self.date_parser)
objects = list(lister.list_objects(bucket='foo'))
self.assertEqual(objects, [('foo/a', 1, now), ('foo/b', 2, now),
('foo/c', 3, now)])


if __name__ == "__main__":
Expand Down

0 comments on commit 9c66304

Please sign in to comment.