From 447bfb84ceaa203366a347231de6eb8ae0e26717 Mon Sep 17 00:00:00 2001 From: Daniel Lindsley Date: Mon, 17 Feb 2014 11:58:58 -0800 Subject: [PATCH 1/3] Updated pagination for jmespath ``result_key``s. --- botocore/exceptions.py | 5 ++++ botocore/paginate.py | 32 ++++++++++++++++++------- botocore/utils.py | 34 +++++++++++++++++++++++++++ tests/unit/test_paginate.py | 15 +++++++++--- tests/unit/test_utils.py | 47 +++++++++++++++++++++++++++++++++++++ 5 files changed, 121 insertions(+), 12 deletions(-) diff --git a/botocore/exceptions.py b/botocore/exceptions.py index f629d597ec..90d0a3abb6 100644 --- a/botocore/exceptions.py +++ b/botocore/exceptions.py @@ -219,3 +219,8 @@ class IncompleteReadError(BotoCoreError): """HTTP response did not return expected number of bytes.""" fmt = ('{actual_bytes} read, but total bytes ' 'expected is {expected_bytes}.') + + +class InvalidExpressionError(BotoCoreError): + """Expression is either invalid or too complex.""" + fmt = 'Invalid expression {expression}: Only dotted lookups are supported.' diff --git a/botocore/paginate.py b/botocore/paginate.py index 1a68ff5540..8e0d026eb8 100644 --- a/botocore/paginate.py +++ b/botocore/paginate.py @@ -26,6 +26,7 @@ import jmespath from botocore.exceptions import PaginationError +from botocore.utils import exp_set class Paginator(object): @@ -66,6 +67,7 @@ def _get_result_keys(self, config): if result_key is not None: if not isinstance(result_key, list): result_key = [result_key] + result_key = [jmespath.compile(rk) for rk in result_key] return result_key def paginate(self, endpoint, **kwargs): @@ -145,7 +147,10 @@ def __iter__(self): starting_truncation = self._handle_first_request( parsed, primary_result_key, starting_truncation) first_request = False - num_current_response = len(parsed.get(primary_result_key, [])) + current_response = primary_result_key.search(parsed) + if current_response is None: + current_response = [] + num_current_response = len(current_response) truncate_amount = 0 if self._max_items is not None: truncate_amount = (total_items + num_current_response) \ @@ -196,23 +201,29 @@ def _handle_first_request(self, parsed, primary_result_key, # First we need to slice into the array and only return # the truncated amount. starting_truncation = self._parse_starting_token()[1] - parsed[primary_result_key] = parsed[ - primary_result_key][starting_truncation:] + all_data = primary_result_key.search(parsed) + exp_set( + parsed, + primary_result_key.expression, + all_data[starting_truncation:] + ) # We also need to truncate any secondary result keys # because they were not truncated in the previous last # response. for token in self.result_keys: if token == primary_result_key: continue - parsed[token] = [] + exp_set(parsed, token.expression, []) return starting_truncation def _truncate_response(self, parsed, primary_result_key, truncate_amount, starting_truncation, next_token): - original = parsed.get(primary_result_key, []) + original = primary_result_key.search(parsed) + if original is None: + original = [] amount_to_keep = len(original) - truncate_amount truncated = original[:amount_to_keep] - parsed[primary_result_key] = truncated + exp_set(parsed, primary_result_key.expression, truncated) # The issue here is that even though we know how much we've truncated # we need to account for this globally including any starting # left truncation. For example: @@ -246,11 +257,11 @@ def build_full_result(self): response = {} key_names = [i.result_key for i in iterators] for key in key_names: - response[key] = [] + exp_set(response, key.expression, []) for vals in zip_longest(*iterators): for k, val in zip(key_names, vals): if val is not None: - response[k].append(val) + response[k.expression].append(val) if self.resume_token is not None: response['NextToken'] = self.resume_token return response @@ -283,5 +294,8 @@ def __init__(self, pages_iterator, result_key): def __iter__(self): for _, page in self._pages_iterator: - for result in page.get(self.result_key, []): + results = self.result_key.search(page) + if results is None: + results = [] + for result in results: yield result diff --git a/botocore/utils.py b/botocore/utils.py index 73058128dc..ec619a81b0 100644 --- a/botocore/utils.py +++ b/botocore/utils.py @@ -11,6 +11,8 @@ # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +from .exceptions import InvalidExpressionError + def normalize_url_path(path): if not path: @@ -57,3 +59,35 @@ def remove_dot_segments(url): output.append(url[:next_slash]) url = url[next_slash:] return ''.join(output) + + +def exp_set(source, expression, value, is_first=True): + # This takes a (limited) jmespath-like expression & can set a value based + # on it. + # Limitations: + # * Only handles dotted lookups + # * No offsets/wildcards/slices/etc. + if is_first: + for invalid in ['[', ']', '*']: + if invalid in expression: + raise InvalidExpressionError(expression=expression) + + exp_bits = expression.split('.') + current_key = exp_bits[0] + remainder = '.'.join(exp_bits[1:]) + + if not current_key: + raise InvalidExpressionError(expression=expression) + + if len(exp_bits) > 1: + if not current_key in source: + # We've got something in the expression that's not present in the + # source (new key). If there's any more bits, we'll set the key with + # an empty dictionary. + source[current_key] = {} + + return exp_set(source[current_key], remainder, value, is_first=False) + + # If we're down to a single key, set it. + source[current_key] = value + return True diff --git a/tests/unit/test_paginate.py b/tests/unit/test_paginate.py index b74a4f7fe8..680d2f3465 100644 --- a/tests/unit/test_paginate.py +++ b/tests/unit/test_paginate.py @@ -31,7 +31,10 @@ def setUp(self): self.paginator = Paginator(self.operation) def test_result_key_available(self): - self.assertEqual(self.paginator.result_keys, ['Foo']) + self.assertEqual( + [rk.expression for rk in self.paginator.result_keys], + ['Foo'] + ) def test_no_next_token(self): response = {'not_the_next_token': 'foobar'} @@ -506,11 +509,17 @@ def test_resume_with_multiple_input_keys(self): [mock.call(None, InMarker1='m1', InMarker2='m2'),]) def test_result_key_exposed_on_paginator(self): - self.assertEqual(self.paginator.result_keys, ['Users', 'Groups']) + self.assertEqual( + [rk.expression for rk in self.paginator.result_keys], + ['Users', 'Groups'] + ) def test_result_key_exposed_on_page_iterator(self): pages = self.paginator.paginate(None, max_items=3) - self.assertEqual(pages.result_keys, ['Users', 'Groups']) + self.assertEqual( + [rk.expression for rk in pages.result_keys], + ['Users', 'Groups'] + ) if __name__ == '__main__': diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 360f5d6512..5ecab96efe 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -14,8 +14,10 @@ import unittest from botocore import xform_name +from botocore.exceptions import InvalidExpressionError from botocore.utils import remove_dot_segments from botocore.utils import normalize_url_path +from botocore.utils import exp_set class TestURINormalization(unittest.TestCase): @@ -69,5 +71,50 @@ def test_special_cases(self): self.assertEqual(xform_name('CreateStorediSCSIVolume', '-'), 'create-stored-iscsi-volume') +class TestExpSet(unittest.TestCase): + def setUp(self): + super(TestExpSet, self).setUp() + self.data = { + 'Response': { + 'Thing': { + 'Id': 1, + 'Name': 'Thing #1', + } + }, + 'Marker': 'some-token' + } + + def test_single_depth_existing(self): + self.assertTrue(exp_set(self.data, 'Marker', 'new-token')) + self.assertEqual(self.data['Marker'], 'new-token') + + def test_single_depth_new(self): + self.assertFalse('Limit' in self.data) + self.assertTrue(exp_set(self.data, 'Limit', 100)) + self.assertEqual(self.data['Limit'], 100) + + def test_multiple_depth_existing(self): + self.assertTrue(exp_set(self.data, 'Response.Thing.Name', 'New Name')) + self.assertEqual(self.data['Response']['Thing']['Name'], 'New Name') + + def test_multiple_depth_new(self): + self.assertFalse('Brand' in self.data) + self.assertTrue(exp_set(self.data, 'Brand.New', {'abc': 123})) + self.assertEqual(self.data['Brand']['New']['abc'], 123) + + def test_invalid_exp(self): + with self.assertRaises(InvalidExpressionError): + exp_set(self.data, 'Response.*.Name', 'new-token') + + with self.assertRaises(InvalidExpressionError): + exp_set(self.data, 'Response.Things[0]', 'new-token') + + with self.assertRaises(InvalidExpressionError): + exp_set(self.data, '', 'new-token') + + with self.assertRaises(InvalidExpressionError): + exp_set(self.data, '.', 'new-token') + + if __name__ == '__main__': unittest.main() From 68e791d49e66c681a3d1af620771e8941212514b Mon Sep 17 00:00:00 2001 From: Daniel Lindsley Date: Mon, 17 Feb 2014 14:21:47 -0800 Subject: [PATCH 2/3] Pagination changes from feedback. --- botocore/paginate.py | 14 ++++++---- botocore/utils.py | 32 +++++++++++++++------- tests/unit/test_utils.py | 57 +++++++++++++++++++++++++--------------- 3 files changed, 67 insertions(+), 36 deletions(-) diff --git a/botocore/paginate.py b/botocore/paginate.py index 8e0d026eb8..055758f9f5 100644 --- a/botocore/paginate.py +++ b/botocore/paginate.py @@ -26,7 +26,7 @@ import jmespath from botocore.exceptions import PaginationError -from botocore.utils import exp_set +from botocore.utils import set_value_from_jmespath class Paginator(object): @@ -202,7 +202,7 @@ def _handle_first_request(self, parsed, primary_result_key, # the truncated amount. starting_truncation = self._parse_starting_token()[1] all_data = primary_result_key.search(parsed) - exp_set( + set_value_from_jmespath( parsed, primary_result_key.expression, all_data[starting_truncation:] @@ -213,7 +213,7 @@ def _handle_first_request(self, parsed, primary_result_key, for token in self.result_keys: if token == primary_result_key: continue - exp_set(parsed, token.expression, []) + set_value_from_jmespath(parsed, token.expression, []) return starting_truncation def _truncate_response(self, parsed, primary_result_key, truncate_amount, @@ -223,7 +223,11 @@ def _truncate_response(self, parsed, primary_result_key, truncate_amount, original = [] amount_to_keep = len(original) - truncate_amount truncated = original[:amount_to_keep] - exp_set(parsed, primary_result_key.expression, truncated) + set_value_from_jmespath( + parsed, + primary_result_key.expression, + truncated + ) # The issue here is that even though we know how much we've truncated # we need to account for this globally including any starting # left truncation. For example: @@ -257,7 +261,7 @@ def build_full_result(self): response = {} key_names = [i.result_key for i in iterators] for key in key_names: - exp_set(response, key.expression, []) + set_value_from_jmespath(response, key.expression, []) for vals in zip_longest(*iterators): for k, val in zip(key_names, vals): if val is not None: diff --git a/botocore/utils.py b/botocore/utils.py index ec619a81b0..7fd2222f6d 100644 --- a/botocore/utils.py +++ b/botocore/utils.py @@ -61,33 +61,45 @@ def remove_dot_segments(url): return ''.join(output) -def exp_set(source, expression, value, is_first=True): +def validate_jmespath_for_set(expression): + # Validates a limited jmespath expression to determine if we can set a value + # based on it. Only works with dotted paths. + if not expression or expression == '.': + raise InvalidExpressionError(expression=expression) + + for invalid in ['[', ']', '*']: + if invalid in expression: + raise InvalidExpressionError(expression=expression) + + +def set_value_from_jmespath(source, expression, value, is_first=True): # This takes a (limited) jmespath-like expression & can set a value based # on it. # Limitations: # * Only handles dotted lookups # * No offsets/wildcards/slices/etc. if is_first: - for invalid in ['[', ']', '*']: - if invalid in expression: - raise InvalidExpressionError(expression=expression) + validate_jmespath_for_set(expression) - exp_bits = expression.split('.') - current_key = exp_bits[0] - remainder = '.'.join(exp_bits[1:]) + bits = expression.split('.', 1) + current_key, remainder = bits[0], bits[1] if len(bits) > 1 else '' if not current_key: raise InvalidExpressionError(expression=expression) - if len(exp_bits) > 1: + if remainder: if not current_key in source: # We've got something in the expression that's not present in the # source (new key). If there's any more bits, we'll set the key with # an empty dictionary. source[current_key] = {} - return exp_set(source[current_key], remainder, value, is_first=False) + return set_value_from_jmespath( + source[current_key], + remainder, + value, + is_first=False + ) # If we're down to a single key, set it. source[current_key] = value - return True diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 5ecab96efe..d568c043e4 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -11,13 +11,14 @@ # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -import unittest +from tests import unittest from botocore import xform_name from botocore.exceptions import InvalidExpressionError from botocore.utils import remove_dot_segments from botocore.utils import normalize_url_path -from botocore.utils import exp_set +from botocore.utils import validate_jmespath_for_set +from botocore.utils import set_value_from_jmespath class TestURINormalization(unittest.TestCase): @@ -71,9 +72,36 @@ def test_special_cases(self): self.assertEqual(xform_name('CreateStorediSCSIVolume', '-'), 'create-stored-iscsi-volume') -class TestExpSet(unittest.TestCase): +class TestValidateJMESPathForSet(unittest.TestCase): def setUp(self): - super(TestExpSet, self).setUp() + super(TestValidateJMESPathForSet, self).setUp() + self.data = { + 'Response': { + 'Thing': { + 'Id': 1, + 'Name': 'Thing #1', + } + }, + 'Marker': 'some-token' + } + + def test_invalid_exp(self): + with self.assertRaises(InvalidExpressionError): + validate_jmespath_for_set('Response.*.Name') + + with self.assertRaises(InvalidExpressionError): + validate_jmespath_for_set('Response.Things[0]') + + with self.assertRaises(InvalidExpressionError): + validate_jmespath_for_set('') + + with self.assertRaises(InvalidExpressionError): + validate_jmespath_for_set('.') + + +class TestSetValueFromJMESPath(unittest.TestCase): + def setUp(self): + super(TestSetValueFromJMESPath, self).setUp() self.data = { 'Response': { 'Thing': { @@ -85,36 +113,23 @@ def setUp(self): } def test_single_depth_existing(self): - self.assertTrue(exp_set(self.data, 'Marker', 'new-token')) + set_value_from_jmespath(self.data, 'Marker', 'new-token') self.assertEqual(self.data['Marker'], 'new-token') def test_single_depth_new(self): self.assertFalse('Limit' in self.data) - self.assertTrue(exp_set(self.data, 'Limit', 100)) + set_value_from_jmespath(self.data, 'Limit', 100) self.assertEqual(self.data['Limit'], 100) def test_multiple_depth_existing(self): - self.assertTrue(exp_set(self.data, 'Response.Thing.Name', 'New Name')) + set_value_from_jmespath(self.data, 'Response.Thing.Name', 'New Name') self.assertEqual(self.data['Response']['Thing']['Name'], 'New Name') def test_multiple_depth_new(self): self.assertFalse('Brand' in self.data) - self.assertTrue(exp_set(self.data, 'Brand.New', {'abc': 123})) + set_value_from_jmespath(self.data, 'Brand.New', {'abc': 123}) self.assertEqual(self.data['Brand']['New']['abc'], 123) - def test_invalid_exp(self): - with self.assertRaises(InvalidExpressionError): - exp_set(self.data, 'Response.*.Name', 'new-token') - - with self.assertRaises(InvalidExpressionError): - exp_set(self.data, 'Response.Things[0]', 'new-token') - - with self.assertRaises(InvalidExpressionError): - exp_set(self.data, '', 'new-token') - - with self.assertRaises(InvalidExpressionError): - exp_set(self.data, '.', 'new-token') - if __name__ == '__main__': unittest.main() From cc7c00df1589d51fc3aa2c2b8f77ea306860ad17 Mon Sep 17 00:00:00 2001 From: Daniel Lindsley Date: Mon, 17 Feb 2014 16:48:36 -0800 Subject: [PATCH 3/3] Updated pagination tests to add expressions. --- botocore/paginate.py | 1 + tests/unit/test_paginate.py | 40 +++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/botocore/paginate.py b/botocore/paginate.py index 055758f9f5..50a739a72d 100644 --- a/botocore/paginate.py +++ b/botocore/paginate.py @@ -265,6 +265,7 @@ def build_full_result(self): for vals in zip_longest(*iterators): for k, val in zip(key_names, vals): if val is not None: + response.setdefault(k.expression, []) response[k.expression].append(val) if self.resume_token is not None: response['NextToken'] = self.resume_token diff --git a/tests/unit/test_paginate.py b/tests/unit/test_paginate.py index 680d2f3465..c0367d61ff 100644 --- a/tests/unit/test_paginate.py +++ b/tests/unit/test_paginate.py @@ -522,5 +522,45 @@ def test_result_key_exposed_on_page_iterator(self): ) +class TestExpressionKeyIterators(unittest.TestCase): + def setUp(self): + self.operation = mock.Mock() + # This is something like what we'd see in RDS. + self.paginate_config = { + "py_input_token": "Marker", + "output_token": "Marker", + "limit_key": "MaxRecords", + "result_key": "EngineDefaults.Parameters" + } + self.operation.pagination = self.paginate_config + self.paginator = Paginator(self.operation) + self.responses = [ + (None, { + "EngineDefaults": {"Parameters": ["One", "Two"] + }, "Marker": "m1"}), + (None, { + "EngineDefaults": {"Parameters": ["Three", "Four"] + }, "Marker": "m2"}), + (None, {"EngineDefaults": {"Parameters": ["Five"]}}), + ] + + def test_result_key_iters(self): + self.operation.call.side_effect = self.responses + pages = self.paginator.paginate(None) + iterators = pages.result_key_iters() + self.assertEqual(len(iterators), 1) + self.assertEqual(list(iterators[0]), + ['One', 'Two', 'Three', 'Four', 'Five']) + + def test_build_full_result_with_single_key(self): + self.operation.call.side_effect = self.responses + pages = self.paginator.paginate(None) + complete = pages.build_full_result() + self.assertEqual(complete, { + 'EngineDefaults': {'Parameters': []}, + 'EngineDefaults.Parameters': ['One', 'Two', 'Three', 'Four', 'Five'] + }) + + if __name__ == '__main__': unittest.main()